From fc456751102ededf8405446e70c228836d390f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 20 May 2012 16:32:39 +0200 Subject: [PATCH 1/4] grep: factor out create_grep_pat() Add create_grep_pat(), a shared helper for all grep pattern allocation and initialization needs. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/grep.c b/grep.c index d03d9e24c2..2b77c47764 100644 --- a/grep.c +++ b/grep.c @@ -3,15 +3,26 @@ #include "userdiff.h" #include "xdiff-interface.h" -void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat) +static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, + const char *origin, int no, + enum grep_pat_token t, + enum grep_header_field field) { struct grep_pat *p = xcalloc(1, sizeof(*p)); p->pattern = pat; - p->patternlen = strlen(pat); - p->origin = "header"; - p->no = 0; - p->token = GREP_PATTERN_HEAD; + p->patternlen = patlen; + p->origin = origin; + p->no = no; + p->token = t; p->field = field; + return p; +} + +void append_header_grep_pattern(struct grep_opt *opt, + enum grep_header_field field, const char *pat) +{ + struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0, + GREP_PATTERN_HEAD, field); *opt->header_tail = p; opt->header_tail = &p->next; p->next = NULL; @@ -26,12 +37,7 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t) { - struct grep_pat *p = xcalloc(1, sizeof(*p)); - p->pattern = pat; - p->patternlen = patlen; - p->origin = origin; - p->no = no; - p->token = t; + struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0); *opt->pattern_tail = p; opt->pattern_tail = &p->next; p->next = NULL; From 2b3873ff34ff937dad729407da4308be6a5bcd66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 20 May 2012 16:32:54 +0200 Subject: [PATCH 2/4] grep: factor out do_append_grep_pat() Add do_append_grep_pat() as a shared function for adding patterns to the header pattern list and the general pattern list. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/grep.c b/grep.c index 2b77c47764..c35a7ce57d 100644 --- a/grep.c +++ b/grep.c @@ -18,14 +18,19 @@ static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, return p; } +static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p) +{ + **tail = p; + *tail = &p->next; + p->next = NULL; +} + void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat) { struct grep_pat *p = create_grep_pat(pat, strlen(pat), "header", 0, GREP_PATTERN_HEAD, field); - *opt->header_tail = p; - opt->header_tail = &p->next; - p->next = NULL; + do_append_grep_pat(&opt->header_tail, p); } void append_grep_pattern(struct grep_opt *opt, const char *pat, @@ -38,9 +43,7 @@ void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t) { struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0); - *opt->pattern_tail = p; - opt->pattern_tail = &p->next; - p->next = NULL; + do_append_grep_pat(&opt->pattern_tail, p); } struct grep_opt *grep_opt_dup(const struct grep_opt *opt) From 526a858a99ace6698823740374edc3e35b87901a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 20 May 2012 16:33:07 +0200 Subject: [PATCH 3/4] grep: support newline separated pattern list Currently, patterns that contain newline characters don't match anything when given to git grep. Regular grep(1) interprets patterns as lists of newline separated search strings instead. Implement this functionality by creating and inserting extra grep_pat structures for patterns consisting of multiple lines when appending to the pattern lists. For simplicity, all pattern strings are duplicated. The original pattern is truncated in place to make it contain only the first line. Requested-by: Torne (Richard Coles) Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Documentation/git-grep.txt | 4 +++- grep.c | 33 ++++++++++++++++++++++++++++++++- grep.h | 2 +- t/t7810-grep.sh | 5 +++++ 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index e150c77cff..c3306f3a3b 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -29,7 +29,9 @@ SYNOPSIS DESCRIPTION ----------- Look for specified patterns in the tracked files in the work tree, blobs -registered in the index file, or blobs in given tree objects. +registered in the index file, or blobs in given tree objects. Patterns +are lists of one or more search expressions separated by newline +characters. An empty string as search expression matches all lines. CONFIGURATION diff --git a/grep.c b/grep.c index c35a7ce57d..02258039d9 100644 --- a/grep.c +++ b/grep.c @@ -9,7 +9,7 @@ static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, enum grep_header_field field) { struct grep_pat *p = xcalloc(1, sizeof(*p)); - p->pattern = pat; + p->pattern = xmemdupz(pat, patlen); p->patternlen = patlen; p->origin = origin; p->no = no; @@ -23,6 +23,36 @@ static void do_append_grep_pat(struct grep_pat ***tail, struct grep_pat *p) **tail = p; *tail = &p->next; p->next = NULL; + + switch (p->token) { + case GREP_PATTERN: /* atom */ + case GREP_PATTERN_HEAD: + case GREP_PATTERN_BODY: + for (;;) { + struct grep_pat *new_pat; + size_t len = 0; + char *cp = p->pattern + p->patternlen, *nl = NULL; + while (++len <= p->patternlen) { + if (*(--cp) == '\n') { + nl = cp; + break; + } + } + if (!nl) + break; + new_pat = create_grep_pat(nl + 1, len - 1, p->origin, + p->no, p->token, p->field); + new_pat->next = p->next; + if (!p->next) + *tail = &new_pat->next; + p->next = new_pat; + *nl = '\0'; + p->patternlen -= len; + } + break; + default: + break; + } } void append_header_grep_pattern(struct grep_opt *opt, @@ -408,6 +438,7 @@ void free_grep_patterns(struct grep_opt *opt) free_pcre_regexp(p); else regfree(&p->regexp); + free(p->pattern); break; default: break; diff --git a/grep.h b/grep.h index cd055cdfa8..5b083affe1 100644 --- a/grep.h +++ b/grep.h @@ -35,7 +35,7 @@ struct grep_pat { const char *origin; int no; enum grep_pat_token token; - const char *pattern; + char *pattern; size_t patternlen; enum grep_header_field field; regex_t regexp; diff --git a/t/t7810-grep.sh b/t/t7810-grep.sh index 6379ad60bc..bc9a522085 100755 --- a/t/t7810-grep.sh +++ b/t/t7810-grep.sh @@ -322,6 +322,11 @@ test_expect_success 'grep -f, multiple patterns' ' test_cmp expected actual ' +test_expect_success 'grep, multiple patterns' ' + git grep "$(cat patterns)" >actual && + test_cmp expected actual +' + cat >expected < Date: Mon, 21 May 2012 18:10:09 +0200 Subject: [PATCH 4/4] grep: stop leaking line strings with -f When reading patterns from a file, we pass the lines as allocated string buffers to append_grep_pat() and never free them. That's not a problem because they are needed until the program ends anyway. However, now that the function duplicates the pattern string, we can reuse the strbuf after calling that function. This simplifies the code a bit and plugs a minor memory leak. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- builtin/grep.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 871afaa3c7..f1392014f1 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -681,15 +681,12 @@ static int file_callback(const struct option *opt, const char *arg, int unset) if (!patterns) die_errno(_("cannot open '%s'"), arg); while (strbuf_getline(&sb, patterns, '\n') == 0) { - char *s; - size_t len; - /* ignore empty line like grep does */ if (sb.len == 0) continue; - s = strbuf_detach(&sb, &len); - append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN); + append_grep_pat(grep_opt, sb.buf, sb.len, arg, ++lno, + GREP_PATTERN); } if (!from_stdin) fclose(patterns);