From aca20dd558338446336934a4b18516cfbf7d8393 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:26:39 +0200 Subject: [PATCH 1/8] grep: add test script for binary file handling Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t7008-grep-binary.sh | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 t/t7008-grep-binary.sh diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh new file mode 100755 index 0000000000..2320e74b69 --- /dev/null +++ b/t/t7008-grep-binary.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description='git grep in binary files' + +. ./test-lib.sh + +test_expect_success 'setup' " + printf 'binary\000file\n' >a && + git add a && + git commit -m. +" + +test_expect_success 'git grep ina a' ' + echo Binary file a matches >expect && + git grep ina a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -ah ina a' ' + git grep -ah ina a >actual && + test_cmp a actual +' + +test_expect_success 'git grep -I ina a' ' + : >expect && + test_must_fail git grep -I ina a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -L bar a' ' + echo a >expect && + git grep -L bar a >actual && + test_cmp expect actual +' + +test_expect_success 'git grep -q ina a' ' + : >expect && + git grep -q ina a >actual && + test_cmp expect actual +' + +test_done From 64fcec78b5c52a054eab482e91d58f7b41d1dfaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:28:17 +0200 Subject: [PATCH 2/8] grep: grep: refactor handling of binary mode options Turn the switch inside-out and add labels for each possible value of ->binary. This makes the code easier to read and avoids calling buffer_is_binary() if the option -a was given. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/grep.c b/grep.c index 543b1d5378..2a8e879e88 100644 --- a/grep.c +++ b/grep.c @@ -800,17 +800,19 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, opt->show_hunk_mark = 1; opt->last_shown = 0; - if (buffer_is_binary(buf, size)) { - switch (opt->binary) { - case GREP_BINARY_DEFAULT: + switch (opt->binary) { + case GREP_BINARY_DEFAULT: + if (buffer_is_binary(buf, size)) binary_match_only = 1; - break; - case GREP_BINARY_NOMATCH: + break; + case GREP_BINARY_NOMATCH: + if (buffer_is_binary(buf, size)) return 0; /* Assume unmatch */ - break; - default: - break; - } + break; + case GREP_BINARY_TEXT: + break; + default: + die("bug: unknown binary handling mode"); } memset(&xecfg, 0, sizeof(xecfg)); From c30c10cff1d640ce119596b907c10cc11f83358d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:29:35 +0200 Subject: [PATCH 3/8] grep: --count over binary The intent of showing the message "Binary file xyz matches" for binary files is to avoid annoying users by potentially messing up their terminals by printing control characters. In --count mode, this precaution isn't necessary. Display counts of matches if -c/--count was specified, even if -a was not given. GNU grep does the same. Moving the check for ->count before the code for handling binary file also avoids printing context lines if --count and -[ABC] were used together, so we can remove the part of the comment that mentions this behaviour. Again, GNU grep does the same. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 9 ++++----- t/t7008-grep-binary.sh | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/grep.c b/grep.c index 2a8e879e88..35c18b7e28 100644 --- a/grep.c +++ b/grep.c @@ -873,6 +873,8 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, count++; if (opt->status_only) return 1; + if (opt->count) + goto next_line; if (binary_match_only) { opt->output(opt, "Binary file ", 12); output_color(opt, name, strlen(name), @@ -886,16 +888,12 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, } /* Hit at this line. If we haven't shown the * pre-context lines, we would need to show them. - * When asked to do "count", this still show - * the context which is nonsense, but the user - * deserves to get that ;-). */ if (opt->pre_context) show_pre_context(opt, name, buf, bol, lno); else if (opt->funcname) show_funcname_line(opt, name, buf, bol, lno); - if (!opt->count) - show_line(opt, bol, eol, name, lno, ':'); + show_line(opt, bol, eol, name, lno, ':'); last_hit = lno; } else if (last_hit && @@ -939,6 +937,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, output_sep(opt, ':'); snprintf(buf, sizeof(buf), "%u\n", count); opt->output(opt, buf, strlen(buf)); + return 1; } return !!last_hit; } diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 2320e74b69..91970eacd6 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -27,6 +27,12 @@ test_expect_success 'git grep -I ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -c ina a' ' + echo a:1 >expect && + git grep -c ina a >actual && + test_cmp expect actual +' + test_expect_success 'git grep -L bar a' ' echo a >expect && git grep -L bar a >actual && From 321ffcc0556a94c461ac84667b35494c193804ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:30:48 +0200 Subject: [PATCH 4/8] grep: --name-only over binary As with the option -c/--count, git grep with the option -l/--name-only should work the same with binary files as with text files because there is no danger of messing up the terminal with control characters from the contents of matching files. GNU grep does the same. Move the check for ->name_only before the one for binary_match_only, thus making the latter irrelevant for git grep -l. Reported-by: Dmitry Potapov Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 8 ++++---- t/t7008-grep-binary.sh | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/grep.c b/grep.c index 35c18b7e28..22639cde2c 100644 --- a/grep.c +++ b/grep.c @@ -873,6 +873,10 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, count++; if (opt->status_only) return 1; + if (opt->name_only) { + show_name(opt, name); + return 1; + } if (opt->count) goto next_line; if (binary_match_only) { @@ -882,10 +886,6 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, opt->output(opt, " matches\n", 9); return 1; } - if (opt->name_only) { - show_name(opt, name); - return 1; - } /* Hit at this line. If we haven't shown the * pre-context lines, we would need to show them. */ diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 91970eacd6..4a12d97922 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -33,6 +33,12 @@ test_expect_success 'git grep -c ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -l ina a' ' + echo a >expect && + git grep -l ina a >actual && + test_cmp expect actual +' + test_expect_success 'git grep -L bar a' ' echo a >expect && git grep -L bar a >actual && From 1baddf4b3781c0c714442adfda496d667e1850cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:32:43 +0200 Subject: [PATCH 5/8] grep: use memmem() for fixed string search Allow searching beyond NUL characters by using memmem() instead of strstr(). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 16 +++++++++------- t/t7008-grep-binary.sh | 4 ++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/grep.c b/grep.c index 22639cde2c..c3affb6caa 100644 --- a/grep.c +++ b/grep.c @@ -329,14 +329,15 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } - -static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match) +static int fixmatch(const char *pattern, char *line, char *eol, + int ignore_case, regmatch_t *match) { char *hit; + if (ignore_case) hit = strcasestr(line, pattern); else - hit = strstr(line, pattern); + hit = memmem(line, eol - line, pattern, strlen(pattern)); if (!hit) { match->rm_so = match->rm_eo = -1; @@ -399,7 +400,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, again: if (p->fixed) - hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch); + hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch); else hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); @@ -725,9 +726,10 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) - hit = !fixmatch(p->pattern, bol, p->ignore_case, &m); - else { + if (p->fixed) { + hit = !fixmatch(p->pattern, bol, bol + *left_p, + p->ignore_case, &m); + } else { #ifdef REG_STARTEND m.rm_so = 0; m.rm_eo = *left_p; diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 4a12d97922..9adc9ed6fe 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -51,4 +51,8 @@ test_expect_success 'git grep -q ina a' ' test_cmp expect actual ' +test_expect_success 'git grep -F ile a' ' + git grep -F ile a +' + test_done From 52d799a79f921cc47823a0455b0e646636410b65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:34:06 +0200 Subject: [PATCH 6/8] grep: continue case insensitive fixed string search after NUL chars Functions for C strings, like strcasestr(), can't see beyond NUL characters. Check if there is such an obstacle on the line and try again behind it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 12 +++++++++--- t/t7008-grep-binary.sh | 4 ++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index c3affb6caa..b95803bbb1 100644 --- a/grep.c +++ b/grep.c @@ -334,9 +334,15 @@ static int fixmatch(const char *pattern, char *line, char *eol, { char *hit; - if (ignore_case) - hit = strcasestr(line, pattern); - else + if (ignore_case) { + char *s = line; + do { + hit = strcasestr(s, pattern); + if (hit) + break; + s += strlen(s) + 1; + } while (s < eol); + } else hit = memmem(line, eol - line, pattern, strlen(pattern)); if (!hit) { diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 9adc9ed6fe..9660842c44 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -55,4 +55,8 @@ test_expect_success 'git grep -F ile a' ' git grep -F ile a ' +test_expect_success 'git grep -Fi iLE a' ' + git grep -Fi iLE a +' + test_done From f96e56733ab3e3ce5c79c27c673c746af1519a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:35:07 +0200 Subject: [PATCH 7/8] grep: use REG_STARTEND for all matching if available Refactor REG_STARTEND handling inlook_ahead() into a new helper, regmatch(), and use it for line matching, too. This allows regex matching beyond NUL characters if regexec() supports the flag. NUL characters themselves are not matched in any way, though. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- grep.c | 24 ++++++++++++++---------- t/t7008-grep-binary.sh | 10 ++++++++++ 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/grep.c b/grep.c index b95803bbb1..70a776f6fe 100644 --- a/grep.c +++ b/grep.c @@ -356,6 +356,17 @@ static int fixmatch(const char *pattern, char *line, char *eol, } } +static int regmatch(const regex_t *preg, char *line, char *eol, + regmatch_t *match, int eflags) +{ +#ifdef REG_STARTEND + match->rm_so = 0; + match->rm_eo = eol - line; + eflags |= REG_STARTEND; +#endif + return regexec(preg, line, 1, match, eflags); +} + static int strip_timestamp(char *bol, char **eol_p) { char *eol = *eol_p; @@ -408,7 +419,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, if (p->fixed) hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch); else - hit = !regexec(&p->regexp, bol, 1, pmatch, eflags); + hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags); if (hit && p->word_regexp) { if ((pmatch[0].rm_so < 0) || @@ -735,15 +746,8 @@ static int look_ahead(struct grep_opt *opt, if (p->fixed) { hit = !fixmatch(p->pattern, bol, bol + *left_p, p->ignore_case, &m); - } else { -#ifdef REG_STARTEND - m.rm_so = 0; - m.rm_eo = *left_p; - hit = !regexec(&p->regexp, bol, 1, &m, REG_STARTEND); -#else - hit = !regexec(&p->regexp, bol, 1, &m, 0); -#endif - } + } else + hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0); if (!hit || m.rm_so < 0 || m.rm_eo < 0) continue; if (earliest < 0 || m.rm_so < earliest) diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 9660842c44..4f5e74fed7 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -59,4 +59,14 @@ test_expect_success 'git grep -Fi iLE a' ' git grep -Fi iLE a ' +# This test actually passes on platforms where regexec() supports the +# flag REG_STARTEND. +test_expect_failure 'git grep ile a' ' + git grep ile a +' + +test_expect_failure 'git grep .fi a' ' + git grep .fi a +' + test_done From ed40a0951cedb70777669144478166aa5bb2cf9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 22 May 2010 23:43:43 +0200 Subject: [PATCH 8/8] grep: support NUL chars in search strings for -F Search patterns in a file specified with -f can contain NUL characters. The current code ignores all characters on a line after a NUL. Pass the actual length of the line all the way from the pattern file to fixmatch() and use it for case-sensitive fixed string matching. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- builtin/grep.c | 8 ++++++-- grep.c | 33 ++++++++++++++++++++------------- grep.h | 2 ++ t/t7008-grep-binary.sh | 30 ++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 15 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 8e928e2170..7653d8492a 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset) if (!patterns) die_errno("cannot open '%s'", arg); while (strbuf_getline(&sb, patterns, '\n') == 0) { + char *s; + size_t len; + /* ignore empty line like grep does */ if (sb.len == 0) continue; - append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg, - ++lno, GREP_PATTERN); + + s = strbuf_detach(&sb, &len); + append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN); } fclose(patterns); strbuf_release(&sb); diff --git a/grep.c b/grep.c index 70a776f6fe..82fb349be6 100644 --- a/grep.c +++ b/grep.c @@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie { struct grep_pat *p = xcalloc(1, sizeof(*p)); p->pattern = pat; + p->patternlen = strlen(pat); p->origin = "header"; p->no = 0; p->token = GREP_PATTERN_HEAD; @@ -18,9 +19,16 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t) +{ + append_grep_pat(opt, pat, strlen(pat), origin, no, t); +} + +void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, + const char *origin, int no, enum grep_pat_token t) { struct grep_pat *p = xcalloc(1, sizeof(*p)); p->pattern = pat; + p->patternlen = patlen; p->origin = origin; p->no = no; p->token = t; @@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt) append_header_grep_pattern(ret, pat->field, pat->pattern); else - append_grep_pattern(ret, pat->pattern, pat->origin, - pat->no, pat->token); + append_grep_pat(ret, pat->pattern, pat->patternlen, + pat->origin, pat->no, pat->token); } return ret; @@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int fixmatch(const char *pattern, char *line, char *eol, - int ignore_case, regmatch_t *match) +static int fixmatch(struct grep_pat *p, char *line, char *eol, + regmatch_t *match) { char *hit; - if (ignore_case) { + if (p->ignore_case) { char *s = line; do { - hit = strcasestr(s, pattern); + hit = strcasestr(s, p->pattern); if (hit) break; s += strlen(s) + 1; } while (s < eol); } else - hit = memmem(line, eol - line, pattern, strlen(pattern)); + hit = memmem(line, eol - line, p->pattern, p->patternlen); if (!hit) { match->rm_so = match->rm_eo = -1; @@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol, } else { match->rm_so = hit - line; - match->rm_eo = match->rm_so + strlen(pattern); + match->rm_eo = match->rm_so + p->patternlen; return 0; } } @@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, again: if (p->fixed) - hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch); + hit = !fixmatch(p, bol, eol, pmatch); else hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags); @@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt, int hit; regmatch_t m; - if (p->fixed) { - hit = !fixmatch(p->pattern, bol, bol + *left_p, - p->ignore_case, &m); - } else + if (p->fixed) + hit = !fixmatch(p, bol, bol + *left_p, &m); + else hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0); if (!hit || m.rm_so < 0 || m.rm_eo < 0) continue; diff --git a/grep.h b/grep.h index 89342e5b47..0aebebd966 100644 --- a/grep.h +++ b/grep.h @@ -29,6 +29,7 @@ struct grep_pat { int no; enum grep_pat_token token; const char *pattern; + size_t patternlen; enum grep_header_field field; regex_t regexp; unsigned fixed:1; @@ -104,6 +105,7 @@ struct grep_opt { void *output_priv; }; +extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t); extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t); extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *); extern void compile_grep_patterns(struct grep_opt *opt); diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh index 4f5e74fed7..eb8ca88cce 100755 --- a/t/t7008-grep-binary.sh +++ b/t/t7008-grep-binary.sh @@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' ' git grep .fi a ' +test_expect_success 'git grep -F yf a' " + printf 'y\000f' >f && + git grep -f f -F a +" + +test_expect_success 'git grep -F yx a' " + printf 'y\000x' >f && + test_must_fail git grep -f f -F a +" + +test_expect_success 'git grep -Fi Yf a' " + printf 'Y\000f' >f && + git grep -f f -Fi a +" + +test_expect_failure 'git grep -Fi Yx a' " + printf 'Y\000x' >f && + test_must_fail git grep -f f -Fi a +" + +test_expect_success 'git grep yf a' " + printf 'y\000f' >f && + git grep -f f a +" + +test_expect_failure 'git grep yx a' " + printf 'y\000x' >f && + test_must_fail git grep -f f a +" + test_done