Merge branch 'rs/grep-binary'

* rs/grep-binary:
  grep: support NUL chars in search strings for -F
  grep: use REG_STARTEND for all matching if available
  grep: continue case insensitive fixed string search after NUL chars
  grep: use memmem() for fixed string search
  grep: --name-only over binary
  grep: --count over binary
  grep: grep: refactor handling of binary mode options
  grep: add test script for binary file handling
This commit is contained in:
Junio C Hamano 2010-06-13 11:21:44 -07:00
commit 534930807c
4 changed files with 169 additions and 41 deletions

View File

@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
if (!patterns)
die_errno("cannot open '%s'", arg);
while (strbuf_getline(&sb, patterns, '\n') == 0) {
char *s;
size_t len;
/* ignore empty line like grep does */
if (sb.len == 0)
continue;
append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
++lno, GREP_PATTERN);
s = strbuf_detach(&sb, &len);
append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
}
fclose(patterns);
strbuf_release(&sb);

84
grep.c
View File

@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
p->patternlen = strlen(pat);
p->origin = "header";
p->no = 0;
p->token = GREP_PATTERN_HEAD;
@ -18,9 +19,16 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
void append_grep_pattern(struct grep_opt *opt, const char *pat,
const char *origin, int no, enum grep_pat_token t)
{
append_grep_pat(opt, pat, strlen(pat), origin, no, t);
}
void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
const char *origin, int no, enum grep_pat_token t)
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
p->patternlen = patlen;
p->origin = origin;
p->no = no;
p->token = t;
@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
append_header_grep_pattern(ret, pat->field,
pat->pattern);
else
append_grep_pattern(ret, pat->pattern, pat->origin,
pat->no, pat->token);
append_grep_pat(ret, pat->pattern, pat->patternlen,
pat->origin, pat->no, pat->token);
}
return ret;
@ -329,14 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
static int fixmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match)
{
char *hit;
if (ignore_case)
hit = strcasestr(line, pattern);
else
hit = strstr(line, pattern);
if (p->ignore_case) {
char *s = line;
do {
hit = strcasestr(s, p->pattern);
if (hit)
break;
s += strlen(s) + 1;
} while (s < eol);
} else
hit = memmem(line, eol - line, p->pattern, p->patternlen);
if (!hit) {
match->rm_so = match->rm_eo = -1;
@ -344,11 +359,22 @@ static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t
}
else {
match->rm_so = hit - line;
match->rm_eo = match->rm_so + strlen(pattern);
match->rm_eo = match->rm_so + p->patternlen;
return 0;
}
}
static int regmatch(const regex_t *preg, char *line, char *eol,
regmatch_t *match, int eflags)
{
#ifdef REG_STARTEND
match->rm_so = 0;
match->rm_eo = eol - line;
eflags |= REG_STARTEND;
#endif
return regexec(preg, line, 1, match, eflags);
}
static int strip_timestamp(char *bol, char **eol_p)
{
char *eol = *eol_p;
@ -399,9 +425,9 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
again:
if (p->fixed)
hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
hit = !fixmatch(p, bol, eol, pmatch);
else
hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
if (hit && p->word_regexp) {
if ((pmatch[0].rm_so < 0) ||
@ -726,16 +752,9 @@ static int look_ahead(struct grep_opt *opt,
regmatch_t m;
if (p->fixed)
hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
else {
#ifdef REG_STARTEND
m.rm_so = 0;
m.rm_eo = *left_p;
hit = !regexec(&p->regexp, bol, 1, &m, REG_STARTEND);
#else
hit = !regexec(&p->regexp, bol, 1, &m, 0);
#endif
}
hit = !fixmatch(p, bol, bol + *left_p, &m);
else
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
if (earliest < 0 || m.rm_so < earliest)
@ -800,17 +819,19 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
opt->show_hunk_mark = 1;
opt->last_shown = 0;
if (buffer_is_binary(buf, size)) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (buffer_is_binary(buf, size))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
if (buffer_is_binary(buf, size))
return 0; /* Assume unmatch */
break;
default:
case GREP_BINARY_TEXT:
break;
}
default:
die("bug: unknown binary handling mode");
}
memset(&xecfg, 0, sizeof(xecfg));
@ -871,6 +892,12 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
count++;
if (opt->status_only)
return 1;
if (opt->name_only) {
show_name(opt, name);
return 1;
}
if (opt->count)
goto next_line;
if (binary_match_only) {
opt->output(opt, "Binary file ", 12);
output_color(opt, name, strlen(name),
@ -878,21 +905,13 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
opt->output(opt, " matches\n", 9);
return 1;
}
if (opt->name_only) {
show_name(opt, name);
return 1;
}
/* Hit at this line. If we haven't shown the
* pre-context lines, we would need to show them.
* When asked to do "count", this still show
* the context which is nonsense, but the user
* deserves to get that ;-).
*/
if (opt->pre_context)
show_pre_context(opt, name, buf, bol, lno);
else if (opt->funcname)
show_funcname_line(opt, name, buf, bol, lno);
if (!opt->count)
show_line(opt, bol, eol, name, lno, ':');
last_hit = lno;
}
@ -937,6 +956,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
output_sep(opt, ':');
snprintf(buf, sizeof(buf), "%u\n", count);
opt->output(opt, buf, strlen(buf));
return 1;
}
return !!last_hit;
}

2
grep.h
View File

@ -29,6 +29,7 @@ struct grep_pat {
int no;
enum grep_pat_token token;
const char *pattern;
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
unsigned fixed:1;
@ -104,6 +105,7 @@ struct grep_opt {
void *output_priv;
};
extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
extern void compile_grep_patterns(struct grep_opt *opt);

102
t/t7008-grep-binary.sh Executable file
View File

@ -0,0 +1,102 @@
#!/bin/sh
test_description='git grep in binary files'
. ./test-lib.sh
test_expect_success 'setup' "
printf 'binary\000file\n' >a &&
git add a &&
git commit -m.
"
test_expect_success 'git grep ina a' '
echo Binary file a matches >expect &&
git grep ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -ah ina a' '
git grep -ah ina a >actual &&
test_cmp a actual
'
test_expect_success 'git grep -I ina a' '
: >expect &&
test_must_fail git grep -I ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -c ina a' '
echo a:1 >expect &&
git grep -c ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -l ina a' '
echo a >expect &&
git grep -l ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -L bar a' '
echo a >expect &&
git grep -L bar a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -q ina a' '
: >expect &&
git grep -q ina a >actual &&
test_cmp expect actual
'
test_expect_success 'git grep -F ile a' '
git grep -F ile a
'
test_expect_success 'git grep -Fi iLE a' '
git grep -Fi iLE a
'
# This test actually passes on platforms where regexec() supports the
# flag REG_STARTEND.
test_expect_failure 'git grep ile a' '
git grep ile a
'
test_expect_failure 'git grep .fi a' '
git grep .fi a
'
test_expect_success 'git grep -F y<NUL>f a' "
printf 'y\000f' >f &&
git grep -f f -F a
"
test_expect_success 'git grep -F y<NUL>x a' "
printf 'y\000x' >f &&
test_must_fail git grep -f f -F a
"
test_expect_success 'git grep -Fi Y<NUL>f a' "
printf 'Y\000f' >f &&
git grep -f f -Fi a
"
test_expect_failure 'git grep -Fi Y<NUL>x a' "
printf 'Y\000x' >f &&
test_must_fail git grep -f f -Fi a
"
test_expect_success 'git grep y<NUL>f a' "
printf 'y\000f' >f &&
git grep -f f a
"
test_expect_failure 'git grep y<NUL>x a' "
printf 'y\000x' >f &&
test_must_fail git grep -f f a
"
test_done