Merge branch 'rs/grep-binary'
* rs/grep-binary: grep: support NUL chars in search strings for -F grep: use REG_STARTEND for all matching if available grep: continue case insensitive fixed string search after NUL chars grep: use memmem() for fixed string search grep: --name-only over binary grep: --count over binary grep: grep: refactor handling of binary mode options grep: add test script for binary file handling
This commit is contained in:
commit
534930807c
@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
|
|||||||
if (!patterns)
|
if (!patterns)
|
||||||
die_errno("cannot open '%s'", arg);
|
die_errno("cannot open '%s'", arg);
|
||||||
while (strbuf_getline(&sb, patterns, '\n') == 0) {
|
while (strbuf_getline(&sb, patterns, '\n') == 0) {
|
||||||
|
char *s;
|
||||||
|
size_t len;
|
||||||
|
|
||||||
/* ignore empty line like grep does */
|
/* ignore empty line like grep does */
|
||||||
if (sb.len == 0)
|
if (sb.len == 0)
|
||||||
continue;
|
continue;
|
||||||
append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
|
|
||||||
++lno, GREP_PATTERN);
|
s = strbuf_detach(&sb, &len);
|
||||||
|
append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
|
||||||
}
|
}
|
||||||
fclose(patterns);
|
fclose(patterns);
|
||||||
strbuf_release(&sb);
|
strbuf_release(&sb);
|
||||||
|
98
grep.c
98
grep.c
@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
|
|||||||
{
|
{
|
||||||
struct grep_pat *p = xcalloc(1, sizeof(*p));
|
struct grep_pat *p = xcalloc(1, sizeof(*p));
|
||||||
p->pattern = pat;
|
p->pattern = pat;
|
||||||
|
p->patternlen = strlen(pat);
|
||||||
p->origin = "header";
|
p->origin = "header";
|
||||||
p->no = 0;
|
p->no = 0;
|
||||||
p->token = GREP_PATTERN_HEAD;
|
p->token = GREP_PATTERN_HEAD;
|
||||||
@ -18,9 +19,16 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
|
|||||||
|
|
||||||
void append_grep_pattern(struct grep_opt *opt, const char *pat,
|
void append_grep_pattern(struct grep_opt *opt, const char *pat,
|
||||||
const char *origin, int no, enum grep_pat_token t)
|
const char *origin, int no, enum grep_pat_token t)
|
||||||
|
{
|
||||||
|
append_grep_pat(opt, pat, strlen(pat), origin, no, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
|
||||||
|
const char *origin, int no, enum grep_pat_token t)
|
||||||
{
|
{
|
||||||
struct grep_pat *p = xcalloc(1, sizeof(*p));
|
struct grep_pat *p = xcalloc(1, sizeof(*p));
|
||||||
p->pattern = pat;
|
p->pattern = pat;
|
||||||
|
p->patternlen = patlen;
|
||||||
p->origin = origin;
|
p->origin = origin;
|
||||||
p->no = no;
|
p->no = no;
|
||||||
p->token = t;
|
p->token = t;
|
||||||
@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
|
|||||||
append_header_grep_pattern(ret, pat->field,
|
append_header_grep_pattern(ret, pat->field,
|
||||||
pat->pattern);
|
pat->pattern);
|
||||||
else
|
else
|
||||||
append_grep_pattern(ret, pat->pattern, pat->origin,
|
append_grep_pat(ret, pat->pattern, pat->patternlen,
|
||||||
pat->no, pat->token);
|
pat->origin, pat->no, pat->token);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -329,14 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
|
|||||||
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
|
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fixmatch(struct grep_pat *p, char *line, char *eol,
|
||||||
static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
|
regmatch_t *match)
|
||||||
{
|
{
|
||||||
char *hit;
|
char *hit;
|
||||||
if (ignore_case)
|
|
||||||
hit = strcasestr(line, pattern);
|
if (p->ignore_case) {
|
||||||
else
|
char *s = line;
|
||||||
hit = strstr(line, pattern);
|
do {
|
||||||
|
hit = strcasestr(s, p->pattern);
|
||||||
|
if (hit)
|
||||||
|
break;
|
||||||
|
s += strlen(s) + 1;
|
||||||
|
} while (s < eol);
|
||||||
|
} else
|
||||||
|
hit = memmem(line, eol - line, p->pattern, p->patternlen);
|
||||||
|
|
||||||
if (!hit) {
|
if (!hit) {
|
||||||
match->rm_so = match->rm_eo = -1;
|
match->rm_so = match->rm_eo = -1;
|
||||||
@ -344,11 +359,22 @@ static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
match->rm_so = hit - line;
|
match->rm_so = hit - line;
|
||||||
match->rm_eo = match->rm_so + strlen(pattern);
|
match->rm_eo = match->rm_so + p->patternlen;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int regmatch(const regex_t *preg, char *line, char *eol,
|
||||||
|
regmatch_t *match, int eflags)
|
||||||
|
{
|
||||||
|
#ifdef REG_STARTEND
|
||||||
|
match->rm_so = 0;
|
||||||
|
match->rm_eo = eol - line;
|
||||||
|
eflags |= REG_STARTEND;
|
||||||
|
#endif
|
||||||
|
return regexec(preg, line, 1, match, eflags);
|
||||||
|
}
|
||||||
|
|
||||||
static int strip_timestamp(char *bol, char **eol_p)
|
static int strip_timestamp(char *bol, char **eol_p)
|
||||||
{
|
{
|
||||||
char *eol = *eol_p;
|
char *eol = *eol_p;
|
||||||
@ -399,9 +425,9 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
|
|||||||
|
|
||||||
again:
|
again:
|
||||||
if (p->fixed)
|
if (p->fixed)
|
||||||
hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
|
hit = !fixmatch(p, bol, eol, pmatch);
|
||||||
else
|
else
|
||||||
hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
|
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
|
||||||
|
|
||||||
if (hit && p->word_regexp) {
|
if (hit && p->word_regexp) {
|
||||||
if ((pmatch[0].rm_so < 0) ||
|
if ((pmatch[0].rm_so < 0) ||
|
||||||
@ -726,16 +752,9 @@ static int look_ahead(struct grep_opt *opt,
|
|||||||
regmatch_t m;
|
regmatch_t m;
|
||||||
|
|
||||||
if (p->fixed)
|
if (p->fixed)
|
||||||
hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
|
hit = !fixmatch(p, bol, bol + *left_p, &m);
|
||||||
else {
|
else
|
||||||
#ifdef REG_STARTEND
|
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
|
||||||
m.rm_so = 0;
|
|
||||||
m.rm_eo = *left_p;
|
|
||||||
hit = !regexec(&p->regexp, bol, 1, &m, REG_STARTEND);
|
|
||||||
#else
|
|
||||||
hit = !regexec(&p->regexp, bol, 1, &m, 0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
|
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
|
||||||
continue;
|
continue;
|
||||||
if (earliest < 0 || m.rm_so < earliest)
|
if (earliest < 0 || m.rm_so < earliest)
|
||||||
@ -800,17 +819,19 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
|
|||||||
opt->show_hunk_mark = 1;
|
opt->show_hunk_mark = 1;
|
||||||
opt->last_shown = 0;
|
opt->last_shown = 0;
|
||||||
|
|
||||||
if (buffer_is_binary(buf, size)) {
|
switch (opt->binary) {
|
||||||
switch (opt->binary) {
|
case GREP_BINARY_DEFAULT:
|
||||||
case GREP_BINARY_DEFAULT:
|
if (buffer_is_binary(buf, size))
|
||||||
binary_match_only = 1;
|
binary_match_only = 1;
|
||||||
break;
|
break;
|
||||||
case GREP_BINARY_NOMATCH:
|
case GREP_BINARY_NOMATCH:
|
||||||
|
if (buffer_is_binary(buf, size))
|
||||||
return 0; /* Assume unmatch */
|
return 0; /* Assume unmatch */
|
||||||
break;
|
break;
|
||||||
default:
|
case GREP_BINARY_TEXT:
|
||||||
break;
|
break;
|
||||||
}
|
default:
|
||||||
|
die("bug: unknown binary handling mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(&xecfg, 0, sizeof(xecfg));
|
memset(&xecfg, 0, sizeof(xecfg));
|
||||||
@ -871,6 +892,12 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
|
|||||||
count++;
|
count++;
|
||||||
if (opt->status_only)
|
if (opt->status_only)
|
||||||
return 1;
|
return 1;
|
||||||
|
if (opt->name_only) {
|
||||||
|
show_name(opt, name);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (opt->count)
|
||||||
|
goto next_line;
|
||||||
if (binary_match_only) {
|
if (binary_match_only) {
|
||||||
opt->output(opt, "Binary file ", 12);
|
opt->output(opt, "Binary file ", 12);
|
||||||
output_color(opt, name, strlen(name),
|
output_color(opt, name, strlen(name),
|
||||||
@ -878,22 +905,14 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
|
|||||||
opt->output(opt, " matches\n", 9);
|
opt->output(opt, " matches\n", 9);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (opt->name_only) {
|
|
||||||
show_name(opt, name);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
/* Hit at this line. If we haven't shown the
|
/* Hit at this line. If we haven't shown the
|
||||||
* pre-context lines, we would need to show them.
|
* pre-context lines, we would need to show them.
|
||||||
* When asked to do "count", this still show
|
|
||||||
* the context which is nonsense, but the user
|
|
||||||
* deserves to get that ;-).
|
|
||||||
*/
|
*/
|
||||||
if (opt->pre_context)
|
if (opt->pre_context)
|
||||||
show_pre_context(opt, name, buf, bol, lno);
|
show_pre_context(opt, name, buf, bol, lno);
|
||||||
else if (opt->funcname)
|
else if (opt->funcname)
|
||||||
show_funcname_line(opt, name, buf, bol, lno);
|
show_funcname_line(opt, name, buf, bol, lno);
|
||||||
if (!opt->count)
|
show_line(opt, bol, eol, name, lno, ':');
|
||||||
show_line(opt, bol, eol, name, lno, ':');
|
|
||||||
last_hit = lno;
|
last_hit = lno;
|
||||||
}
|
}
|
||||||
else if (last_hit &&
|
else if (last_hit &&
|
||||||
@ -937,6 +956,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
|
|||||||
output_sep(opt, ':');
|
output_sep(opt, ':');
|
||||||
snprintf(buf, sizeof(buf), "%u\n", count);
|
snprintf(buf, sizeof(buf), "%u\n", count);
|
||||||
opt->output(opt, buf, strlen(buf));
|
opt->output(opt, buf, strlen(buf));
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
return !!last_hit;
|
return !!last_hit;
|
||||||
}
|
}
|
||||||
|
2
grep.h
2
grep.h
@ -29,6 +29,7 @@ struct grep_pat {
|
|||||||
int no;
|
int no;
|
||||||
enum grep_pat_token token;
|
enum grep_pat_token token;
|
||||||
const char *pattern;
|
const char *pattern;
|
||||||
|
size_t patternlen;
|
||||||
enum grep_header_field field;
|
enum grep_header_field field;
|
||||||
regex_t regexp;
|
regex_t regexp;
|
||||||
unsigned fixed:1;
|
unsigned fixed:1;
|
||||||
@ -104,6 +105,7 @@ struct grep_opt {
|
|||||||
void *output_priv;
|
void *output_priv;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
|
||||||
extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
|
extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
|
||||||
extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
|
extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
|
||||||
extern void compile_grep_patterns(struct grep_opt *opt);
|
extern void compile_grep_patterns(struct grep_opt *opt);
|
||||||
|
102
t/t7008-grep-binary.sh
Executable file
102
t/t7008-grep-binary.sh
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
test_description='git grep in binary files'
|
||||||
|
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
test_expect_success 'setup' "
|
||||||
|
printf 'binary\000file\n' >a &&
|
||||||
|
git add a &&
|
||||||
|
git commit -m.
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_success 'git grep ina a' '
|
||||||
|
echo Binary file a matches >expect &&
|
||||||
|
git grep ina a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -ah ina a' '
|
||||||
|
git grep -ah ina a >actual &&
|
||||||
|
test_cmp a actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -I ina a' '
|
||||||
|
: >expect &&
|
||||||
|
test_must_fail git grep -I ina a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -c ina a' '
|
||||||
|
echo a:1 >expect &&
|
||||||
|
git grep -c ina a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -l ina a' '
|
||||||
|
echo a >expect &&
|
||||||
|
git grep -l ina a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -L bar a' '
|
||||||
|
echo a >expect &&
|
||||||
|
git grep -L bar a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -q ina a' '
|
||||||
|
: >expect &&
|
||||||
|
git grep -q ina a >actual &&
|
||||||
|
test_cmp expect actual
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -F ile a' '
|
||||||
|
git grep -F ile a
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -Fi iLE a' '
|
||||||
|
git grep -Fi iLE a
|
||||||
|
'
|
||||||
|
|
||||||
|
# This test actually passes on platforms where regexec() supports the
|
||||||
|
# flag REG_STARTEND.
|
||||||
|
test_expect_failure 'git grep ile a' '
|
||||||
|
git grep ile a
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_failure 'git grep .fi a' '
|
||||||
|
git grep .fi a
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'git grep -F y<NUL>f a' "
|
||||||
|
printf 'y\000f' >f &&
|
||||||
|
git grep -f f -F a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_success 'git grep -F y<NUL>x a' "
|
||||||
|
printf 'y\000x' >f &&
|
||||||
|
test_must_fail git grep -f f -F a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_success 'git grep -Fi Y<NUL>f a' "
|
||||||
|
printf 'Y\000f' >f &&
|
||||||
|
git grep -f f -Fi a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_failure 'git grep -Fi Y<NUL>x a' "
|
||||||
|
printf 'Y\000x' >f &&
|
||||||
|
test_must_fail git grep -f f -Fi a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_success 'git grep y<NUL>f a' "
|
||||||
|
printf 'y\000f' >f &&
|
||||||
|
git grep -f f a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_expect_failure 'git grep y<NUL>x a' "
|
||||||
|
printf 'y\000x' >f &&
|
||||||
|
test_must_fail git grep -f f a
|
||||||
|
"
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in New Issue
Block a user