grep: allow to use textconv filters

Recently and not so recently, we made sure that log/grep type operations
use textconv filters when a userfacing diff would do the same:

ef90ab6 (pickaxe: use textconv for -S counting, 2012-10-28)
b1c2f57 (diff_grep: use textconv buffers for add/deleted files, 2012-10-28)
0508fe5 (combine-diff: respect textconv attributes, 2011-05-23)

"git grep" currently does not use textconv filters at all, that is
neither for displaying the match and context nor for the actual grepping,
even when requested by --textconv.

Introduce an option "--textconv" which makes git grep use any configured
textconv filters for grepping and output purposes. It is off by default.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2013-05-10 17:10:15 +02:00 committed by Junio C Hamano
parent 97f6a9c975
commit 335ec3bf41
5 changed files with 102 additions and 16 deletions

View File

@ -9,7 +9,7 @@ git-grep - Print lines matching a pattern
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp] 'git grep' [-a | --text] [-I] [--textconv] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name] [-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp] [-E | --extended-regexp] [-G | --basic-regexp]
[-P | --perl-regexp] [-P | --perl-regexp]
@ -80,6 +80,13 @@ OPTIONS
--text:: --text::
Process binary files as if they were text. Process binary files as if they were text.
--textconv::
Honor textconv filter settings.
--no-textconv::
Do not honor textconv filter settings.
This is the default.
-i:: -i::
--ignore-case:: --ignore-case::
Ignore case differences between the patterns and the Ignore case differences between the patterns and the

View File

@ -659,6 +659,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_SET_INT('I', NULL, &opt.binary, OPT_SET_INT('I', NULL, &opt.binary,
N_("don't match patterns in binary files"), N_("don't match patterns in binary files"),
GREP_BINARY_NOMATCH), GREP_BINARY_NOMATCH),
OPT_BOOL(0, "textconv", &opt.allow_textconv,
N_("process binary files with textconv filters")),
{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"), { OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"),
N_("descend at most <depth> levels"), PARSE_OPT_NONEG, N_("descend at most <depth> levels"), PARSE_OPT_NONEG,
NULL, 1 }, NULL, 1 },

100
grep.c
View File

@ -2,6 +2,8 @@
#include "grep.h" #include "grep.h"
#include "userdiff.h" #include "userdiff.h"
#include "xdiff-interface.h" #include "xdiff-interface.h"
#include "diff.h"
#include "diffcore.h"
static int grep_source_load(struct grep_source *gs); static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs);
@ -1322,6 +1324,58 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size)
fwrite(buf, size, 1, stdout); fwrite(buf, size, 1, stdout);
} }
static int fill_textconv_grep(struct userdiff_driver *driver,
struct grep_source *gs)
{
struct diff_filespec *df;
char *buf;
size_t size;
if (!driver || !driver->textconv)
return grep_source_load(gs);
/*
* The textconv interface is intimately tied to diff_filespecs, so we
* have to pretend to be one. If we could unify the grep_source
* and diff_filespec structs, this mess could just go away.
*/
df = alloc_filespec(gs->path);
switch (gs->type) {
case GREP_SOURCE_SHA1:
fill_filespec(df, gs->identifier, 1, 0100644);
break;
case GREP_SOURCE_FILE:
fill_filespec(df, null_sha1, 0, 0100644);
break;
default:
die("BUG: attempt to textconv something without a path?");
}
/*
* fill_textconv is not remotely thread-safe; it may load objects
* behind the scenes, and it modifies the global diff tempfile
* structure.
*/
grep_read_lock();
size = fill_textconv(driver, df, &buf);
grep_read_unlock();
free_filespec(df);
/*
* The normal fill_textconv usage by the diff machinery would just keep
* the textconv'd buf separate from the diff_filespec. But much of the
* grep code passes around a grep_source and assumes that its "buf"
* pointer is the beginning of the thing we are searching. So let's
* install our textconv'd version into the grep_source, taking care not
* to leak any existing buffer.
*/
grep_source_clear_data(gs);
gs->buf = buf;
gs->size = size;
return 0;
}
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{ {
char *bol; char *bol;
@ -1332,6 +1386,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
unsigned count = 0; unsigned count = 0;
int try_lookahead = 0; int try_lookahead = 0;
int show_function = 0; int show_function = 0;
struct userdiff_driver *textconv = NULL;
enum grep_context ctx = GREP_CONTEXT_HEAD; enum grep_context ctx = GREP_CONTEXT_HEAD;
xdemitconf_t xecfg; xdemitconf_t xecfg;
@ -1353,19 +1408,36 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
} }
opt->last_shown = 0; opt->last_shown = 0;
switch (opt->binary) { if (opt->allow_textconv) {
case GREP_BINARY_DEFAULT: grep_source_load_driver(gs);
if (grep_source_is_binary(gs)) /*
binary_match_only = 1; * We might set up the shared textconv cache data here, which
break; * is not thread-safe.
case GREP_BINARY_NOMATCH: */
if (grep_source_is_binary(gs)) grep_attr_lock();
return 0; /* Assume unmatch */ textconv = userdiff_get_textconv(gs->driver);
break; grep_attr_unlock();
case GREP_BINARY_TEXT: }
break;
default: /*
die("bug: unknown binary handling mode"); * We know the result of a textconv is text, so we only have to care
* about binary handling if we are not using it.
*/
if (!textconv) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (grep_source_is_binary(gs))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
if (grep_source_is_binary(gs))
return 0; /* Assume unmatch */
break;
case GREP_BINARY_TEXT:
break;
default:
die("bug: unknown binary handling mode");
}
} }
memset(&xecfg, 0, sizeof(xecfg)); memset(&xecfg, 0, sizeof(xecfg));
@ -1373,7 +1445,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
try_lookahead = should_lookahead(opt); try_lookahead = should_lookahead(opt);
if (grep_source_load(gs) < 0) if (fill_textconv_grep(textconv, gs) < 0)
return 0; return 0;
bol = gs->buf; bol = gs->buf;

1
grep.h
View File

@ -107,6 +107,7 @@ struct grep_opt {
#define GREP_BINARY_NOMATCH 1 #define GREP_BINARY_NOMATCH 1
#define GREP_BINARY_TEXT 2 #define GREP_BINARY_TEXT 2
int binary; int binary;
int allow_textconv;
int extended; int extended;
int use_reflog_filter; int use_reflog_filter;
int pcre; int pcre;

View File

@ -160,7 +160,7 @@ test_expect_success 'grep does not honor textconv' '
test_must_fail git grep Qfile test_must_fail git grep Qfile
' '
test_expect_failure 'grep --textconv honors textconv' ' test_expect_success 'grep --textconv honors textconv' '
echo "a:binaryQfile" >expect && echo "a:binaryQfile" >expect &&
git grep --textconv Qfile >actual && git grep --textconv Qfile >actual &&
test_cmp expect actual test_cmp expect actual
@ -176,4 +176,8 @@ test_expect_failure 'grep --textconv blob honors textconv' '
test_cmp expect actual test_cmp expect actual
' '
test_expect_success 'grep --no-textconv blob does not honor textconv' '
test_must_fail git grep --no-textconv Qfile HEAD:a
'
test_done test_done