Merge branch 'hm/paint-hits-in-log-grep'

"git log --grep=string --author=name" learns to highlight hits just
like "git grep string" does.

* hm/paint-hits-in-log-grep:
  grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data
  pretty: colorize pattern matches in commit messages
  grep: refactor next_match() and match_one_pattern() for external use
This commit is contained in:
Junio C Hamano 2021-11-01 13:48:08 -07:00
commit b93d720691
6 changed files with 255 additions and 46 deletions

View File

@ -105,9 +105,12 @@ color.grep.<slot>::
`matchContext`;;
matching text in context lines
`matchSelected`;;
matching text in selected lines
matching text in selected lines. Also, used to customize the following
linkgit:git-log[1] subcommands: `--grep`, `--author` and `--committer`.
`selected`;;
non-matching text in selected lines
non-matching text in selected lines. Also, used to customize the
following linkgit:git-log[1] subcommands: `--grep`, `--author` and
`--committer`.
`separator`;;
separators between fields on a line (`:`, `-`, and `=`)
and between hunks (`--`)

85
grep.c
View File

@ -382,8 +382,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
options |= PCRE2_CASELESS;
}
if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
!(!opt->ignore_case && (p->fixed || p->is_fixed)))
if ((!opt->ignore_locale && !has_non_ascii(p->pattern)) ||
(!opt->ignore_locale && is_utf8_locale() &&
has_non_ascii(p->pattern) && !(!opt->ignore_case &&
(p->fixed || p->is_fixed))))
options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
#ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER
@ -944,10 +946,10 @@ static struct {
{ "reflog ", 7 },
};
static int match_one_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx,
regmatch_t *pmatch, int eflags)
static int headerless_match_one_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx,
regmatch_t *pmatch, int eflags)
{
int hit = 0;
const char *start = bol;
@ -956,25 +958,6 @@ static int match_one_pattern(struct grep_pat *p,
((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
return 0;
if (p->token == GREP_PATTERN_HEAD) {
const char *field;
size_t len;
assert(p->field < ARRAY_SIZE(header_field));
field = header_field[p->field].field;
len = header_field[p->field].len;
if (strncmp(bol, field, len))
return 0;
bol += len;
switch (p->field) {
case GREP_HEADER_AUTHOR:
case GREP_HEADER_COMMITTER:
strip_timestamp(bol, &eol);
break;
default:
break;
}
}
again:
hit = patmatch(p, bol, eol, pmatch, eflags);
@ -1025,6 +1008,36 @@ static int match_one_pattern(struct grep_pat *p,
return hit;
}
static int match_one_pattern(struct grep_pat *p,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch,
int eflags)
{
const char *field;
size_t len;
if (p->token == GREP_PATTERN_HEAD) {
assert(p->field < ARRAY_SIZE(header_field));
field = header_field[p->field].field;
len = header_field[p->field].len;
if (strncmp(bol, field, len))
return 0;
bol += len;
switch (p->field) {
case GREP_HEADER_AUTHOR:
case GREP_HEADER_COMMITTER:
strip_timestamp(bol, &eol);
break;
default:
break;
}
}
return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
}
static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
const char *bol, const char *eol,
enum grep_context ctx, ssize_t *col,
@ -1143,7 +1156,7 @@ static int match_next_pattern(struct grep_pat *p,
{
regmatch_t match;
if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
return 0;
if (match.rm_so < 0 || match.rm_eo < 0)
return 0;
@ -1158,19 +1171,26 @@ static int match_next_pattern(struct grep_pat *p,
return 1;
}
static int next_match(struct grep_opt *opt,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch, int eflags)
int grep_next_match(struct grep_opt *opt,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch,
enum grep_header_field field, int eflags)
{
struct grep_pat *p;
int hit = 0;
pmatch->rm_so = pmatch->rm_eo = -1;
if (bol < eol) {
for (p = opt->pattern_list; p; p = p->next) {
for (p = ((ctx == GREP_CONTEXT_HEAD)
? opt->header_list : opt->pattern_list);
p; p = p->next) {
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
if ((field != GREP_HEADER_FIELD_MAX) &&
(p->field != field))
continue;
/* fall thru */
case GREP_PATTERN: /* atom */
case GREP_PATTERN_BODY:
hit |= match_next_pattern(p, bol, eol, ctx,
pmatch, eflags);
@ -1261,7 +1281,8 @@ static void show_line(struct grep_opt *opt,
else if (sign == '=')
line_color = opt->colors[GREP_COLOR_FUNCTION];
}
while (next_match(opt, bol, eol, ctx, &match, eflags)) {
while (grep_next_match(opt, bol, eol, ctx, &match,
GREP_HEADER_FIELD_MAX, eflags)) {
if (match.rm_so == match.rm_eo)
break;

9
grep.h
View File

@ -191,6 +191,15 @@ void compile_grep_patterns(struct grep_opt *opt);
void free_grep_patterns(struct grep_opt *opt);
int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size);
/* The field parameter is only used to filter header patterns
* (where appropriate). If filtering isn't desirable
* GREP_HEADER_FIELD_MAX should be supplied.
*/
int grep_next_match(struct grep_opt *opt,
const char *bol, const char *eol,
enum grep_context ctx, regmatch_t *pmatch,
enum grep_header_field field, int eflags);
struct grep_source {
char *name;

101
pretty.c
View File

@ -431,6 +431,52 @@ const char *show_ident_date(const struct ident_split *ident,
return show_date(date, tz, mode);
}
static inline void strbuf_add_with_color(struct strbuf *sb, const char *color,
const char *buf, size_t buflen)
{
strbuf_addstr(sb, color);
strbuf_add(sb, buf, buflen);
if (*color)
strbuf_addstr(sb, GIT_COLOR_RESET);
}
static void append_line_with_color(struct strbuf *sb, struct grep_opt *opt,
const char *line, size_t linelen,
int color, enum grep_context ctx,
enum grep_header_field field)
{
const char *buf, *eol, *line_color, *match_color;
regmatch_t match;
int eflags = 0;
buf = line;
eol = buf + linelen;
if (!opt || !want_color(color) || opt->invert)
goto end;
line_color = opt->colors[GREP_COLOR_SELECTED];
match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
while (grep_next_match(opt, buf, eol, ctx, &match, field, eflags)) {
if (match.rm_so == match.rm_eo)
break;
strbuf_add_with_color(sb, line_color, buf, match.rm_so);
strbuf_add_with_color(sb, match_color, buf + match.rm_so,
match.rm_eo - match.rm_so);
buf += match.rm_eo;
eflags = REG_NOTBOL;
}
if (eflags)
strbuf_add_with_color(sb, line_color, buf, eol - buf);
else {
end:
strbuf_add(sb, buf, eol - buf);
}
}
void pp_user_info(struct pretty_print_context *pp,
const char *what, struct strbuf *sb,
const char *line, const char *encoding)
@ -496,9 +542,26 @@ void pp_user_info(struct pretty_print_context *pp,
strbuf_addch(sb, '\n');
strbuf_addf(sb, " <%.*s>\n", (int)maillen, mailbuf);
} else {
strbuf_addf(sb, "%s: %.*s%.*s <%.*s>\n", what,
(pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, " ",
(int)namelen, namebuf, (int)maillen, mailbuf);
struct strbuf id = STRBUF_INIT;
enum grep_header_field field = GREP_HEADER_FIELD_MAX;
struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
if (!strcmp(what, "Author"))
field = GREP_HEADER_AUTHOR;
else if (!strcmp(what, "Commit"))
field = GREP_HEADER_COMMITTER;
strbuf_addf(sb, "%s: ", what);
if (pp->fmt == CMIT_FMT_FULLER)
strbuf_addchars(sb, ' ', 4);
strbuf_addf(&id, "%.*s <%.*s>", (int)namelen, namebuf,
(int)maillen, mailbuf);
append_line_with_color(sb, opt, id.buf, id.len, pp->color,
GREP_CONTEXT_HEAD, field);
strbuf_addch(sb, '\n');
strbuf_release(&id);
}
switch (pp->fmt) {
@ -1935,8 +1998,9 @@ static int pp_utf8_width(const char *start, const char *end)
return width;
}
static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
const char *line, int linelen)
static void strbuf_add_tabexpand(struct strbuf *sb, struct grep_opt *opt,
int color, int tabwidth, const char *line,
int linelen)
{
const char *tab;
@ -1953,7 +2017,9 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
break;
/* Output the data .. */
strbuf_add(sb, line, tab - line);
append_line_with_color(sb, opt, line, tab - line, color,
GREP_CONTEXT_BODY,
GREP_HEADER_FIELD_MAX);
/* .. and the de-tabified tab */
strbuf_addchars(sb, ' ', tabwidth - (width % tabwidth));
@ -1968,7 +2034,8 @@ static void strbuf_add_tabexpand(struct strbuf *sb, int tabwidth,
* worrying about width - there's nothing more to
* align.
*/
strbuf_add(sb, line, linelen);
append_line_with_color(sb, opt, line, linelen, color, GREP_CONTEXT_BODY,
GREP_HEADER_FIELD_MAX);
}
/*
@ -1980,11 +2047,16 @@ static void pp_handle_indent(struct pretty_print_context *pp,
struct strbuf *sb, int indent,
const char *line, int linelen)
{
struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
strbuf_addchars(sb, ' ', indent);
if (pp->expand_tabs_in_log)
strbuf_add_tabexpand(sb, pp->expand_tabs_in_log, line, linelen);
strbuf_add_tabexpand(sb, opt, pp->color, pp->expand_tabs_in_log,
line, linelen);
else
strbuf_add(sb, line, linelen);
append_line_with_color(sb, opt, line, linelen, pp->color,
GREP_CONTEXT_BODY,
GREP_HEADER_FIELD_MAX);
}
static int is_mboxrd_from(const char *line, int len)
@ -2002,7 +2074,9 @@ void pp_remainder(struct pretty_print_context *pp,
struct strbuf *sb,
int indent)
{
struct grep_opt *opt = pp->rev ? &pp->rev->grep_filter : NULL;
int first = 1;
for (;;) {
const char *line = *msg_p;
int linelen = get_one_line(line);
@ -2023,14 +2097,17 @@ void pp_remainder(struct pretty_print_context *pp,
if (indent)
pp_handle_indent(pp, sb, indent, line, linelen);
else if (pp->expand_tabs_in_log)
strbuf_add_tabexpand(sb, pp->expand_tabs_in_log,
line, linelen);
strbuf_add_tabexpand(sb, opt, pp->color,
pp->expand_tabs_in_log, line,
linelen);
else {
if (pp->fmt == CMIT_FMT_MBOXRD &&
is_mboxrd_from(line, linelen))
strbuf_addch(sb, '>');
strbuf_add(sb, line, linelen);
append_line_with_color(sb, opt, line, linelen,
pp->color, GREP_CONTEXT_BODY,
GREP_HEADER_FIELD_MAX);
}
strbuf_addch(sb, '\n');
}

View File

@ -449,6 +449,57 @@ test_expect_success !FAIL_PREREQS 'log with various grep.patternType configurati
)
'
test_expect_success 'log --author' '
cat >expect <<-\EOF &&
Author: <BOLD;RED>A U<RESET> Thor <author@example.com>
EOF
git log -1 --color=always --author="A U" >log &&
grep Author log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expect actual
'
test_expect_success 'log --committer' '
cat >expect <<-\EOF &&
Commit: C O Mitter <committer@<BOLD;RED>example<RESET>.com>
EOF
git log -1 --color=always --pretty=fuller --committer="example" >log &&
grep "Commit:" log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expect actual
'
test_expect_success 'log -i --grep with color' '
cat >expect <<-\EOF &&
<BOLD;RED>Sec<RESET>ond
<BOLD;RED>sec<RESET>ond
EOF
git log --color=always -i --grep=^sec >log &&
grep -i sec log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expect actual
'
test_expect_success '-c color.grep.selected log --grep' '
cat >expect <<-\EOF &&
<GREEN>th<RESET><BOLD;RED>ir<RESET><GREEN>d<RESET>
EOF
git -c color.grep.selected="green" log --color=always --grep=ir >log &&
grep ir log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expect actual
'
test_expect_success '-c color.grep.matchSelected log --grep' '
cat >expect <<-\EOF &&
<BLUE>i<RESET>n<BLUE>i<RESET>t<BLUE>i<RESET>al
EOF
git -c color.grep.matchSelected="blue" log --color=always --grep=i >log &&
grep al log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expect actual
'
cat > expect <<EOF
* Second
* sixth

View File

@ -53,6 +53,54 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
test_cmp expected actual
'
test_expect_success GETTEXT_LOCALE,PCRE 'log --author with an ascii pattern on UTF-8 data' '
cat >expected <<-\EOF &&
Author: <BOLD;RED>À Ú Thor<RESET> <author@example.com>
EOF
test_write_lines "forth" >file4 &&
git add file4 &&
git commit --author="À Ú Thor <author@example.com>" -m sécond &&
git log -1 --color=always --perl-regexp --author=".*Thor" >log &&
grep Author log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expected actual
'
test_expect_success GETTEXT_LOCALE,PCRE 'log --committer with an ascii pattern on ISO-8859-1 data' '
cat >expected <<-\EOF &&
Commit: Ç<BOLD;RED> O Mîtter <committer@example.com><RESET>
EOF
test_write_lines "fifth" >file5 &&
git add file5 &&
GIT_COMMITTER_NAME="Ç O Mîtter" &&
GIT_COMMITTER_EMAIL="committer@example.com" &&
git -c i18n.commitEncoding=latin1 commit -m thïrd &&
git -c i18n.logOutputEncoding=latin1 log -1 --pretty=fuller --color=always --perl-regexp --committer=" O.*" >log &&
grep Commit: log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expected actual
'
test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on UTF-8 data' '
cat >expected <<-\EOF &&
sé<BOLD;RED>con<RESET>d
EOF
git log -1 --color=always --perl-regexp --grep="con" >log &&
grep con log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expected actual
'
test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on ISO-8859-1 data' '
cat >expected <<-\EOF &&
<BOLD;RED>thïrd<RESET>
EOF
git -c i18n.logOutputEncoding=latin1 log -1 --color=always --perl-regexp --grep="th.*rd" >log &&
grep "th.*rd" log >actual.raw &&
test_decode_color <actual.raw >actual &&
test_cmp expected actual
'
test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' '
printf "\\200\\n" >invalid-0x80 &&
echo "ævar" >expected &&