Merge branch 'jk/read-commit-buffer-data-after-free'

"git log --grep=<pattern>" used to look for the pattern in literal
bytes of the commit log message and ignored the log-output encoding.

* jk/read-commit-buffer-data-after-free:
  log: re-encode commit messages before grepping
This commit is contained in:
Junio C Hamano 2013-02-17 15:23:20 -08:00
commit d04f998b12
2 changed files with 78 additions and 7 deletions

View File

@ -2268,7 +2268,10 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st
static int commit_match(struct commit *commit, struct rev_info *opt)
{
int retval;
const char *encoding;
char *message;
struct strbuf buf = STRBUF_INIT;
if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list)
return 1;
@ -2279,13 +2282,23 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
strbuf_addch(&buf, '\n');
}
/*
* We grep in the user's output encoding, under the assumption that it
* is the encoding they are most likely to write their grep pattern
* for. In addition, it means we will match the "notes" encoding below,
* so we will not end up with a buffer that has two different encodings
* in it.
*/
encoding = get_log_output_encoding();
message = logmsg_reencode(commit, encoding);
/* Copy the commit to temporary if we are using "fake" headers */
if (buf.len)
strbuf_addstr(&buf, commit->buffer);
strbuf_addstr(&buf, message);
if (opt->grep_filter.header_list && opt->mailmap) {
if (!buf.len)
strbuf_addstr(&buf, commit->buffer);
strbuf_addstr(&buf, message);
commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
@ -2294,18 +2307,18 @@ static int commit_match(struct commit *commit, struct rev_info *opt)
/* Append "fake" message parts as needed */
if (opt->show_notes) {
if (!buf.len)
strbuf_addstr(&buf, commit->buffer);
format_display_notes(commit->object.sha1, &buf,
get_log_output_encoding(), 1);
strbuf_addstr(&buf, message);
format_display_notes(commit->object.sha1, &buf, encoding, 1);
}
/* Find either in the commit object, or in the temporary */
/* Find either in the original commit message, or in the temporary */
if (buf.len)
retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len);
else
retval = grep_buffer(&opt->grep_filter,
commit->buffer, strlen(commit->buffer));
message, strlen(message));
strbuf_release(&buf);
logmsg_free(message, commit);
return retval;
}

58
t/t4210-log-i18n.sh Executable file
View File

@ -0,0 +1,58 @@
#!/bin/sh
test_description='test log with i18n features'
. ./test-lib.sh
# two forms of é
utf8_e=$(printf '\303\251')
latin1_e=$(printf '\351')
test_expect_success 'create commits in different encodings' '
test_tick &&
cat >msg <<-EOF &&
utf8
t${utf8_e}st
EOF
git add msg &&
git -c i18n.commitencoding=utf8 commit -F msg &&
cat >msg <<-EOF &&
latin1
t${latin1_e}st
EOF
git add msg &&
git -c i18n.commitencoding=ISO-8859-1 commit -F msg
'
test_expect_success 'log --grep searches in log output encoding (utf8)' '
cat >expect <<-\EOF &&
latin1
utf8
EOF
git log --encoding=utf8 --format=%s --grep=$utf8_e >actual &&
test_cmp expect actual
'
test_expect_success 'log --grep searches in log output encoding (latin1)' '
cat >expect <<-\EOF &&
latin1
utf8
EOF
git log --encoding=ISO-8859-1 --format=%s --grep=$latin1_e >actual &&
test_cmp expect actual
'
test_expect_success 'log --grep does not find non-reencoded values (utf8)' '
>expect &&
git log --encoding=utf8 --format=%s --grep=$latin1_e >actual &&
test_cmp expect actual
'
test_expect_success 'log --grep does not find non-reencoded values (latin1)' '
>expect &&
git log --encoding=ISO-8859-1 --format=%s --grep=$utf8_e >actual &&
test_cmp expect actual
'
test_done