diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt index 5428111d73..1ab1b96cf9 100644 --- a/Documentation/blame-options.txt +++ b/Documentation/blame-options.txt @@ -49,6 +49,13 @@ of lines before or after the line given by . Show the result incrementally in a format designed for machine consumption. +--encoding=:: + Specifies the encoding used to output author names + and commit summaries. Setting it to `none` makes blame + output unconverted data. For more information see the + discussion about encoding in the linkgit:git-log[1] + manual page. + --contents :: When is not specified, the command annotates the changes starting backwards from the working tree copy. diff --git a/Documentation/i18n.txt b/Documentation/i18n.txt index d2970f8357..2cdacd94cd 100644 --- a/Documentation/i18n.txt +++ b/Documentation/i18n.txt @@ -37,9 +37,9 @@ of `i18n.commitencoding` in its `encoding` header. This is to help other people who look at them later. Lack of this header implies that the commit log message is encoded in UTF-8. -. 'git-log', 'git-show' and friends looks at the `encoding` - header of a commit object, and tries to re-code the log - message into UTF-8 unless otherwise specified. You can +. 'git-log', 'git-show', 'git-blame' and friends look at the + `encoding` header of a commit object, and try to re-code the + log message into UTF-8 unless otherwise specified. You can specify the desired output encoding with `i18n.logoutputencoding` in `.git/config` file, like this: + diff --git a/builtin-blame.c b/builtin-blame.c index 48cc0c175d..2457e71fc0 100644 --- a/builtin-blame.c +++ b/builtin-blame.c @@ -1431,7 +1431,7 @@ static void get_commit_info(struct commit *commit, int detailed) { int len; - char *tmp, *endp; + char *tmp, *endp, *reencoded, *message; static char author_buf[1024]; static char committer_buf[1024]; static char summary_buf[1024]; @@ -1449,24 +1449,29 @@ static void get_commit_info(struct commit *commit, die("Cannot read commit %s", sha1_to_hex(commit->object.sha1)); } + reencoded = reencode_commit_message(commit, NULL); + message = reencoded ? reencoded : commit->buffer; ret->author = author_buf; - get_ac_line(commit->buffer, "\nauthor ", + get_ac_line(message, "\nauthor ", sizeof(author_buf), author_buf, &ret->author_mail, &ret->author_time, &ret->author_tz); - if (!detailed) + if (!detailed) { + free(reencoded); return; + } ret->committer = committer_buf; - get_ac_line(commit->buffer, "\ncommitter ", + get_ac_line(message, "\ncommitter ", sizeof(committer_buf), committer_buf, &ret->committer_mail, &ret->committer_time, &ret->committer_tz); ret->summary = summary_buf; - tmp = strstr(commit->buffer, "\n\n"); + tmp = strstr(message, "\n\n"); if (!tmp) { error_out: sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); + free(reencoded); return; } tmp += 2; @@ -1478,6 +1483,7 @@ static void get_commit_info(struct commit *commit, goto error_out; memcpy(summary_buf, tmp, len); summary_buf[len] = 0; + free(reencoded); } /* diff --git a/commit.h b/commit.h index 4c05864fb4..3a7b06a828 100644 --- a/commit.h +++ b/commit.h @@ -65,6 +65,8 @@ enum cmit_fmt { extern int non_ascii(int); struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ +extern char *reencode_commit_message(const struct commit *commit, + const char **encoding_p); extern void get_commit_format(const char *arg, struct rev_info *); extern void format_commit_message(const struct commit *commit, const void *format, struct strbuf *sb, diff --git a/pretty.c b/pretty.c index 1e79943339..f6ff31264b 100644 --- a/pretty.c +++ b/pretty.c @@ -783,6 +783,20 @@ void pp_remainder(enum cmit_fmt fmt, } } +char *reencode_commit_message(const struct commit *commit, const char **encoding_p) +{ + const char *encoding; + + encoding = (git_log_output_encoding + ? git_log_output_encoding + : git_commit_encoding); + if (!encoding) + encoding = "utf-8"; + if (encoding_p) + *encoding_p = encoding; + return logmsg_reencode(commit, encoding); +} + void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, struct strbuf *sb, int abbrev, const char *subject, const char *after_subject, @@ -799,12 +813,7 @@ void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit, return; } - encoding = (git_log_output_encoding - ? git_log_output_encoding - : git_commit_encoding); - if (!encoding) - encoding = "utf-8"; - reencoded = logmsg_reencode(commit, encoding); + reencoded = reencode_commit_message(commit, &encoding); if (reencoded) { msg = reencoded; } diff --git a/t/t8005-blame-i18n.sh b/t/t8005-blame-i18n.sh new file mode 100755 index 0000000000..4470a92bb2 --- /dev/null +++ b/t/t8005-blame-i18n.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +test_description='git blame encoding conversion' +. ./test-lib.sh + +. "$TEST_DIRECTORY"/t8005/utf8.txt +. "$TEST_DIRECTORY"/t8005/cp1251.txt +. "$TEST_DIRECTORY"/t8005/sjis.txt + +test_expect_success 'setup the repository' ' + # Create the file + echo "UTF-8 LINE" > file && + git add file && + git commit --author "$UTF8_NAME " -m "$UTF8_MSG" && + + echo "CP1251 LINE" >> file && + git add file && + git config i18n.commitencoding cp1251 && + git commit --author "$CP1251_NAME " -m "$CP1251_MSG" && + + echo "SJIS LINE" >> file && + git add file && + git config i18n.commitencoding shift-jis && + git commit --author "$SJIS_NAME " -m "$SJIS_MSG" +' + +cat >expected < actual && + test_cmp actual expected +' + +cat >expected < actual && + test_cmp actual expected +' + +cat >expected < actual && + test_cmp actual expected +' + +cat >expected < actual && + test_cmp actual expected +' + +test_done diff --git a/t/t8005/cp1251.txt b/t/t8005/cp1251.txt new file mode 100644 index 0000000000..ce41e98b81 --- /dev/null +++ b/t/t8005/cp1251.txt @@ -0,0 +1,2 @@ +CP1251_NAME=" " +CP1251_MSG=" " diff --git a/t/t8005/sjis.txt b/t/t8005/sjis.txt new file mode 100644 index 0000000000..2ccfbad207 --- /dev/null +++ b/t/t8005/sjis.txt @@ -0,0 +1,2 @@ +SJIS_NAME="Irp~ Pury Rytr" +SJIS_MSG="Suru qu~yu" diff --git a/t/t8005/utf8.txt b/t/t8005/utf8.txt new file mode 100644 index 0000000000..f46cfc56d8 --- /dev/null +++ b/t/t8005/utf8.txt @@ -0,0 +1,2 @@ +UTF8_NAME="Иван Петрович Сидоров" +UTF8_MSG="Тестовое сообщение"