diff --git a/Documentation/config.txt b/Documentation/config.txt index 22482d6a94..ffef3abfb6 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -248,6 +248,10 @@ i18n.commitEncoding:: browser (and possibly at other places in the future or in other porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'. +i18n.logOutputEncoding:: + Character encoding the commit messages are converted to when + running `git-log` and friends. + log.showroot:: If true, the initial commit will be shown as a big creation event. This is equivalent to a diff against an empty tree. diff --git a/builtin-commit-tree.c b/builtin-commit-tree.c index 33c29f7495..146aaffd28 100644 --- a/builtin-commit-tree.c +++ b/builtin-commit-tree.c @@ -118,7 +118,9 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) parents++; } - encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8"); + /* Not having i18n.commitencoding is the same as having utf-8 */ + encoding_is_utf8 = (!git_commit_encoding || + !strcmp(git_commit_encoding, "utf-8")); init_buffer(&buffer, &size); add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1)); diff --git a/builtin-log.c b/builtin-log.c index b7e47cb5fe..a59b4acef1 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -33,13 +33,10 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix, const char *arg = argv[i]; if (!strncmp(arg, "--encoding=", 11)) { arg += 11; - if (MAX_ENCODING_LENGTH <= strlen(arg)) - die(" Value of output encoding '%s' too long", - arg); if (strcmp(arg, "none")) - strcpy(git_commit_encoding, arg); + git_log_output_encoding = strdup(arg); else - git_commit_encoding[0] = 0; + git_log_output_encoding = ""; } else die("unrecognized argument: %s", arg); diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index 507b93f6a7..a67f3eb90b 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -806,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) if (!strcmp(argv[1], "-k")) keep_subject = 1; else if (!strcmp(argv[1], "-u")) - metainfo_charset = git_commit_encoding; + metainfo_charset = (git_commit_encoding + ? git_commit_encoding : "utf-8"); else if (!strncmp(argv[1], "--encoding=", 11)) metainfo_charset = argv[1] + 11; else diff --git a/cache.h b/cache.h index 4943056c19..29dd290c92 100644 --- a/cache.h +++ b/cache.h @@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value); extern char git_default_email[MAX_GITNAME]; extern char git_default_name[MAX_GITNAME]; -#define MAX_ENCODING_LENGTH 64 -extern char git_commit_encoding[MAX_ENCODING_LENGTH]; +extern char *git_commit_encoding; +extern char *git_log_output_encoding; extern int copy_fd(int ifd, int ofd); extern void write_or_die(int fd, const void *buf, size_t count); diff --git a/commit.c b/commit.c index df4bc0775a..6f2839a5cd 100644 --- a/commit.c +++ b/commit.c @@ -592,12 +592,20 @@ static char *get_header(const struct commit *commit, const char *key) static char *logmsg_reencode(const struct commit *commit) { - char *encoding = get_header(commit, "encoding"); + char *encoding; char *out; + char *output_encoding = (git_log_output_encoding + ? git_log_output_encoding + : git_commit_encoding); - if (!encoding || !strcmp(encoding, git_commit_encoding)) + if (!output_encoding) return NULL; - out = reencode_string(commit->buffer, git_commit_encoding, encoding); + encoding = get_header(commit, "encoding"); + if (!encoding || !strcmp(encoding, output_encoding)) { + free(encoding); + return NULL; + } + out = reencode_string(commit->buffer, output_encoding, encoding); free(encoding); if (!out) return NULL; @@ -618,15 +626,10 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, int parents_shown = 0; const char *msg = commit->buffer; int plain_non_ascii = 0; - char *reencoded = NULL; + char *reencoded = logmsg_reencode(commit); - if (*git_commit_encoding) { - reencoded = logmsg_reencode(commit); - if (reencoded) { - msg = reencoded; - len = strlen(msg); - } - } + if (reencoded) + msg = reencoded; if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL) indent = 0; @@ -643,7 +646,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, for (in_body = i = 0; (ch = msg[i]) && i < len; i++) { if (!in_body) { /* author could be non 7-bit ASCII but - * the log may so; skip over the + * the log may be so; skip over the * header part first. */ if (ch == '\n' && diff --git a/config.c b/config.c index 1662a4626e..fcccf7e2a4 100644 --- a/config.c +++ b/config.c @@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value) } if (!strcmp(var, "i18n.commitencoding")) { - strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding)); + git_commit_encoding = strdup(value); return 0; } + if (!strcmp(var, "i18n.logoutputencoding")) { + git_log_output_encoding = strdup(value); + return 0; + } + + if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) { pager_use_color = git_config_bool(var,value); return 0; diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index 234cd0954b..7c7520ea29 100755 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -711,6 +711,7 @@ _git_repo_config () core.compression core.legacyHeaders i18n.commitEncoding + i18n.logOutputEncoding diff.color color.diff diff.renameLimit diff --git a/environment.c b/environment.c index f8c7dbcead..a1502c4e87 100644 --- a/environment.c +++ b/environment.c @@ -18,7 +18,8 @@ int prefer_symlink_refs; int log_all_ref_updates; int warn_ambiguous_refs = 1; int repository_format_version; -char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8"; +char *git_commit_encoding; +char *git_log_output_encoding; int shared_repository = PERM_UMASK; const char *apply_default_whitespace; int zlib_compression_level = Z_DEFAULT_COMPRESSION; diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh new file mode 100755 index 0000000000..3606ed2efa --- /dev/null +++ b/t/t3900-i18n-commit.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# +# Copyright (c) 2006 Junio C Hamano +# + +test_description='commit and log output encodings' + +. ./test-lib.sh + +compare_with () { + git-show -s "$1" | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current && + diff -u current "$2" +} + +test_expect_success setup ' + : >F && + git-add F && + T=$(git-write-tree) && + C=$(git-commit-tree $T <../t3900/1-UTF-8.txt) && + git-update-ref HEAD $C && + git-tag C0 +' + +test_expect_success 'no encoding header for base case' ' + E=$(git-cat-file commit C0 | sed -ne "s/^encoding //p") && + test z = "z$E" +' + +for H in ISO-8859-1 EUCJP ISO2022JP +do + test_expect_success "$H setup" ' + git-repo-config i18n.commitencoding $H && + git-checkout -b $H C0 && + echo $H >F && + git-commit -a -F ../t3900/$H.txt + ' +done + +for H in ISO-8859-1 EUCJP ISO2022JP +do + test_expect_success "check encoding header for $H" ' + E=$(git-cat-file commit '$H' | sed -ne "s/^encoding //p") && + test "z$E" = "z'$H'" + ' +done + +test_expect_success 'repo-config to remove customization' ' + git-repo-config --unset-all i18n.commitencoding && + if Z=$(git-repo-config --get-all i18n.commitencoding) + then + echo Oops, should have failed. + false + else + test z = "z$Z" + fi && + git-repo-config i18n.commitencoding utf-8 +' + +test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' ' + compare_with ISO-8859-1 ../t3900/1-UTF-8.txt +' + +for H in EUCJP ISO2022JP +do + test_expect_success "$H should be shown in UTF-8 now" ' + compare_with '$H' ../t3900/2-UTF-8.txt + ' +done + +test_expect_success 'repo-config to add customization' ' + git-repo-config --unset-all i18n.commitencoding && + if Z=$(git-repo-config --get-all i18n.commitencoding) + then + echo Oops, should have failed. + false + else + test z = "z$Z" + fi +' + +for H in ISO-8859-1 EUCJP ISO2022JP +do + test_expect_success "$H should be shown in itself now" ' + git-repo-config i18n.commitencoding '$H' && + compare_with '$H' ../t3900/'$H'.txt + ' +done + +test_expect_success 'repo-config to tweak customization' ' + git-repo-config i18n.logoutputencoding utf-8 +' + +test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' ' + compare_with ISO-8859-1 ../t3900/1-UTF-8.txt +' + +for H in EUCJP ISO2022JP +do + test_expect_success "$H should be shown in UTF-8 now" ' + compare_with '$H' ../t3900/2-UTF-8.txt + ' +done + +test_done diff --git a/t/t3900/1-UTF-8.txt b/t/t3900/1-UTF-8.txt new file mode 100644 index 0000000000..ee31e19738 --- /dev/null +++ b/t/t3900/1-UTF-8.txt @@ -0,0 +1,3 @@ +ÄËÑÏÖ + +Ábçdèfg diff --git a/t/t3900/2-UTF-8.txt b/t/t3900/2-UTF-8.txt new file mode 100644 index 0000000000..63f4f8f121 --- /dev/null +++ b/t/t3900/2-UTF-8.txt @@ -0,0 +1,4 @@ +はれひほふ + +しているのが、いるので。 +濱浜ほれぷりぽれまびぐりろへ。 diff --git a/t/t3900/EUCJP.txt b/t/t3900/EUCJP.txt new file mode 100644 index 0000000000..546f2aac01 --- /dev/null +++ b/t/t3900/EUCJP.txt @@ -0,0 +1,4 @@ +ϤҤۤ + +ƤΤΤǡ +ͤۤפݤޤӤء diff --git a/t/t3900/ISO-8859-1.txt b/t/t3900/ISO-8859-1.txt new file mode 100644 index 0000000000..7cbef0ee6f --- /dev/null +++ b/t/t3900/ISO-8859-1.txt @@ -0,0 +1,3 @@ + + +bdfg diff --git a/t/t3900/ISO2022JP.txt b/t/t3900/ISO2022JP.txt new file mode 100644 index 0000000000..74b533042f --- /dev/null +++ b/t/t3900/ISO2022JP.txt @@ -0,0 +1,4 @@ +$B$O$l$R$[$U(B + +$B$7$F$$$k$N$,!"$$$k$N$G!#(B +$B_@IM$[$l$W$j$]$l$^$S$0$j$m$X!#(B