pretty: two phase conversion for non utf-8 commits
Always assume format_commit_item() takes an utf-8 string for string handling simplicity (we can handle utf-8 strings, but can't with other encodings). If commit message is in non-utf8, or output encoding is not, then the commit is first converted to utf-8, processed, then output converted to output encoding. This of course only works with encodings that are compatible with Unicode. This also fixes the iso8859-1 test in t6006. It's supposed to create an iso8859-1 commit, but the commit content in t6006 is in UTF-8. t6006 is now converted back in UTF-8 (the downside is we can't put utf-8 strings there anymore). Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
b782bbab94
commit
7e77df39bf
24
pretty.c
24
pretty.c
@ -954,7 +954,8 @@ static int format_reflog_person(struct strbuf *sb,
|
||||
return format_person_part(sb, part, ident, strlen(ident), dmode);
|
||||
}
|
||||
|
||||
static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
|
||||
static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
|
||||
const char *placeholder,
|
||||
void *context)
|
||||
{
|
||||
struct format_commit_context *c = context;
|
||||
@ -1193,7 +1194,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
|
||||
return 0; /* unknown placeholder */
|
||||
}
|
||||
|
||||
static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
|
||||
static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
|
||||
const char *placeholder,
|
||||
void *context)
|
||||
{
|
||||
int consumed;
|
||||
@ -1273,6 +1275,7 @@ void format_commit_message(const struct commit *commit,
|
||||
{
|
||||
struct format_commit_context context;
|
||||
const char *output_enc = pretty_ctx->output_encoding;
|
||||
const char *utf8 = "UTF-8";
|
||||
|
||||
memset(&context, 0, sizeof(context));
|
||||
context.commit = commit;
|
||||
@ -1285,6 +1288,23 @@ void format_commit_message(const struct commit *commit,
|
||||
strbuf_expand(sb, format, format_commit_item, &context);
|
||||
rewrap_message_tail(sb, &context, 0, 0, 0);
|
||||
|
||||
if (output_enc) {
|
||||
if (same_encoding(utf8, output_enc))
|
||||
output_enc = NULL;
|
||||
} else {
|
||||
if (context.commit_encoding &&
|
||||
!same_encoding(context.commit_encoding, utf8))
|
||||
output_enc = context.commit_encoding;
|
||||
}
|
||||
|
||||
if (output_enc) {
|
||||
int outsz;
|
||||
char *out = reencode_string_len(sb->buf, sb->len,
|
||||
output_enc, utf8, &outsz);
|
||||
if (out)
|
||||
strbuf_attach(sb, out, outsz, outsz + 1);
|
||||
}
|
||||
|
||||
free(context.commit_encoding);
|
||||
logmsg_free(context.message, commit);
|
||||
free(context.signature_check.gpg_output);
|
||||
|
@ -184,7 +184,7 @@ Test printing of complex bodies
|
||||
|
||||
This commit message is much longer than the others,
|
||||
and it will be encoded in iso8859-1. We should therefore
|
||||
include an iso8859 character: ¡bueno!
|
||||
include an iso8859 character: ¡bueno!
|
||||
EOF
|
||||
test_expect_success 'setup complex body' '
|
||||
git config i18n.commitencoding iso8859-1 &&
|
||||
@ -192,14 +192,14 @@ git config i18n.commitencoding iso8859-1 &&
|
||||
'
|
||||
|
||||
test_format complex-encoding %e <<'EOF'
|
||||
commit f58db70b055c5718631e5c61528b28b12090cdea
|
||||
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
|
||||
iso8859-1
|
||||
commit 131a310eb913d107dd3c09a65d1651175898735d
|
||||
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
|
||||
EOF
|
||||
|
||||
test_format complex-subject %s <<'EOF'
|
||||
commit f58db70b055c5718631e5c61528b28b12090cdea
|
||||
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
|
||||
Test printing of complex bodies
|
||||
commit 131a310eb913d107dd3c09a65d1651175898735d
|
||||
changed foo
|
||||
@ -208,17 +208,17 @@ added foo
|
||||
EOF
|
||||
|
||||
test_format complex-body %b <<'EOF'
|
||||
commit f58db70b055c5718631e5c61528b28b12090cdea
|
||||
commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
|
||||
This commit message is much longer than the others,
|
||||
and it will be encoded in iso8859-1. We should therefore
|
||||
include an iso8859 character: ¡bueno!
|
||||
include an iso8859 character: ¡bueno!
|
||||
|
||||
commit 131a310eb913d107dd3c09a65d1651175898735d
|
||||
commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
|
||||
EOF
|
||||
|
||||
test_expect_success '%x00 shows NUL' '
|
||||
echo >expect commit f58db70b055c5718631e5c61528b28b12090cdea &&
|
||||
echo >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
|
||||
echo >>expect fooQbar &&
|
||||
git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
|
||||
nul_to_q <actual.nul >actual &&
|
||||
|
Loading…
Reference in New Issue
Block a user