Merge branch 'jc/int'

* jc/int:
  More tests in t3901.
  Consistent message encoding while reusing log from an existing commit.
  t3901: test "format-patch | am" pipe with i18n
  Use log output encoding in --pretty=email headers.
This commit is contained in:
Junio C Hamano 2007-01-14 12:04:25 -08:00
commit f4b6c6b90f
6 changed files with 334 additions and 34 deletions

View File

@ -464,20 +464,29 @@ static int get_one_line(const char *msg, unsigned long len)
return ret;
}
static int is_rfc2047_special(char ch)
/* High bit set, or ISO-2022-INT */
static int non_ascii(int ch)
{
return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_'));
ch = (ch & 0xff);
return ((ch & 0x80) || (ch == 0x1b));
}
static int add_rfc2047(char *buf, const char *line, int len)
static int is_rfc2047_special(char ch)
{
return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
}
static int add_rfc2047(char *buf, const char *line, int len,
const char *encoding)
{
char *bp = buf;
int i, needquote;
static const char q_utf8[] = "=?utf-8?q?";
char q_encoding[128];
const char *q_encoding_fmt = "=?%s?q?";
for (i = needquote = 0; !needquote && i < len; i++) {
unsigned ch = line[i];
if (ch & 0x80)
int ch = line[i];
if (non_ascii(ch))
needquote++;
if ((i + 1 < len) &&
(ch == '=' && line[i+1] == '?'))
@ -486,8 +495,11 @@ static int add_rfc2047(char *buf, const char *line, int len)
if (!needquote)
return sprintf(buf, "%.*s", len, line);
memcpy(bp, q_utf8, sizeof(q_utf8)-1);
bp += sizeof(q_utf8)-1;
i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding);
if (sizeof(q_encoding) < i)
die("Insanely long encoding name %s", encoding);
memcpy(bp, q_encoding, i);
bp += i;
for (i = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF;
if (is_rfc2047_special(ch)) {
@ -505,7 +517,8 @@ static int add_rfc2047(char *buf, const char *line, int len)
}
static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
const char *line, int relative_date)
const char *line, int relative_date,
const char *encoding)
{
char *date;
int namelen;
@ -533,7 +546,8 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
filler = "";
strcpy(buf, "From: ");
ret = strlen(buf);
ret += add_rfc2047(buf + ret, line, display_name_length);
ret += add_rfc2047(buf + ret, line, display_name_length,
encoding);
memcpy(buf + ret, name_tail, namelen - display_name_length);
ret += namelen - display_name_length;
buf[ret++] = '\n';
@ -668,21 +682,18 @@ static char *replace_encoding_header(char *buf, char *encoding)
return buf;
}
static char *logmsg_reencode(const struct commit *commit)
static char *logmsg_reencode(const struct commit *commit,
char *output_encoding)
{
char *encoding;
char *out;
char *output_encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
char *utf8 = "utf-8";
if (!output_encoding)
output_encoding = "utf-8";
else if (!*output_encoding)
if (!*output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
if (!encoding)
return NULL;
encoding = utf8;
if (!strcmp(encoding, output_encoding))
out = strdup(commit->buffer);
else
@ -691,7 +702,8 @@ static char *logmsg_reencode(const struct commit *commit)
if (out)
out = replace_encoding_header(out, output_encoding);
free(encoding);
if (encoding != utf8)
free(encoding);
if (!out)
return NULL;
return out;
@ -711,8 +723,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
char *reencoded = logmsg_reencode(commit);
char *reencoded;
char *encoding;
encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);
if (!encoding)
encoding = "utf-8";
reencoded = logmsg_reencode(commit, encoding);
if (reencoded)
msg = reencoded;
@ -738,7 +757,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
i + 1 < len && msg[i+1] == '\n')
in_body = 1;
}
else if (ch & 0x80) {
else if (non_ascii(ch)) {
plain_non_ascii = 1;
break;
}
@ -797,13 +816,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
offset += add_user_info("Author", fmt,
buf + offset,
line + 7,
relative_date);
relative_date,
encoding);
if (!memcmp(line, "committer ", 10) &&
(fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER))
offset += add_user_info("Commit", fmt,
buf + offset,
line + 10,
relative_date);
relative_date,
encoding);
continue;
}
@ -826,7 +847,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
int slen = strlen(subject);
memcpy(buf + offset, subject, slen);
offset += slen;
offset += add_rfc2047(buf + offset, line, linelen);
offset += add_rfc2047(buf + offset, line, linelen,
encoding);
}
else {
memset(buf + offset, ' ', indent);
@ -837,11 +859,17 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
if (fmt == CMIT_FMT_ONELINE)
break;
if (subject && plain_non_ascii) {
static const char header[] =
"Content-Type: text/plain; charset=UTF-8\n"
int sz;
char header[512];
const char *header_fmt =
"Content-Type: text/plain; charset=%s\n"
"Content-Transfer-Encoding: 8bit\n";
memcpy(buf + offset, header, sizeof(header)-1);
offset += sizeof(header)-1;
sz = snprintf(header, sizeof(header), header_fmt,
encoding);
if (sizeof(header) < sz)
die("Encoding name %s too long", encoding);
memcpy(buf + offset, header, sz);
offset += sz;
}
if (after_subject) {
int slen = strlen(after_subject);

View File

@ -429,7 +429,9 @@ then
fi
elif test "$use_commit" != ""
then
git-cat-file commit "$use_commit" | sed -e '1,/^$/d'
encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
sed -e '1,/^$/d' -e 's/^ //'
elif test -f "$GIT_DIR/MERGE_MSG"
then
cat "$GIT_DIR/MERGE_MSG"
@ -491,7 +493,8 @@ then
q
}
'
set_author_env=`git-cat-file commit "$use_commit" |
encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
set_author_env=`git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
LANG=C LC_ALL=C sed -ne "$pick_author_script"`
eval "$set_author_env"
export GIT_AUTHOR_NAME

View File

@ -81,6 +81,8 @@ prev=$(git-rev-parse --verify "$commit^1" 2>/dev/null) ||
git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
die "Cannot run $me a multi-parent commit."
encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
# "commit" is an existing commit. We would want to apply
# the difference it introduces since its first parent "prev"
# on top of the current HEAD if we are cherry-pick. Or the
@ -88,10 +90,11 @@ git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
case "$me" in
revert)
git-rev-list --pretty=oneline --max-count=1 $commit |
git show -s --pretty=oneline --encoding="$encoding" $commit |
sed -e '
s/^[^ ]* /Revert "/
s/$/"/'
s/$/"/
'
echo
echo "This reverts commit $commit."
test "$rev" = "$commit" ||
@ -120,14 +123,17 @@ cherry-pick)
q
}'
set_author_env=`git-cat-file commit "$commit" |
logmsg=`git show -s --pretty=raw --encoding="$encoding" "$commit"`
set_author_env=`echo "$logmsg" |
LANG=C LC_ALL=C sed -ne "$pick_author_script"`
eval "$set_author_env"
export GIT_AUTHOR_NAME
export GIT_AUTHOR_EMAIL
export GIT_AUTHOR_DATE
git-cat-file commit $commit | sed -e '1,/^$/d'
echo "$logmsg" |
sed -e '1,/^$/d' -e 's/^ //'
case "$replay" in
'')
echo "(cherry picked from commit $commit)"

4
t/t3901-8859-1.txt Executable file
View File

@ -0,0 +1,4 @@
: to be sourced in t3901 -- this is latin-1
GIT_AUTHOR_NAME="Áéí óú" &&
GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
export GIT_AUTHOR_NAME GIT_COMMITTER_NAME

255
t/t3901-i18n-patch.sh Executable file
View File

@ -0,0 +1,255 @@
#!/bin/sh
#
# Copyright (c) 2006 Junio C Hamano
#
test_description='i18n settings and format-patch | am pipe'
. ./test-lib.sh
check_encoding () {
# Make sure characters are not corrupted
cnt="$1" header="$2" i=1 j=0 bad=0
while test "$i" -le $cnt
do
git format-patch --encoding=UTF-8 --stdout HEAD~$i..HEAD~$j |
grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" &&
git-cat-file commit HEAD~$j |
case "$header" in
8859)
grep "^encoding ISO-8859-1" ;;
*)
! grep "^encoding ISO-8859-1" ;;
esac || {
bad=1
break
}
j=$i
i=$(($i+1))
done
(exit $bad)
}
test_expect_success setup '
git-repo-config i18n.commitencoding UTF-8 &&
# use UTF-8 in author and committer name to match the
# i18n.commitencoding settings
. ../t3901-utf8.txt &&
test_tick &&
echo "$GIT_AUTHOR_NAME" >mine &&
git add mine &&
git commit -s -m "Initial commit" &&
test_tick &&
echo Hello world >mine &&
git add mine &&
git commit -s -m "Second on main" &&
# the first commit on the side branch is UTF-8
test_tick &&
git checkout -b side master^ &&
echo Another file >yours &&
git add yours &&
git commit -s -m "Second on side" &&
# the second one on the side branch is ISO-8859-1
git-repo-config i18n.commitencoding ISO-8859-1 &&
# use author and committer name in ISO-8859-1 to match it.
. ../t3901-8859-1.txt &&
test_tick &&
echo Yet another >theirs &&
git add theirs &&
git commit -s -m "Third on side" &&
# Back to default
git-repo-config i18n.commitencoding UTF-8
'
test_expect_success 'format-patch output (ISO-8859-1)' '
git-repo-config i18n.logoutputencoding ISO-8859-1 &&
git format-patch --stdout master..HEAD^ >out-l1 &&
git format-patch --stdout HEAD^ >out-l2 &&
grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l1 &&
grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l1 &&
grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l2 &&
grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l2
'
test_expect_success 'format-patch output (UTF-8)' '
git repo-config i18n.logoutputencoding UTF-8 &&
git format-patch --stdout master..HEAD^ >out-u1 &&
git format-patch --stdout HEAD^ >out-u2 &&
grep "^Content-Type: text/plain; charset=UTF-8" out-u1 &&
grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u1 &&
grep "^Content-Type: text/plain; charset=UTF-8" out-u2 &&
grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u2
'
test_expect_success 'rebase (U/U)' '
# We want the result of rebase in UTF-8
git-repo-config i18n.commitencoding UTF-8 &&
# The test is about logoutputencoding not affecting the
# final outcome -- it is used internally to generate the
# patch and the log.
git repo-config i18n.logoutputencoding UTF-8 &&
# The result will be committed by GIT_COMMITTER_NAME --
# we want UTF-8 encoded name.
. ../t3901-utf8.txt &&
git checkout -b test &&
git-rebase master &&
check_encoding 2
'
test_expect_success 'rebase (U/L)' '
git-repo-config i18n.commitencoding UTF-8 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-utf8.txt &&
git reset --hard side &&
git-rebase master &&
check_encoding 2
'
test_expect_success 'rebase (L/L)' '
# In this test we want ISO-8859-1 encoded commits as the result
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-8859-1.txt &&
git reset --hard side &&
git-rebase master &&
check_encoding 2 8859
'
test_expect_success 'rebase (L/U)' '
# This is pathological -- use UTF-8 as intermediate form
# to get ISO-8859-1 results.
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding UTF-8 &&
. ../t3901-8859-1.txt &&
git reset --hard side &&
git-rebase master &&
check_encoding 2 8859
'
test_expect_success 'cherry-pick(U/U)' '
# Both the commitencoding and logoutputencoding is set to UTF-8.
git-repo-config i18n.commitencoding UTF-8 &&
git repo-config i18n.logoutputencoding UTF-8 &&
. ../t3901-utf8.txt &&
git reset --hard master &&
git cherry-pick side^ &&
git cherry-pick side &&
EDITOR=: VISUAL=: git revert HEAD &&
check_encoding 3
'
test_expect_success 'cherry-pick(L/L)' '
# Both the commitencoding and logoutputencoding is set to ISO-8859-1
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-8859-1.txt &&
git reset --hard master &&
git cherry-pick side^ &&
git cherry-pick side &&
EDITOR=: VISUAL=: git revert HEAD &&
check_encoding 3 8859
'
test_expect_success 'cherry-pick(U/L)' '
# Commitencoding is set to UTF-8 but logoutputencoding is ISO-8859-1
git-repo-config i18n.commitencoding UTF-8 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-utf8.txt &&
git reset --hard master &&
git cherry-pick side^ &&
git cherry-pick side &&
EDITOR=: VISUAL=: git revert HEAD &&
check_encoding 3
'
test_expect_success 'cherry-pick(L/U)' '
# Again, the commitencoding is set to ISO-8859-1 but
# logoutputencoding is set to UTF-8.
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding UTF-8 &&
. ../t3901-8859-1.txt &&
git reset --hard master &&
git cherry-pick side^ &&
git cherry-pick side &&
EDITOR=: VISUAL=: git revert HEAD &&
check_encoding 3 8859
'
test_expect_success 'rebase --merge (U/U)' '
git-repo-config i18n.commitencoding UTF-8 &&
git repo-config i18n.logoutputencoding UTF-8 &&
. ../t3901-utf8.txt &&
git reset --hard side &&
git-rebase --merge master &&
check_encoding 2
'
test_expect_success 'rebase --merge (U/L)' '
git-repo-config i18n.commitencoding UTF-8 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-utf8.txt &&
git reset --hard side &&
git-rebase --merge master &&
check_encoding 2
'
test_expect_success 'rebase --merge (L/L)' '
# In this test we want ISO-8859-1 encoded commits as the result
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding ISO-8859-1 &&
. ../t3901-8859-1.txt &&
git reset --hard side &&
git-rebase --merge master &&
check_encoding 2 8859
'
test_expect_success 'rebase --merge (L/U)' '
# This is pathological -- use UTF-8 as intermediate form
# to get ISO-8859-1 results.
git-repo-config i18n.commitencoding ISO-8859-1 &&
git repo-config i18n.logoutputencoding UTF-8 &&
. ../t3901-8859-1.txt &&
git reset --hard side &&
git-rebase --merge master &&
check_encoding 2 8859
'
test_done

4
t/t3901-utf8.txt Executable file
View File

@ -0,0 +1,4 @@
: to be sourced in t3901 -- this is utf8
GIT_AUTHOR_NAME="Áéí óú" &&
GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
export GIT_AUTHOR_NAME GIT_COMMITTER_NAME