fast-export: do automatic reencoding of commit messages only if requested
Automatic re-encoding of commit messages (and dropping of the encoding header) hurts attempts to do reversible history rewrites (e.g. sha1sum <-> sha256sum transitions, some subtree rewrites), and seems inconsistent with the general principle followed elsewhere in fast-export of requiring explicit user requests to modify the output (e.g. --signed-tags=strip, --tag-of-filtered-object=rewrite). Add a --reencode flag that the user can use to specify, and like other fast-export flags, default it to 'abort'. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
57a8be2cb0
commit
e80001f8fd
@ -129,6 +129,13 @@ marks the same across runs.
|
|||||||
for intermediary filters (e.g. for rewriting commit messages
|
for intermediary filters (e.g. for rewriting commit messages
|
||||||
which refer to older commits, or for stripping blobs by id).
|
which refer to older commits, or for stripping blobs by id).
|
||||||
|
|
||||||
|
--reencode=(yes|no|abort)::
|
||||||
|
Specify how to handle `encoding` header in commit objects. When
|
||||||
|
asking to 'abort' (which is the default), this program will die
|
||||||
|
when encountering such a commit object. With 'yes', the commit
|
||||||
|
message will be reencoded into UTF-8. With 'no', the original
|
||||||
|
encoding will be preserved.
|
||||||
|
|
||||||
--refspec::
|
--refspec::
|
||||||
Apply the specified refspec to each ref exported. Multiple of them can
|
Apply the specified refspec to each ref exported. Multiple of them can
|
||||||
be specified.
|
be specified.
|
||||||
|
@ -33,6 +33,7 @@ static const char *fast_export_usage[] = {
|
|||||||
static int progress;
|
static int progress;
|
||||||
static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
|
static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT;
|
||||||
static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
|
static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT;
|
||||||
|
static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT;
|
||||||
static int fake_missing_tagger;
|
static int fake_missing_tagger;
|
||||||
static int use_done_feature;
|
static int use_done_feature;
|
||||||
static int no_data;
|
static int no_data;
|
||||||
@ -77,6 +78,31 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int parse_opt_reencode_mode(const struct option *opt,
|
||||||
|
const char *arg, int unset)
|
||||||
|
{
|
||||||
|
if (unset) {
|
||||||
|
reencode_mode = REENCODE_ABORT;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (git_parse_maybe_bool(arg)) {
|
||||||
|
case 0:
|
||||||
|
reencode_mode = REENCODE_NO;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
reencode_mode = REENCODE_YES;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (!strcasecmp(arg, "abort"))
|
||||||
|
reencode_mode = REENCODE_ABORT;
|
||||||
|
else
|
||||||
|
return error("Unknown reencoding mode: %s", arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static struct decoration idnums;
|
static struct decoration idnums;
|
||||||
static uint32_t last_idnum;
|
static uint32_t last_idnum;
|
||||||
|
|
||||||
@ -633,10 +659,21 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mark_next_object(&commit->object);
|
mark_next_object(&commit->object);
|
||||||
if (anonymize)
|
if (anonymize) {
|
||||||
reencoded = anonymize_commit_message(message);
|
reencoded = anonymize_commit_message(message);
|
||||||
else if (!is_encoding_utf8(encoding))
|
} else if (encoding) {
|
||||||
reencoded = reencode_string(message, "UTF-8", encoding);
|
switch(reencode_mode) {
|
||||||
|
case REENCODE_YES:
|
||||||
|
reencoded = reencode_string(message, "UTF-8", encoding);
|
||||||
|
break;
|
||||||
|
case REENCODE_NO:
|
||||||
|
break;
|
||||||
|
case REENCODE_ABORT:
|
||||||
|
die("Encountered commit-specific encoding %s in commit "
|
||||||
|
"%s; use --reencode=[yes|no] to handle it",
|
||||||
|
encoding, oid_to_hex(&commit->object.oid));
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!commit->parents)
|
if (!commit->parents)
|
||||||
printf("reset %s\n", refname);
|
printf("reset %s\n", refname);
|
||||||
printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
|
printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
|
||||||
@ -1091,6 +1128,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
|
|||||||
OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
|
OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"),
|
||||||
N_("select handling of tags that tag filtered objects"),
|
N_("select handling of tags that tag filtered objects"),
|
||||||
parse_opt_tag_of_filtered_mode),
|
parse_opt_tag_of_filtered_mode),
|
||||||
|
OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"),
|
||||||
|
N_("select handling of commit messages in an alternate encoding"),
|
||||||
|
parse_opt_reencode_mode),
|
||||||
OPT_STRING(0, "export-marks", &export_filename, N_("file"),
|
OPT_STRING(0, "export-marks", &export_filename, N_("file"),
|
||||||
N_("Dump marks to this file")),
|
N_("Dump marks to this file")),
|
||||||
OPT_STRING(0, "import-marks", &import_filename, N_("file"),
|
OPT_STRING(0, "import-marks", &import_filename, N_("file"),
|
||||||
|
@ -94,14 +94,14 @@ test_expect_success 'fast-export --show-original-ids | git fast-import' '
|
|||||||
test $MUSS = $(git rev-parse --verify refs/tags/muss)
|
test $MUSS = $(git rev-parse --verify refs/tags/muss)
|
||||||
'
|
'
|
||||||
|
|
||||||
test_expect_success 'iso-8859-7' '
|
test_expect_success 'reencoding iso-8859-7' '
|
||||||
|
|
||||||
test_when_finished "git reset --hard HEAD~1" &&
|
test_when_finished "git reset --hard HEAD~1" &&
|
||||||
test_config i18n.commitencoding iso-8859-7 &&
|
test_config i18n.commitencoding iso-8859-7 &&
|
||||||
test_tick &&
|
test_tick &&
|
||||||
echo rosten >file &&
|
echo rosten >file &&
|
||||||
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
|
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
|
||||||
git fast-export wer^..wer >iso-8859-7.fi &&
|
git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
|
||||||
sed "s/wer/i18n/" iso-8859-7.fi |
|
sed "s/wer/i18n/" iso-8859-7.fi |
|
||||||
(cd new &&
|
(cd new &&
|
||||||
git fast-import &&
|
git fast-import &&
|
||||||
@ -118,13 +118,45 @@ test_expect_success 'iso-8859-7' '
|
|||||||
! grep ^encoding actual)
|
! grep ^encoding actual)
|
||||||
'
|
'
|
||||||
|
|
||||||
|
test_expect_success 'aborting on iso-8859-7' '
|
||||||
|
|
||||||
|
test_when_finished "git reset --hard HEAD~1" &&
|
||||||
|
test_config i18n.commitencoding iso-8859-7 &&
|
||||||
|
echo rosten >file &&
|
||||||
|
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
|
||||||
|
test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'preserving iso-8859-7' '
|
||||||
|
|
||||||
|
test_when_finished "git reset --hard HEAD~1" &&
|
||||||
|
test_config i18n.commitencoding iso-8859-7 &&
|
||||||
|
echo rosten >file &&
|
||||||
|
git commit -s -F "$TEST_DIRECTORY/t9350/simple-iso-8859-7-commit-message.txt" file &&
|
||||||
|
git fast-export --reencode=no wer^..wer >iso-8859-7.fi &&
|
||||||
|
sed "s/wer/i18n-no-recoding/" iso-8859-7.fi |
|
||||||
|
(cd new &&
|
||||||
|
git fast-import &&
|
||||||
|
# The commit object, if not re-encoded, is 240 bytes.
|
||||||
|
# Removing the "encoding iso-8859-7\n" header would drops 20
|
||||||
|
# bytes. Re-encoding the Pi character from \xF0 (\360) in
|
||||||
|
# iso-8859-7 to \xCF\x80 (\317\200) in UTF-8 adds a byte.
|
||||||
|
# Check for the expected size...
|
||||||
|
test 240 -eq "$(git cat-file -s i18n-no-recoding)" &&
|
||||||
|
# ...as well as the expected byte.
|
||||||
|
git cat-file commit i18n-no-recoding >actual &&
|
||||||
|
grep $(printf "\360") actual &&
|
||||||
|
# Also make sure the commit has the "encoding" header
|
||||||
|
grep ^encoding actual)
|
||||||
|
'
|
||||||
|
|
||||||
test_expect_success 'encoding preserved if reencoding fails' '
|
test_expect_success 'encoding preserved if reencoding fails' '
|
||||||
|
|
||||||
test_when_finished "git reset --hard HEAD~1" &&
|
test_when_finished "git reset --hard HEAD~1" &&
|
||||||
test_config i18n.commitencoding iso-8859-7 &&
|
test_config i18n.commitencoding iso-8859-7 &&
|
||||||
echo rosten >file &&
|
echo rosten >file &&
|
||||||
git commit -s -F "$TEST_DIRECTORY/t9350/broken-iso-8859-7-commit-message.txt" file &&
|
git commit -s -F "$TEST_DIRECTORY/t9350/broken-iso-8859-7-commit-message.txt" file &&
|
||||||
git fast-export wer^..wer >iso-8859-7.fi &&
|
git fast-export --reencode=yes wer^..wer >iso-8859-7.fi &&
|
||||||
sed "s/wer/i18n-invalid/" iso-8859-7.fi |
|
sed "s/wer/i18n-invalid/" iso-8859-7.fi |
|
||||||
(cd new &&
|
(cd new &&
|
||||||
git fast-import &&
|
git fast-import &&
|
||||||
|
Loading…
Reference in New Issue
Block a user