From 462d97daf69951f968f16b6271de9db34f7dd13c Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Thu, 29 Nov 2012 18:00:55 +0100 Subject: [PATCH 1/2] git-remote-mediawiki: escape ", \, and LF in file names A mediawiki page can contain, and even start with a " character, we have to escape it when generating the fast-export stream, as well as \ character. While we're there, also escape newlines, but I don't think we can get them from MediaWiki pages. Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- contrib/mw-to-git/git-remote-mediawiki | 16 ++++++++++--- contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh | 26 +++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki index 68555d4265..094129de09 100755 --- a/contrib/mw-to-git/git-remote-mediawiki +++ b/contrib/mw-to-git/git-remote-mediawiki @@ -711,6 +711,14 @@ sub fetch_mw_revisions { return ($n, @revisions); } +sub fe_escape_path { + my $path = shift; + $path =~ s/\\/\\\\/g; + $path =~ s/"/\\"/g; + $path =~ s/\n/\\n/g; + return '"' . $path . '"'; +} + sub import_file_revision { my $commit = shift; my %commit = %{$commit}; @@ -738,15 +746,17 @@ sub import_file_revision { print STDOUT "from refs/mediawiki/$remotename/master^0\n"; } if ($content ne DELETED_CONTENT) { - print STDOUT "M 644 inline $title.mw\n"; + print STDOUT "M 644 inline " . + fe_escape_path($title . ".mw") . "\n"; literal_data($content); if (%mediafile) { - print STDOUT "M 644 inline $mediafile{title}\n"; + print STDOUT "M 644 inline " + . fe_escape_path($mediafile{title}) . "\n"; literal_data_raw($mediafile{content}); } print STDOUT "\n\n"; } else { - print STDOUT "D $title.mw\n"; + print STDOUT "D " . fe_escape_path($title . ".mw") . "\n"; } # mediawiki revision number in the git note diff --git a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh index 246d47d8fb..b6405ce262 100755 --- a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh +++ b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh @@ -318,4 +318,30 @@ test_expect_success 'git push with \ in format control' ' ' +test_expect_success 'fast-import meta-characters in page name (mw -> git)' ' + wiki_reset && + wiki_editpage \"file\"_\\_foo "expect to be called \"file\"_\\_foo" false && + git clone mediawiki::'"$WIKI_URL"' mw_dir_21 && + test_path_is_file mw_dir_21/\"file\"_\\_foo.mw && + wiki_getallpage ref_page_21 && + test_diff_directories mw_dir_21 ref_page_21 +' + + +test_expect_success 'fast-import meta-characters in page name (git -> mw) ' ' + wiki_reset && + git clone mediawiki::'"$WIKI_URL"' mw_dir_22 && + ( + cd mw_dir_22 && + echo "this file is called \"file\"_\\_foo.mw" >\"file\"_\\_foo && + git add . && + git commit -am "file \"file\"_\\_foo" && + git pull && + git push + ) && + wiki_getallpage ref_page_22 && + test_diff_directories mw_dir_22 ref_page_22 +' + + test_done From 7c65b2ebb72fcf9b563be3367a088256757343a6 Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Thu, 29 Nov 2012 20:11:32 +0100 Subject: [PATCH 2/2] git-fast-import.txt: improve documentation for quoted paths The documentation mentioned only newlines and double quotes as characters needing escaping, but the backslash also needs it. Also, the documentation was not clearly saying that double quotes around the file name were required (double quotes in the examples could be interpreted as part of the sentence, not part of the actual string). Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 959e4d3aee..d1844ead4a 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -562,8 +562,12 @@ A `` string must use UNIX-style directory separators (forward slash `/`), may contain any byte other than `LF`, and must not start with double quote (`"`). -If an `LF` or double quote must be encoded into `` shell-style -quoting should be used, e.g. `"path/with\n and \" in it"`. +A path can use C-style string quoting; this is accepted in all cases +and mandatory if the filename starts with double quote or contains +`LF`. In C-style quoting, the complete name should be surrounded with +double quotes, and any `LF`, backslash, or double quote characters +must be escaped by preceding them with a backslash (e.g., +`"path/with\n, \\ and \" in it"`). The value of `` must be in canonical form. That is it must not: