From 0b63fd6965350fd0159f7712b3582ba83cbfca25 Mon Sep 17 00:00:00 2001 From: Alexandr Miloslavskiy Date: Tue, 24 Sep 2019 03:40:29 -0700 Subject: [PATCH 1/2] t0028: fix test for UTF-16-LE-BOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to its name, the test is designed for UTF-16-LE-BOM. However, possibly due to copy&paste oversight, it was using UTF-32. While the test succeeds (extra \000\000 are interpreted as NUL), I myself had an unrelated problem which caused the test to fail. When analyzing the failure I was quite puzzled by the fact that the test is obviously buggy. And it seems that I'm not alone: https://public-inbox.org/git/CAH8yC8kSakS807d4jc_BtcUJOrcVT4No37AXSz=jePxhw-o9Dg@mail.gmail.com/T/#u Fix the test to follow its original intention. Signed-off-by: Alexandr Miloslavskiy Reviewed-by: Torsten Bögershausen Signed-off-by: Junio C Hamano --- t/t0028-working-tree-encoding.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh index 1090e650ed..5493cf3ca9 100755 --- a/t/t0028-working-tree-encoding.sh +++ b/t/t0028-working-tree-encoding.sh @@ -40,7 +40,7 @@ test_expect_success 'setup test files' ' printf "$text" | write_utf16 >test.utf16.raw && printf "$text" | write_utf32 >test.utf32.raw && printf "\377\376" >test.utf16lebom.raw && - printf "$text" | iconv -f UTF-8 -t UTF-32LE >>test.utf16lebom.raw && + printf "$text" | iconv -f UTF-8 -t UTF-16LE >>test.utf16lebom.raw && # Line ending tests printf "one\ntwo\nthree\n" >lf.utf8.raw && From d928a8388a237bdaab66b61edda40d25aa08af5f Mon Sep 17 00:00:00 2001 From: Alexandr Miloslavskiy Date: Tue, 24 Sep 2019 03:40:30 -0700 Subject: [PATCH 2/2] t0028: add more tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After I discovered that UTF-16-LE-BOM test was buggy, I decided that better tests are required. Possibly the best option here is to compare git results against hardcoded ground truth. The new tests also cover more interesting chars where (ANSI != UTF-8). Signed-off-by: Alexandr Miloslavskiy Reviewed-by: Torsten Bögershausen Signed-off-by: Junio C Hamano --- t/t0028-working-tree-encoding.sh | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/t/t0028-working-tree-encoding.sh b/t/t0028-working-tree-encoding.sh index 5493cf3ca9..7aa0945d8d 100755 --- a/t/t0028-working-tree-encoding.sh +++ b/t/t0028-working-tree-encoding.sh @@ -280,4 +280,43 @@ test_expect_success ICONV_SHIFT_JIS 'check roundtrip encoding' ' git reset ' +# $1: checkout encoding +# $2: test string +# $3: binary test string in checkout encoding +test_commit_utf8_checkout_other () { + encoding="$1" + orig_string="$2" + expect_bytes="$3" + + test_expect_success "Commit UTF-8, checkout $encoding" ' + test_when_finished "git checkout HEAD -- .gitattributes" && + + test_ext="commit_utf8_checkout_$encoding" && + test_file="test.$test_ext" && + + # Commit as UTF-8 + echo "*.$test_ext text working-tree-encoding=UTF-8" >.gitattributes && + printf "$orig_string" >$test_file && + git add $test_file && + git commit -m "Test data" && + + # Checkout in tested encoding + rm $test_file && + echo "*.$test_ext text working-tree-encoding=$encoding" >.gitattributes && + git checkout HEAD -- $test_file && + + # Test + printf $expect_bytes >$test_file.raw && + test_cmp_bin $test_file.raw $test_file + ' +} + +test_commit_utf8_checkout_other "UTF-8" "Test Тест" "\124\145\163\164\040\320\242\320\265\321\201\321\202" +test_commit_utf8_checkout_other "UTF-16LE" "Test Тест" "\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004" +test_commit_utf8_checkout_other "UTF-16BE" "Test Тест" "\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102" +test_commit_utf8_checkout_other "UTF-16LE-BOM" "Test Тест" "\377\376\124\000\145\000\163\000\164\000\040\000\042\004\065\004\101\004\102\004" +test_commit_utf8_checkout_other "UTF-16BE-BOM" "Test Тест" "\376\377\000\124\000\145\000\163\000\164\000\040\004\042\004\065\004\101\004\102" +test_commit_utf8_checkout_other "UTF-32LE" "Test Тест" "\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\000\042\004\000\000\065\004\000\000\101\004\000\000\102\004\000\000" +test_commit_utf8_checkout_other "UTF-32BE" "Test Тест" "\000\000\000\124\000\000\000\145\000\000\000\163\000\000\000\164\000\000\000\040\000\000\004\042\000\000\004\065\000\000\004\101\000\000\004\102" + test_done