git-p4: handle utf16 filetype properly

One of the filetypes that p4 supports is utf16.  Its behavior is
odd in this case.  The data delivered through "p4 -G print" is
not encoded in utf16, although "p4 print -o" will produce the
proper utf16-encoded file.

When dealing with this filetype, discard the data from -G, and
instead read the contents directly.

An alternate approach would be to try to encode the data in
python.  That worked for true utf16 files, but for other files
marked as utf16, p4 delivers mangled text in no recognizable encoding.

Add a test case to check utf16 handling, and +k and +ko handling.

Reported-by: Chris Li <git@chrisli.org>
Acked-by: Luke Diamand <luke@diamand.org>
Signed-off-by: Pete Wyckoff <pw@padd.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Pete Wyckoff 2011-09-17 19:16:14 -04:00 committed by Junio C Hamano
parent fc00233071
commit 55aa5714af
2 changed files with 119 additions and 0 deletions

View File

@ -1238,6 +1238,15 @@ class P4Sync(Command, P4UserMap):
data = ''.join(contents) data = ''.join(contents)
contents = [data[:-1]] contents = [data[:-1]]
if file['type'].startswith("utf16"):
# p4 delivers different text in the python output to -G
# than it does when using "print -o", or normal p4 client
# operations. utf16 is converted to ascii or utf8, perhaps.
# But ascii text saved as -t utf16 is completely mangled.
# Invoke print -o to get the real contents.
text = p4_read_pipe('print -q -o - "%s"' % file['depotFile'])
contents = [ text ]
if self.isWindows and file["type"].endswith("text"): if self.isWindows and file["type"].endswith("text"):
mangled = [] mangled = []
for data in contents: for data in contents:
@ -1245,6 +1254,8 @@ class P4Sync(Command, P4UserMap):
mangled.append(data) mangled.append(data)
contents = mangled contents = mangled
# Note that we do not try to de-mangle keywords on utf16 files,
# even though in theory somebody may want that.
if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'): if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents) contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'): elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):

108
t/t9802-git-p4-filetype.sh Executable file
View File

@ -0,0 +1,108 @@
#!/bin/sh
test_description='git-p4 p4 filetype tests'
. ./lib-git-p4.sh
test_expect_success 'start p4d' '
start_p4d
'
test_expect_success 'utf-16 file create' '
(
cd "$cli" &&
# p4 saves this verbatim
printf "three\nline\ntext\n" >f-ascii &&
p4 add -t text f-ascii &&
# p4 adds \377\376 header
cp f-ascii f-ascii-as-utf16 &&
p4 add -t utf16 f-ascii-as-utf16 &&
# p4 saves this exactly as iconv produced it
printf "three\nline\ntext\n" | iconv -f ascii -t utf-16 >f-utf16 &&
p4 add -t utf16 f-utf16 &&
# this also is unchanged
cp f-utf16 f-utf16-as-text &&
p4 add -t text f-utf16-as-text &&
p4 submit -d "f files" &&
# force update of client files
p4 sync -f
)
'
test_expect_success 'utf-16 file test' '
test_when_finished cleanup_git &&
"$GITP4" clone --dest="$git" //depot@all &&
(
cd "$git" &&
test_cmp "$cli/f-ascii" f-ascii &&
test_cmp "$cli/f-ascii-as-utf16" f-ascii-as-utf16 &&
test_cmp "$cli/f-utf16" f-utf16 &&
test_cmp "$cli/f-utf16-as-text" f-utf16-as-text
)
'
test_expect_success 'keyword file create' '
(
cd "$cli" &&
printf "id\n\$Id\$\n\$Author\$\ntext\n" >k-text-k &&
p4 add -t text+k k-text-k &&
cp k-text-k k-text-ko &&
p4 add -t text+ko k-text-ko &&
cat k-text-k | iconv -f ascii -t utf-16 >k-utf16-k &&
p4 add -t utf16+k k-utf16-k &&
cp k-utf16-k k-utf16-ko &&
p4 add -t utf16+ko k-utf16-ko &&
p4 submit -d "k files" &&
p4 sync -f
)
'
build_smush() {
cat >k_smush.py <<-\EOF &&
import re, sys
sys.stdout.write(re.sub(r'(?i)\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$]*\$', r'$\1$', sys.stdin.read()))
EOF
cat >ko_smush.py <<-\EOF
import re, sys
sys.stdout.write(re.sub(r'(?i)\$(Id|Header):[^$]*\$', r'$\1$', sys.stdin.read()))
EOF
}
test_expect_success 'keyword file test' '
build_smush &&
test_when_finished rm -f k_smush.py ko_smush.py &&
test_when_finished cleanup_git &&
"$GITP4" clone --dest="$git" //depot@all &&
(
cd "$git" &&
# text, ensure unexpanded
"$PYTHON_PATH" "$TRASH_DIRECTORY/k_smush.py" <"$cli/k-text-k" >cli-k-text-k-smush &&
test_cmp cli-k-text-k-smush k-text-k &&
"$PYTHON_PATH" "$TRASH_DIRECTORY/ko_smush.py" <"$cli/k-text-ko" >cli-k-text-ko-smush &&
test_cmp cli-k-text-ko-smush k-text-ko &&
# utf16, even though p4 expands keywords, git-p4 does not
# try to undo that
test_cmp "$cli/k-utf16-k" k-utf16-k &&
test_cmp "$cli/k-utf16-ko" k-utf16-ko
)
'
test_expect_success 'kill p4d' '
kill_p4d
'
test_done