Merge branch 'jh/p4-rcs-expansion-in-bytestring'

The RCS keyword substitution in "git p4" used to be done assuming
that the contents are UTF-8 text, which can trigger decoding
errors.  We now treat the contents as a bytestring for robustness
and correctness.

* jh/p4-rcs-expansion-in-bytestring:
  git-p4: resolve RCS keywords in bytes not utf-8
  git-p4: open temporary patch file for write only
  git-p4: add raw option to read_pipelines
  git-p4: pre-compile RCS keyword regexes
  git-p4: use with statements to close files after use in patchRCSKeywords
This commit is contained in:
Junio C Hamano 2022-01-05 14:01:27 -08:00
commit b58e7bfcd7
2 changed files with 42 additions and 39 deletions

View File

@ -56,6 +56,9 @@ defaultBlockSize = 1<<20
p4_access_checked = False
re_ko_keywords = re.compile(br'\$(Id|Header)(:[^$\n]+)?\$')
re_k_keywords = re.compile(br'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$')
def p4_build_cmd(cmd):
"""Build a suitable p4 command line.
@ -337,17 +340,19 @@ def p4_read_pipe(c, ignore_error=False, raw=False):
real_cmd = p4_build_cmd(c)
return read_pipe(real_cmd, ignore_error, raw=raw)
def read_pipe_lines(c):
def read_pipe_lines(c, raw=False):
if verbose:
sys.stderr.write('Reading pipe: %s\n' % str(c))
expand = not isinstance(c, list)
p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand)
pipe = p.stdout
val = [decode_text_stream(line) for line in pipe.readlines()]
lines = pipe.readlines()
if not raw:
lines = [decode_text_stream(line) for line in lines]
if pipe.close() or p.wait():
die('Command failed: %s' % str(c))
return val
return lines
def p4_read_pipe_lines(c):
"""Specifically invoke p4 on the command supplied. """
@ -577,20 +582,12 @@ def p4_type(f):
#
def p4_keywords_regexp_for_type(base, type_mods):
if base in ("text", "unicode", "binary"):
kwords = None
if "ko" in type_mods:
kwords = 'Id|Header'
return re_ko_keywords
elif "k" in type_mods:
kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision'
return re_k_keywords
else:
return None
pattern = r"""
\$ # Starts with a dollar, followed by...
(%s) # one of the keywords, followed by...
(:[^$\n]+)? # possibly an old expansion, followed by...
\$ # another dollar
""" % kwords
return pattern
else:
return None
@ -1753,18 +1750,13 @@ class P4Submit(Command, P4UserMap):
return result
def patchRCSKeywords(self, file, pattern):
# Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern
def patchRCSKeywords(self, file, regexp):
# Attempt to zap the RCS keywords in a p4 controlled file matching the given regex
(handle, outFileName) = tempfile.mkstemp(dir='.')
try:
outFile = os.fdopen(handle, "w+")
inFile = open(file, "r")
regexp = re.compile(pattern, re.VERBOSE)
for line in inFile.readlines():
line = regexp.sub(r'$\1$', line)
outFile.write(line)
inFile.close()
outFile.close()
with os.fdopen(handle, "wb") as outFile, open(file, "rb") as inFile:
for line in inFile.readlines():
outFile.write(regexp.sub(br'$\1$', line))
# Forcibly overwrite the original file
os.unlink(file)
shutil.move(outFileName, file)
@ -2091,25 +2083,24 @@ class P4Submit(Command, P4UserMap):
# the patch to see if that's possible.
if gitConfigBool("git-p4.attemptRCSCleanup"):
file = None
pattern = None
kwfiles = {}
for file in editedFiles | filesToDelete:
# did this file's delta contain RCS keywords?
pattern = p4_keywords_regexp_for_file(file)
if pattern:
regexp = p4_keywords_regexp_for_file(file)
if regexp:
# this file is a possibility...look for RCS keywords.
regexp = re.compile(pattern, re.VERBOSE)
for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]):
for line in read_pipe_lines(
["git", "diff", "%s^..%s" % (id, id), file],
raw=True):
if regexp.search(line):
if verbose:
print("got keyword match on %s in %s in %s" % (pattern, line, file))
kwfiles[file] = pattern
print("got keyword match on %s in %s in %s" % (regex.pattern, line, file))
kwfiles[file] = regexp
break
for file in kwfiles:
for file, regexp in kwfiles.items():
if verbose:
print("zapping %s with %s" % (line,pattern))
print("zapping %s with %s" % (line, regexp.pattern))
# File is being deleted, so not open in p4. Must
# disable the read-only bit on windows.
if self.isWindows and file not in editedFiles:
@ -3029,12 +3020,9 @@ class P4Sync(Command, P4UserMap):
# Note that we do not try to de-mangle keywords on utf16 files,
# even though in theory somebody may want that.
pattern = p4_keywords_regexp_for_type(type_base, type_mods)
if pattern:
regexp = re.compile(pattern, re.VERBOSE)
text = ''.join(decode_text_stream(c) for c in contents)
text = regexp.sub(r'$\1$', text)
contents = [ encode_text_stream(text) ]
regexp = p4_keywords_regexp_for_type(type_base, type_mods)
if regexp:
contents = [regexp.sub(br'$\1$', c) for c in contents]
if self.largeFileSystem:
(git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)

View File

@ -4,6 +4,8 @@ test_description='git p4 rcs keywords'
. ./lib-git-p4.sh
CP1252="\223\224"
test_expect_success 'start p4d' '
start_p4d
'
@ -32,6 +34,9 @@ test_expect_success 'init depot' '
p4 submit -d "filek" &&
p4 add -t text+ko fileko &&
p4 submit -d "fileko" &&
printf "$CP1252" >fileko_cp1252 &&
p4 add -t text+ko fileko_cp1252 &&
p4 submit -d "fileko_cp1252" &&
p4 add -t text file_text &&
p4 submit -d "file_text"
)
@ -359,4 +364,14 @@ test_expect_failure 'Add keywords in git which do not match the default p4 value
)
'
test_expect_success 'check cp1252 smart quote are preserved through RCS keyword processing' '
test_when_finished cleanup_git &&
git p4 clone --dest="$git" //depot &&
(
cd "$git" &&
printf "$CP1252" >expect &&
test_cmp_bin expect fileko_cp1252
)
'
test_done