From 8618d322e00f88b7b5528abfc75b26c162032a21 Mon Sep 17 00:00:00 2001 From: Joel Holdsworth Date: Thu, 16 Dec 2021 13:46:15 +0000 Subject: [PATCH 1/5] git-p4: use with statements to close files after use in patchRCSKeywords Python with statements are used to wrap the execution of a block of code so that an object can be safely released when execution leaves the scope. They are desirable for improving code tidyness, and to ensure that objects are properly destroyed even when exceptions are thrown. Signed-off-by: Joel Holdsworth Signed-off-by: Junio C Hamano --- git-p4.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/git-p4.py b/git-p4.py index 2b4500226a..226cdef424 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1757,14 +1757,11 @@ class P4Submit(Command, P4UserMap): # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern (handle, outFileName) = tempfile.mkstemp(dir='.') try: - outFile = os.fdopen(handle, "w+") - inFile = open(file, "r") - regexp = re.compile(pattern, re.VERBOSE) - for line in inFile.readlines(): - line = regexp.sub(r'$\1$', line) - outFile.write(line) - inFile.close() - outFile.close() + with os.fdopen(handle, "w+") as outFile, open(file, "r") as inFile: + regexp = re.compile(pattern, re.VERBOSE) + for line in inFile.readlines(): + line = regexp.sub(r'$\1$', line) + outFile.write(line) # Forcibly overwrite the original file os.unlink(file) shutil.move(outFileName, file) From e665e98ec1d1558a55a3e7e5fd5e88d70396ec96 Mon Sep 17 00:00:00 2001 From: Joel Holdsworth Date: Thu, 16 Dec 2021 13:46:16 +0000 Subject: [PATCH 2/5] git-p4: pre-compile RCS keyword regexes Previously git-p4.py would compile one of two regular expressions for ever RCS keyword-enabled file. This patch improves simplifies the code by pre-compiling the two regular expressions when the script first loads. Signed-off-by: Joel Holdsworth Signed-off-by: Junio C Hamano --- git-p4.py | 48 ++++++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/git-p4.py b/git-p4.py index 226cdef424..0af83b9c72 100755 --- a/git-p4.py +++ b/git-p4.py @@ -56,6 +56,9 @@ defaultBlockSize = 1<<20 p4_access_checked = False +re_ko_keywords = re.compile(r'\$(Id|Header)(:[^$\n]+)?\$') +re_k_keywords = re.compile(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$') + def p4_build_cmd(cmd): """Build a suitable p4 command line. @@ -577,20 +580,12 @@ def p4_type(f): # def p4_keywords_regexp_for_type(base, type_mods): if base in ("text", "unicode", "binary"): - kwords = None if "ko" in type_mods: - kwords = 'Id|Header' + return re_ko_keywords elif "k" in type_mods: - kwords = 'Id|Header|Author|Date|DateTime|Change|File|Revision' + return re_k_keywords else: return None - pattern = r""" - \$ # Starts with a dollar, followed by... - (%s) # one of the keywords, followed by... - (:[^$\n]+)? # possibly an old expansion, followed by... - \$ # another dollar - """ % kwords - return pattern else: return None @@ -1753,15 +1748,13 @@ class P4Submit(Command, P4UserMap): return result - def patchRCSKeywords(self, file, pattern): - # Attempt to zap the RCS keywords in a p4 controlled file matching the given pattern + def patchRCSKeywords(self, file, regexp): + # Attempt to zap the RCS keywords in a p4 controlled file matching the given regex (handle, outFileName) = tempfile.mkstemp(dir='.') try: with os.fdopen(handle, "w+") as outFile, open(file, "r") as inFile: - regexp = re.compile(pattern, re.VERBOSE) for line in inFile.readlines(): - line = regexp.sub(r'$\1$', line) - outFile.write(line) + outFile.write(regexp.sub(r'$\1$', line)) # Forcibly overwrite the original file os.unlink(file) shutil.move(outFileName, file) @@ -2088,25 +2081,22 @@ class P4Submit(Command, P4UserMap): # the patch to see if that's possible. if gitConfigBool("git-p4.attemptRCSCleanup"): file = None - pattern = None kwfiles = {} for file in editedFiles | filesToDelete: # did this file's delta contain RCS keywords? - pattern = p4_keywords_regexp_for_file(file) - - if pattern: + regexp = p4_keywords_regexp_for_file(file) + if regexp: # this file is a possibility...look for RCS keywords. - regexp = re.compile(pattern, re.VERBOSE) for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]): if regexp.search(line): if verbose: - print("got keyword match on %s in %s in %s" % (pattern, line, file)) - kwfiles[file] = pattern + print("got keyword match on %s in %s in %s" % (regex.pattern, line, file)) + kwfiles[file] = regexp break - for file in kwfiles: + for file, regexp in kwfiles.items(): if verbose: - print("zapping %s with %s" % (line,pattern)) + print("zapping %s with %s" % (line, regexp.pattern)) # File is being deleted, so not open in p4. Must # disable the read-only bit on windows. if self.isWindows and file not in editedFiles: @@ -3026,12 +3016,10 @@ class P4Sync(Command, P4UserMap): # Note that we do not try to de-mangle keywords on utf16 files, # even though in theory somebody may want that. - pattern = p4_keywords_regexp_for_type(type_base, type_mods) - if pattern: - regexp = re.compile(pattern, re.VERBOSE) - text = ''.join(decode_text_stream(c) for c in contents) - text = regexp.sub(r'$\1$', text) - contents = [ encode_text_stream(text) ] + regexp = p4_keywords_regexp_for_type(type_base, type_mods) + if regexp: + contents = [encode_text_stream(regexp.sub( + r'$\1$', ''.join(decode_text_stream(c) for c in contents)))] if self.largeFileSystem: (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) From 9732e2229c9a1f5285109eecef0dddff16be0ace Mon Sep 17 00:00:00 2001 From: Joel Holdsworth Date: Thu, 16 Dec 2021 13:46:17 +0000 Subject: [PATCH 3/5] git-p4: add raw option to read_pipelines Previously the read_lines function always decoded the result lines. In order to improve support for non-decoded binary processing of data in git-p4.py, this patch adds a raw option to the function that allows decoding to be disabled. Signed-off-by: Joel Holdsworth Signed-off-by: Junio C Hamano --- git-p4.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/git-p4.py b/git-p4.py index 0af83b9c72..509feac2d8 100755 --- a/git-p4.py +++ b/git-p4.py @@ -340,17 +340,19 @@ def p4_read_pipe(c, ignore_error=False, raw=False): real_cmd = p4_build_cmd(c) return read_pipe(real_cmd, ignore_error, raw=raw) -def read_pipe_lines(c): +def read_pipe_lines(c, raw=False): if verbose: sys.stderr.write('Reading pipe: %s\n' % str(c)) expand = not isinstance(c, list) p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) pipe = p.stdout - val = [decode_text_stream(line) for line in pipe.readlines()] + lines = pipe.readlines() + if not raw: + lines = [decode_text_stream(line) for line in lines] if pipe.close() or p.wait(): die('Command failed: %s' % str(c)) - return val + return lines def p4_read_pipe_lines(c): """Specifically invoke p4 on the command supplied. """ From 4cf67ae1b6c80eb8a63cc8dd752bd3951cffa104 Mon Sep 17 00:00:00 2001 From: Joel Holdsworth Date: Thu, 16 Dec 2021 13:46:18 +0000 Subject: [PATCH 4/5] git-p4: open temporary patch file for write only The patchRCSKeywords method creates a temporary file in which to store the patched output data. Previously this file was opened in "w+" mode (write and read), but the code never reads the contents of the file while open, so it only needs to be opened in "w" mode (write-only). Signed-off-by: Joel Holdsworth Signed-off-by: Junio C Hamano --- git-p4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-p4.py b/git-p4.py index 509feac2d8..7845210e69 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1754,7 +1754,7 @@ class P4Submit(Command, P4UserMap): # Attempt to zap the RCS keywords in a p4 controlled file matching the given regex (handle, outFileName) = tempfile.mkstemp(dir='.') try: - with os.fdopen(handle, "w+") as outFile, open(file, "r") as inFile: + with os.fdopen(handle, "w") as outFile, open(file, "r") as inFile: for line in inFile.readlines(): outFile.write(regexp.sub(r'$\1$', line)) # Forcibly overwrite the original file From 70c0d55349a50707166f9fb9a9720ac1c0530217 Mon Sep 17 00:00:00 2001 From: Joel Holdsworth Date: Thu, 16 Dec 2021 13:46:19 +0000 Subject: [PATCH 5/5] git-p4: resolve RCS keywords in bytes not utf-8 RCS keywords are strings that are replaced with information from Perforce. Examples include $Date$, $Author$, $File$, $Change$ etc. Perforce resolves these by expanding them with their expanded values when files are synced, but Git's data model requires these expanded values to be converted back into their unexpanded form. Previously, git-p4.py would implement this behaviour through the use of regular expressions. However, the regular expression substitution was applied using decoded strings i.e. the content of incoming commit diffs was first decoded from bytes into UTF-8, processed with regular expressions, then converted back to bytes. Not only is this behaviour inefficient, but it is also a cause of a common issue caused by text files containing invalid UTF-8 data. For files created in Windows, CP1252 Smart Quote Characters (0x93 and 0x94) are seen fairly frequently. These codes are invalid in UTF-8, so if the script encountered any file containing them, on Python 2 the symbols will be corrupted, and on Python 3 the script will fail with an exception. This patch replaces this decoding/encoding with bytes object regular expressions, so that the substitution is performed directly upon the source data with no conversions. A test for smart quote handling has been added to the t9810-git-p4-rcs.sh test suite. Signed-off-by: Joel Holdsworth Signed-off-by: Junio C Hamano --- git-p4.py | 15 ++++++++------- t/t9810-git-p4-rcs.sh | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/git-p4.py b/git-p4.py index 7845210e69..986595bef0 100755 --- a/git-p4.py +++ b/git-p4.py @@ -56,8 +56,8 @@ defaultBlockSize = 1<<20 p4_access_checked = False -re_ko_keywords = re.compile(r'\$(Id|Header)(:[^$\n]+)?\$') -re_k_keywords = re.compile(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$') +re_ko_keywords = re.compile(br'\$(Id|Header)(:[^$\n]+)?\$') +re_k_keywords = re.compile(br'\$(Id|Header|Author|Date|DateTime|Change|File|Revision)(:[^$\n]+)?\$') def p4_build_cmd(cmd): """Build a suitable p4 command line. @@ -1754,9 +1754,9 @@ class P4Submit(Command, P4UserMap): # Attempt to zap the RCS keywords in a p4 controlled file matching the given regex (handle, outFileName) = tempfile.mkstemp(dir='.') try: - with os.fdopen(handle, "w") as outFile, open(file, "r") as inFile: + with os.fdopen(handle, "wb") as outFile, open(file, "rb") as inFile: for line in inFile.readlines(): - outFile.write(regexp.sub(r'$\1$', line)) + outFile.write(regexp.sub(br'$\1$', line)) # Forcibly overwrite the original file os.unlink(file) shutil.move(outFileName, file) @@ -2089,7 +2089,9 @@ class P4Submit(Command, P4UserMap): regexp = p4_keywords_regexp_for_file(file) if regexp: # this file is a possibility...look for RCS keywords. - for line in read_pipe_lines(["git", "diff", "%s^..%s" % (id, id), file]): + for line in read_pipe_lines( + ["git", "diff", "%s^..%s" % (id, id), file], + raw=True): if regexp.search(line): if verbose: print("got keyword match on %s in %s in %s" % (regex.pattern, line, file)) @@ -3020,8 +3022,7 @@ class P4Sync(Command, P4UserMap): # even though in theory somebody may want that. regexp = p4_keywords_regexp_for_type(type_base, type_mods) if regexp: - contents = [encode_text_stream(regexp.sub( - r'$\1$', ''.join(decode_text_stream(c) for c in contents)))] + contents = [regexp.sub(br'$\1$', c) for c in contents] if self.largeFileSystem: (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) diff --git a/t/t9810-git-p4-rcs.sh b/t/t9810-git-p4-rcs.sh index e3836888ec..5fe83315ec 100755 --- a/t/t9810-git-p4-rcs.sh +++ b/t/t9810-git-p4-rcs.sh @@ -4,6 +4,8 @@ test_description='git p4 rcs keywords' . ./lib-git-p4.sh +CP1252="\223\224" + test_expect_success 'start p4d' ' start_p4d ' @@ -32,6 +34,9 @@ test_expect_success 'init depot' ' p4 submit -d "filek" && p4 add -t text+ko fileko && p4 submit -d "fileko" && + printf "$CP1252" >fileko_cp1252 && + p4 add -t text+ko fileko_cp1252 && + p4 submit -d "fileko_cp1252" && p4 add -t text file_text && p4 submit -d "file_text" ) @@ -359,4 +364,14 @@ test_expect_failure 'Add keywords in git which do not match the default p4 value ) ' +test_expect_success 'check cp1252 smart quote are preserved through RCS keyword processing' ' + test_when_finished cleanup_git && + git p4 clone --dest="$git" //depot && + ( + cd "$git" && + printf "$CP1252" >expect && + test_cmp_bin expect fileko_cp1252 + ) +' + test_done