From 692e17964dd4f8da4b3994c5c7055ec9a8c22a75 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:54:58 +0200 Subject: [PATCH 1/7] git-p4: add optional type specifier to gitConfig reader The functions "gitConfig" and "gitConfigBool" are almost identical. Make "gitConfig" more generic by adding an optional type specifier. Use the type specifier "--bool" with "gitConfig" to implement "gitConfigBool. This prepares the implementation of other type specifiers such as "--int". Signed-off-by: Lars Schneider Acked-by: Luke Diamand Signed-off-by: Junio C Hamano --- git-p4.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/git-p4.py b/git-p4.py index 0093fa3d83..206a45f613 100755 --- a/git-p4.py +++ b/git-p4.py @@ -604,9 +604,12 @@ def gitBranchExists(branch): _gitConfig = {} -def gitConfig(key): +def gitConfig(key, typeSpecifier=None): if not _gitConfig.has_key(key): - cmd = [ "git", "config", key ] + cmd = [ "git", "config" ] + if typeSpecifier: + cmd += [ typeSpecifier ] + cmd += [ key ] s = read_pipe(cmd, ignore_error=True) _gitConfig[key] = s.strip() return _gitConfig[key] @@ -617,10 +620,7 @@ def gitConfigBool(key): in the config.""" if not _gitConfig.has_key(key): - cmd = [ "git", "config", "--bool", key ] - s = read_pipe(cmd, ignore_error=True) - v = s.strip() - _gitConfig[key] = v == "true" + _gitConfig[key] = gitConfig(key, '--bool') == "true" return _gitConfig[key] def gitConfigList(key): From cb1dafdfdadca4d1bbe4147d2f9c72b5ef0f4ab7 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:54:59 +0200 Subject: [PATCH 2/7] git-p4: add gitConfigInt reader Add a git config reader for integer variables. Please note that the git config implementation automatically supports k, m, and g suffixes. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/git-p4.py b/git-p4.py index 206a45f613..c99b077884 100755 --- a/git-p4.py +++ b/git-p4.py @@ -623,6 +623,17 @@ def gitConfigBool(key): _gitConfig[key] = gitConfig(key, '--bool') == "true" return _gitConfig[key] +def gitConfigInt(key): + if not _gitConfig.has_key(key): + cmd = [ "git", "config", "--int", key ] + s = read_pipe(cmd, ignore_error=True) + v = s.strip() + try: + _gitConfig[key] = int(gitConfig(key, '--int')) + except ValueError: + _gitConfig[key] = None + return _gitConfig[key] + def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) From 7960e70710a97b4d650531614b2839a907f79d95 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:00 +0200 Subject: [PATCH 3/7] git-p4: return an empty list if a list config has no values Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/git-p4.py b/git-p4.py index c99b077884..ae1e9ea52e 100755 --- a/git-p4.py +++ b/git-p4.py @@ -638,6 +638,8 @@ def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) _gitConfig[key] = s.strip().split(os.linesep) + if _gitConfig[key] == ['']: + _gitConfig[key] = [] return _gitConfig[key] def p4BranchesInGit(branchesAreInRemotes=True): From d2176a5060c73a04846e270ab07b90aad4d8b8f0 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:01 +0200 Subject: [PATCH 4/7] git-p4: add file streaming progress in verbose mode If a file is streamed from P4 to Git then the verbose mode prints continuously the progress as percentage like this: //depot/file.bin 20% (10 MB) Upon completion the progress is overwritten with depot source, local file and size like this: //depot/file.bin --> local/file.bin (10 MB) Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/git-p4.py b/git-p4.py index ae1e9ea52e..81b0fbc648 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2171,7 +2171,9 @@ class P4Sync(Command, P4UserMap): def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) if verbose: - sys.stderr.write("%s\n" % relPath) + size = int(self.stream_file['fileSize']) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@ -2248,7 +2250,8 @@ class P4Sync(Command, P4UserMap): def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) if verbose: - sys.stderr.write("delete %s\n" % relPath) + sys.stdout.write("delete %s\n" % relPath) + sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) # handle another chunk of streaming data @@ -2288,10 +2291,23 @@ class P4Sync(Command, P4UserMap): # 'data' field we need to append to our array for k in marshalled.keys(): if k == 'data': + if 'streamContentSize' not in self.stream_file: + self.stream_file['streamContentSize'] = 0 + self.stream_file['streamContentSize'] += len(marshalled['data']) self.stream_contents.append(marshalled['data']) else: self.stream_file[k] = marshalled[k] + if (verbose and + 'streamContentSize' in self.stream_file and + 'fileSize' in self.stream_file and + 'depotFile' in self.stream_file): + size = int(self.stream_file["fileSize"]) + if size > 0: + progress = 100*self.stream_file['streamContentSize']/size + sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024))) + sys.stdout.flush() + self.stream_have_file_info = True # Stream directly from "p4 files" into "git fast-import" From 4d25dc4475639395b4c567d947218613de359f09 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:02 +0200 Subject: [PATCH 5/7] git-p4: check free space during streaming git-p4 will just halt if there is not enough disk space while streaming content from P4 to Git. Add a check to ensure at least 4 times (arbitrarily chosen) the size of a streamed file is available. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/git-p4.py b/git-p4.py index 81b0fbc648..a60c086b18 100755 --- a/git-p4.py +++ b/git-p4.py @@ -104,6 +104,16 @@ def chdir(path, is_client_path=False): path = os.getcwd() os.environ['PWD'] = path +def calcDiskFree(): + """Return free space in bytes on the disk of the given dirname.""" + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + st = os.statvfs(os.getcwd()) + return st.f_bavail * st.f_frsize + def die(msg): if verbose: raise Exception(msg) @@ -2263,6 +2273,14 @@ class P4Sync(Command, P4UserMap): if marshalled["code"] == "error": if "data" in marshalled: err = marshalled["data"].rstrip() + + if not err and 'fileSize' in self.stream_file: + required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree()) + if required_bytes > 0: + err = 'Not enough space left on %s! Free at least %i MB.' % ( + os.getcwd(), required_bytes/1024/1024 + ) + if err: f = None if self.stream_have_file_info: From a5db4b127bf86b9dbeb3cb079f4a9530ba313941 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:03 +0200 Subject: [PATCH 6/7] git-p4: add support for large file systems Perforce repositories can contain large (binary) files. Migrating these repositories to Git generates very large local clones. External storage systems such as Git LFS [1], Git Fat [2], Git Media [3], git-annex [4] try to address this problem. Add a generic mechanism to detect large files based on extension, uncompressed size, and/or compressed size. [1] https://git-lfs.github.com/ [2] https://github.com/jedbrown/git-fat [3] https://github.com/alebedev/git-media [4] https://git-annex.branchable.com/ Signed-off-by: Lars Schneider Conflicts: Documentation/git-p4.txt git-p4.py Signed-off-by: Junio C Hamano --- Documentation/git-p4.txt | 32 +++++++ git-p4.py | 139 +++++++++++++++++++++++++-- t/t9823-git-p4-mock-lfs.sh | 192 +++++++++++++++++++++++++++++++++++++ 3 files changed, 353 insertions(+), 10 deletions(-) create mode 100755 t/t9823-git-p4-mock-lfs.sh diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index 82aa5d6073..f34d16d71c 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -510,6 +510,38 @@ git-p4.useClientSpec:: option '--use-client-spec'. See the "CLIENT SPEC" section above. This variable is a boolean, not the name of a p4 client. +git-p4.largeFileSystem:: + Specify the system that is used for large (binary) files. Please note + that large file systems do not support the 'git p4 submit' command. + Only Git LFS [1] is implemented right now. Download + and install the Git LFS command line extension to use this option + and configure it like this: ++ +------------- +git config git-p4.largeFileSystem GitLFS +------------- ++ + [1] https://git-lfs.github.com/ + +git-p4.largeFileExtensions:: + All files matching a file extension in the list will be processed + by the large file system. Do not prefix the extensions with '.'. + +git-p4.largeFileThreshold:: + All files with an uncompressed size exceeding the threshold will be + processed by the large file system. By default the threshold is + defined in bytes. Add the suffix k, m, or g to change the unit. + +git-p4.largeFileCompressedThreshold:: + All files with a compressed size exceeding the threshold will be + processed by the large file system. This option might slow down + your clone/sync process. By default the threshold is defined in + bytes. Add the suffix k, m, or g to change the unit. + +git-p4.largeFilePush:: + Boolean variable which defines if large files are automatically + pushed to a server. + Submit variables ~~~~~~~~~~~~~~~~ git-p4.detectRenames:: diff --git a/git-p4.py b/git-p4.py index a60c086b18..02b5e9e362 100755 --- a/git-p4.py +++ b/git-p4.py @@ -22,6 +22,8 @@ import platform import re import shutil import stat +import zipfile +import zlib try: from subprocess import CalledProcessError @@ -932,6 +934,110 @@ def wildcard_present(path): m = re.search("[*#@%]", path) return m is not None +class LargeFileSystem(object): + """Base class for large file system support.""" + + def __init__(self, writeToGitStream): + self.largeFiles = set() + self.writeToGitStream = writeToGitStream + + def generatePointer(self, cloneDestination, contentFile): + """Return the content of a pointer file that is stored in Git instead of + the actual content.""" + assert False, "Method 'generatePointer' required in " + self.__class__.__name__ + + def pushFile(self, localLargeFile): + """Push the actual content which is not stored in the Git repository to + a server.""" + assert False, "Method 'pushFile' required in " + self.__class__.__name__ + + def hasLargeFileExtension(self, relPath): + return reduce( + lambda a, b: a or b, + [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], + False + ) + + def generateTempFile(self, contents): + contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + for d in contents: + contentFile.write(d) + contentFile.close() + return contentFile.name + + def exceedsLargeFileThreshold(self, relPath, contents): + if gitConfigInt('git-p4.largeFileThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize > gitConfigInt('git-p4.largeFileThreshold'): + return True + if gitConfigInt('git-p4.largeFileCompressedThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'): + return False + contentTempFile = self.generateTempFile(contents) + compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + zf = zipfile.ZipFile(compressedContentFile.name, mode='w') + zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED) + zf.close() + compressedContentsSize = zf.infolist()[0].compress_size + os.remove(contentTempFile) + os.remove(compressedContentFile.name) + if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'): + return True + return False + + def addLargeFile(self, relPath): + self.largeFiles.add(relPath) + + def removeLargeFile(self, relPath): + self.largeFiles.remove(relPath) + + def isLargeFile(self, relPath): + return relPath in self.largeFiles + + def processContent(self, git_mode, relPath, contents): + """Processes the content of git fast import. This method decides if a + file is stored in the large file system and handles all necessary + steps.""" + if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath): + contentTempFile = self.generateTempFile(contents) + (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile) + + # Move temp file to final location in large file system + largeFileDir = os.path.dirname(localLargeFile) + if not os.path.isdir(largeFileDir): + os.makedirs(largeFileDir) + shutil.move(contentTempFile, localLargeFile) + self.addLargeFile(relPath) + if gitConfigBool('git-p4.largeFilePush'): + self.pushFile(localLargeFile) + if verbose: + sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile)) + return (git_mode, contents) + +class MockLFS(LargeFileSystem): + """Mock large file system for testing.""" + + def generatePointer(self, contentFile): + """The pointer content is the original content prefixed with "pointer-". + The local filename of the large file storage is derived from the file content. + """ + with open(contentFile, 'r') as f: + content = next(f) + gitMode = '100644' + pointerContents = 'pointer-' + content + localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1]) + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + """The remote filename of the large file storage is the same as the local + one but in a different directory. + """ + remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote') + if not os.path.exists(remotePath): + os.makedirs(remotePath) + shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@ -1105,6 +1211,9 @@ class P4Submit(Command, P4UserMap): self.p4HasMoveCommand = p4_has_move_command() self.branch = None + if gitConfig('git-p4.largeFileSystem'): + die("Large file system not supported for git-p4 submit command. Please remove it from config.") + def check(self): if len(p4CmdList("opened ...")) > 0: die("You have files opened with perforce! Close them before starting the sync.") @@ -2055,6 +2164,13 @@ class P4Sync(Command, P4UserMap): self.clientSpecDirs = None self.tempBranches = [] self.tempBranchLocation = "git-p4-tmp" + self.largeFileSystem = None + + if gitConfig('git-p4.largeFileSystem'): + largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] + self.largeFileSystem = largeFileSystemConstructor( + lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents) + ) if gitConfig("git-p4.syncFromOrigin") == "false": self.syncWithOrigin = False @@ -2175,6 +2291,13 @@ class P4Sync(Command, P4UserMap): return branches + def writeToGitStream(self, gitMode, relPath, contents): + self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) + for d in contents: + self.gitStream.write(d) + self.gitStream.write('\n') + # output one file from the P4 stream # - helper for streamP4Files @@ -2245,17 +2368,10 @@ class P4Sync(Command, P4UserMap): text = regexp.sub(r'$\1$', text) contents = [ text ] - self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) + if self.largeFileSystem: + (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) - # total length... - length = 0 - for d in contents: - length = length + len(d) - - self.gitStream.write("data %d\n" % length) - for d in contents: - self.gitStream.write(d) - self.gitStream.write("\n") + self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) @@ -2264,6 +2380,9 @@ class P4Sync(Command, P4UserMap): sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) + if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): + self.largeFileSystem.removeLargeFile(relPath) + # handle another chunk of streaming data def streamP4FilesCb(self, marshalled): diff --git a/t/t9823-git-p4-mock-lfs.sh b/t/t9823-git-p4-mock-lfs.sh new file mode 100755 index 0000000000..1f2dc369bf --- /dev/null +++ b/t/t9823-git-p4-mock-lfs.sh @@ -0,0 +1,192 @@ +#!/bin/sh + +test_description='Clone repositories and store files in Mock LFS' + +. ./lib-git-p4.sh + +test_file_is_not_in_mock_lfs () { + FILE="$1" && + CONTENT="$2" && + echo "$CONTENT" >expect_content && + test_path_is_file "$FILE" && + test_cmp expect_content "$FILE" +} + +test_file_is_in_mock_lfs () { + FILE="$1" && + CONTENT="$2" && + LOCAL_STORAGE=".git/mock-storage/local/$CONTENT" && + SERVER_STORAGE=".git/mock-storage/remote/$CONTENT" && + echo "pointer-$CONTENT" >expect_pointer && + echo "$CONTENT" >expect_content && + test_path_is_file "$FILE" && + test_path_is_file "$LOCAL_STORAGE" && + test_path_is_file "$SERVER_STORAGE" && + test_cmp expect_pointer "$FILE" && + test_cmp expect_content "$LOCAL_STORAGE" && + test_cmp expect_content "$SERVER_STORAGE" +} + +test_file_is_deleted_in_mock_lfs () { + FILE="$1" && + CONTENT="$2" && + LOCAL_STORAGE=".git/mock-storage/local/$CONTENT" && + SERVER_STORAGE=".git/mock-storage/remote/$CONTENT" && + echo "pointer-$CONTENT" >expect_pointer && + echo "$CONTENT" >expect_content && + test_path_is_missing "$FILE" && + test_path_is_file "$LOCAL_STORAGE" && + test_path_is_file "$SERVER_STORAGE" && + test_cmp expect_content "$LOCAL_STORAGE" && + test_cmp expect_content "$SERVER_STORAGE" +} + +test_file_count_in_dir () { + DIR="$1" && + EXPECTED_COUNT="$2" && + find "$DIR" -type f >actual && + test_line_count = $EXPECTED_COUNT actual +} + +test_expect_success 'start p4d' ' + start_p4d +' + +test_expect_success 'Create repo with binary files' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + + echo "content 1 txt 23 bytes" >file1.txt && + p4 add file1.txt && + echo "content 2-3 bin 25 bytes" >file2.dat && + p4 add file2.dat && + p4 submit -d "Add text and binary file" && + + mkdir "path with spaces" && + echo "content 2-3 bin 25 bytes" >"path with spaces/file3.bin" && + p4 add "path with spaces/file3.bin" && + p4 submit -d "Add another binary file with same content and spaces in path" && + + echo "content 4 bin 26 bytes XX" >file4.bin && + p4 add file4.bin && + p4 submit -d "Add another binary file with different content" + ) +' + +test_expect_success 'Store files in Mock LFS based on size (>24 bytes)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem MockLFS && + git config git-p4.largeFileThreshold 24 && + git config git-p4.largeFilePush True && + git p4 clone --destination="$git" //depot@all && + + test_file_is_not_in_mock_lfs file1.txt "content 1 txt 23 bytes" && + test_file_is_in_mock_lfs file2.dat "content 2-3 bin 25 bytes" && + test_file_is_in_mock_lfs "path with spaces/file3.bin" "content 2-3 bin 25 bytes" && + test_file_is_in_mock_lfs file4.bin "content 4 bin 26 bytes XX" && + + test_file_count_in_dir ".git/mock-storage/local" 2 && + test_file_count_in_dir ".git/mock-storage/remote" 2 + ) +' + +test_expect_success 'Store files in Mock LFS based on extension (dat)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem MockLFS && + git config git-p4.largeFileExtensions dat && + git config git-p4.largeFilePush True && + git p4 clone --destination="$git" //depot@all && + + test_file_is_not_in_mock_lfs file1.txt "content 1 txt 23 bytes" && + test_file_is_in_mock_lfs file2.dat "content 2-3 bin 25 bytes" && + test_file_is_not_in_mock_lfs "path with spaces/file3.bin" "content 2-3 bin 25 bytes" && + test_file_is_not_in_mock_lfs file4.bin "content 4 bin 26 bytes XX" && + + test_file_count_in_dir ".git/mock-storage/local" 1 && + test_file_count_in_dir ".git/mock-storage/remote" 1 + ) +' + +test_expect_success 'Store files in Mock LFS based on extension (dat) and use git p4 sync and no client spec' ' + test_when_finished cleanup_git && + ( + cd "$git" && + git init && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem MockLFS && + git config git-p4.largeFileExtensions dat && + git config git-p4.largeFilePush True && + git p4 sync //depot && + git checkout p4/master && + + test_file_is_not_in_mock_lfs file1.txt "content 1 txt 23 bytes" && + test_file_is_in_mock_lfs file2.dat "content 2-3 bin 25 bytes" && + test_file_is_not_in_mock_lfs "path with spaces/file3.bin" "content 2-3 bin 25 bytes" && + test_file_is_not_in_mock_lfs file4.bin "content 4 bin 26 bytes XX" && + + test_file_count_in_dir ".git/mock-storage/local" 1 && + test_file_count_in_dir ".git/mock-storage/remote" 1 + ) +' + +test_expect_success 'Remove file from repo and store files in Mock LFS based on size (>24 bytes)' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + p4 delete file4.bin && + p4 submit -d "Remove file" + ) && + + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem MockLFS && + git config git-p4.largeFileThreshold 24 && + git config git-p4.largeFilePush True && + git p4 clone --destination="$git" //depot@all && + + test_file_is_not_in_mock_lfs file1.txt "content 1 txt 23 bytes" && + test_file_is_in_mock_lfs file2.dat "content 2-3 bin 25 bytes" && + test_file_is_in_mock_lfs "path with spaces/file3.bin" "content 2-3 bin 25 bytes" && + test_file_is_deleted_in_mock_lfs file4.bin "content 4 bin 26 bytes XX" && + + test_file_count_in_dir ".git/mock-storage/local" 2 && + test_file_count_in_dir ".git/mock-storage/remote" 2 + ) +' + +test_expect_success 'Run git p4 submit in repo configured with large file system' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem MockLFS && + git config git-p4.largeFileThreshold 24 && + git config git-p4.largeFilePush True && + git p4 clone --destination="$git" //depot@all && + + test_must_fail git p4 submit + ) +' + +test_expect_success 'kill p4d' ' + kill_p4d +' + +test_done From b47d807d20374d8dc929ba4c0ca3bd7e8a4ce1f7 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:04 +0200 Subject: [PATCH 7/7] git-p4: add Git LFS backend for large file system Add example implementation including test cases for the large file system using Git LFS. Pushing files to the Git LFS server is not tested. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 72 ++++++++++ t/t9824-git-p4-git-lfs.sh | 288 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 360 insertions(+) create mode 100755 t/t9824-git-p4-git-lfs.sh diff --git a/git-p4.py b/git-p4.py index 02b5e9e362..765ea5f064 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1038,6 +1038,78 @@ class MockLFS(LargeFileSystem): os.makedirs(remotePath) shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) +class GitLFS(LargeFileSystem): + """Git LFS as backend for the git-p4 large file system. + See https://git-lfs.github.com/ for details.""" + + def __init__(self, *args): + LargeFileSystem.__init__(self, *args) + self.baseGitAttributes = [] + + def generatePointer(self, contentFile): + """Generate a Git LFS pointer for the content. Return LFS Pointer file + mode and content which is stored in the Git repository instead of + the actual content. Return also the new location of the actual + content. + """ + pointerProcess = subprocess.Popen( + ['git', 'lfs', 'pointer', '--file=' + contentFile], + stdout=subprocess.PIPE + ) + pointerFile = pointerProcess.stdout.read() + if pointerProcess.wait(): + os.remove(contentFile) + die('git-lfs pointer command failed. Did you install the extension?') + pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]] + oid = pointerContents[1].split(' ')[1].split(':')[1][:-1] + localLargeFile = os.path.join( + os.getcwd(), + '.git', 'lfs', 'objects', oid[:2], oid[2:4], + oid, + ) + # LFS Spec states that pointer files should not have the executable bit set. + gitMode = '100644' + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + uploadProcess = subprocess.Popen( + ['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)] + ) + if uploadProcess.wait(): + die('git-lfs push command failed. Did you define a remote?') + + def generateGitAttributes(self): + return ( + self.baseGitAttributes + + [ + '\n', + '#\n', + '# Git LFS (see https://git-lfs.github.com/)\n', + '#\n', + ] + + ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(gitConfigList('git-p4.largeFileExtensions')) + ] + + ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f) + ] + ) + + def addLargeFile(self, relPath): + LargeFileSystem.addLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def removeLargeFile(self, relPath): + LargeFileSystem.removeLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def processContent(self, git_mode, relPath, contents): + if relPath == '.gitattributes': + self.baseGitAttributes = contents + return (git_mode, self.generateGitAttributes()) + else: + return LargeFileSystem.processContent(self, git_mode, relPath, contents) + class Command: def __init__(self): self.usage = "usage: %prog [options]" diff --git a/t/t9824-git-p4-git-lfs.sh b/t/t9824-git-p4-git-lfs.sh new file mode 100755 index 0000000000..0b664a377c --- /dev/null +++ b/t/t9824-git-p4-git-lfs.sh @@ -0,0 +1,288 @@ +#!/bin/sh + +test_description='Clone repositories and store files in Git LFS' + +. ./lib-git-p4.sh + +git lfs help >/dev/null 2>&1 || { + skip_all='skipping git p4 Git LFS tests; Git LFS not found' + test_done +} + +test_file_in_lfs () { + FILE="$1" && + SIZE="$2" && + EXPECTED_CONTENT="$3" && + cat "$FILE" | grep "size $SIZE" && + HASH=$(cat "$FILE" | grep "oid sha256:" | sed -e "s/oid sha256://g") && + LFS_FILE=".git/lfs/objects/$(echo "$HASH" | cut -c1-2)/$(echo "$HASH" | cut -c3-4)/$HASH" && + echo $EXPECTED_CONTENT >expect && + test_path_is_file "$FILE" && + test_path_is_file "$LFS_FILE" && + test_cmp expect "$LFS_FILE" +} + +test_file_count_in_dir () { + DIR="$1" && + EXPECTED_COUNT="$2" && + find "$DIR" -type f >actual && + test_line_count = $EXPECTED_COUNT actual +} + +test_expect_success 'start p4d' ' + start_p4d +' + +test_expect_success 'Create repo with binary files' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + + echo "content 1 txt 23 bytes" >file1.txt && + p4 add file1.txt && + echo "content 2-3 bin 25 bytes" >file2.dat && + p4 add file2.dat && + p4 submit -d "Add text and binary file" && + + mkdir "path with spaces" && + echo "content 2-3 bin 25 bytes" >"path with spaces/file3.bin" && + p4 add "path with spaces/file3.bin" && + p4 submit -d "Add another binary file with same content and spaces in path" && + + echo "content 4 bin 26 bytes XX" >file4.bin && + p4 add file4.bin && + p4 submit -d "Add another binary file with different content" + ) +' + +test_expect_success 'Store files in LFS based on size (>24 bytes)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileThreshold 24 && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file2.dat 25 "content 2-3 bin 25 bytes" && + test_file_in_lfs "path with spaces/file3.bin" 25 "content 2-3 bin 25 bytes" && + test_file_in_lfs file4.bin 26 "content 4 bin 26 bytes XX" && + + test_file_count_in_dir ".git/lfs/objects" 2 && + + cat >expect <<-\EOF && + + # + # Git LFS (see https://git-lfs.github.com/) + # + /file2.dat filter=lfs -text + /file4.bin filter=lfs -text + /path[[:space:]]with[[:space:]]spaces/file3.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Store files in LFS based on size (>25 bytes)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileThreshold 25 && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file4.bin 26 "content 4 bin 26 bytes XX" && + test_file_count_in_dir ".git/lfs/objects" 1 && + + cat >expect <<-\EOF && + + # + # Git LFS (see https://git-lfs.github.com/) + # + /file4.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Store files in LFS based on extension (dat)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileExtensions dat && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file2.dat 25 "content 2-3 bin 25 bytes" && + test_file_count_in_dir ".git/lfs/objects" 1 && + + cat >expect <<-\EOF && + + # + # Git LFS (see https://git-lfs.github.com/) + # + *.dat filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Store files in LFS based on size (>25 bytes) and extension (dat)' ' + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileExtensions dat && + git config git-p4.largeFileThreshold 25 && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file2.dat 25 "content 2-3 bin 25 bytes" && + test_file_in_lfs file4.bin 26 "content 4 bin 26 bytes XX" && + test_file_count_in_dir ".git/lfs/objects" 2 && + + cat >expect <<-\EOF && + + # + # Git LFS (see https://git-lfs.github.com/) + # + *.dat filter=lfs -text + /file4.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Remove file from repo and store files in LFS based on size (>24 bytes)' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + p4 delete file4.bin && + p4 submit -d "Remove file" + ) && + + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileThreshold 24 && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file2.dat 25 "content 2-3 bin 25 bytes" && + test_file_in_lfs "path with spaces/file3.bin" 25 "content 2-3 bin 25 bytes" && + test_path_is_missing file4.bin && + test_file_count_in_dir ".git/lfs/objects" 2 && + + cat >expect <<-\EOF && + + # + # Git LFS (see https://git-lfs.github.com/) + # + /file2.dat filter=lfs -text + /path[[:space:]]with[[:space:]]spaces/file3.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Add .gitattributes and store files in LFS based on size (>24 bytes)' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + echo "*.txt text" >.gitattributes && + p4 add .gitattributes && + p4 submit -d "Add .gitattributes" + ) && + + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileThreshold 24 && + git p4 clone --destination="$git" //depot@all && + + test_file_in_lfs file2.dat 25 "content 2-3 bin 25 bytes" && + test_file_in_lfs "path with spaces/file3.bin" 25 "content 2-3 bin 25 bytes" && + test_path_is_missing file4.bin && + test_file_count_in_dir ".git/lfs/objects" 2 && + + cat >expect <<-\EOF && + *.txt text + + # + # Git LFS (see https://git-lfs.github.com/) + # + /file2.dat filter=lfs -text + /path[[:space:]]with[[:space:]]spaces/file3.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'Add big files to repo and store files in LFS based on compressed size (>28 bytes)' ' + client_view "//depot/... //client/..." && + ( + cd "$cli" && + echo "content 5 bin 40 bytes XXXXXXXXXXXXXXXX" >file5.bin && + p4 add file5.bin && + p4 submit -d "Add file with small footprint after compression" && + + echo "content 6 bin 39 bytes XXXXXYYYYYZZZZZ" >file6.bin && + p4 add file6.bin && + p4 submit -d "Add file with large footprint after compression" + ) && + + client_view "//depot/... //client/..." && + test_when_finished cleanup_git && + ( + cd "$git" && + git init . && + git config git-p4.useClientSpec true && + git config git-p4.largeFileSystem GitLFS && + git config git-p4.largeFileCompressedThreshold 28 && + # We only import HEAD here ("@all" is missing!) + git p4 clone --destination="$git" //depot && + + test_file_in_lfs file6.bin 13 "content 6 bin 39 bytes XXXXXYYYYYZZZZZ" + test_file_count_in_dir ".git/lfs/objects" 1 && + + cat >expect <<-\EOF && + *.txt text + + # + # Git LFS (see https://git-lfs.github.com/) + # + /file6.bin filter=lfs -text + EOF + test_path_is_file .gitattributes && + test_cmp expect .gitattributes + ) +' + +test_expect_success 'kill p4d' ' + kill_p4d +' + +test_done