git-p4: fixing --changes-block-size handling

The --changes-block-size handling was intended to help when
a user has a limited "maxscanrows" (see "p4 group"). It used
"p4 changes -m $maxchanges" to limit the number of results.

Unfortunately, it turns out that the "maxscanrows" and "maxresults"
limits are actually applied *before* the "-m maxchanges" parameter
is considered (experimentally).

Fix the block-size handling so that it gets blocks of changes
limited by revision number ($Start..$Start+$N, etc). This limits
the number of results early enough that both sets of tests pass.

Note that many other Perforce operations can fail for the same
reason (p4 print, p4 files, etc) and it's probably not possible
to workaround this. In the real world, this is probably not
usually a problem.

Signed-off-by: Luke Diamand <luke@diamand.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Luke Diamand 2015-06-10 08:30:59 +01:00 committed by Junio C Hamano
parent eceafffbec
commit 1051ef0063
2 changed files with 70 additions and 29 deletions

View File

@ -43,6 +43,9 @@ verbose = False
# Only labels/tags matching this will be imported/exported # Only labels/tags matching this will be imported/exported
defaultLabelRegexp = r'[a-zA-Z0-9_\-.]+$' defaultLabelRegexp = r'[a-zA-Z0-9_\-.]+$'
# Grab changes in blocks of this many revisions, unless otherwise requested
defaultBlockSize = 512
def p4_build_cmd(cmd): def p4_build_cmd(cmd):
"""Build a suitable p4 command line. """Build a suitable p4 command line.
@ -249,6 +252,10 @@ def p4_reopen(type, f):
def p4_move(src, dest): def p4_move(src, dest):
p4_system(["move", "-k", wildcard_encode(src), wildcard_encode(dest)]) p4_system(["move", "-k", wildcard_encode(src), wildcard_encode(dest)])
def p4_last_change():
results = p4CmdList(["changes", "-m", "1"])
return int(results[0]['change'])
def p4_describe(change): def p4_describe(change):
"""Make sure it returns a valid result by checking for """Make sure it returns a valid result by checking for
the presence of field "time". Return a dict of the the presence of field "time". Return a dict of the
@ -742,43 +749,77 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = "refs/remotes/p4/", silent
def originP4BranchesExist(): def originP4BranchesExist():
return gitBranchExists("origin") or gitBranchExists("origin/p4") or gitBranchExists("origin/p4/master") return gitBranchExists("origin") or gitBranchExists("origin/p4") or gitBranchExists("origin/p4/master")
def p4ChangesForPaths(depotPaths, changeRange, block_size):
assert depotPaths
assert block_size
# Parse the change range into start and end def p4ParseNumericChangeRange(parts):
changeStart = int(parts[0][1:])
if parts[1] == '#head':
changeEnd = p4_last_change()
else:
changeEnd = int(parts[1])
return (changeStart, changeEnd)
def chooseBlockSize(blockSize):
if blockSize:
return blockSize
else:
return defaultBlockSize
def p4ChangesForPaths(depotPaths, changeRange, requestedBlockSize):
assert depotPaths
# Parse the change range into start and end. Try to find integer
# revision ranges as these can be broken up into blocks to avoid
# hitting server-side limits (maxrows, maxscanresults). But if
# that doesn't work, fall back to using the raw revision specifier
# strings, without using block mode.
if changeRange is None or changeRange == '': if changeRange is None or changeRange == '':
changeStart = '@1' changeStart = 1
changeEnd = '#head' changeEnd = p4_last_change()
block_size = chooseBlockSize(requestedBlockSize)
else: else:
parts = changeRange.split(',') parts = changeRange.split(',')
assert len(parts) == 2 assert len(parts) == 2
changeStart = parts[0] try:
changeEnd = parts[1] (changeStart, changeEnd) = p4ParseNumericChangeRange(parts)
block_size = chooseBlockSize(requestedBlockSize)
except:
changeStart = parts[0][1:]
changeEnd = parts[1]
if requestedBlockSize:
die("cannot use --changes-block-size with non-numeric revisions")
block_size = None
# Accumulate change numbers in a dictionary to avoid duplicates # Accumulate change numbers in a dictionary to avoid duplicates
changes = {} changes = {}
for p in depotPaths: for p in depotPaths:
# Retrieve changes a block at a time, to prevent running # Retrieve changes a block at a time, to prevent running
# into a MaxScanRows error from the server. # into a MaxResults/MaxScanRows error from the server.
start = changeStart
end = changeEnd while True:
get_another_block = True
while get_another_block:
new_changes = []
cmd = ['changes'] cmd = ['changes']
cmd += ['-m', str(block_size)]
cmd += ["%s...%s,%s" % (p, start, end)] if block_size:
end = min(changeEnd, changeStart + block_size)
revisionRange = "%d,%d" % (changeStart, end)
else:
revisionRange = "%s,%s" % (changeStart, changeEnd)
cmd += ["%s...@%s" % (p, revisionRange)]
for line in p4_read_pipe_lines(cmd): for line in p4_read_pipe_lines(cmd):
changeNum = int(line.split(" ")[1]) changeNum = int(line.split(" ")[1])
new_changes.append(changeNum)
changes[changeNum] = True changes[changeNum] = True
if len(new_changes) == block_size:
get_another_block = True if not block_size:
end = '@' + str(min(new_changes)) break
else:
get_another_block = False if end >= changeEnd:
break
changeStart = end + 1
changelist = changes.keys() changelist = changes.keys()
changelist.sort() changelist.sort()
@ -1974,7 +2015,7 @@ class P4Sync(Command, P4UserMap):
self.syncWithOrigin = True self.syncWithOrigin = True
self.importIntoRemotes = True self.importIntoRemotes = True
self.maxChanges = "" self.maxChanges = ""
self.changes_block_size = 500 self.changes_block_size = None
self.keepRepoPath = False self.keepRepoPath = False
self.depotPaths = None self.depotPaths = None
self.p4BranchesInGit = [] self.p4BranchesInGit = []

View File

@ -49,11 +49,11 @@ test_expect_success 'Default user cannot fetch changes' '
! p4 changes -m 1 //depot/... ! p4 changes -m 1 //depot/...
' '
test_expect_failure 'Clone the repo' ' test_expect_success 'Clone the repo' '
git p4 clone --dest="$git" --changes-block-size=7 --verbose //depot/included@all git p4 clone --dest="$git" --changes-block-size=7 --verbose //depot/included@all
' '
test_expect_failure 'All files are present' ' test_expect_success 'All files are present' '
echo file.txt >expected && echo file.txt >expected &&
test_write_lines outer0.txt outer1.txt outer2.txt outer3.txt outer4.txt >>expected && test_write_lines outer0.txt outer1.txt outer2.txt outer3.txt outer4.txt >>expected &&
test_write_lines outer5.txt >>expected && test_write_lines outer5.txt >>expected &&
@ -61,18 +61,18 @@ test_expect_failure 'All files are present' '
test_cmp expected current test_cmp expected current
' '
test_expect_failure 'file.txt is correct' ' test_expect_success 'file.txt is correct' '
echo 55 >expected && echo 55 >expected &&
test_cmp expected "$git/file.txt" test_cmp expected "$git/file.txt"
' '
test_expect_failure 'Correct number of commits' ' test_expect_success 'Correct number of commits' '
(cd "$git" && git log --oneline) >log && (cd "$git" && git log --oneline) >log &&
wc -l log && wc -l log &&
test_line_count = 43 log test_line_count = 43 log
' '
test_expect_failure 'Previous version of file.txt is correct' ' test_expect_success 'Previous version of file.txt is correct' '
(cd "$git" && git checkout HEAD^^) && (cd "$git" && git checkout HEAD^^) &&
echo 53 >expected && echo 53 >expected &&
test_cmp expected "$git/file.txt" test_cmp expected "$git/file.txt"
@ -102,7 +102,7 @@ test_expect_success 'Add some more files' '
# This should pick up the 10 new files in "included", but not be confused # This should pick up the 10 new files in "included", but not be confused
# by the additional files in "excluded" # by the additional files in "excluded"
test_expect_failure 'Syncing files' ' test_expect_success 'Syncing files' '
( (
cd "$git" && cd "$git" &&
git p4 sync --changes-block-size=7 && git p4 sync --changes-block-size=7 &&