2019-06-18 20:14:28 +02:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='split commit graph'
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
|
|
|
GIT_TEST_COMMIT_GRAPH=0
|
2020-04-06 18:59:55 +02:00
|
|
|
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
|
2019-06-18 20:14:28 +02:00
|
|
|
|
|
|
|
test_expect_success 'setup repo' '
|
|
|
|
git init &&
|
|
|
|
git config core.commitGraph true &&
|
2019-08-13 20:37:45 +02:00
|
|
|
git config gc.writeCommitGraph false &&
|
2019-06-18 20:14:28 +02:00
|
|
|
infodir=".git/objects/info" &&
|
|
|
|
graphdir="$infodir/commit-graphs" &&
|
2019-12-21 20:49:27 +01:00
|
|
|
test_oid_cache <<-EOM
|
|
|
|
shallow sha1:1760
|
|
|
|
shallow sha256:2064
|
|
|
|
|
|
|
|
base sha1:1376
|
|
|
|
base sha256:1496
|
2020-08-17 16:04:47 +02:00
|
|
|
|
|
|
|
oid_version sha1:1
|
|
|
|
oid_version sha256:2
|
2019-12-21 20:49:27 +01:00
|
|
|
EOM
|
2019-06-18 20:14:28 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
graph_read_expect() {
|
|
|
|
NUM_BASE=0
|
|
|
|
if test ! -z $2
|
|
|
|
then
|
|
|
|
NUM_BASE=$2
|
|
|
|
fi
|
|
|
|
cat >expect <<- EOF
|
2020-08-17 16:04:47 +02:00
|
|
|
header: 43475048 1 $(test_oid oid_version) 3 $NUM_BASE
|
2019-06-18 20:14:28 +02:00
|
|
|
num_commits: $1
|
|
|
|
chunks: oid_fanout oid_lookup commit_metadata
|
|
|
|
EOF
|
2019-11-12 17:58:20 +01:00
|
|
|
test-tool read-graph >output &&
|
2019-06-18 20:14:28 +02:00
|
|
|
test_cmp expect output
|
|
|
|
}
|
|
|
|
|
2020-04-29 19:36:42 +02:00
|
|
|
test_expect_success POSIXPERM 'tweak umask for modebit tests' '
|
|
|
|
umask 022
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:28 +02:00
|
|
|
test_expect_success 'create commits and write commit-graph' '
|
|
|
|
for i in $(test_seq 3)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i || return 1
|
|
|
|
done &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file $infodir/commit-graph &&
|
|
|
|
graph_read_expect 3
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_two_modes() {
|
|
|
|
git -c core.commitGraph=true $1 >output
|
|
|
|
git -c core.commitGraph=false $1 >expect
|
|
|
|
test_cmp expect output
|
|
|
|
}
|
|
|
|
|
|
|
|
graph_git_behavior() {
|
|
|
|
MSG=$1
|
|
|
|
BRANCH=$2
|
|
|
|
COMPARE=$3
|
|
|
|
test_expect_success "check normal git operations: $MSG" '
|
|
|
|
graph_git_two_modes "log --oneline $BRANCH" &&
|
|
|
|
graph_git_two_modes "log --topo-order $BRANCH" &&
|
|
|
|
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
|
|
|
|
graph_git_two_modes "branch -vv" &&
|
|
|
|
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
|
|
|
graph_git_behavior 'graph exists' commits/3 commits/1
|
|
|
|
|
|
|
|
verify_chain_files_exist() {
|
|
|
|
for hash in $(cat $1/commit-graph-chain)
|
|
|
|
do
|
|
|
|
test_path_is_file $1/graph-$hash.graph || return 1
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'add more commits, and write a new base graph' '
|
|
|
|
git reset --hard commits/1 &&
|
|
|
|
for i in $(test_seq 4 5)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i || return 1
|
|
|
|
done &&
|
|
|
|
git reset --hard commits/2 &&
|
|
|
|
for i in $(test_seq 6 10)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i || return 1
|
|
|
|
done &&
|
|
|
|
git reset --hard commits/2 &&
|
|
|
|
git merge commits/4 &&
|
|
|
|
git branch merge/1 &&
|
|
|
|
git reset --hard commits/4 &&
|
|
|
|
git merge commits/6 &&
|
|
|
|
git branch merge/2 &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
graph_read_expect 12
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:30 +02:00
|
|
|
test_expect_success 'fork and fail to base a chain on a commit-graph file' '
|
|
|
|
test_when_finished rm -rf fork &&
|
|
|
|
git clone . fork &&
|
|
|
|
(
|
|
|
|
cd fork &&
|
|
|
|
rm .git/objects/info/commit-graph &&
|
|
|
|
echo "$(pwd)/../.git/objects" >.git/objects/info/alternates &&
|
|
|
|
test_commit new-commit &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
test_line_count = 1 $graphdir/commit-graph-chain &&
|
|
|
|
verify_chain_files_exist $graphdir
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:28 +02:00
|
|
|
test_expect_success 'add three more commits, write a tip graph' '
|
|
|
|
git reset --hard commits/3 &&
|
|
|
|
git merge merge/1 &&
|
|
|
|
git merge commits/5 &&
|
|
|
|
git merge merge/2 &&
|
|
|
|
git branch merge/3 &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_path_is_missing $infodir/commit-graph &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 2 graph-files &&
|
|
|
|
verify_chain_files_exist $graphdir
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'split commit-graph: merge 3 vs 2' merge/3 merge/2
|
|
|
|
|
|
|
|
test_expect_success 'add one commit, write a tip graph' '
|
|
|
|
test_commit 11 &&
|
|
|
|
git branch commits/11 &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_path_is_missing $infodir/commit-graph &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 3 graph-files &&
|
|
|
|
verify_chain_files_exist $graphdir
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6
|
|
|
|
|
commit-graph: merge commit-graph chains
When searching for a commit in a commit-graph chain of G graphs with N
commits, the search takes O(G log N) time. If we always add a new tip
graph with every write, the linear G term will start to dominate and
slow the lookup process.
To keep lookups fast, but also keep most incremental writes fast, create
a strategy for merging levels of the commit-graph chain. The strategy is
detailed in the commit-graph design document, but is summarized by these
two conditions:
1. If the number of commits we are adding is more than half the number
of commits in the graph below, then merge with that graph.
2. If we are writing more than 64,000 commits into a single graph,
then merge with all lower graphs.
The numeric values in the conditions above are currently constant, but
can become config options in a future update.
As we merge levels of the commit-graph chain, check that the commits
still exist in the repository. A garbage-collection operation may have
removed those commits from the object store and we do not want to
persist them in the commit-graph chain. This is a non-issue if the
'git gc' process wrote a new, single-level commit-graph file.
After we merge levels, the old graph-{hash}.graph files are no longer
referenced by the commit-graph-chain file. We will expire these files in
a future change.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-06-18 20:14:29 +02:00
|
|
|
test_expect_success 'add one commit, write a merged graph' '
|
|
|
|
test_commit 12 &&
|
|
|
|
git branch commits/12 &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
2019-06-18 20:14:31 +02:00
|
|
|
test_line_count = 2 graph-files &&
|
commit-graph: merge commit-graph chains
When searching for a commit in a commit-graph chain of G graphs with N
commits, the search takes O(G log N) time. If we always add a new tip
graph with every write, the linear G term will start to dominate and
slow the lookup process.
To keep lookups fast, but also keep most incremental writes fast, create
a strategy for merging levels of the commit-graph chain. The strategy is
detailed in the commit-graph design document, but is summarized by these
two conditions:
1. If the number of commits we are adding is more than half the number
of commits in the graph below, then merge with that graph.
2. If we are writing more than 64,000 commits into a single graph,
then merge with all lower graphs.
The numeric values in the conditions above are currently constant, but
can become config options in a future update.
As we merge levels of the commit-graph chain, check that the commits
still exist in the repository. A garbage-collection operation may have
removed those commits from the object store and we do not want to
persist them in the commit-graph chain. This is a non-issue if the
'git gc' process wrote a new, single-level commit-graph file.
After we merge levels, the old graph-{hash}.graph files are no longer
referenced by the commit-graph-chain file. We will expire these files in
a future change.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-06-18 20:14:29 +02:00
|
|
|
verify_chain_files_exist $graphdir
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6
|
|
|
|
|
2019-06-18 20:14:30 +02:00
|
|
|
test_expect_success 'create fork and chain across alternate' '
|
|
|
|
git clone . fork &&
|
|
|
|
(
|
|
|
|
cd fork &&
|
|
|
|
git config core.commitGraph true &&
|
|
|
|
rm -rf $graphdir &&
|
|
|
|
echo "$(pwd)/../.git/objects" >.git/objects/info/alternates &&
|
|
|
|
test_commit 13 &&
|
|
|
|
git branch commits/13 &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
test_line_count = 3 $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files &&
|
|
|
|
git -c core.commitGraph=true rev-list HEAD >expect &&
|
|
|
|
git -c core.commitGraph=false rev-list HEAD >actual &&
|
2019-06-18 20:14:36 +02:00
|
|
|
test_cmp expect actual &&
|
|
|
|
test_commit 14 &&
|
|
|
|
git commit-graph write --reachable --split --object-dir=.git/objects/ &&
|
|
|
|
test_line_count = 3 $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files
|
2019-06-18 20:14:30 +02:00
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6
|
|
|
|
|
2019-06-18 20:14:32 +02:00
|
|
|
test_expect_success 'test merge stragety constants' '
|
|
|
|
git clone . merge-2 &&
|
|
|
|
(
|
|
|
|
cd merge-2 &&
|
|
|
|
git config core.commitGraph true &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
test_commit 14 &&
|
|
|
|
git commit-graph write --reachable --split --size-multiple=2 &&
|
|
|
|
test_line_count = 3 $graphdir/commit-graph-chain
|
|
|
|
|
|
|
|
) &&
|
|
|
|
git clone . merge-10 &&
|
|
|
|
(
|
|
|
|
cd merge-10 &&
|
|
|
|
git config core.commitGraph true &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
test_commit 14 &&
|
|
|
|
git commit-graph write --reachable --split --size-multiple=10 &&
|
|
|
|
test_line_count = 1 $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files
|
|
|
|
) &&
|
|
|
|
git clone . merge-10-expire &&
|
|
|
|
(
|
|
|
|
cd merge-10-expire &&
|
|
|
|
git config core.commitGraph true &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
test_commit 15 &&
|
2020-04-01 23:00:44 +02:00
|
|
|
touch $graphdir/to-delete.graph $graphdir/to-keep.graph &&
|
|
|
|
test-tool chmtime =1546362000 $graphdir/to-delete.graph &&
|
|
|
|
test-tool chmtime =1546362001 $graphdir/to-keep.graph &&
|
|
|
|
git commit-graph write --reachable --split --size-multiple=10 \
|
|
|
|
--expire-time="2019-01-01 12:00 -05:00" &&
|
2019-06-18 20:14:32 +02:00
|
|
|
test_line_count = 1 $graphdir/commit-graph-chain &&
|
2020-04-01 23:00:44 +02:00
|
|
|
test_path_is_missing $graphdir/to-delete.graph &&
|
|
|
|
test_path_is_file $graphdir/to-keep.graph &&
|
2019-06-18 20:14:32 +02:00
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 3 graph-files
|
|
|
|
) &&
|
|
|
|
git clone --no-hardlinks . max-commits &&
|
|
|
|
(
|
|
|
|
cd max-commits &&
|
|
|
|
git config core.commitGraph true &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
test_commit 16 &&
|
|
|
|
test_commit 17 &&
|
|
|
|
git commit-graph write --reachable --split --max-commits=1 &&
|
|
|
|
test_line_count = 1 $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:33 +02:00
|
|
|
test_expect_success 'remove commit-graph-chain file after flattening' '
|
|
|
|
git clone . flatten &&
|
|
|
|
(
|
|
|
|
cd flatten &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_missing $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir >graph-files &&
|
|
|
|
test_line_count = 0 graph-files
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:32 +02:00
|
|
|
corrupt_file() {
|
|
|
|
file=$1
|
|
|
|
pos=$2
|
|
|
|
data="${3:-\0}"
|
2019-06-18 20:14:36 +02:00
|
|
|
chmod a+w "$file" &&
|
2019-06-18 20:14:32 +02:00
|
|
|
printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'verify hashes along chain, even in shallow' '
|
|
|
|
git clone --no-hardlinks . verify &&
|
|
|
|
(
|
|
|
|
cd verify &&
|
|
|
|
git commit-graph verify &&
|
|
|
|
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
|
2019-12-21 20:49:27 +01:00
|
|
|
corrupt_file "$base_file" $(test_oid shallow) "\01" &&
|
2019-06-18 20:14:32 +02:00
|
|
|
test_must_fail git commit-graph verify --shallow 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "incorrect checksum" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'verify --shallow does not check base contents' '
|
|
|
|
git clone --no-hardlinks . verify-shallow &&
|
|
|
|
(
|
|
|
|
cd verify-shallow &&
|
|
|
|
git commit-graph verify &&
|
|
|
|
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
|
|
|
|
corrupt_file "$base_file" 1000 "\01" &&
|
|
|
|
git commit-graph verify --shallow &&
|
|
|
|
test_must_fail git commit-graph verify 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "incorrect checksum" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'warn on base graph chunk incorrect' '
|
|
|
|
git clone --no-hardlinks . base-chunk &&
|
|
|
|
(
|
|
|
|
cd base-chunk &&
|
|
|
|
git commit-graph verify &&
|
|
|
|
base_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
|
2019-12-21 20:49:27 +01:00
|
|
|
corrupt_file "$base_file" $(test_oid base) "\01" &&
|
2019-06-18 20:14:32 +02:00
|
|
|
git commit-graph verify --shallow 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "commit-graph chain does not match" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'verify after commit-graph-chain corruption' '
|
|
|
|
git clone --no-hardlinks . verify-chain &&
|
|
|
|
(
|
|
|
|
cd verify-chain &&
|
|
|
|
corrupt_file "$graphdir/commit-graph-chain" 60 "G" &&
|
|
|
|
git commit-graph verify 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "invalid commit-graph chain" err &&
|
|
|
|
corrupt_file "$graphdir/commit-graph-chain" 60 "A" &&
|
|
|
|
git commit-graph verify 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "unable to find all commit-graph files" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:36 +02:00
|
|
|
test_expect_success 'verify across alternates' '
|
|
|
|
git clone --no-hardlinks . verify-alt &&
|
|
|
|
(
|
|
|
|
cd verify-alt &&
|
|
|
|
rm -rf $graphdir &&
|
|
|
|
altdir="$(pwd)/../.git/objects" &&
|
|
|
|
echo "$altdir" >.git/objects/info/alternates &&
|
|
|
|
git commit-graph verify --object-dir="$altdir/" &&
|
|
|
|
test_commit extra &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
|
|
|
|
corrupt_file "$tip_file" 100 "\01" &&
|
|
|
|
test_must_fail git commit-graph verify --shallow 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
|
|
|
test_i18ngrep "commit-graph has incorrect fanout value" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:34 +02:00
|
|
|
test_expect_success 'add octopus merge' '
|
|
|
|
git reset --hard commits/10 &&
|
|
|
|
git merge commits/3 commits/4 &&
|
|
|
|
git branch merge/octopus &&
|
|
|
|
git commit-graph write --reachable --split &&
|
2019-08-26 18:29:58 +02:00
|
|
|
git commit-graph verify --progress 2>err &&
|
commit-graph: fix bug around octopus merges
In 1771be90 "commit-graph: merge commit-graph chains" (2019-06-18),
the method sort_and_scan_merged_commits() was added to merge the
commit lists of two commit-graph files in the incremental format.
Unfortunately, there was an off-by-one error in that method around
incrementing num_extra_edges, which leads to an incorrect offset
for the base graph chunk.
When we store an octopus merge in the commit-graph file, we store
the first parent in the normal place, but use the second parent
position to point into the "extra edges" chunk where the remaining
parents exist. This means we should be adding "num_parents - 1"
edges to this list, not "num_parents - 2". That is the basic error.
The reason this was not caught in the test suite is more subtle.
In 5324-split-commit-graph.sh, we test creating an octopus merge
and adding it to the tip of a commit-graph chain, then verify the
result. This _should_ have caught the problem, except that when
we load the commit-graph files we were overly careful to not fail
when the commit-graph chain does not match. This care was on
purpose to avoid race conditions as one process reads the chain
and another process modifies it. In such a case, the reading
process outputs the following message to stderr:
warning: commit-graph chain does not match
These warnings are output in the test suite, but ignored. By
checking the stderr of `git commit-graph verify` to include
the expected progress output, it will now catch this error.
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-05 18:43:41 +02:00
|
|
|
test_line_count = 3 err &&
|
|
|
|
test_i18ngrep ! warning err &&
|
2019-06-18 20:14:34 +02:00
|
|
|
test_line_count = 3 $graphdir/commit-graph-chain
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'graph exists' merge/octopus commits/12
|
|
|
|
|
2019-06-18 20:14:35 +02:00
|
|
|
test_expect_success 'split across alternate where alternate is not split' '
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file .git/objects/info/commit-graph &&
|
|
|
|
cp .git/objects/info/commit-graph . &&
|
|
|
|
git clone --no-hardlinks . alt-split &&
|
|
|
|
(
|
|
|
|
cd alt-split &&
|
2019-08-13 20:37:45 +02:00
|
|
|
rm -f .git/objects/info/commit-graph &&
|
2019-06-18 20:14:35 +02:00
|
|
|
echo "$(pwd)"/../.git/objects >.git/objects/info/alternates &&
|
|
|
|
test_commit 18 &&
|
|
|
|
git commit-graph write --reachable --split &&
|
|
|
|
test_line_count = 1 $graphdir/commit-graph-chain
|
|
|
|
) &&
|
|
|
|
test_cmp commit-graph .git/objects/info/commit-graph
|
|
|
|
'
|
|
|
|
|
builtin/commit-graph.c: introduce split strategy 'no-merge'
In the previous commit, we laid the groundwork for supporting different
splitting strategies. In this commit, we introduce the first splitting
strategy: 'no-merge'.
Passing '--split=no-merge' is useful for callers which wish to write a
new incremental commit-graph, but do not want to spend effort condensing
the incremental chain [1]. Previously, this was possible by passing
'--size-multiple=0', but this no longer the case following 63020f175f
(commit-graph: prefer default size_mult when given zero, 2020-01-02).
When '--split=no-merge' is given, the commit-graph machinery will never
condense an existing chain, and it will always write a new incremental.
[1]: This might occur when, for example, a server administrator running
some program after each push may want to ensure that each job runs
proportional in time to the size of the push, and does not "jump" when
the commit-graph machinery decides to trigger a merge.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 06:04:12 +02:00
|
|
|
test_expect_success '--split=no-merge always writes an incremental' '
|
|
|
|
test_when_finished rm -rf a b &&
|
|
|
|
rm -rf $graphdir $infodir/commit-graph &&
|
|
|
|
git reset --hard commits/2 &&
|
|
|
|
git rev-list HEAD~1 >a &&
|
|
|
|
git rev-list HEAD >b &&
|
|
|
|
git commit-graph write --split --stdin-commits <a &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits <b &&
|
|
|
|
test_line_count = 2 $graphdir/commit-graph-chain
|
|
|
|
'
|
|
|
|
|
builtin/commit-graph.c: introduce split strategy 'replace'
When using split commit-graphs, it is sometimes useful to completely
replace the commit-graph chain with a new base.
For example, consider a scenario in which a repository builds a new
commit-graph incremental for each push. Occasionally (say, after some
fixed number of pushes), they may wish to rebuild the commit-graph chain
with all reachable commits.
They can do so with
$ git commit-graph write --reachable
but this removes the chain entirely and replaces it with a single
commit-graph in 'objects/info/commit-graph'. Unfortunately, this means
that the next push will have to move this commit-graph into the first
layer of a new chain, and then write its new commits on top.
Avoid such copying entirely by allowing the caller to specify that they
wish to replace the entirety of their commit-graph chain, while also
specifying that the new commit-graph should become the basis of a fresh,
length-one chain.
This addresses the above situation by making it possible for the caller
to instead write:
$ git commit-graph write --reachable --split=replace
which writes a new length-one chain to 'objects/info/commit-graphs',
making the commit-graph incremental generated by the subsequent push
relatively cheap by avoiding the aforementioned copy.
In order to do this, remove an assumption in 'write_commit_graph_file'
that chains are always at least two incrementals long.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 06:04:17 +02:00
|
|
|
test_expect_success '--split=replace replaces the chain' '
|
|
|
|
rm -rf $graphdir $infodir/commit-graph &&
|
|
|
|
git reset --hard commits/3 &&
|
|
|
|
git rev-list -1 HEAD~2 >a &&
|
|
|
|
git rev-list -1 HEAD~1 >b &&
|
|
|
|
git rev-list -1 HEAD >c &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits <a &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits <b &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits <c &&
|
|
|
|
test_line_count = 3 $graphdir/commit-graph-chain &&
|
|
|
|
git commit-graph write --stdin-commits --split=replace <b &&
|
|
|
|
test_path_is_missing $infodir/commit-graph &&
|
|
|
|
test_path_is_file $graphdir/commit-graph-chain &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files &&
|
|
|
|
verify_chain_files_exist $graphdir &&
|
|
|
|
graph_read_expect 2
|
|
|
|
'
|
|
|
|
|
commit-graph.c: gracefully handle file descriptor exhaustion
When writing a layered commit-graph, the commit-graph machinery uses
'commit_graph_filenames_after' and 'commit_graph_hash_after' to keep
track of the layers in the chain that we are in the process of writing.
When the number of commit-graph layers shrinks, we initialize all
entries in the aforementioned arrays, because we know the structure of
the new commit-graph chain immediately (since there are no new layers,
there are no unknown hash values).
But when the number of commit-graph layers grows (i.e., that
'num_commit_graphs_after > num_commit_graphs_before'), then we leave
some entries in the filenames and hashes arrays as uninitialized,
because we will fill them in later as those values become available.
For instance, we rely on 'write_commit_graph_file's to store the
filename and hash of the last layer in the new chain, which is the one
that it is responsible for writing. But, it's possible that
'write_commit_graph_file' may fail, e.g., from file descriptor
exhaustion. In this case it is possible that 'git_mkstemp_mode' will
fail, and that function will return early *before* setting the values
for the last commit-graph layer's filename and hash.
This causes a number of upleasant side-effects. For instance, trying to
'free()' each entry in 'ctx->commit_graph_filenames_after' (and
similarly for the hashes array) causes us to 'free()' uninitialized
memory, since the area is allocated with 'malloc()' and is therefore
subject to contain garbage (which is left alone when
'write_commit_graph_file' returns early).
This can manifest in other issues, like a general protection fault,
and/or leaving a stray 'commit-graph-chain.lock' around after the
process dies. (The reasoning for this is still a mystery to me, since
we'd otherwise usually expect the kernel to run tempfile.c's 'atexit()'
handlers in the case of a normal death...)
To resolve this, initialize the memory with 'CALLOC_ARRAY' so that
uninitialized entries are filled with zeros, and can thus be 'free()'d
as a noop instead of causing a fault.
Helped-by: Jeff King <peff@peff.net>
Helped-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-23 23:41:09 +02:00
|
|
|
test_expect_success ULIMIT_FILE_DESCRIPTORS 'handles file descriptor exhaustion' '
|
|
|
|
git init ulimit &&
|
|
|
|
(
|
|
|
|
cd ulimit &&
|
|
|
|
for i in $(test_seq 64)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
2020-07-07 08:04:35 +02:00
|
|
|
run_with_limited_open_files test_might_fail git commit-graph write \
|
commit-graph.c: gracefully handle file descriptor exhaustion
When writing a layered commit-graph, the commit-graph machinery uses
'commit_graph_filenames_after' and 'commit_graph_hash_after' to keep
track of the layers in the chain that we are in the process of writing.
When the number of commit-graph layers shrinks, we initialize all
entries in the aforementioned arrays, because we know the structure of
the new commit-graph chain immediately (since there are no new layers,
there are no unknown hash values).
But when the number of commit-graph layers grows (i.e., that
'num_commit_graphs_after > num_commit_graphs_before'), then we leave
some entries in the filenames and hashes arrays as uninitialized,
because we will fill them in later as those values become available.
For instance, we rely on 'write_commit_graph_file's to store the
filename and hash of the last layer in the new chain, which is the one
that it is responsible for writing. But, it's possible that
'write_commit_graph_file' may fail, e.g., from file descriptor
exhaustion. In this case it is possible that 'git_mkstemp_mode' will
fail, and that function will return early *before* setting the values
for the last commit-graph layer's filename and hash.
This causes a number of upleasant side-effects. For instance, trying to
'free()' each entry in 'ctx->commit_graph_filenames_after' (and
similarly for the hashes array) causes us to 'free()' uninitialized
memory, since the area is allocated with 'malloc()' and is therefore
subject to contain garbage (which is left alone when
'write_commit_graph_file' returns early).
This can manifest in other issues, like a general protection fault,
and/or leaving a stray 'commit-graph-chain.lock' around after the
process dies. (The reasoning for this is still a mystery to me, since
we'd otherwise usually expect the kernel to run tempfile.c's 'atexit()'
handlers in the case of a normal death...)
To resolve this, initialize the memory with 'CALLOC_ARRAY' so that
uninitialized entries are filled with zeros, and can thus be 'free()'d
as a noop instead of causing a fault.
Helped-by: Jeff King <peff@peff.net>
Helped-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-23 23:41:09 +02:00
|
|
|
--split=no-merge --reachable || return 1
|
|
|
|
done
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2020-04-29 19:36:42 +02:00
|
|
|
while read mode modebits
|
|
|
|
do
|
|
|
|
test_expect_success POSIXPERM "split commit-graph respects core.sharedrepository $mode" '
|
|
|
|
rm -rf $graphdir $infodir/commit-graph &&
|
|
|
|
git reset --hard commits/1 &&
|
|
|
|
test_config core.sharedrepository "$mode" &&
|
|
|
|
git commit-graph write --split --reachable &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files &&
|
|
|
|
echo "$modebits" >expect &&
|
|
|
|
test_modebits $graphdir/graph-*.graph >actual &&
|
2020-04-29 19:36:46 +02:00
|
|
|
test_cmp expect actual &&
|
|
|
|
test_modebits $graphdir/commit-graph-chain >actual &&
|
2020-04-29 19:36:42 +02:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
done <<\EOF
|
|
|
|
0666 -r--r--r--
|
|
|
|
0600 -r--------
|
|
|
|
EOF
|
|
|
|
|
commit-graph: introduce 'get_bloom_filter_settings()'
Many places in the code often need a pointer to the commit-graph's
'struct bloom_filter_settings', in which case they often take the value
from the top-most commit-graph.
In the non-split case, this works as expected. In the split case,
however, things get a little tricky. Not all layers in a chain of
incremental commit-graphs are required to themselves have Bloom data,
and so whether or not some part of the code uses Bloom filters depends
entirely on whether or not the top-most level of the commit-graph chain
has Bloom filters.
This has been the behavior since Bloom filters were introduced, and has
been codified into the tests since a759bfa9ee (t4216: add end to end
tests for git log with Bloom filters, 2020-04-06). In fact, t4216.130
requires that Bloom filters are not used in exactly the case described
earlier.
There is no reason that this needs to be the case, since it is perfectly
valid for commits in an earlier layer to have Bloom filters when commits
in a newer layer do not.
Since Bloom settings are guaranteed in practice to be the same for any
layer in a chain that has Bloom data, it is sufficient to traverse the
'->base_graph' pointer until either (1) a non-null 'struct
bloom_filter_settings *' is found, or (2) until we are at the root of
the commit-graph chain.
Introduce a 'get_bloom_filter_settings()' function that does just this,
and use it instead of purely dereferencing the top-most graph's
'->bloom_filter_settings' pointer.
While we're at it, add an additional test in t5324 to guard against code
in the commit-graph writing machinery that doesn't correctly handle a
NULL 'struct bloom_filter *'.
Co-authored-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-09 17:22:44 +02:00
|
|
|
test_expect_success '--split=replace with partial Bloom data' '
|
|
|
|
rm -rf $graphdir $infodir/commit-graph &&
|
|
|
|
git reset --hard commits/3 &&
|
|
|
|
git rev-list -1 HEAD~2 >a &&
|
|
|
|
git rev-list -1 HEAD~1 >b &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits --changed-paths <a &&
|
|
|
|
git commit-graph write --split=no-merge --stdin-commits <b &&
|
|
|
|
git commit-graph write --split=replace --stdin-commits --changed-paths <c &&
|
|
|
|
ls $graphdir/graph-*.graph >graph-files &&
|
|
|
|
test_line_count = 1 graph-files &&
|
|
|
|
verify_chain_files_exist $graphdir
|
|
|
|
'
|
|
|
|
|
commit-graph: ignore duplicates when merging layers
Thomas reported [1] that a "git fetch" command was failing with an error
saying "unexpected duplicate commit id". The root cause is that they had
fetch.writeCommitGraph enabled which generates commit-graph chains, and
this instance was merging two layers that both contained the same commit
ID.
[1] https://lore.kernel.org/git/55f8f00c-a61c-67d4-889e-a9501c596c39@virtuell-zuhause.de/
The initial assumption is that Git would not write a commit ID into a
commit-graph layer if it already exists in a lower commit-graph layer.
Somehow, this specific case did get into that situation, leading to this
error.
While unexpected, this isn't actually invalid (as long as the two layers
agree on the metadata for the commit). When we parse a commit that does
not have a graph_pos in the commit_graph_data_slab, we use binary search
in the commit-graph layers to find the commit and set graph_pos. That
position is never used again in this case. However, when we parse a
commit from the commit-graph file, we load its parents from the
commit-graph and assign graph_pos at that point. If those parents were
already parsed from the commit-graph, then nothing needs to be done.
Otherwise, this graph_pos is a valid position in the commit-graph so we
can parse the parents, when necessary.
Thus, this die() is too aggressive. The easiest thing to do would be to
ignore the duplicates.
If we only ignore the duplicates, then we will produce a commit-graph
that has identical commit IDs listed in adjacent positions. This excess
data will never be removed from the commit-graph, which could cascade
into significantly bloated file sizes.
Thankfully, we can collapse the list to erase the duplicate commit
pointers. This allows us to get the end result we want without extra
memory costs and minimal CPU time.
The root cause is due to disabling core.commitGraph, which prevents
parsing commits from the lower layers during a 'git commit-graph write
--split' command. Since we use the 'graph_pos' value to determine
whether a commit is in a lower layer, we never discover that those
commits are already in the commit-graph chain and add them to the top
layer. This layer is then merged down, creating duplicates.
The test added in t5324-split-commit-graph.sh fails without this change.
However, we still have not completely removed the need for this
duplicate check. That will come in a follow-up change.
Reported-by: Thomas Braun <thomas.braun@virtuell-zuhause.de>
Helped-by: Taylor Blau <me@ttaylorr.com>
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-09 22:53:51 +02:00
|
|
|
test_expect_success 'prevent regression for duplicate commits across layers' '
|
|
|
|
git init dup &&
|
|
|
|
git -C dup commit --allow-empty -m one &&
|
2020-10-09 22:53:52 +02:00
|
|
|
git -C dup -c core.commitGraph=false commit-graph write --split=no-merge --reachable 2>err &&
|
|
|
|
test_i18ngrep "attempting to write a commit-graph" err &&
|
commit-graph: ignore duplicates when merging layers
Thomas reported [1] that a "git fetch" command was failing with an error
saying "unexpected duplicate commit id". The root cause is that they had
fetch.writeCommitGraph enabled which generates commit-graph chains, and
this instance was merging two layers that both contained the same commit
ID.
[1] https://lore.kernel.org/git/55f8f00c-a61c-67d4-889e-a9501c596c39@virtuell-zuhause.de/
The initial assumption is that Git would not write a commit ID into a
commit-graph layer if it already exists in a lower commit-graph layer.
Somehow, this specific case did get into that situation, leading to this
error.
While unexpected, this isn't actually invalid (as long as the two layers
agree on the metadata for the commit). When we parse a commit that does
not have a graph_pos in the commit_graph_data_slab, we use binary search
in the commit-graph layers to find the commit and set graph_pos. That
position is never used again in this case. However, when we parse a
commit from the commit-graph file, we load its parents from the
commit-graph and assign graph_pos at that point. If those parents were
already parsed from the commit-graph, then nothing needs to be done.
Otherwise, this graph_pos is a valid position in the commit-graph so we
can parse the parents, when necessary.
Thus, this die() is too aggressive. The easiest thing to do would be to
ignore the duplicates.
If we only ignore the duplicates, then we will produce a commit-graph
that has identical commit IDs listed in adjacent positions. This excess
data will never be removed from the commit-graph, which could cascade
into significantly bloated file sizes.
Thankfully, we can collapse the list to erase the duplicate commit
pointers. This allows us to get the end result we want without extra
memory costs and minimal CPU time.
The root cause is due to disabling core.commitGraph, which prevents
parsing commits from the lower layers during a 'git commit-graph write
--split' command. Since we use the 'graph_pos' value to determine
whether a commit is in a lower layer, we never discover that those
commits are already in the commit-graph chain and add them to the top
layer. This layer is then merged down, creating duplicates.
The test added in t5324-split-commit-graph.sh fails without this change.
However, we still have not completely removed the need for this
duplicate check. That will come in a follow-up change.
Reported-by: Thomas Braun <thomas.braun@virtuell-zuhause.de>
Helped-by: Taylor Blau <me@ttaylorr.com>
Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-10-09 22:53:51 +02:00
|
|
|
git -C dup commit-graph write --split=no-merge --reachable &&
|
|
|
|
git -C dup commit --allow-empty -m two &&
|
|
|
|
git -C dup commit-graph write --split=no-merge --reachable &&
|
|
|
|
git -C dup commit --allow-empty -m three &&
|
|
|
|
git -C dup commit-graph write --split --reachable &&
|
|
|
|
git -C dup commit-graph verify
|
|
|
|
'
|
|
|
|
|
2019-06-18 20:14:28 +02:00
|
|
|
test_done
|