git-commit-vandalism/t/t5324-split-commit-graph.sh

431 lines
13 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='split commit graph'
. ./test-lib.sh
GIT_TEST_COMMIT_GRAPH=0
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
test_expect_success 'setup repo' '
git init &&
git config core.commitGraph true &&
git config gc.writeCommitGraph false &&
infodir=".git/objects/info" &&
graphdir="$infodir/commit-graphs" &&
test_oid_cache <<-EOM
shallow sha1:1760
shallow sha256:2064
base sha1:1376
base sha256:1496
commit-graph: use the "hash version" byte The commit-graph format reserved a byte among the header of the file to store a "hash version". During the SHA-256 work, this was not modified because file formats are not necessarily intended to work across hash versions. If a repository has SHA-256 as its hash algorithm, it automatically up-shifts the lengths of object names in all necessary formats. However, since we have this byte available for adjusting the version, we can make the file formats more obviously incompatible instead of relying on other context from the repository. Update the oid_version() method in commit-graph.c to add a new value, 2, for sha-256. This automatically writes the new value in a SHA-256 repository _and_ verifies the value is correct. This is a breaking change relative to the current 'master' branch since 092b677 (Merge branch 'bc/sha-256-cvs-svn-updates', 2020-08-13) but it is not breaking relative to any released version of Git. The test impact is relatively minor: the output of 'test-tool read-graph' lists the header information, so those instances of '1' need to be replaced with a variable determined by GIT_TEST_DEFAULT_HASH. A more careful test is added that specifically creates a repository of each type then swaps the commit-graph files. The important value here is that the "git log" command succeeds while writing a message to stderr. Helped-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-17 16:04:47 +02:00
oid_version sha1:1
oid_version sha256:2
EOM
'
graph_read_expect() {
NUM_BASE=0
if test ! -z $2
then
NUM_BASE=$2
fi
cat >expect <<- EOF
commit-graph: use the "hash version" byte The commit-graph format reserved a byte among the header of the file to store a "hash version". During the SHA-256 work, this was not modified because file formats are not necessarily intended to work across hash versions. If a repository has SHA-256 as its hash algorithm, it automatically up-shifts the lengths of object names in all necessary formats. However, since we have this byte available for adjusting the version, we can make the file formats more obviously incompatible instead of relying on other context from the repository. Update the oid_version() method in commit-graph.c to add a new value, 2, for sha-256. This automatically writes the new value in a SHA-256 repository _and_ verifies the value is correct. This is a breaking change relative to the current 'master' branch since 092b677 (Merge branch 'bc/sha-256-cvs-svn-updates', 2020-08-13) but it is not breaking relative to any released version of Git. The test impact is relatively minor: the output of 'test-tool read-graph' lists the header information, so those instances of '1' need to be replaced with a variable determined by GIT_TEST_DEFAULT_HASH. A more careful test is added that specifically creates a repository of each type then swaps the commit-graph files. The important value here is that the "git log" command succeeds while writing a message to stderr. Helped-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-17 16:04:47 +02:00
header: 43475048 1 $(test_oid oid_version) 3 $NUM_BASE
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata
EOF
test-tool read-graph >output &&
test_cmp expect output
}
test_expect_success POSIXPERM 'tweak umask for modebit tests' '
umask 022
'
test_expect_success 'create commits and write commit-graph' '
for i in $(test_seq 3)
do
test_commit $i &&
git branch commits/$i || return 1
done &&
git commit-graph write --reachable &&
test_path_is_file $infodir/commit-graph &&
graph_read_expect 3
'
graph_git_two_modes() {
git -c core.commitGraph=true $1 >output
git -c core.commitGraph=false $1 >expect
test_cmp expect output
}
graph_git_behavior() {
MSG=$1
BRANCH=$2
COMPARE=$3
test_expect_success "check normal git operations: $MSG" '
graph_git_two_modes "log --oneline $BRANCH" &&
graph_git_two_modes "log --topo-order $BRANCH" &&
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
graph_git_two_modes "branch -vv" &&
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
'
}
graph_git_behavior 'graph exists' commits/3 commits/1
verify_chain_files_exist() {
for hash in $(cat $1/commit-graph-chain)
do
test_path_is_file $1/graph-$hash.graph || return 1
done
}
test_expect_success 'add more commits, and write a new base graph' '
git reset --hard commits/1 &&
for i in $(test_seq 4 5)
do
test_commit $i &&
git branch commits/$i || return 1
done &&
git reset --hard commits/2 &&
for i in $(test_seq 6 10)
do
test_commit $i &&
git branch commits/$i || return 1
done &&
git reset --hard commits/2 &&
git merge commits/4 &&
git branch merge/1 &&
git reset --hard commits/4 &&
git merge commits/6 &&
git branch merge/2 &&
git commit-graph write --reachable &&
graph_read_expect 12
'
test_expect_success 'fork and fail to base a chain on a commit-graph file' '
test_when_finished rm -rf fork &&
git clone . fork &&
(
cd fork &&
rm .git/objects/info/commit-graph &&
echo "$(pwd)/../.git/objects" >.git/objects/info/alternates &&
test_commit new-commit &&
git commit-graph write --reachable --split &&
test_path_is_file $graphdir/commit-graph-chain &&
test_line_count = 1 $graphdir/commit-graph-chain &&
verify_chain_files_exist $graphdir
)
'
test_expect_success 'add three more commits, write a tip graph' '
git reset --hard commits/3 &&
git merge merge/1 &&
git merge commits/5 &&
git merge merge/2 &&
git branch merge/3 &&
git commit-graph write --reachable --split &&
test_path_is_missing $infodir/commit-graph &&
test_path_is_file $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 2 graph-files &&
verify_chain_files_exist $graphdir
'
graph_git_behavior 'split commit-graph: merge 3 vs 2' merge/3 merge/2
test_expect_success 'add one commit, write a tip graph' '
test_commit 11 &&
git branch commits/11 &&
git commit-graph write --reachable --split &&
test_path_is_missing $infodir/commit-graph &&
test_path_is_file $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 3 graph-files &&
verify_chain_files_exist $graphdir
'
graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6
test_expect_success 'add one commit, write a merged graph' '
test_commit 12 &&
git branch commits/12 &&
git commit-graph write --reachable --split &&
test_path_is_file $graphdir/commit-graph-chain &&
test_line_count = 2 $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 2 graph-files &&
verify_chain_files_exist $graphdir
'
graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6
test_expect_success 'create fork and chain across alternate' '
git clone . fork &&
(
cd fork &&
git config core.commitGraph true &&
rm -rf $graphdir &&
echo "$(pwd)/../.git/objects" >.git/objects/info/alternates &&
test_commit 13 &&
git branch commits/13 &&
git commit-graph write --reachable --split &&
test_path_is_file $graphdir/commit-graph-chain &&
test_line_count = 3 $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files &&
git -c core.commitGraph=true rev-list HEAD >expect &&
git -c core.commitGraph=false rev-list HEAD >actual &&
test_cmp expect actual &&
test_commit 14 &&
git commit-graph write --reachable --split --object-dir=.git/objects/ &&
test_line_count = 3 $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files
)
'
graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6
test_expect_success 'test merge stragety constants' '
git clone . merge-2 &&
(
cd merge-2 &&
git config core.commitGraph true &&
test_line_count = 2 $graphdir/commit-graph-chain &&
test_commit 14 &&
git commit-graph write --reachable --split --size-multiple=2 &&
test_line_count = 3 $graphdir/commit-graph-chain
) &&
git clone . merge-10 &&
(
cd merge-10 &&
git config core.commitGraph true &&
test_line_count = 2 $graphdir/commit-graph-chain &&
test_commit 14 &&
git commit-graph write --reachable --split --size-multiple=10 &&
test_line_count = 1 $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files
) &&
git clone . merge-10-expire &&
(
cd merge-10-expire &&
git config core.commitGraph true &&
test_line_count = 2 $graphdir/commit-graph-chain &&
test_commit 15 &&
touch $graphdir/to-delete.graph $graphdir/to-keep.graph &&
test-tool chmtime =1546362000 $graphdir/to-delete.graph &&
test-tool chmtime =1546362001 $graphdir/to-keep.graph &&
git commit-graph write --reachable --split --size-multiple=10 \
--expire-time="2019-01-01 12:00 -05:00" &&
test_line_count = 1 $graphdir/commit-graph-chain &&
test_path_is_missing $graphdir/to-delete.graph &&
test_path_is_file $graphdir/to-keep.graph &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 3 graph-files
) &&
git clone --no-hardlinks . max-commits &&
(
cd max-commits &&
git config core.commitGraph true &&
test_line_count = 2 $graphdir/commit-graph-chain &&
test_commit 16 &&
test_commit 17 &&
git commit-graph write --reachable --split --max-commits=1 &&
test_line_count = 1 $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files
)
'
test_expect_success 'remove commit-graph-chain file after flattening' '
git clone . flatten &&
(
cd flatten &&
test_line_count = 2 $graphdir/commit-graph-chain &&
git commit-graph write --reachable &&
test_path_is_missing $graphdir/commit-graph-chain &&
ls $graphdir >graph-files &&
test_line_count = 0 graph-files
)
'
corrupt_file() {
file=$1
pos=$2
data="${3:-\0}"
chmod a+w "$file" &&
printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc
}
test_expect_success 'verify hashes along chain, even in shallow' '
git clone --no-hardlinks . verify &&
(
cd verify &&
git commit-graph verify &&
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$base_file" $(test_oid shallow) "\01" &&
test_must_fail git commit-graph verify --shallow 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "incorrect checksum" err
)
'
test_expect_success 'verify --shallow does not check base contents' '
git clone --no-hardlinks . verify-shallow &&
(
cd verify-shallow &&
git commit-graph verify &&
base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$base_file" 1000 "\01" &&
git commit-graph verify --shallow &&
test_must_fail git commit-graph verify 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "incorrect checksum" err
)
'
test_expect_success 'warn on base graph chunk incorrect' '
git clone --no-hardlinks . base-chunk &&
(
cd base-chunk &&
git commit-graph verify &&
base_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$base_file" $(test_oid base) "\01" &&
git commit-graph verify --shallow 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "commit-graph chain does not match" err
)
'
test_expect_success 'verify after commit-graph-chain corruption' '
git clone --no-hardlinks . verify-chain &&
(
cd verify-chain &&
corrupt_file "$graphdir/commit-graph-chain" 60 "G" &&
git commit-graph verify 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "invalid commit-graph chain" err &&
corrupt_file "$graphdir/commit-graph-chain" 60 "A" &&
git commit-graph verify 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "unable to find all commit-graph files" err
)
'
test_expect_success 'verify across alternates' '
git clone --no-hardlinks . verify-alt &&
(
cd verify-alt &&
rm -rf $graphdir &&
altdir="$(pwd)/../.git/objects" &&
echo "$altdir" >.git/objects/info/alternates &&
git commit-graph verify --object-dir="$altdir/" &&
test_commit extra &&
git commit-graph write --reachable --split &&
tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
corrupt_file "$tip_file" 100 "\01" &&
test_must_fail git commit-graph verify --shallow 2>test_err &&
grep -v "^+" test_err >err &&
test_i18ngrep "commit-graph has incorrect fanout value" err
)
'
test_expect_success 'add octopus merge' '
git reset --hard commits/10 &&
git merge commits/3 commits/4 &&
git branch merge/octopus &&
git commit-graph write --reachable --split &&
git commit-graph verify --progress 2>err &&
commit-graph: fix bug around octopus merges In 1771be90 "commit-graph: merge commit-graph chains" (2019-06-18), the method sort_and_scan_merged_commits() was added to merge the commit lists of two commit-graph files in the incremental format. Unfortunately, there was an off-by-one error in that method around incrementing num_extra_edges, which leads to an incorrect offset for the base graph chunk. When we store an octopus merge in the commit-graph file, we store the first parent in the normal place, but use the second parent position to point into the "extra edges" chunk where the remaining parents exist. This means we should be adding "num_parents - 1" edges to this list, not "num_parents - 2". That is the basic error. The reason this was not caught in the test suite is more subtle. In 5324-split-commit-graph.sh, we test creating an octopus merge and adding it to the tip of a commit-graph chain, then verify the result. This _should_ have caught the problem, except that when we load the commit-graph files we were overly careful to not fail when the commit-graph chain does not match. This care was on purpose to avoid race conditions as one process reads the chain and another process modifies it. In such a case, the reading process outputs the following message to stderr: warning: commit-graph chain does not match These warnings are output in the test suite, but ignored. By checking the stderr of `git commit-graph verify` to include the expected progress output, it will now catch this error. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-08-05 18:43:41 +02:00
test_line_count = 3 err &&
test_i18ngrep ! warning err &&
test_line_count = 3 $graphdir/commit-graph-chain
'
graph_git_behavior 'graph exists' merge/octopus commits/12
test_expect_success 'split across alternate where alternate is not split' '
git commit-graph write --reachable &&
test_path_is_file .git/objects/info/commit-graph &&
cp .git/objects/info/commit-graph . &&
git clone --no-hardlinks . alt-split &&
(
cd alt-split &&
rm -f .git/objects/info/commit-graph &&
echo "$(pwd)"/../.git/objects >.git/objects/info/alternates &&
test_commit 18 &&
git commit-graph write --reachable --split &&
test_line_count = 1 $graphdir/commit-graph-chain
) &&
test_cmp commit-graph .git/objects/info/commit-graph
'
test_expect_success '--split=no-merge always writes an incremental' '
test_when_finished rm -rf a b &&
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/2 &&
git rev-list HEAD~1 >a &&
git rev-list HEAD >b &&
git commit-graph write --split --stdin-commits <a &&
git commit-graph write --split=no-merge --stdin-commits <b &&
test_line_count = 2 $graphdir/commit-graph-chain
'
builtin/commit-graph.c: introduce split strategy 'replace' When using split commit-graphs, it is sometimes useful to completely replace the commit-graph chain with a new base. For example, consider a scenario in which a repository builds a new commit-graph incremental for each push. Occasionally (say, after some fixed number of pushes), they may wish to rebuild the commit-graph chain with all reachable commits. They can do so with $ git commit-graph write --reachable but this removes the chain entirely and replaces it with a single commit-graph in 'objects/info/commit-graph'. Unfortunately, this means that the next push will have to move this commit-graph into the first layer of a new chain, and then write its new commits on top. Avoid such copying entirely by allowing the caller to specify that they wish to replace the entirety of their commit-graph chain, while also specifying that the new commit-graph should become the basis of a fresh, length-one chain. This addresses the above situation by making it possible for the caller to instead write: $ git commit-graph write --reachable --split=replace which writes a new length-one chain to 'objects/info/commit-graphs', making the commit-graph incremental generated by the subsequent push relatively cheap by avoiding the aforementioned copy. In order to do this, remove an assumption in 'write_commit_graph_file' that chains are always at least two incrementals long. Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-14 06:04:17 +02:00
test_expect_success '--split=replace replaces the chain' '
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/3 &&
git rev-list -1 HEAD~2 >a &&
git rev-list -1 HEAD~1 >b &&
git rev-list -1 HEAD >c &&
git commit-graph write --split=no-merge --stdin-commits <a &&
git commit-graph write --split=no-merge --stdin-commits <b &&
git commit-graph write --split=no-merge --stdin-commits <c &&
test_line_count = 3 $graphdir/commit-graph-chain &&
git commit-graph write --stdin-commits --split=replace <b &&
test_path_is_missing $infodir/commit-graph &&
test_path_is_file $graphdir/commit-graph-chain &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files &&
verify_chain_files_exist $graphdir &&
graph_read_expect 2
'
commit-graph.c: gracefully handle file descriptor exhaustion When writing a layered commit-graph, the commit-graph machinery uses 'commit_graph_filenames_after' and 'commit_graph_hash_after' to keep track of the layers in the chain that we are in the process of writing. When the number of commit-graph layers shrinks, we initialize all entries in the aforementioned arrays, because we know the structure of the new commit-graph chain immediately (since there are no new layers, there are no unknown hash values). But when the number of commit-graph layers grows (i.e., that 'num_commit_graphs_after > num_commit_graphs_before'), then we leave some entries in the filenames and hashes arrays as uninitialized, because we will fill them in later as those values become available. For instance, we rely on 'write_commit_graph_file's to store the filename and hash of the last layer in the new chain, which is the one that it is responsible for writing. But, it's possible that 'write_commit_graph_file' may fail, e.g., from file descriptor exhaustion. In this case it is possible that 'git_mkstemp_mode' will fail, and that function will return early *before* setting the values for the last commit-graph layer's filename and hash. This causes a number of upleasant side-effects. For instance, trying to 'free()' each entry in 'ctx->commit_graph_filenames_after' (and similarly for the hashes array) causes us to 'free()' uninitialized memory, since the area is allocated with 'malloc()' and is therefore subject to contain garbage (which is left alone when 'write_commit_graph_file' returns early). This can manifest in other issues, like a general protection fault, and/or leaving a stray 'commit-graph-chain.lock' around after the process dies. (The reasoning for this is still a mystery to me, since we'd otherwise usually expect the kernel to run tempfile.c's 'atexit()' handlers in the case of a normal death...) To resolve this, initialize the memory with 'CALLOC_ARRAY' so that uninitialized entries are filled with zeros, and can thus be 'free()'d as a noop instead of causing a fault. Helped-by: Jeff King <peff@peff.net> Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-23 23:41:09 +02:00
test_expect_success ULIMIT_FILE_DESCRIPTORS 'handles file descriptor exhaustion' '
git init ulimit &&
(
cd ulimit &&
for i in $(test_seq 64)
do
test_commit $i &&
run_with_limited_open_files test_might_fail git commit-graph write \
commit-graph.c: gracefully handle file descriptor exhaustion When writing a layered commit-graph, the commit-graph machinery uses 'commit_graph_filenames_after' and 'commit_graph_hash_after' to keep track of the layers in the chain that we are in the process of writing. When the number of commit-graph layers shrinks, we initialize all entries in the aforementioned arrays, because we know the structure of the new commit-graph chain immediately (since there are no new layers, there are no unknown hash values). But when the number of commit-graph layers grows (i.e., that 'num_commit_graphs_after > num_commit_graphs_before'), then we leave some entries in the filenames and hashes arrays as uninitialized, because we will fill them in later as those values become available. For instance, we rely on 'write_commit_graph_file's to store the filename and hash of the last layer in the new chain, which is the one that it is responsible for writing. But, it's possible that 'write_commit_graph_file' may fail, e.g., from file descriptor exhaustion. In this case it is possible that 'git_mkstemp_mode' will fail, and that function will return early *before* setting the values for the last commit-graph layer's filename and hash. This causes a number of upleasant side-effects. For instance, trying to 'free()' each entry in 'ctx->commit_graph_filenames_after' (and similarly for the hashes array) causes us to 'free()' uninitialized memory, since the area is allocated with 'malloc()' and is therefore subject to contain garbage (which is left alone when 'write_commit_graph_file' returns early). This can manifest in other issues, like a general protection fault, and/or leaving a stray 'commit-graph-chain.lock' around after the process dies. (The reasoning for this is still a mystery to me, since we'd otherwise usually expect the kernel to run tempfile.c's 'atexit()' handlers in the case of a normal death...) To resolve this, initialize the memory with 'CALLOC_ARRAY' so that uninitialized entries are filled with zeros, and can thus be 'free()'d as a noop instead of causing a fault. Helped-by: Jeff King <peff@peff.net> Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-23 23:41:09 +02:00
--split=no-merge --reachable || return 1
done
)
'
while read mode modebits
do
test_expect_success POSIXPERM "split commit-graph respects core.sharedrepository $mode" '
rm -rf $graphdir $infodir/commit-graph &&
git reset --hard commits/1 &&
test_config core.sharedrepository "$mode" &&
git commit-graph write --split --reachable &&
ls $graphdir/graph-*.graph >graph-files &&
test_line_count = 1 graph-files &&
echo "$modebits" >expect &&
test_modebits $graphdir/graph-*.graph >actual &&
test_cmp expect actual &&
test_modebits $graphdir/commit-graph-chain >actual &&
test_cmp expect actual
'
done <<\EOF
0666 -r--r--r--
0600 -r--------
EOF
test_done