2018-04-02 22:34:20 +02:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='commit graph'
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
2020-04-06 18:59:55 +02:00
|
|
|
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
|
|
|
|
|
2021-08-23 14:30:20 +02:00
|
|
|
test_expect_success 'usage' '
|
|
|
|
test_expect_code 129 git commit-graph write blah &&
|
|
|
|
test_expect_code 129 git commit-graph write verify
|
|
|
|
'
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'setup full repo' '
|
|
|
|
mkdir full &&
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git init &&
|
2018-04-10 14:56:05 +02:00
|
|
|
git config core.commitGraph true &&
|
2020-08-17 16:04:47 +02:00
|
|
|
objdir=".git/objects" &&
|
|
|
|
|
|
|
|
test_oid_cache <<-EOF
|
|
|
|
oid_version sha1:1
|
|
|
|
oid_version sha256:2
|
|
|
|
EOF
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2020-04-29 19:36:38 +02:00
|
|
|
test_expect_success POSIXPERM 'tweak umask for modebit tests' '
|
|
|
|
umask 022
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:32 +02:00
|
|
|
test_expect_success 'verify graph with no graph file' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph verify
|
|
|
|
'
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'write graph with no packs' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
2020-01-31 00:00:43 +01:00
|
|
|
git commit-graph write --object-dir $objdir &&
|
|
|
|
test_path_is_missing $objdir/info/commit-graph
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2019-08-05 10:02:40 +02:00
|
|
|
test_expect_success 'exit with correct error on bad input to --stdin-packs' '
|
2019-06-12 15:29:37 +02:00
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
echo doesnotexist >in &&
|
2019-08-05 10:02:38 +02:00
|
|
|
test_expect_code 1 git commit-graph write --stdin-packs <in 2>stderr &&
|
2019-06-12 15:29:37 +02:00
|
|
|
test_i18ngrep "error adding pack" stderr
|
|
|
|
'
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'create commits and repack' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
for i in $(test_seq 3)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i
|
|
|
|
done &&
|
|
|
|
git repack
|
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_two_modes() {
|
2018-06-27 15:24:26 +02:00
|
|
|
git -c core.commitGraph=true $1 >output
|
|
|
|
git -c core.commitGraph=false $1 >expect
|
2018-10-05 23:54:04 +02:00
|
|
|
test_cmp expect output
|
2018-04-10 14:56:05 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
graph_git_behavior() {
|
|
|
|
MSG=$1
|
|
|
|
DIR=$2
|
|
|
|
BRANCH=$3
|
|
|
|
COMPARE=$4
|
|
|
|
test_expect_success "check normal git operations: $MSG" '
|
|
|
|
cd "$TRASH_DIRECTORY/$DIR" &&
|
|
|
|
graph_git_two_modes "log --oneline $BRANCH" &&
|
|
|
|
graph_git_two_modes "log --topo-order $BRANCH" &&
|
|
|
|
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
|
|
|
|
graph_git_two_modes "branch -vv" &&
|
|
|
|
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
|
|
|
graph_git_behavior 'no graph' full commits/3 commits/1
|
|
|
|
|
2018-04-10 14:56:02 +02:00
|
|
|
graph_read_expect() {
|
|
|
|
OPTIONAL=""
|
|
|
|
NUM_CHUNKS=3
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
if test ! -z "$2"
|
2018-04-10 14:56:02 +02:00
|
|
|
then
|
|
|
|
OPTIONAL=" $2"
|
|
|
|
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
|
|
|
|
fi
|
|
|
|
cat >expect <<- EOF
|
2020-08-17 16:04:47 +02:00
|
|
|
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
|
2018-04-10 14:56:02 +02:00
|
|
|
num_commits: $1
|
|
|
|
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
|
|
|
|
EOF
|
2019-11-12 17:58:20 +01:00
|
|
|
test-tool read-graph >output &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_cmp expect output
|
|
|
|
}
|
|
|
|
|
2020-05-13 23:59:51 +02:00
|
|
|
test_expect_success 'exit with correct error on bad input to --stdin-commits' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-13 23:59:55 +02:00
|
|
|
# invalid, non-hex OID
|
|
|
|
echo HEAD >in &&
|
|
|
|
test_expect_code 1 git commit-graph write --stdin-commits <in 2>stderr &&
|
2020-05-13 23:59:51 +02:00
|
|
|
test_i18ngrep "unexpected non-hex object ID: HEAD" stderr &&
|
commit-graph: drop COMMIT_GRAPH_WRITE_CHECK_OIDS flag
Since 7c5c9b9c57 (commit-graph: error out on invalid commit oids in
'write --stdin-commits', 2019-08-05), the commit-graph builtin dies on
receiving non-commit OIDs as input to '--stdin-commits'.
This behavior can be cumbersome to work around in, say, the case of
piping 'git for-each-ref' to 'git commit-graph write --stdin-commits' if
the caller does not want to cull out non-commits themselves. In this
situation, it would be ideal if 'git commit-graph write' wrote the graph
containing the inputs that did pertain to commits, and silently ignored
the remainder of the input.
Some options have been proposed to the effect of '--[no-]check-oids'
which would allow callers to have the commit-graph builtin do just that.
After some discussion, it is difficult to imagine a caller who wouldn't
want to pass '--no-check-oids', suggesting that we should get rid of the
behavior of complaining about non-commit inputs altogether.
If callers do wish to retain this behavior, they can easily work around
this change by doing the following:
git for-each-ref --format='%(objectname) %(objecttype) %(*objecttype)' |
awk '
!/commit/ { print "not-a-commit:"$1 }
/commit/ { print $1 }
' |
git commit-graph write --stdin-commits
To make it so that valid OIDs that refer to non-existent objects are
indeed an error after loosening the error handling, perform an extra
lookup to make sure that object indeed exists before sending it to the
commit-graph internals.
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-13 23:59:55 +02:00
|
|
|
# non-existent OID
|
|
|
|
echo $ZERO_OID >in &&
|
|
|
|
test_expect_code 1 git commit-graph write --stdin-commits <in 2>stderr &&
|
|
|
|
test_i18ngrep "invalid object" stderr &&
|
|
|
|
# valid commit and tree OID
|
|
|
|
git rev-parse HEAD HEAD^{tree} >in &&
|
|
|
|
git commit-graph write --stdin-commits <in &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect 3 generation_data
|
2020-05-13 23:59:51 +02:00
|
|
|
'
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'write graph' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
2019-03-22 15:27:25 +01:00
|
|
|
git commit-graph write &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "3" generation_data
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2020-04-29 19:36:38 +02:00
|
|
|
test_expect_success POSIXPERM 'write graph has correct permissions' '
|
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
|
|
|
echo "-r--r--r--" >expect &&
|
|
|
|
test_modebits $objdir/info/commit-graph >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'graph exists' full commits/3 commits/1
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'Add more commits' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git reset --hard commits/1 &&
|
|
|
|
for i in $(test_seq 4 5)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i
|
|
|
|
done &&
|
|
|
|
git reset --hard commits/2 &&
|
|
|
|
for i in $(test_seq 6 7)
|
|
|
|
do
|
|
|
|
test_commit $i &&
|
|
|
|
git branch commits/$i
|
|
|
|
done &&
|
|
|
|
git reset --hard commits/2 &&
|
|
|
|
git merge commits/4 &&
|
|
|
|
git branch merge/1 &&
|
|
|
|
git reset --hard commits/4 &&
|
|
|
|
git merge commits/6 &&
|
|
|
|
git branch merge/2 &&
|
|
|
|
git reset --hard commits/3 &&
|
|
|
|
git merge commits/5 commits/7 &&
|
|
|
|
git branch merge/3 &&
|
|
|
|
git repack
|
|
|
|
'
|
|
|
|
|
2019-08-26 18:29:58 +02:00
|
|
|
test_expect_success 'commit-graph write progress off for redirected stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write 2>err &&
|
2020-06-04 00:21:07 +02:00
|
|
|
test_must_be_empty err
|
2019-08-26 18:29:58 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph write force progress on for stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
2019-11-25 22:28:22 +01:00
|
|
|
GIT_PROGRESS_DELAY=0 git commit-graph write --progress 2>err &&
|
2019-08-26 18:29:58 +02:00
|
|
|
test_file_not_empty err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph write with the --no-progress option' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write --no-progress 2>err &&
|
2020-06-04 00:21:07 +02:00
|
|
|
test_must_be_empty err
|
2019-08-26 18:29:58 +02:00
|
|
|
'
|
|
|
|
|
2020-06-01 20:01:31 +02:00
|
|
|
test_expect_success 'commit-graph write --stdin-commits progress off for redirected stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git rev-parse commits/5 >in &&
|
|
|
|
git commit-graph write --stdin-commits <in 2>err &&
|
|
|
|
test_must_be_empty err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph write --stdin-commits force progress on for stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git rev-parse commits/5 >in &&
|
|
|
|
GIT_PROGRESS_DELAY=0 git commit-graph write --stdin-commits --progress <in 2>err &&
|
|
|
|
test_i18ngrep "Collecting commits from input" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph write --stdin-commits with the --no-progress option' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git rev-parse commits/5 >in &&
|
|
|
|
git commit-graph write --stdin-commits --no-progress <in 2>err &&
|
|
|
|
test_must_be_empty err
|
2019-08-26 18:29:58 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph verify progress off for redirected stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph verify 2>err &&
|
2020-06-04 00:21:07 +02:00
|
|
|
test_must_be_empty err
|
2019-08-26 18:29:58 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph verify force progress on for stderr' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
2019-11-25 22:28:22 +01:00
|
|
|
GIT_PROGRESS_DELAY=0 git commit-graph verify --progress 2>err &&
|
2019-08-26 18:29:58 +02:00
|
|
|
test_file_not_empty err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'commit-graph verify with the --no-progress option' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph verify --no-progress 2>err &&
|
2020-06-04 00:21:07 +02:00
|
|
|
test_must_be_empty err
|
2019-08-26 18:29:58 +02:00
|
|
|
'
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
# Current graph structure:
|
|
|
|
#
|
|
|
|
# __M3___
|
|
|
|
# / | \
|
|
|
|
# 3 M1 5 M2 7
|
|
|
|
# |/ \|/ \|
|
|
|
|
# 2 4 6
|
|
|
|
# |___/____/
|
|
|
|
# 1
|
|
|
|
|
|
|
|
test_expect_success 'write graph with merges' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "10" "generation_data extra_edges"
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2
|
|
|
|
graph_git_behavior 'merge 1 vs 3' full merge/1 merge/3
|
|
|
|
graph_git_behavior 'merge 2 vs 3' full merge/2 merge/3
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'Add one more commit' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
test_commit 8 &&
|
|
|
|
git branch commits/8 &&
|
|
|
|
ls $objdir/pack | grep idx >existing-idx &&
|
|
|
|
git repack &&
|
2018-08-24 17:20:16 +02:00
|
|
|
ls $objdir/pack| grep idx | grep -v -f existing-idx >new-idx
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
# Current graph structure:
|
|
|
|
#
|
|
|
|
# 8
|
|
|
|
# |
|
|
|
|
# __M3___
|
|
|
|
# / | \
|
|
|
|
# 3 M1 5 M2 7
|
|
|
|
# |/ \|/ \|
|
|
|
|
# 2 4 6
|
|
|
|
# |___/____/
|
|
|
|
# 1
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'mixed mode, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'mixed mode, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'write graph with new commit' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "11" "generation_data extra_edges"
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'full graph, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'write graph with nothing new' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "11" "generation_data extra_edges"
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'cleared graph, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-10 14:56:06 +02:00
|
|
|
test_expect_success 'build graph from latest pack with closure' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
cat new-idx | git commit-graph write --stdin-packs &&
|
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "9" "generation_data extra_edges"
|
2018-04-10 14:56:06 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'graph from pack, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'graph from pack, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-10 14:56:07 +02:00
|
|
|
test_expect_success 'build graph from commits with closure' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git tag -a -m "merge" tag/merge merge/2 &&
|
|
|
|
git rev-parse tag/merge >commits-in &&
|
|
|
|
git rev-parse merge/1 >>commits-in &&
|
|
|
|
cat commits-in | git commit-graph write --stdin-commits &&
|
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "6" "generation_data"
|
2018-04-10 14:56:07 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'graph from commits, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'graph from commits, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-10 14:56:08 +02:00
|
|
|
test_expect_success 'build graph from commits with append' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git rev-parse merge/3 | git commit-graph write --stdin-commits --append &&
|
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "10" "generation_data extra_edges"
|
2018-04-10 14:56:08 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-06-27 15:24:45 +02:00
|
|
|
test_expect_success 'build graph using --reachable' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file $objdir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "11" "generation_data extra_edges"
|
2018-06-27 15:24:45 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1
|
|
|
|
graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'setup bare repo' '
|
|
|
|
cd "$TRASH_DIRECTORY" &&
|
|
|
|
git clone --bare --no-local full bare &&
|
|
|
|
cd bare &&
|
2018-04-10 14:56:05 +02:00
|
|
|
git config core.commitGraph true &&
|
2018-04-02 22:34:20 +02:00
|
|
|
baredir="./objects"
|
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'bare repo, commit 8 vs merge 1' bare commits/8 merge/1
|
|
|
|
graph_git_behavior 'bare repo, commit 8 vs merge 2' bare commits/8 merge/2
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_expect_success 'write graph in bare repo' '
|
|
|
|
cd "$TRASH_DIRECTORY/bare" &&
|
|
|
|
git commit-graph write &&
|
2018-04-10 14:56:02 +02:00
|
|
|
test_path_is_file $baredir/info/commit-graph &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
graph_read_expect "11" "generation_data extra_edges"
|
2018-04-02 22:34:20 +02:00
|
|
|
'
|
|
|
|
|
2018-04-10 14:56:05 +02:00
|
|
|
graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1
|
|
|
|
graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2
|
|
|
|
|
2018-05-01 14:47:23 +02:00
|
|
|
test_expect_success 'perform fast-forward merge in full repo' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git checkout -b merge-5-to-8 commits/5 &&
|
|
|
|
git merge commits/8 &&
|
|
|
|
git show-ref -s merge-5-to-8 >output &&
|
|
|
|
git show-ref -s commits/8 >expect &&
|
|
|
|
test_cmp expect output
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:46 +02:00
|
|
|
test_expect_success 'check that gc computes commit-graph' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git commit --allow-empty -m "blank" &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
cp $objdir/info/commit-graph commit-graph-before-gc &&
|
|
|
|
git reset --hard HEAD~1 &&
|
|
|
|
git config gc.writeCommitGraph true &&
|
|
|
|
git gc &&
|
|
|
|
cp $objdir/info/commit-graph commit-graph-after-gc &&
|
2018-08-13 13:52:43 +02:00
|
|
|
! test_cmp_bin commit-graph-before-gc commit-graph-after-gc &&
|
2018-06-27 15:24:46 +02:00
|
|
|
git commit-graph write --reachable &&
|
2018-08-13 13:52:43 +02:00
|
|
|
test_cmp_bin commit-graph-after-gc $objdir/info/commit-graph
|
2018-06-27 15:24:46 +02:00
|
|
|
'
|
|
|
|
|
2018-08-20 20:24:27 +02:00
|
|
|
test_expect_success 'replace-objects invalidates commit-graph' '
|
|
|
|
cd "$TRASH_DIRECTORY" &&
|
|
|
|
test_when_finished rm -rf replace &&
|
|
|
|
git clone full replace &&
|
|
|
|
(
|
|
|
|
cd replace &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file .git/objects/info/commit-graph &&
|
|
|
|
git replace HEAD~1 HEAD~2 &&
|
|
|
|
git -c core.commitGraph=false log >expect &&
|
|
|
|
git -c core.commitGraph=true log >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
git -c core.commitGraph=false --no-replace-objects log >expect &&
|
|
|
|
git -c core.commitGraph=true --no-replace-objects log >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
rm -rf .git/objects/info/commit-graph &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file .git/objects/info/commit-graph
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2018-08-20 20:24:30 +02:00
|
|
|
test_expect_success 'commit grafts invalidate commit-graph' '
|
|
|
|
cd "$TRASH_DIRECTORY" &&
|
|
|
|
test_when_finished rm -rf graft &&
|
|
|
|
git clone full graft &&
|
|
|
|
(
|
|
|
|
cd graft &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file .git/objects/info/commit-graph &&
|
|
|
|
H1=$(git rev-parse --verify HEAD~1) &&
|
|
|
|
H3=$(git rev-parse --verify HEAD~3) &&
|
|
|
|
echo "$H1 $H3" >.git/info/grafts &&
|
|
|
|
git -c core.commitGraph=false log >expect &&
|
|
|
|
git -c core.commitGraph=true log >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
git -c core.commitGraph=false --no-replace-objects log >expect &&
|
|
|
|
git -c core.commitGraph=true --no-replace-objects log >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
rm -rf .git/objects/info/commit-graph &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_missing .git/objects/info/commit-graph
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'replace-objects invalidates commit-graph' '
|
|
|
|
cd "$TRASH_DIRECTORY" &&
|
|
|
|
test_when_finished rm -rf shallow &&
|
|
|
|
git clone --depth 2 "file://$TRASH_DIRECTORY/full" shallow &&
|
|
|
|
(
|
|
|
|
cd shallow &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_missing .git/objects/info/commit-graph &&
|
|
|
|
git fetch origin --unshallow &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_path_is_file .git/objects/info/commit-graph
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2020-08-17 16:04:47 +02:00
|
|
|
test_expect_success 'warn on improper hash version' '
|
|
|
|
git init --object-format=sha1 sha1 &&
|
|
|
|
(
|
|
|
|
cd sha1 &&
|
|
|
|
test_commit 1 &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
mv .git/objects/info/commit-graph ../cg-sha1
|
|
|
|
) &&
|
|
|
|
git init --object-format=sha256 sha256 &&
|
|
|
|
(
|
|
|
|
cd sha256 &&
|
|
|
|
test_commit 1 &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
mv .git/objects/info/commit-graph ../cg-sha256
|
|
|
|
) &&
|
|
|
|
(
|
|
|
|
cd sha1 &&
|
|
|
|
mv ../cg-sha256 .git/objects/info/commit-graph &&
|
|
|
|
git log -1 2>err &&
|
|
|
|
test_i18ngrep "commit-graph hash version 2 does not match version 1" err
|
|
|
|
) &&
|
|
|
|
(
|
|
|
|
cd sha256 &&
|
|
|
|
mv ../cg-sha1 .git/objects/info/commit-graph &&
|
|
|
|
git log -1 2>err &&
|
|
|
|
test_i18ngrep "commit-graph hash version 1 does not match version 2" err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2021-02-02 04:01:23 +01:00
|
|
|
test_expect_success 'lower layers have overflow chunk' '
|
2021-02-01 18:15:04 +01:00
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
UNIX_EPOCH_ZERO="@0 +0000" &&
|
|
|
|
FUTURE_DATE="@2147483646 +0000" &&
|
|
|
|
rm -f .git/objects/info/commit-graph &&
|
|
|
|
test_commit --date "$FUTURE_DATE" future-1 &&
|
|
|
|
test_commit --date "$UNIX_EPOCH_ZERO" old-1 &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
test_commit --date "$FUTURE_DATE" future-2 &&
|
|
|
|
test_commit --date "$UNIX_EPOCH_ZERO" old-2 &&
|
|
|
|
git commit-graph write --reachable --split=no-merge &&
|
|
|
|
test_commit extra &&
|
|
|
|
git commit-graph write --reachable --split=no-merge &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
graph_read_expect 16 "generation_data generation_data_overflow extra_edges" &&
|
|
|
|
mv .git/objects/info/commit-graph commit-graph-upgraded &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
graph_read_expect 16 "generation_data generation_data_overflow extra_edges" &&
|
|
|
|
test_cmp .git/objects/info/commit-graph commit-graph-upgraded
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:33 +02:00
|
|
|
# the verify tests below expect the commit-graph to contain
|
|
|
|
# exactly the commits reachable from the commits/8 branch.
|
|
|
|
# If the file changes the set of commits in the list, then the
|
|
|
|
# offsets into the binary file will result in different edits
|
|
|
|
# and the tests will likely break.
|
|
|
|
|
2018-06-27 15:24:32 +02:00
|
|
|
test_expect_success 'git commit-graph verify' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
2021-02-25 19:19:43 +01:00
|
|
|
git rev-parse commits/8 | git -c commitGraph.generationVersion=1 commit-graph write --stdin-commits &&
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
git commit-graph verify >output &&
|
|
|
|
graph_read_expect 9 extra_edges
|
2018-06-27 15:24:32 +02:00
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:36 +02:00
|
|
|
NUM_COMMITS=9
|
2018-06-27 15:24:41 +02:00
|
|
|
NUM_OCTOPUS_EDGES=2
|
2018-09-13 07:17:42 +02:00
|
|
|
HASH_LEN="$(test_oid rawsz)"
|
2018-06-27 15:24:33 +02:00
|
|
|
GRAPH_BYTE_VERSION=4
|
|
|
|
GRAPH_BYTE_HASH=5
|
2018-06-27 15:24:34 +02:00
|
|
|
GRAPH_BYTE_CHUNK_COUNT=6
|
|
|
|
GRAPH_CHUNK_LOOKUP_OFFSET=8
|
|
|
|
GRAPH_CHUNK_LOOKUP_WIDTH=12
|
|
|
|
GRAPH_CHUNK_LOOKUP_ROWS=5
|
|
|
|
GRAPH_BYTE_OID_FANOUT_ID=$GRAPH_CHUNK_LOOKUP_OFFSET
|
|
|
|
GRAPH_BYTE_OID_LOOKUP_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \
|
|
|
|
1 * $GRAPH_CHUNK_LOOKUP_WIDTH))
|
|
|
|
GRAPH_BYTE_COMMIT_DATA_ID=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \
|
|
|
|
2 * $GRAPH_CHUNK_LOOKUP_WIDTH))
|
2018-06-27 15:24:35 +02:00
|
|
|
GRAPH_FANOUT_OFFSET=$(($GRAPH_CHUNK_LOOKUP_OFFSET + \
|
|
|
|
$GRAPH_CHUNK_LOOKUP_WIDTH * $GRAPH_CHUNK_LOOKUP_ROWS))
|
|
|
|
GRAPH_BYTE_FANOUT1=$(($GRAPH_FANOUT_OFFSET + 4 * 4))
|
|
|
|
GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFFSET + 4 * 255))
|
|
|
|
GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256))
|
|
|
|
GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8))
|
2018-06-27 15:24:36 +02:00
|
|
|
GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10))
|
2018-06-27 15:24:37 +02:00
|
|
|
GRAPH_COMMIT_DATA_OFFSET=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * $NUM_COMMITS))
|
|
|
|
GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET
|
2018-06-27 15:24:38 +02:00
|
|
|
GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN))
|
|
|
|
GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4))
|
|
|
|
GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3))
|
2018-06-27 15:24:39 +02:00
|
|
|
GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11))
|
2018-06-27 15:24:40 +02:00
|
|
|
GRAPH_BYTE_COMMIT_DATE=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 12))
|
2018-06-27 15:24:41 +02:00
|
|
|
GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16))
|
|
|
|
GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \
|
|
|
|
$GRAPH_COMMIT_DATA_WIDTH * $NUM_COMMITS))
|
|
|
|
GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4))
|
2018-06-27 15:24:42 +02:00
|
|
|
GRAPH_BYTE_FOOTER=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4 * $NUM_OCTOPUS_EDGES))
|
2018-06-27 15:24:33 +02:00
|
|
|
|
2019-02-21 23:37:46 +01:00
|
|
|
corrupt_graph_setup() {
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
test_when_finished mv commit-graph-backup $objdir/info/commit-graph &&
|
2020-04-29 19:36:38 +02:00
|
|
|
cp $objdir/info/commit-graph commit-graph-backup &&
|
|
|
|
chmod u+w $objdir/info/commit-graph
|
2019-02-21 23:37:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
corrupt_graph_verify() {
|
|
|
|
grepstr=$1
|
|
|
|
test_must_fail git commit-graph verify 2>test_err &&
|
|
|
|
grep -v "^+" test_err >err &&
|
commit-graph: fix segfault on e.g. "git status"
When core.commitGraph=true is set, various common commands now consult
the commit graph. Because the commit-graph code is very trusting of
its input data, it's possibly to construct a graph that'll cause an
immediate segfault on e.g. "status" (and e.g. "log", "blame", ...). In
some other cases where git immediately exits with a cryptic error
about the graph being broken.
The root cause of this is that while the "commit-graph verify"
sub-command exhaustively verifies the graph, other users of the graph
simply trust the graph, and will e.g. deference data found at certain
offsets as pointers, causing segfaults.
This change does the bare minimum to ensure that we don't segfault in
the common fill_commit_in_graph() codepath called by
e.g. setup_revisions(), to do this instrument the "commit-graph
verify" tests to always check if "status" would subsequently
segfault. This fixes the following tests which would previously
segfault:
not ok 50 - detect low chunk count
not ok 51 - detect missing OID fanout chunk
not ok 52 - detect missing OID lookup chunk
not ok 53 - detect missing commit data chunk
Those happened because with the commit-graph enabled setup_revisions()
would eventually call fill_commit_in_graph(), where e.g.
g->chunk_commit_data is used early as an offset (and will be
0x0). With this change we get far enough to detect that the graph is
broken, and show an error instead. E.g.:
$ git status; echo $?
error: commit-graph is missing the Commit Data chunk
1
That also sucks, we should *warn* and not hard-fail "status" just
because the commit-graph is corrupt, but fixing is left to a follow-up
change.
A side-effect of changing the reporting from graph_report() to error()
is that we now have an "error: " prefix for these even for
"commit-graph verify". Pseudo-diff before/after:
$ git commit-graph verify
-commit-graph is missing the Commit Data chunk
+error: commit-graph is missing the Commit Data chunk
Changing that is OK. Various errors it emits now early on are prefixed
with "error: ", moving these over and changing the output doesn't
break anything.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:29 +01:00
|
|
|
test_i18ngrep "$grepstr" err &&
|
commit-graph write: don't die if the existing graph is corrupt
When the commit-graph is written we end up calling
parse_commit(). This will in turn invoke code that'll consult the
existing commit-graph about the commit, if the graph is corrupted we
die.
We thus get into a state where a failing "commit-graph verify" can't
be followed-up with a "commit-graph write" if core.commitGraph=true is
set, the graph either needs to be manually removed to proceed, or
core.commitGraph needs to be set to "false".
Change the "commit-graph write" codepath to use a new
parse_commit_no_graph() helper instead of parse_commit() to avoid
this. The latter will call repo_parse_commit_internal() with
use_commit_graph=1 as seen in 177722b344 ("commit: integrate commit
graph with commit parsing", 2018-04-10).
Not using the old graph at all slows down the writing of the new graph
by some small amount, but is a sensible way to prevent an error in the
existing commit-graph from spreading.
Just fixing the current issue would be likely to result in code that's
inadvertently broken in the future. New code might use the
commit-graph at a distance. To detect such cases introduce a
"GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" setting used when we do our
corruption tests, and test that a "write/verify" combo works after
every one of our current test cases where we now detect commit-graph
corruption.
Some of the code changes here might be strictly unnecessary, e.g. I
was unable to find cases where the parse_commit() called from
write_graph_chunk_data() didn't exit early due to
"item->object.parsed" being true in
repo_parse_commit_internal() (before the use_commit_graph=1 has any
effect). But let's also convert those cases for good measure, we do
not have exhaustive tests for all possible types of commit-graph
corruption.
This might need to be re-visited if we learn to write the commit-graph
incrementally, but probably not. Hopefully we'll just start by finding
out what commits we have in total, then read the old graph(s) to see
what they cover, and finally write a new graph file with everything
that's missing. In that case the new graph writing code just needs to
continue to use e.g. a parse_commit() that doesn't consult the
existing commit-graphs.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:33 +01:00
|
|
|
if test "$2" != "no-copy"
|
|
|
|
then
|
|
|
|
cp $objdir/info/commit-graph commit-graph-pre-write-test
|
|
|
|
fi &&
|
|
|
|
git status --short &&
|
2020-06-23 19:47:01 +02:00
|
|
|
GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write &&
|
2020-04-29 19:36:38 +02:00
|
|
|
chmod u+w $objdir/info/commit-graph &&
|
commit-graph write: don't die if the existing graph is corrupt
When the commit-graph is written we end up calling
parse_commit(). This will in turn invoke code that'll consult the
existing commit-graph about the commit, if the graph is corrupted we
die.
We thus get into a state where a failing "commit-graph verify" can't
be followed-up with a "commit-graph write" if core.commitGraph=true is
set, the graph either needs to be manually removed to proceed, or
core.commitGraph needs to be set to "false".
Change the "commit-graph write" codepath to use a new
parse_commit_no_graph() helper instead of parse_commit() to avoid
this. The latter will call repo_parse_commit_internal() with
use_commit_graph=1 as seen in 177722b344 ("commit: integrate commit
graph with commit parsing", 2018-04-10).
Not using the old graph at all slows down the writing of the new graph
by some small amount, but is a sensible way to prevent an error in the
existing commit-graph from spreading.
Just fixing the current issue would be likely to result in code that's
inadvertently broken in the future. New code might use the
commit-graph at a distance. To detect such cases introduce a
"GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" setting used when we do our
corruption tests, and test that a "write/verify" combo works after
every one of our current test cases where we now detect commit-graph
corruption.
Some of the code changes here might be strictly unnecessary, e.g. I
was unable to find cases where the parse_commit() called from
write_graph_chunk_data() didn't exit early due to
"item->object.parsed" being true in
repo_parse_commit_internal() (before the use_commit_graph=1 has any
effect). But let's also convert those cases for good measure, we do
not have exhaustive tests for all possible types of commit-graph
corruption.
This might need to be re-visited if we learn to write the commit-graph
incrementally, but probably not. Hopefully we'll just start by finding
out what commits we have in total, then read the old graph(s) to see
what they cover, and finally write a new graph file with everything
that's missing. In that case the new graph writing code just needs to
continue to use e.g. a parse_commit() that doesn't consult the
existing commit-graphs.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:33 +01:00
|
|
|
git commit-graph verify
|
2019-02-21 23:37:46 +01:00
|
|
|
}
|
|
|
|
|
2019-01-15 23:25:51 +01:00
|
|
|
# usage: corrupt_graph_and_verify <position> <data> <string> [<zero_pos>]
|
2018-06-27 15:24:33 +02:00
|
|
|
# Manipulates the commit-graph file at the position
|
2019-01-15 23:25:51 +01:00
|
|
|
# by inserting the data, optionally zeroing the file
|
|
|
|
# starting at <zero_pos>, then runs 'git commit-graph verify'
|
2018-06-27 15:24:33 +02:00
|
|
|
# and places the output in the file 'err'. Test 'err' for
|
|
|
|
# the given string.
|
|
|
|
corrupt_graph_and_verify() {
|
|
|
|
pos=$1
|
|
|
|
data="${2:-\0}"
|
|
|
|
grepstr=$3
|
2019-02-21 23:37:46 +01:00
|
|
|
corrupt_graph_setup &&
|
2019-01-15 23:25:51 +01:00
|
|
|
orig_size=$(wc -c < $objdir/info/commit-graph) &&
|
|
|
|
zero_pos=${4:-${orig_size}} &&
|
2018-06-27 15:24:33 +02:00
|
|
|
printf "$data" | dd of="$objdir/info/commit-graph" bs=1 seek="$pos" conv=notrunc &&
|
2019-02-21 20:28:49 +01:00
|
|
|
dd of="$objdir/info/commit-graph" bs=1 seek="$zero_pos" if=/dev/null &&
|
2021-02-09 22:41:51 +01:00
|
|
|
test-tool genzeros $(($orig_size - $zero_pos)) >>"$objdir/info/commit-graph" &&
|
2019-02-21 23:37:46 +01:00
|
|
|
corrupt_graph_verify "$grepstr"
|
|
|
|
|
2018-06-27 15:24:33 +02:00
|
|
|
}
|
|
|
|
|
2019-03-25 13:08:32 +01:00
|
|
|
test_expect_success POSIXPERM,SANITY 'detect permission problem' '
|
|
|
|
corrupt_graph_setup &&
|
|
|
|
chmod 000 $objdir/info/commit-graph &&
|
commit-graph write: don't die if the existing graph is corrupt
When the commit-graph is written we end up calling
parse_commit(). This will in turn invoke code that'll consult the
existing commit-graph about the commit, if the graph is corrupted we
die.
We thus get into a state where a failing "commit-graph verify" can't
be followed-up with a "commit-graph write" if core.commitGraph=true is
set, the graph either needs to be manually removed to proceed, or
core.commitGraph needs to be set to "false".
Change the "commit-graph write" codepath to use a new
parse_commit_no_graph() helper instead of parse_commit() to avoid
this. The latter will call repo_parse_commit_internal() with
use_commit_graph=1 as seen in 177722b344 ("commit: integrate commit
graph with commit parsing", 2018-04-10).
Not using the old graph at all slows down the writing of the new graph
by some small amount, but is a sensible way to prevent an error in the
existing commit-graph from spreading.
Just fixing the current issue would be likely to result in code that's
inadvertently broken in the future. New code might use the
commit-graph at a distance. To detect such cases introduce a
"GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" setting used when we do our
corruption tests, and test that a "write/verify" combo works after
every one of our current test cases where we now detect commit-graph
corruption.
Some of the code changes here might be strictly unnecessary, e.g. I
was unable to find cases where the parse_commit() called from
write_graph_chunk_data() didn't exit early due to
"item->object.parsed" being true in
repo_parse_commit_internal() (before the use_commit_graph=1 has any
effect). But let's also convert those cases for good measure, we do
not have exhaustive tests for all possible types of commit-graph
corruption.
This might need to be re-visited if we learn to write the commit-graph
incrementally, but probably not. Hopefully we'll just start by finding
out what commits we have in total, then read the old graph(s) to see
what they cover, and finally write a new graph file with everything
that's missing. In that case the new graph writing code just needs to
continue to use e.g. a parse_commit() that doesn't consult the
existing commit-graphs.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:33 +01:00
|
|
|
corrupt_graph_verify "Could not open" "no-copy"
|
2019-03-25 13:08:32 +01:00
|
|
|
'
|
|
|
|
|
2019-02-21 23:37:47 +01:00
|
|
|
test_expect_success 'detect too small' '
|
|
|
|
corrupt_graph_setup &&
|
|
|
|
echo "a small graph" >$objdir/info/commit-graph &&
|
|
|
|
corrupt_graph_verify "too small"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:33 +02:00
|
|
|
test_expect_success 'detect bad signature' '
|
|
|
|
corrupt_graph_and_verify 0 "\0" \
|
|
|
|
"graph signature"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect bad version' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \
|
|
|
|
"graph version"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect bad hash version' '
|
2019-12-21 20:49:24 +01:00
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_HASH "\03" \
|
2018-06-27 15:24:33 +02:00
|
|
|
"hash version"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:34 +02:00
|
|
|
test_expect_success 'detect low chunk count' '
|
commit-graph: fix parsing the Chunk Lookup table
The commit-graph file format specifies that the chunks may be in any
order. However, if the OID Lookup chunk happens to be the last one in
the file, then any command attempting to access the commit-graph data
will fail with:
fatal: invalid commit position. commit-graph is likely corrupt
In this case the error is wrong, the commit-graph file does conform to
the specification, but the parsing of the Chunk Lookup table is a bit
buggy, and leaves the field holding the number of commits in the
commit-graph zero-initialized.
The number of commits in the commit-graph is determined while parsing
the Chunk Lookup table, by dividing the size of the OID Lookup chunk
with the hash size. However, the Chunk Lookup table doesn't actually
store the size of the chunks, but it stores their starting offset.
Consequently, the size of a chunk can only be calculated by
subtracting the starting offsets of that chunk from the offset of the
subsequent chunk, or in case of the last chunk from the offset
recorded in the terminating label. This is currenly implemented in a
bit complicated way: as we iterate over the entries of the Chunk
Lookup table, we check the ID of each chunk and store its starting
offset, then we check the ID of the last seen chunk and calculate its
size using its previously saved offset if necessary (at the moment
it's only necessary for the OID Lookup chunk). Alas, while parsing
the Chunk Lookup table we only interate through the "real" chunks, but
never look at the terminating label, thus don't even check whether
it's necessary to calulate the size of the last chunk. Consequently,
if the OID Lookup chunk is the last one, then we don't calculate its
size and turn don't run the piece of code determining the number of
commits in the commit graph, leaving the field holding that number
unchanged (i.e. zero-initialized), eventually triggering the sanity
check in load_oid_from_graph().
Fix this by iterating through all entries in the Chunk Lookup table,
including the terminating label.
Note that this is the minimal fix, suitable for the maintenance track.
A better fix would be to simplify how the chunk sizes are calculated,
but that is a more invasive change, less suitable for 'maint', so that
will be done in later patches.
This additional flexibility of scanning more chunks breaks a test for
"git commit-graph verify" so alter that test to mutate the commit-graph
to have an even lower chunk count.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-05 15:00:24 +02:00
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
|
2021-02-18 15:07:35 +01:00
|
|
|
"final chunk has non-zero id"
|
2018-06-27 15:24:34 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect missing OID fanout chunk' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_OID_FANOUT_ID "\0" \
|
|
|
|
"missing the OID Fanout chunk"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect missing OID lookup chunk' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_ID "\0" \
|
|
|
|
"missing the OID Lookup chunk"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect missing commit data chunk' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATA_ID "\0" \
|
|
|
|
"missing the Commit Data chunk"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:35 +02:00
|
|
|
test_expect_success 'detect incorrect fanout' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_FANOUT1 "\01" \
|
|
|
|
"fanout value"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect incorrect fanout final value' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \
|
|
|
|
"fanout value"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect incorrect OID order' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_ORDER "\01" \
|
|
|
|
"incorrect OID order"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:36 +02:00
|
|
|
test_expect_success 'detect OID not in object database' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_MISSING "\01" \
|
|
|
|
"from object database"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:37 +02:00
|
|
|
test_expect_success 'detect incorrect tree OID' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_TREE "\01" \
|
|
|
|
"root tree OID for commit"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:38 +02:00
|
|
|
test_expect_success 'detect incorrect parent int-id' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_PARENT "\01" \
|
|
|
|
"invalid parent"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect extra parent int-id' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_EXTRA_PARENT "\00" \
|
|
|
|
"is too long"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect wrong parent' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_WRONG_PARENT "\01" \
|
|
|
|
"commit-graph parent for"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:39 +02:00
|
|
|
test_expect_success 'detect incorrect generation number' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\070" \
|
|
|
|
"generation for commit"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'detect incorrect generation number' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\01" \
|
|
|
|
"non-zero generation number"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:40 +02:00
|
|
|
test_expect_success 'detect incorrect commit date' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATE "\01" \
|
|
|
|
"commit date"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:41 +02:00
|
|
|
test_expect_success 'detect incorrect parent for octopus merge' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_OCTOPUS "\01" \
|
|
|
|
"invalid parent"
|
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:42 +02:00
|
|
|
test_expect_success 'detect invalid checksum hash' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \
|
|
|
|
"incorrect checksum"
|
|
|
|
'
|
|
|
|
|
2019-01-15 23:25:51 +01:00
|
|
|
test_expect_success 'detect incorrect chunk count' '
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\377" \
|
commit-graph: simplify parse_commit_graph() #1
While we iterate over all entries of the Chunk Lookup table we make
sure that we don't attempt to read past the end of the mmap-ed
commit-graph file, and check in each iteration that the chunk ID and
offset we are about to read is still within the mmap-ed memory region.
However, these checks in each iteration are not really necessary,
because the number of chunks in the commit-graph file is already known
before this loop from the just parsed commit-graph header.
So let's check that the commit-graph file is large enough for all
entries in the Chunk Lookup table before we start iterating over those
entries, and drop those per-iteration checks. While at it, take into
account the size of everything that is necessary to have a valid
commit-graph file, i.e. the size of the header, the size of the
mandatory OID Fanout chunk, and the size of the signature in the
trailer as well.
Note that this necessitates the change of the error message as well,
and, consequently, have to update the 'detect incorrect chunk count'
test in 't5318-commit-graph.sh' as well.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-05 15:00:29 +02:00
|
|
|
"commit-graph file is too small to hold [0-9]* chunks" \
|
|
|
|
$GRAPH_CHUNK_LOOKUP_OFFSET
|
2019-01-15 23:25:51 +01:00
|
|
|
'
|
|
|
|
|
2018-06-27 15:24:43 +02:00
|
|
|
test_expect_success 'git fsck (checks commit-graph)' '
|
|
|
|
cd "$TRASH_DIRECTORY/full" &&
|
|
|
|
git fsck &&
|
|
|
|
corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \
|
|
|
|
"incorrect checksum" &&
|
commit-graph write: don't die if the existing graph is corrupt
When the commit-graph is written we end up calling
parse_commit(). This will in turn invoke code that'll consult the
existing commit-graph about the commit, if the graph is corrupted we
die.
We thus get into a state where a failing "commit-graph verify" can't
be followed-up with a "commit-graph write" if core.commitGraph=true is
set, the graph either needs to be manually removed to proceed, or
core.commitGraph needs to be set to "false".
Change the "commit-graph write" codepath to use a new
parse_commit_no_graph() helper instead of parse_commit() to avoid
this. The latter will call repo_parse_commit_internal() with
use_commit_graph=1 as seen in 177722b344 ("commit: integrate commit
graph with commit parsing", 2018-04-10).
Not using the old graph at all slows down the writing of the new graph
by some small amount, but is a sensible way to prevent an error in the
existing commit-graph from spreading.
Just fixing the current issue would be likely to result in code that's
inadvertently broken in the future. New code might use the
commit-graph at a distance. To detect such cases introduce a
"GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" setting used when we do our
corruption tests, and test that a "write/verify" combo works after
every one of our current test cases where we now detect commit-graph
corruption.
Some of the code changes here might be strictly unnecessary, e.g. I
was unable to find cases where the parse_commit() called from
write_graph_chunk_data() didn't exit early due to
"item->object.parsed" being true in
repo_parse_commit_internal() (before the use_commit_graph=1 has any
effect). But let's also convert those cases for good measure, we do
not have exhaustive tests for all possible types of commit-graph
corruption.
This might need to be re-visited if we learn to write the commit-graph
incrementally, but probably not. Hopefully we'll just start by finding
out what commits we have in total, then read the old graph(s) to see
what they cover, and finally write a new graph file with everything
that's missing. In that case the new graph writing code just needs to
continue to use e.g. a parse_commit() that doesn't consult the
existing commit-graphs.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-25 13:08:33 +01:00
|
|
|
cp commit-graph-pre-write-test $objdir/info/commit-graph &&
|
2018-06-27 15:24:43 +02:00
|
|
|
test_must_fail git fsck
|
|
|
|
'
|
|
|
|
|
2018-07-12 00:42:42 +02:00
|
|
|
test_expect_success 'setup non-the_repository tests' '
|
|
|
|
rm -rf repo &&
|
|
|
|
git init repo &&
|
|
|
|
test_commit -C repo one &&
|
|
|
|
test_commit -C repo two &&
|
|
|
|
git -C repo config core.commitGraph true &&
|
|
|
|
git -C repo rev-parse two | \
|
|
|
|
git -C repo commit-graph write --stdin-commits
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'parse_commit_in_graph works for non-the_repository' '
|
|
|
|
test-tool repository parse_commit_in_graph \
|
|
|
|
repo/.git repo "$(git -C repo rev-parse two)" >actual &&
|
2018-08-13 02:30:10 +02:00
|
|
|
{
|
|
|
|
git -C repo log --pretty=format:"%ct " -1 &&
|
|
|
|
git -C repo rev-parse one
|
|
|
|
} >expect &&
|
2018-07-12 00:42:42 +02:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
test-tool repository parse_commit_in_graph \
|
|
|
|
repo/.git repo "$(git -C repo rev-parse one)" >actual &&
|
2018-08-13 02:30:10 +02:00
|
|
|
git -C repo log --pretty="%ct" -1 one >expect &&
|
2018-07-12 00:42:42 +02:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'get_commit_tree_in_graph works for non-the_repository' '
|
|
|
|
test-tool repository get_commit_tree_in_graph \
|
|
|
|
repo/.git repo "$(git -C repo rev-parse two)" >actual &&
|
2018-08-13 02:30:10 +02:00
|
|
|
git -C repo rev-parse two^{tree} >expect &&
|
2018-07-12 00:42:42 +02:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
test-tool repository get_commit_tree_in_graph \
|
|
|
|
repo/.git repo "$(git -C repo rev-parse one)" >actual &&
|
2018-08-13 02:30:10 +02:00
|
|
|
git -C repo rev-parse one^{tree} >expect &&
|
2018-07-12 00:42:42 +02:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
commit-graph.c: handle commit parsing errors
To write a commit graph chunk, 'write_graph_chunk_data()' takes a list
of commits to write and parses each one before writing the necessary
data, and continuing on to the next commit in the list.
Since the majority of these commits are not parsed ahead of time (an
exception is made for the *last* commit in the list, which is parsed
early within 'copy_oids_to_commits'), it is possible that calling
'parse_commit_no_graph()' on them may return an error. Failing to catch
these errors before de-referencing later calls can result in a undefined
memory access and a SIGSEGV.
One such example of this is 'get_commit_tree_oid()', which expects a
parsed object as its input (in this case, the commit-graph code passes
'*list'). If '*list' causes a parse error, the subsequent call will
fail.
Prevent such an issue by checking the return value of
'parse_commit_no_graph()' to avoid passing an unparsed object to a
function which expects a parsed object, thus preventing a segfault.
It is worth noting that this fix is really skirting around the issue in
object.c's 'parse_object()', which makes it difficult to tell how
corrupt an object is without digging into it. Presumably one could
change the meaning of 'parse_object' returns, but this would require
adjusting each callsite accordingly. Instead of that, add an additional
check to the object parsed.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-06 00:04:55 +02:00
|
|
|
test_expect_success 'corrupt commit-graph write (broken parent)' '
|
2019-09-06 00:04:53 +02:00
|
|
|
rm -rf repo &&
|
|
|
|
git init repo &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
empty="$(git mktree </dev/null)" &&
|
|
|
|
cat >broken <<-EOF &&
|
|
|
|
tree $empty
|
2020-02-07 01:52:49 +01:00
|
|
|
parent $ZERO_OID
|
2019-09-06 00:04:53 +02:00
|
|
|
author whatever <whatever@example.com> 1234 -0000
|
|
|
|
committer whatever <whatever@example.com> 1234 -0000
|
|
|
|
|
|
|
|
broken commit
|
|
|
|
EOF
|
|
|
|
broken="$(git hash-object -w -t commit --literally broken)" &&
|
|
|
|
git commit-tree -p "$broken" -m "good commit" "$empty" >good &&
|
|
|
|
test_must_fail git commit-graph write --stdin-commits \
|
|
|
|
<good 2>test_err &&
|
|
|
|
test_i18ngrep "unable to parse commit" test_err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-09-06 00:04:57 +02:00
|
|
|
test_expect_success 'corrupt commit-graph write (missing tree)' '
|
2019-09-06 00:04:53 +02:00
|
|
|
rm -rf repo &&
|
|
|
|
git init repo &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
tree="$(git mktree </dev/null)" &&
|
|
|
|
cat >broken <<-EOF &&
|
2020-02-07 01:52:49 +01:00
|
|
|
parent $ZERO_OID
|
2019-09-06 00:04:53 +02:00
|
|
|
author whatever <whatever@example.com> 1234 -0000
|
|
|
|
committer whatever <whatever@example.com> 1234 -0000
|
|
|
|
|
|
|
|
broken commit
|
|
|
|
EOF
|
|
|
|
broken="$(git hash-object -w -t commit --literally broken)" &&
|
|
|
|
git commit-tree -p "$broken" -m "good" "$tree" >good &&
|
|
|
|
test_must_fail git commit-graph write --stdin-commits \
|
|
|
|
<good 2>test_err &&
|
commit, tag: don't set parsed bit for parse failures
If we can't parse a commit, then parse_commit() will return an error
code. But it _also_ sets the "parsed" flag, which tells us not to bother
trying to re-parse the object. That means that subsequent parses have no
idea that the information in the struct may be bogus. I.e., doing this:
parse_commit(commit);
...
if (parse_commit(commit) < 0)
die("commit is broken");
will never trigger the die(). The second parse_commit() will see the
"parsed" flag and quietly return success.
There are two obvious ways to fix this:
1. Stop setting "parsed" until we've successfully parsed.
2. Keep a second "corrupt" flag to indicate that we saw an error (and
when the parsed flag is set, return 0/-1 depending on the corrupt
flag).
This patch does option 1. The obvious downside versus option 2 is that
we might continually re-parse a broken object. But in practice,
corruption like this is rare, and we typically die() or return an error
in the caller. So it's OK not to worry about optimizing for corruption.
And it's much simpler: we don't need to use an extra bit in the object
struct, and callers which check the "parsed" flag don't need to learn
about the corrupt bit, too.
There's no new test here, because this case is already covered in t5318.
Note that we do need to update the expected message there, because we
now detect the problem in the return from "parse_commit()", and not with
a separate check for a NULL tree. In fact, we can now ditch that
explicit tree check entirely, as we're covered robustly by this change
(and the previous recent change to treat a NULL tree as a parse error).
We'll also give tags the same treatment. I don't know offhand of any
cases where the problem can be triggered (it implies somebody ignoring a
parse error earlier in the process), but consistently returning an error
should cause the least surprise.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-25 23:20:20 +02:00
|
|
|
test_i18ngrep "unable to parse commit" test_err
|
2019-09-06 00:04:53 +02:00
|
|
|
)
|
|
|
|
'
|
|
|
|
|
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.
We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.
Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).
We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.
To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:
We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.
If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.
We test the overflow-related code with the following repo history:
F - N - U
/ \
U - N - U N
\ /
N - F - N
Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.
The largest offset observed is 2 ^ 31, just large enough to overflow.
[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:15 +01:00
|
|
|
# We test the overflow-related code with the following repo history:
|
|
|
|
#
|
|
|
|
# 4:F - 5:N - 6:U
|
|
|
|
# / \
|
|
|
|
# 1:U - 2:N - 3:U M:N
|
|
|
|
# \ /
|
|
|
|
# 7:N - 8:F - 9:N
|
|
|
|
#
|
|
|
|
# Here the commits denoted by U have committer date of zero seconds
|
|
|
|
# since Unix epoch, the commits denoted by N have committer date
|
|
|
|
# starting from 1112354055 seconds since Unix epoch (default committer
|
|
|
|
# date for the test suite), and the commits denoted by F have committer
|
|
|
|
# date of (2 ^ 31 - 2) seconds since Unix epoch.
|
|
|
|
#
|
|
|
|
# The largest offset observed is 2 ^ 31, just large enough to overflow.
|
|
|
|
#
|
|
|
|
|
|
|
|
test_expect_success 'set up and verify repo with generation data overflow chunk' '
|
|
|
|
objdir=".git/objects" &&
|
|
|
|
UNIX_EPOCH_ZERO="@0 +0000" &&
|
|
|
|
FUTURE_DATE="@2147483646 +0000" &&
|
|
|
|
test_oid_cache <<-EOF &&
|
|
|
|
oid_version sha1:1
|
|
|
|
oid_version sha256:2
|
|
|
|
EOF
|
|
|
|
cd "$TRASH_DIRECTORY" &&
|
|
|
|
mkdir repo &&
|
|
|
|
cd repo &&
|
|
|
|
git init &&
|
|
|
|
test_commit --date "$UNIX_EPOCH_ZERO" 1 &&
|
|
|
|
test_commit 2 &&
|
|
|
|
test_commit --date "$UNIX_EPOCH_ZERO" 3 &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
graph_read_expect 3 generation_data &&
|
|
|
|
test_commit --date "$FUTURE_DATE" 4 &&
|
|
|
|
test_commit 5 &&
|
|
|
|
test_commit --date "$UNIX_EPOCH_ZERO" 6 &&
|
|
|
|
git branch left &&
|
|
|
|
git reset --hard 3 &&
|
|
|
|
test_commit 7 &&
|
|
|
|
test_commit --date "$FUTURE_DATE" 8 &&
|
|
|
|
test_commit 9 &&
|
|
|
|
git branch right &&
|
|
|
|
git reset --hard 3 &&
|
|
|
|
test_merge M left right &&
|
|
|
|
git commit-graph write --reachable &&
|
|
|
|
graph_read_expect 10 "generation_data generation_data_overflow" &&
|
|
|
|
git commit-graph verify
|
|
|
|
'
|
|
|
|
|
|
|
|
graph_git_behavior 'generation data overflow chunk repo' repo left right
|
|
|
|
|
2018-04-02 22:34:20 +02:00
|
|
|
test_done
|