2009-01-30 09:33:00 +01:00
|
|
|
#!/bin/sh
|
|
|
|
|
2010-09-07 03:47:07 +02:00
|
|
|
test_description='git fsck random collection of tests
|
|
|
|
|
|
|
|
* (HEAD) B
|
|
|
|
* (master) A
|
|
|
|
'
|
2009-01-30 09:33:00 +01:00
|
|
|
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
|
|
|
test_expect_success setup '
|
2010-09-07 03:47:07 +02:00
|
|
|
git config gc.auto 0 &&
|
2010-05-26 23:50:34 +02:00
|
|
|
git config i18n.commitencoding ISO-8859-1 &&
|
2009-01-30 09:33:00 +01:00
|
|
|
test_commit A fileA one &&
|
2010-05-26 23:50:34 +02:00
|
|
|
git config --unset i18n.commitencoding &&
|
2009-01-30 09:33:00 +01:00
|
|
|
git checkout HEAD^0 &&
|
|
|
|
test_commit B fileB two &&
|
|
|
|
git tag -d A B &&
|
2010-09-07 03:47:07 +02:00
|
|
|
git reflog expire --expire=now --all &&
|
|
|
|
>empty
|
2009-01-30 09:33:00 +01:00
|
|
|
'
|
|
|
|
|
2009-01-30 09:50:54 +01:00
|
|
|
test_expect_success 'loose objects borrowed from alternate are not missing' '
|
|
|
|
mkdir another &&
|
|
|
|
(
|
|
|
|
cd another &&
|
|
|
|
git init &&
|
|
|
|
echo ../../../.git/objects >.git/objects/info/alternates &&
|
|
|
|
test_commit C fileC one &&
|
2012-02-28 23:55:39 +01:00
|
|
|
git fsck --no-dangling >../actual 2>&1
|
2010-09-07 03:47:07 +02:00
|
|
|
) &&
|
|
|
|
test_cmp empty actual
|
2009-01-30 09:50:54 +01:00
|
|
|
'
|
|
|
|
|
2010-09-07 03:47:07 +02:00
|
|
|
test_expect_success 'HEAD is part of refs, valid objects appear valid' '
|
|
|
|
git fsck >actual 2>&1 &&
|
|
|
|
test_cmp empty actual
|
2010-05-26 23:50:34 +02:00
|
|
|
'
|
|
|
|
|
2009-02-19 12:13:39 +01:00
|
|
|
# Corruption tests follow. Make sure to remove all traces of the
|
|
|
|
# specific corruption you test afterwards, lest a later test trip over
|
|
|
|
# it.
|
|
|
|
|
2010-09-07 03:47:07 +02:00
|
|
|
test_expect_success 'setup: helpers for corruption tests' '
|
|
|
|
sha1_file() {
|
t1450: refactor loose-object removal
Commit 90cf590f5 (fsck: optionally show more helpful info
for broken links, 2016-07-17) added a remove_loose_object()
helper, but we already had a remove_object() helper that did
the same thing. Let's combine these into one.
The implementations had a few subtle differences, so I've
tried to take the best of both:
- the original used "sed", but the newer version avoids
spawning an extra process
- the original processed "$*", which was nonsense, as it
assumed only a single sha1. Use "$1" to make that more
clear.
- the newer version ran an extra rev-parse, but it was not
necessary; it's sole caller already converted the
argument into a raw sha1
- the original used "rm -f", whereas the new one uses
"rm". The latter is better because it may notice a bug
or other unexpected failure in the test. (The original
does check that the object exists before we remove it,
which is good, but that's a subset of the possible
unexpected conditions).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-13 18:54:10 +01:00
|
|
|
remainder=${1#??} &&
|
|
|
|
firsttwo=${1%$remainder} &&
|
|
|
|
echo ".git/objects/$firsttwo/$remainder"
|
2010-09-07 03:47:07 +02:00
|
|
|
} &&
|
|
|
|
|
|
|
|
remove_object() {
|
t1450: refactor loose-object removal
Commit 90cf590f5 (fsck: optionally show more helpful info
for broken links, 2016-07-17) added a remove_loose_object()
helper, but we already had a remove_object() helper that did
the same thing. Let's combine these into one.
The implementations had a few subtle differences, so I've
tried to take the best of both:
- the original used "sed", but the newer version avoids
spawning an extra process
- the original processed "$*", which was nonsense, as it
assumed only a single sha1. Use "$1" to make that more
clear.
- the newer version ran an extra rev-parse, but it was not
necessary; it's sole caller already converted the
argument into a raw sha1
- the original used "rm -f", whereas the new one uses
"rm". The latter is better because it may notice a bug
or other unexpected failure in the test. (The original
does check that the object exists before we remove it,
which is good, but that's a subset of the possible
unexpected conditions).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-13 18:54:10 +01:00
|
|
|
rm "$(sha1_file "$1")"
|
2010-09-07 03:47:07 +02:00
|
|
|
}
|
|
|
|
'
|
|
|
|
|
2009-02-19 12:13:39 +01:00
|
|
|
test_expect_success 'object with bad sha1' '
|
|
|
|
sha=$(echo blob | git hash-object -w --stdin) &&
|
|
|
|
old=$(echo $sha | sed "s+^..+&/+") &&
|
|
|
|
new=$(dirname $old)/ffffffffffffffffffffffffffffffffffffff &&
|
2010-10-31 02:46:54 +01:00
|
|
|
sha="$(dirname $new)$(basename $new)" &&
|
2009-02-19 12:13:39 +01:00
|
|
|
mv .git/objects/$old .git/objects/$new &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "remove_object $sha" &&
|
2009-02-19 12:13:39 +01:00
|
|
|
git update-index --add --cacheinfo 100644 $sha foo &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "git read-tree -u --reset HEAD" &&
|
2009-02-19 12:13:39 +01:00
|
|
|
tree=$(git write-tree) &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "remove_object $tree" &&
|
2009-02-19 12:13:39 +01:00
|
|
|
cmt=$(echo bogus | git commit-tree $tree) &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "remove_object $cmt" &&
|
2009-02-19 12:13:39 +01:00
|
|
|
git update-ref refs/heads/bogus $cmt &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
|
|
|
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2010-09-07 03:47:07 +02:00
|
|
|
cat out &&
|
|
|
|
grep "$sha.*corrupt" out
|
2009-02-19 12:13:39 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'branch pointing to non-commit' '
|
2010-09-07 03:47:07 +02:00
|
|
|
git rev-parse HEAD^{tree} >.git/refs/heads/invalid &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/invalid" &&
|
2015-09-23 22:46:39 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2010-09-07 03:47:07 +02:00
|
|
|
cat out &&
|
|
|
|
grep "not a commit" out
|
2009-02-19 12:13:39 +01:00
|
|
|
'
|
|
|
|
|
2015-09-23 22:46:39 +02:00
|
|
|
test_expect_success 'HEAD link pointing at a funny object' '
|
|
|
|
test_when_finished "mv .git/SAVED_HEAD .git/HEAD" &&
|
|
|
|
mv .git/HEAD .git/SAVED_HEAD &&
|
|
|
|
echo 0000000000000000000000000000000000000000 >.git/HEAD &&
|
|
|
|
# avoid corrupt/broken HEAD from interfering with repo discovery
|
|
|
|
test_must_fail env GIT_DIR=.git git fsck 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "detached HEAD points" out
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'HEAD link pointing at a funny place' '
|
|
|
|
test_when_finished "mv .git/SAVED_HEAD .git/HEAD" &&
|
|
|
|
mv .git/HEAD .git/SAVED_HEAD &&
|
|
|
|
echo "ref: refs/funny/place" >.git/HEAD &&
|
|
|
|
# avoid corrupt/broken HEAD from interfering with repo discovery
|
|
|
|
test_must_fail env GIT_DIR=.git git fsck 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "HEAD points to something strange" out
|
|
|
|
'
|
|
|
|
|
2010-04-24 18:06:08 +02:00
|
|
|
test_expect_success 'email without @ is okay' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/@/AT/" basis >okay &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <okay) &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "remove_object $new" &&
|
2010-04-24 18:06:08 +02:00
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2010-04-24 18:06:08 +02:00
|
|
|
git fsck 2>out &&
|
|
|
|
cat out &&
|
2010-09-07 03:47:07 +02:00
|
|
|
! grep "commit $new" out
|
2010-04-24 18:06:08 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'email with embedded > is not okay' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/@[a-z]/&>/" basis >bad-email &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <bad-email) &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "remove_object $new" &&
|
2010-04-24 18:06:08 +02:00
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
2010-09-07 03:47:07 +02:00
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2010-04-24 18:06:08 +02:00
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new" out
|
|
|
|
'
|
2009-02-19 12:13:39 +01:00
|
|
|
|
2011-08-11 12:21:10 +02:00
|
|
|
test_expect_success 'missing < email delimiter is reported nicely' '
|
2011-08-11 12:21:09 +02:00
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/<//" basis >bad-email-2 &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <bad-email-2) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2011-08-11 12:21:09 +02:00
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new.* - bad name" out
|
|
|
|
'
|
|
|
|
|
2011-08-11 12:21:10 +02:00
|
|
|
test_expect_success 'missing email is reported nicely' '
|
2011-08-11 12:21:09 +02:00
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/[a-z]* <[^>]*>//" basis >bad-email-3 &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <bad-email-3) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2011-08-11 12:21:09 +02:00
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new.* - missing email" out
|
|
|
|
'
|
|
|
|
|
2011-08-11 12:21:10 +02:00
|
|
|
test_expect_success '> in name is reported' '
|
2011-08-11 12:21:09 +02:00
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/ </> </" basis >bad-email-4 &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <bad-email-4) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
2011-08-11 12:21:09 +02:00
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new" out
|
|
|
|
'
|
|
|
|
|
fsck: report integer overflow in author timestamps
When we check commit objects, we complain if commit->date is
ULONG_MAX, which is an indication that we saw integer
overflow when parsing it. However, we do not do any check at
all for author lines, which also contain a timestamp.
Let's actually check the timestamps on each ident line
with strtoul. This catches both author and committer lines,
and we can get rid of the now-redundant commit->date check.
Note that like the existing check, we compare only against
ULONG_MAX. Now that we are calling strtoul at the site of
the check, we could be slightly more careful and also check
that errno is set to ERANGE. However, this will make further
refactoring in future patches a little harder, and it
doesn't really matter in practice.
For 32-bit systems, one would have to create a commit at the
exact wrong second in 2038. But by the time we get close to
that, all systems will hopefully have moved to 64-bit (and
if they haven't, they have a real problem one second later).
For 64-bit systems, by the time we get close to ULONG_MAX,
all systems will hopefully have been consumed in the fiery
wrath of our expanding Sun.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-24 08:39:04 +01:00
|
|
|
# date is 2^64 + 1
|
|
|
|
test_expect_success 'integer overflow in timestamps is reported' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/^\\(author .*>\\) [0-9]*/\\1 18446744073709551617/" \
|
|
|
|
<basis >bad-timestamp &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <bad-timestamp) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
test_must_fail git fsck 2>out &&
|
fsck: report integer overflow in author timestamps
When we check commit objects, we complain if commit->date is
ULONG_MAX, which is an indication that we saw integer
overflow when parsing it. However, we do not do any check at
all for author lines, which also contain a timestamp.
Let's actually check the timestamps on each ident line
with strtoul. This catches both author and committer lines,
and we can get rid of the now-redundant commit->date check.
Note that like the existing check, we compare only against
ULONG_MAX. Now that we are calling strtoul at the site of
the check, we could be slightly more careful and also check
that errno is set to ERANGE. However, this will make further
refactoring in future patches a little harder, and it
doesn't really matter in practice.
For 32-bit systems, one would have to create a commit at the
exact wrong second in 2038. But by the time we get close to
that, all systems will hopefully have moved to 64-bit (and
if they haven't, they have a real problem one second later).
For 64-bit systems, by the time we get close to ULONG_MAX,
all systems will hopefully have been consumed in the fiery
wrath of our expanding Sun.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-24 08:39:04 +01:00
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new.*integer overflow" out
|
|
|
|
'
|
|
|
|
|
2015-11-19 17:20:14 +01:00
|
|
|
test_expect_success 'commit with NUL in header' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/author ./author Q/" <basis | q_to_nul >commit-NUL-header &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <commit-NUL-header) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "error in commit $new.*unterminated header: NUL at offset" out
|
|
|
|
'
|
|
|
|
|
2016-09-27 22:59:51 +02:00
|
|
|
test_expect_success 'tree object with duplicate entries' '
|
2017-01-16 22:24:03 +01:00
|
|
|
test_when_finished "for i in \$T; do remove_object \$i; done" &&
|
2014-08-29 22:31:46 +02:00
|
|
|
T=$(
|
|
|
|
GIT_INDEX_FILE=test-index &&
|
|
|
|
export GIT_INDEX_FILE &&
|
|
|
|
rm -f test-index &&
|
|
|
|
>x &&
|
|
|
|
git add x &&
|
2017-01-16 22:24:03 +01:00
|
|
|
git rev-parse :x &&
|
2014-08-29 22:31:46 +02:00
|
|
|
T=$(git write-tree) &&
|
2017-01-16 22:24:03 +01:00
|
|
|
echo $T &&
|
2014-08-29 22:31:46 +02:00
|
|
|
(
|
|
|
|
git cat-file tree $T &&
|
|
|
|
git cat-file tree $T
|
|
|
|
) |
|
|
|
|
git hash-object -w -t tree --stdin
|
|
|
|
) &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
grep "error in tree .*contains duplicate file entries" out
|
|
|
|
'
|
|
|
|
|
2016-09-27 22:59:51 +02:00
|
|
|
test_expect_success 'unparseable tree object' '
|
|
|
|
test_when_finished "git update-ref -d refs/heads/wrong" &&
|
|
|
|
test_when_finished "remove_object \$tree_sha1" &&
|
|
|
|
test_when_finished "remove_object \$commit_sha1" &&
|
|
|
|
tree_sha1=$(printf "100644 \0twenty-bytes-of-junk" | git hash-object -t tree --stdin -w --literally) &&
|
|
|
|
commit_sha1=$(git commit-tree $tree_sha1) &&
|
|
|
|
git update-ref refs/heads/wrong $commit_sha1 &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
test_i18ngrep "error: empty filename in tree entry" out &&
|
|
|
|
test_i18ngrep "$tree_sha1" out &&
|
|
|
|
test_i18ngrep ! "fatal: empty filename in tree entry" out
|
|
|
|
'
|
|
|
|
|
2017-10-05 21:41:26 +02:00
|
|
|
hex2oct() {
|
|
|
|
perl -ne 'printf "\\%03o", hex for /../g'
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'tree entry with type mismatch' '
|
|
|
|
test_when_finished "remove_object \$blob" &&
|
|
|
|
test_when_finished "remove_object \$tree" &&
|
|
|
|
test_when_finished "remove_object \$commit" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/type_mismatch" &&
|
|
|
|
blob=$(echo blob | git hash-object -w --stdin) &&
|
|
|
|
blob_bin=$(echo $blob | hex2oct) &&
|
|
|
|
tree=$(
|
|
|
|
printf "40000 dir\0${blob_bin}100644 file\0${blob_bin}" |
|
|
|
|
git hash-object -t tree --stdin -w --literally
|
|
|
|
) &&
|
|
|
|
commit=$(git commit-tree $tree) &&
|
|
|
|
git update-ref refs/heads/type_mismatch $commit &&
|
|
|
|
test_must_fail git fsck >out 2>&1 &&
|
|
|
|
test_i18ngrep "is a blob, not a tree" out &&
|
|
|
|
test_i18ngrep ! "dangling blob" out
|
|
|
|
'
|
|
|
|
|
2010-02-20 01:18:44 +01:00
|
|
|
test_expect_success 'tag pointing to nonexistent' '
|
2010-10-31 02:46:54 +01:00
|
|
|
cat >invalid-tag <<-\EOF &&
|
2010-09-07 03:47:07 +02:00
|
|
|
object ffffffffffffffffffffffffffffffffffffffff
|
|
|
|
type commit
|
|
|
|
tag invalid
|
|
|
|
tagger T A Gger <tagger@example.com> 1234567890 -0000
|
|
|
|
|
|
|
|
This is an invalid tag.
|
|
|
|
EOF
|
|
|
|
|
|
|
|
tag=$(git hash-object -t tag -w --stdin <invalid-tag) &&
|
|
|
|
test_when_finished "remove_object $tag" &&
|
|
|
|
echo $tag >.git/refs/tags/invalid &&
|
|
|
|
test_when_finished "git update-ref -d refs/tags/invalid" &&
|
2010-02-20 01:18:44 +01:00
|
|
|
test_must_fail git fsck --tags >out &&
|
2009-02-19 12:13:39 +01:00
|
|
|
cat out &&
|
2010-09-07 03:47:07 +02:00
|
|
|
grep "broken link" out
|
2009-02-19 12:13:39 +01:00
|
|
|
'
|
|
|
|
|
2010-02-20 01:18:44 +01:00
|
|
|
test_expect_success 'tag pointing to something else than its type' '
|
2010-09-07 03:47:07 +02:00
|
|
|
sha=$(echo blob | git hash-object -w --stdin) &&
|
|
|
|
test_when_finished "remove_object $sha" &&
|
|
|
|
cat >wrong-tag <<-EOF &&
|
|
|
|
object $sha
|
|
|
|
type commit
|
|
|
|
tag wrong
|
|
|
|
tagger T A Gger <tagger@example.com> 1234567890 -0000
|
|
|
|
|
|
|
|
This is an invalid tag.
|
|
|
|
EOF
|
|
|
|
|
|
|
|
tag=$(git hash-object -t tag -w --stdin <wrong-tag) &&
|
|
|
|
test_when_finished "remove_object $tag" &&
|
|
|
|
echo $tag >.git/refs/tags/wrong &&
|
|
|
|
test_when_finished "git update-ref -d refs/tags/wrong" &&
|
t1450: the order the objects are checked is undefined
When a tag T points at an object X that is of a type that is
different from what the tag records as, fsck should report it as an
error.
However, depending on the order X and T are checked individually,
the actual error message can be different. If X is checked first,
fsck remembers X's type and then when it checks T, it notices that T
records X as a wrong type (i.e. the complaint is about a broken tag
T). If T is checked first, on the other hand, fsck remembers that we
need to verify X is of the type tag records, and when it later
checks X, it notices that X is of a wrong type (i.e. the complaint
is about a broken object X).
The important thing is that fsck notices such an error and diagnoses
the issue on object X, but the test was expecting that we happen to
check objects in the order to make us detect issues with tag T, not
with object X. Remove this unwarranted assumption.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-10-03 00:08:16 +02:00
|
|
|
test_must_fail git fsck --tags
|
2009-02-19 12:13:39 +01:00
|
|
|
'
|
|
|
|
|
2014-09-11 16:26:41 +02:00
|
|
|
test_expect_success 'tag with incorrect tag name & missing tagger' '
|
|
|
|
sha=$(git rev-parse HEAD) &&
|
|
|
|
cat >wrong-tag <<-EOF &&
|
|
|
|
object $sha
|
|
|
|
type commit
|
|
|
|
tag wrong name format
|
|
|
|
|
|
|
|
This is an invalid tag.
|
|
|
|
EOF
|
|
|
|
|
|
|
|
tag=$(git hash-object -t tag -w --stdin <wrong-tag) &&
|
|
|
|
test_when_finished "remove_object $tag" &&
|
|
|
|
echo $tag >.git/refs/tags/wrong &&
|
|
|
|
test_when_finished "git update-ref -d refs/tags/wrong" &&
|
|
|
|
git fsck --tags 2>out &&
|
2014-12-08 06:48:13 +01:00
|
|
|
|
|
|
|
cat >expect <<-EOF &&
|
2015-06-22 17:25:52 +02:00
|
|
|
warning in tag $tag: badTagName: invalid '\''tag'\'' name: wrong name format
|
|
|
|
warning in tag $tag: missingTaggerEntry: invalid format - expected '\''tagger'\'' line
|
2014-12-08 06:48:13 +01:00
|
|
|
EOF
|
|
|
|
test_cmp expect out
|
2014-09-11 16:26:41 +02:00
|
|
|
'
|
|
|
|
|
2014-09-11 23:16:36 +02:00
|
|
|
test_expect_success 'tag with bad tagger' '
|
|
|
|
sha=$(git rev-parse HEAD) &&
|
|
|
|
cat >wrong-tag <<-EOF &&
|
|
|
|
object $sha
|
|
|
|
type commit
|
|
|
|
tag not-quite-wrong
|
|
|
|
tagger Bad Tagger Name
|
|
|
|
|
|
|
|
This is an invalid tag.
|
|
|
|
EOF
|
|
|
|
|
|
|
|
tag=$(git hash-object --literally -t tag -w --stdin <wrong-tag) &&
|
|
|
|
test_when_finished "remove_object $tag" &&
|
|
|
|
echo $tag >.git/refs/tags/wrong &&
|
|
|
|
test_when_finished "git update-ref -d refs/tags/wrong" &&
|
|
|
|
test_must_fail git fsck --tags 2>out &&
|
|
|
|
grep "error in tag .*: invalid author/committer" out
|
|
|
|
'
|
|
|
|
|
2015-11-19 17:25:31 +01:00
|
|
|
test_expect_success 'tag with NUL in header' '
|
2015-11-19 17:20:14 +01:00
|
|
|
sha=$(git rev-parse HEAD) &&
|
|
|
|
q_to_nul >tag-NUL-header <<-EOF &&
|
|
|
|
object $sha
|
|
|
|
type commit
|
|
|
|
tag contains-Q-in-header
|
|
|
|
tagger T A Gger <tagger@example.com> 1234567890 -0000
|
|
|
|
|
|
|
|
This is an invalid tag.
|
|
|
|
EOF
|
|
|
|
|
|
|
|
tag=$(git hash-object --literally -t tag -w --stdin <tag-NUL-header) &&
|
|
|
|
test_when_finished "remove_object $tag" &&
|
|
|
|
echo $tag >.git/refs/tags/wrong &&
|
|
|
|
test_when_finished "git update-ref -d refs/tags/wrong" &&
|
|
|
|
test_must_fail git fsck --tags 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "error in tag $tag.*unterminated header: NUL at offset" out
|
|
|
|
'
|
|
|
|
|
2010-09-07 03:47:07 +02:00
|
|
|
test_expect_success 'cleaned up' '
|
|
|
|
git fsck >actual 2>&1 &&
|
|
|
|
test_cmp empty actual
|
|
|
|
'
|
2009-02-19 12:13:39 +01:00
|
|
|
|
2012-02-13 21:17:11 +01:00
|
|
|
test_expect_success 'rev-list --verify-objects' '
|
|
|
|
git rev-list --verify-objects --all >/dev/null 2>out &&
|
|
|
|
test_cmp empty out
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'rev-list --verify-objects with bad sha1' '
|
|
|
|
sha=$(echo blob | git hash-object -w --stdin) &&
|
|
|
|
old=$(echo $sha | sed "s+^..+&/+") &&
|
|
|
|
new=$(dirname $old)/ffffffffffffffffffffffffffffffffffffff &&
|
|
|
|
sha="$(dirname $new)$(basename $new)" &&
|
|
|
|
mv .git/objects/$old .git/objects/$new &&
|
|
|
|
test_when_finished "remove_object $sha" &&
|
|
|
|
git update-index --add --cacheinfo 100644 $sha foo &&
|
|
|
|
test_when_finished "git read-tree -u --reset HEAD" &&
|
|
|
|
tree=$(git write-tree) &&
|
|
|
|
test_when_finished "remove_object $tree" &&
|
|
|
|
cmt=$(echo bogus | git commit-tree $tree) &&
|
|
|
|
test_when_finished "remove_object $cmt" &&
|
|
|
|
git update-ref refs/heads/bogus $cmt &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
|
|
|
|
|
|
|
test_might_fail git rev-list --verify-objects refs/heads/bogus >/dev/null 2>out &&
|
|
|
|
cat out &&
|
2018-07-21 09:49:33 +02:00
|
|
|
test_i18ngrep -q "error: sha1 mismatch 63ffffffffffffffffffffffffffffffffffffff" out
|
2012-02-13 21:17:11 +01:00
|
|
|
'
|
|
|
|
|
2015-06-22 17:27:06 +02:00
|
|
|
test_expect_success 'force fsck to ignore double author' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/^author .*/&,&/" <basis | tr , \\n >multiple-authors &&
|
|
|
|
new=$(git hash-object -t commit -w --stdin <multiple-authors) &&
|
|
|
|
test_when_finished "remove_object $new" &&
|
|
|
|
git update-ref refs/heads/bogus "$new" &&
|
|
|
|
test_when_finished "git update-ref -d refs/heads/bogus" &&
|
|
|
|
test_must_fail git fsck &&
|
|
|
|
git -c fsck.multipleAuthors=ignore fsck
|
|
|
|
'
|
|
|
|
|
2012-07-28 17:06:29 +02:00
|
|
|
_bz='\0'
|
|
|
|
_bz5="$_bz$_bz$_bz$_bz$_bz"
|
|
|
|
_bz20="$_bz5$_bz5$_bz5$_bz5"
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices blob entry pointing to null sha1' '
|
|
|
|
(git init null-blob &&
|
|
|
|
cd null-blob &&
|
|
|
|
sha=$(printf "100644 file$_bz$_bz20" |
|
|
|
|
git hash-object -w --stdin -t tree) &&
|
|
|
|
git fsck 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "warning.*null sha1" out
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices submodule entry pointing to null sha1' '
|
|
|
|
(git init null-commit &&
|
|
|
|
cd null-commit &&
|
|
|
|
sha=$(printf "160000 submodule$_bz$_bz20" |
|
|
|
|
git hash-object -w --stdin -t tree) &&
|
|
|
|
git fsck 2>out &&
|
|
|
|
cat out &&
|
|
|
|
grep "warning.*null sha1" out
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
fsck: complain about HFS+ ".git" aliases in trees
Now that the index can block pathnames that case-fold to
".git" on HFS+, it would be helpful for fsck to notice such
problematic paths. This lets servers which use
receive.fsckObjects block them before the damage spreads.
Note that the fsck check is always on, even for systems
without core.protectHFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
HFS+.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on HFS+ themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git with invisible Unicode code-points mixed
in, meaning mischief is almost certainly what the tree
author had in mind).
Ideally these would be controlled by a separate
"fsck.protectHFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 00:21:57 +01:00
|
|
|
while read name path pretty; do
|
2014-11-24 19:40:11 +01:00
|
|
|
while read mode type; do
|
fsck: complain about HFS+ ".git" aliases in trees
Now that the index can block pathnames that case-fold to
".git" on HFS+, it would be helpful for fsck to notice such
problematic paths. This lets servers which use
receive.fsckObjects block them before the damage spreads.
Note that the fsck check is always on, even for systems
without core.protectHFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
HFS+.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on HFS+ themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git with invisible Unicode code-points mixed
in, meaning mischief is almost certainly what the tree
author had in mind).
Ideally these would be controlled by a separate
"fsck.protectHFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 00:21:57 +01:00
|
|
|
: ${pretty:=$path}
|
|
|
|
test_expect_success "fsck notices $pretty as $type" '
|
2014-11-24 19:40:11 +01:00
|
|
|
(
|
|
|
|
git init $name-$type &&
|
|
|
|
cd $name-$type &&
|
|
|
|
echo content >file &&
|
|
|
|
git add file &&
|
|
|
|
git commit -m base &&
|
|
|
|
blob=$(git rev-parse :file) &&
|
|
|
|
tree=$(git rev-parse HEAD^{tree}) &&
|
|
|
|
value=$(eval "echo \$$type") &&
|
|
|
|
printf "$mode $type %s\t%s" "$value" "$path" >bad &&
|
fsck: complain about NTFS ".git" aliases in trees
Now that the index can block pathnames that can be mistaken
to mean ".git" on NTFS and FAT32, it would be helpful for
fsck to notice such problematic paths. This lets servers
which use receive.fsckObjects block them before the damage
spreads.
Note that the fsck check is always on, even for systems
without core.protectNTFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
NTFS.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on NTFS themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git or git~1, meaning mischief is almost
certainly what the tree author had in mind).
Ideally these would be controlled by a separate
"fsck.protectNTFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 22:28:27 +01:00
|
|
|
bad_tree=$(git mktree <bad) &&
|
2014-11-24 19:40:11 +01:00
|
|
|
git fsck 2>out &&
|
|
|
|
cat out &&
|
fsck: complain about NTFS ".git" aliases in trees
Now that the index can block pathnames that can be mistaken
to mean ".git" on NTFS and FAT32, it would be helpful for
fsck to notice such problematic paths. This lets servers
which use receive.fsckObjects block them before the damage
spreads.
Note that the fsck check is always on, even for systems
without core.protectNTFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
NTFS.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on NTFS themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git or git~1, meaning mischief is almost
certainly what the tree author had in mind).
Ideally these would be controlled by a separate
"fsck.protectNTFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 22:28:27 +01:00
|
|
|
grep "warning.*tree $bad_tree" out
|
2014-11-24 19:40:11 +01:00
|
|
|
)'
|
|
|
|
done <<-\EOF
|
|
|
|
100644 blob
|
|
|
|
040000 tree
|
|
|
|
EOF
|
fsck: complain about HFS+ ".git" aliases in trees
Now that the index can block pathnames that case-fold to
".git" on HFS+, it would be helpful for fsck to notice such
problematic paths. This lets servers which use
receive.fsckObjects block them before the damage spreads.
Note that the fsck check is always on, even for systems
without core.protectHFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
HFS+.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on HFS+ themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git with invisible Unicode code-points mixed
in, meaning mischief is almost certainly what the tree
author had in mind).
Ideally these would be controlled by a separate
"fsck.protectHFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 00:21:57 +01:00
|
|
|
done <<-EOF
|
2014-11-24 19:40:11 +01:00
|
|
|
dot .
|
|
|
|
dotdot ..
|
|
|
|
dotgit .git
|
2014-11-24 19:40:44 +01:00
|
|
|
dotgit-case .GIT
|
fsck: complain about HFS+ ".git" aliases in trees
Now that the index can block pathnames that case-fold to
".git" on HFS+, it would be helpful for fsck to notice such
problematic paths. This lets servers which use
receive.fsckObjects block them before the damage spreads.
Note that the fsck check is always on, even for systems
without core.protectHFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
HFS+.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on HFS+ themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git with invisible Unicode code-points mixed
in, meaning mischief is almost certainly what the tree
author had in mind).
Ideally these would be controlled by a separate
"fsck.protectHFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-16 00:21:57 +01:00
|
|
|
dotgit-unicode .gI${u200c}T .gI{u200c}T
|
fsck: complain about NTFS ".git" aliases in trees
Now that the index can block pathnames that can be mistaken
to mean ".git" on NTFS and FAT32, it would be helpful for
fsck to notice such problematic paths. This lets servers
which use receive.fsckObjects block them before the damage
spreads.
Note that the fsck check is always on, even for systems
without core.protectNTFS set. This is technically more
restrictive than we need to be, as a set of users on ext4
could happily use these odd filenames without caring about
NTFS.
However, on balance, it's helpful for all servers to block
these (because the paths can be used for mischief, and
servers which bother to fsck would want to stop the spread
whether they are on NTFS themselves or not), and hardly
anybody will be affected (because the blocked names are
variants of .git or git~1, meaning mischief is almost
certainly what the tree author had in mind).
Ideally these would be controlled by a separate
"fsck.protectNTFS" flag. However, it would be much nicer to
be able to enable/disable _any_ fsck flag individually, and
any scheme we choose should match such a system. Given the
likelihood of anybody using such a path in practice, it is
not unreasonable to wait until such a system materializes.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-12-10 22:28:27 +01:00
|
|
|
dotgit-case2 .Git
|
|
|
|
git-tilde1 git~1
|
|
|
|
dotgitdot .git.
|
|
|
|
dot-backslash-case .\\\\.GIT\\\\foobar
|
|
|
|
dotgit-case-backslash .git\\\\foobar
|
2014-11-24 19:40:11 +01:00
|
|
|
EOF
|
2012-11-28 22:35:29 +01:00
|
|
|
|
2014-12-23 09:45:36 +01:00
|
|
|
test_expect_success 'fsck allows .Ňit' '
|
|
|
|
(
|
|
|
|
git init not-dotgit &&
|
|
|
|
cd not-dotgit &&
|
|
|
|
echo content >file &&
|
|
|
|
git add file &&
|
|
|
|
git commit -m base &&
|
|
|
|
blob=$(git rev-parse :file) &&
|
|
|
|
printf "100644 blob $blob\t.\\305\\207it" >tree &&
|
|
|
|
tree=$(git mktree <tree) &&
|
|
|
|
git fsck 2>err &&
|
|
|
|
test_line_count = 0 err
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2016-04-14 19:58:22 +02:00
|
|
|
test_expect_success 'NUL in commit' '
|
|
|
|
rm -fr nul-in-commit &&
|
|
|
|
git init nul-in-commit &&
|
|
|
|
(
|
|
|
|
cd nul-in-commit &&
|
|
|
|
git commit --allow-empty -m "initial commitQNUL after message" &&
|
|
|
|
git cat-file commit HEAD >original &&
|
|
|
|
q_to_nul <original >munged &&
|
|
|
|
git hash-object -w -t commit --stdin <munged >name &&
|
|
|
|
git branch bad $(cat name) &&
|
|
|
|
|
|
|
|
test_must_fail git -c fsck.nulInCommit=error fsck 2>warn.1 &&
|
|
|
|
grep nulInCommit warn.1 &&
|
|
|
|
git fsck 2>warn.2 &&
|
|
|
|
grep nulInCommit warn.2
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2014-09-12 05:38:30 +02:00
|
|
|
# create a static test repo which is broken by omitting
|
|
|
|
# one particular object ($1, which is looked up via rev-parse
|
|
|
|
# in the new repository).
|
|
|
|
create_repo_missing () {
|
|
|
|
rm -rf missing &&
|
|
|
|
git init missing &&
|
|
|
|
(
|
|
|
|
cd missing &&
|
|
|
|
git commit -m one --allow-empty &&
|
|
|
|
mkdir subdir &&
|
|
|
|
echo content >subdir/file &&
|
|
|
|
git add subdir/file &&
|
|
|
|
git commit -m two &&
|
|
|
|
unrelated=$(echo unrelated | git hash-object --stdin -w) &&
|
|
|
|
git tag -m foo tag $unrelated &&
|
|
|
|
sha1=$(git rev-parse --verify "$1") &&
|
|
|
|
path=$(echo $sha1 | sed 's|..|&/|') &&
|
|
|
|
rm .git/objects/$path
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices missing blob' '
|
|
|
|
create_repo_missing HEAD:subdir/file &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices missing subtree' '
|
|
|
|
create_repo_missing HEAD:subdir &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices missing root tree' '
|
|
|
|
create_repo_missing HEAD^{tree} &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices missing parent' '
|
|
|
|
create_repo_missing HEAD^ &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices missing tagged object' '
|
|
|
|
create_repo_missing tag^{blob} &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices ref pointing to missing commit' '
|
|
|
|
create_repo_missing HEAD &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck notices ref pointing to missing tag' '
|
|
|
|
create_repo_missing tag &&
|
|
|
|
test_must_fail git -C missing fsck
|
|
|
|
'
|
|
|
|
|
2015-06-22 17:27:12 +02:00
|
|
|
test_expect_success 'fsck --connectivity-only' '
|
|
|
|
rm -rf connectivity-only &&
|
|
|
|
git init connectivity-only &&
|
|
|
|
(
|
|
|
|
cd connectivity-only &&
|
|
|
|
touch empty &&
|
|
|
|
git add empty &&
|
|
|
|
test_commit empty &&
|
fsck: prepare dummy objects for --connectivity-check
Normally fsck makes a pass over all objects to check their
integrity, and then follows up with a reachability check to
make sure we have all of the referenced objects (and to know
which ones are dangling). The latter checks for the HAS_OBJ
flag in obj->flags to see if we found the object in the
first pass.
Commit 02976bf85 (fsck: introduce `git fsck --connectivity-only`,
2015-06-22) taught fsck to skip the initial pass, and to
fallback to has_sha1_file() instead of the HAS_OBJ check.
However, it converted only one HAS_OBJ check to use
has_sha1_file(). But there are many other places in
builtin/fsck.c that assume that the flag is set (or that
lookup_object() will return an object at all). This leads to
several bugs with --connectivity-only:
1. mark_object() will not queue objects for examination,
so recursively following links from commits to trees,
etc, did nothing. I.e., we were checking the
reachability of hardly anything at all.
2. When a set of heads is given on the command-line, we
use lookup_object() to see if they exist. But without
the initial pass, we assume nothing exists.
3. When loading reflog entries, we do a similar
lookup_object() check, and complain that the reflog is
broken if the object doesn't exist in our hash.
So in short, --connectivity-only is broken pretty badly, and
will claim that your repository is fine when it's not.
Presumably nobody noticed for a few reasons.
One is that the embedded test does not actually test the
recursive nature of the reachability check. All of the
missing objects are still in the index, and we directly
check items from the index. This patch modifies the test to
delete the index, which shows off breakage (1).
Another is that --connectivity-only just skips the initial
pass for loose objects. So on a real repository, the packed
objects were still checked correctly. But on the flipside,
it means that "git fsck --connectivity-only" still checks
the sha1 of all of the packed objects, nullifying its
original purpose of being a faster git-fsck.
And of course the final problem is that the bug only shows
up when there _is_ corruption, which is rare. So anybody
running "git fsck --connectivity-only" proactively would
assume it was being thorough, when it was not.
One possibility for fixing this is to find all of the spots
that rely on HAS_OBJ and tweak them for the connectivity-only
case. But besides the risk that we might miss a spot (and I
found three already, corresponding to the three bugs above),
there are other parts of fsck that _can't_ work without a
full list of objects. E.g., the list of dangling objects.
Instead, let's make the connectivity-only case look more
like the normal case. Rather than skip the initial pass
completely, we'll do an abbreviated one that sets up the
HAS_OBJ flag for each object, without actually loading the
object data.
That's simple and fast, and we don't have to care about the
connectivity_only flag in the rest of the code at all.
While we're at it, let's make sure we treat loose and packed
objects the same (i.e., setting up dummy objects for both
and skipping the actual sha1 check). That makes the
connectivity-only check actually fast on a real repo (40
seconds versus 180 seconds on my copy of linux.git).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-17 22:32:57 +01:00
|
|
|
|
|
|
|
# Drop the index now; we want to be sure that we
|
|
|
|
# recursively notice the broken objects
|
|
|
|
# because they are reachable from refs, not because
|
|
|
|
# they are in the index.
|
|
|
|
rm -f .git/index &&
|
|
|
|
|
|
|
|
# corrupt the blob, but in a way that we can still identify
|
|
|
|
# its type. That lets us see that --connectivity-only is
|
|
|
|
# not actually looking at the contents, but leaves it
|
|
|
|
# free to examine the type if it chooses.
|
2015-06-22 17:27:12 +02:00
|
|
|
empty=.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 &&
|
fsck: prepare dummy objects for --connectivity-check
Normally fsck makes a pass over all objects to check their
integrity, and then follows up with a reachability check to
make sure we have all of the referenced objects (and to know
which ones are dangling). The latter checks for the HAS_OBJ
flag in obj->flags to see if we found the object in the
first pass.
Commit 02976bf85 (fsck: introduce `git fsck --connectivity-only`,
2015-06-22) taught fsck to skip the initial pass, and to
fallback to has_sha1_file() instead of the HAS_OBJ check.
However, it converted only one HAS_OBJ check to use
has_sha1_file(). But there are many other places in
builtin/fsck.c that assume that the flag is set (or that
lookup_object() will return an object at all). This leads to
several bugs with --connectivity-only:
1. mark_object() will not queue objects for examination,
so recursively following links from commits to trees,
etc, did nothing. I.e., we were checking the
reachability of hardly anything at all.
2. When a set of heads is given on the command-line, we
use lookup_object() to see if they exist. But without
the initial pass, we assume nothing exists.
3. When loading reflog entries, we do a similar
lookup_object() check, and complain that the reflog is
broken if the object doesn't exist in our hash.
So in short, --connectivity-only is broken pretty badly, and
will claim that your repository is fine when it's not.
Presumably nobody noticed for a few reasons.
One is that the embedded test does not actually test the
recursive nature of the reachability check. All of the
missing objects are still in the index, and we directly
check items from the index. This patch modifies the test to
delete the index, which shows off breakage (1).
Another is that --connectivity-only just skips the initial
pass for loose objects. So on a real repository, the packed
objects were still checked correctly. But on the flipside,
it means that "git fsck --connectivity-only" still checks
the sha1 of all of the packed objects, nullifying its
original purpose of being a faster git-fsck.
And of course the final problem is that the bug only shows
up when there _is_ corruption, which is rare. So anybody
running "git fsck --connectivity-only" proactively would
assume it was being thorough, when it was not.
One possibility for fixing this is to find all of the spots
that rely on HAS_OBJ and tweak them for the connectivity-only
case. But besides the risk that we might miss a spot (and I
found three already, corresponding to the three bugs above),
there are other parts of fsck that _can't_ work without a
full list of objects. E.g., the list of dangling objects.
Instead, let's make the connectivity-only case look more
like the normal case. Rather than skip the initial pass
completely, we'll do an abbreviated one that sets up the
HAS_OBJ flag for each object, without actually loading the
object data.
That's simple and fast, and we don't have to care about the
connectivity_only flag in the rest of the code at all.
While we're at it, let's make sure we treat loose and packed
objects the same (i.e., setting up dummy objects for both
and skipping the actual sha1 check). That makes the
connectivity-only check actually fast on a real repo (40
seconds versus 180 seconds on my copy of linux.git).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-17 22:32:57 +01:00
|
|
|
blob=$(echo unrelated | git hash-object -w --stdin) &&
|
2017-01-24 14:27:49 +01:00
|
|
|
mv -f $(sha1_file $blob) $empty &&
|
fsck: prepare dummy objects for --connectivity-check
Normally fsck makes a pass over all objects to check their
integrity, and then follows up with a reachability check to
make sure we have all of the referenced objects (and to know
which ones are dangling). The latter checks for the HAS_OBJ
flag in obj->flags to see if we found the object in the
first pass.
Commit 02976bf85 (fsck: introduce `git fsck --connectivity-only`,
2015-06-22) taught fsck to skip the initial pass, and to
fallback to has_sha1_file() instead of the HAS_OBJ check.
However, it converted only one HAS_OBJ check to use
has_sha1_file(). But there are many other places in
builtin/fsck.c that assume that the flag is set (or that
lookup_object() will return an object at all). This leads to
several bugs with --connectivity-only:
1. mark_object() will not queue objects for examination,
so recursively following links from commits to trees,
etc, did nothing. I.e., we were checking the
reachability of hardly anything at all.
2. When a set of heads is given on the command-line, we
use lookup_object() to see if they exist. But without
the initial pass, we assume nothing exists.
3. When loading reflog entries, we do a similar
lookup_object() check, and complain that the reflog is
broken if the object doesn't exist in our hash.
So in short, --connectivity-only is broken pretty badly, and
will claim that your repository is fine when it's not.
Presumably nobody noticed for a few reasons.
One is that the embedded test does not actually test the
recursive nature of the reachability check. All of the
missing objects are still in the index, and we directly
check items from the index. This patch modifies the test to
delete the index, which shows off breakage (1).
Another is that --connectivity-only just skips the initial
pass for loose objects. So on a real repository, the packed
objects were still checked correctly. But on the flipside,
it means that "git fsck --connectivity-only" still checks
the sha1 of all of the packed objects, nullifying its
original purpose of being a faster git-fsck.
And of course the final problem is that the bug only shows
up when there _is_ corruption, which is rare. So anybody
running "git fsck --connectivity-only" proactively would
assume it was being thorough, when it was not.
One possibility for fixing this is to find all of the spots
that rely on HAS_OBJ and tweak them for the connectivity-only
case. But besides the risk that we might miss a spot (and I
found three already, corresponding to the three bugs above),
there are other parts of fsck that _can't_ work without a
full list of objects. E.g., the list of dangling objects.
Instead, let's make the connectivity-only case look more
like the normal case. Rather than skip the initial pass
completely, we'll do an abbreviated one that sets up the
HAS_OBJ flag for each object, without actually loading the
object data.
That's simple and fast, and we don't have to care about the
connectivity_only flag in the rest of the code at all.
While we're at it, let's make sure we treat loose and packed
objects the same (i.e., setting up dummy objects for both
and skipping the actual sha1 check). That makes the
connectivity-only check actually fast on a real repo (40
seconds versus 180 seconds on my copy of linux.git).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-17 22:32:57 +01:00
|
|
|
|
2015-06-22 17:27:12 +02:00
|
|
|
test_must_fail git fsck --strict &&
|
|
|
|
git fsck --strict --connectivity-only &&
|
|
|
|
tree=$(git rev-parse HEAD:) &&
|
|
|
|
suffix=${tree#??} &&
|
|
|
|
tree=.git/objects/${tree%$suffix}/$suffix &&
|
|
|
|
rm -f $tree &&
|
|
|
|
echo invalid >$tree &&
|
|
|
|
test_must_fail git fsck --strict --connectivity-only
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
fsck: prepare dummy objects for --connectivity-check
Normally fsck makes a pass over all objects to check their
integrity, and then follows up with a reachability check to
make sure we have all of the referenced objects (and to know
which ones are dangling). The latter checks for the HAS_OBJ
flag in obj->flags to see if we found the object in the
first pass.
Commit 02976bf85 (fsck: introduce `git fsck --connectivity-only`,
2015-06-22) taught fsck to skip the initial pass, and to
fallback to has_sha1_file() instead of the HAS_OBJ check.
However, it converted only one HAS_OBJ check to use
has_sha1_file(). But there are many other places in
builtin/fsck.c that assume that the flag is set (or that
lookup_object() will return an object at all). This leads to
several bugs with --connectivity-only:
1. mark_object() will not queue objects for examination,
so recursively following links from commits to trees,
etc, did nothing. I.e., we were checking the
reachability of hardly anything at all.
2. When a set of heads is given on the command-line, we
use lookup_object() to see if they exist. But without
the initial pass, we assume nothing exists.
3. When loading reflog entries, we do a similar
lookup_object() check, and complain that the reflog is
broken if the object doesn't exist in our hash.
So in short, --connectivity-only is broken pretty badly, and
will claim that your repository is fine when it's not.
Presumably nobody noticed for a few reasons.
One is that the embedded test does not actually test the
recursive nature of the reachability check. All of the
missing objects are still in the index, and we directly
check items from the index. This patch modifies the test to
delete the index, which shows off breakage (1).
Another is that --connectivity-only just skips the initial
pass for loose objects. So on a real repository, the packed
objects were still checked correctly. But on the flipside,
it means that "git fsck --connectivity-only" still checks
the sha1 of all of the packed objects, nullifying its
original purpose of being a faster git-fsck.
And of course the final problem is that the bug only shows
up when there _is_ corruption, which is rare. So anybody
running "git fsck --connectivity-only" proactively would
assume it was being thorough, when it was not.
One possibility for fixing this is to find all of the spots
that rely on HAS_OBJ and tweak them for the connectivity-only
case. But besides the risk that we might miss a spot (and I
found three already, corresponding to the three bugs above),
there are other parts of fsck that _can't_ work without a
full list of objects. E.g., the list of dangling objects.
Instead, let's make the connectivity-only case look more
like the normal case. Rather than skip the initial pass
completely, we'll do an abbreviated one that sets up the
HAS_OBJ flag for each object, without actually loading the
object data.
That's simple and fast, and we don't have to care about the
connectivity_only flag in the rest of the code at all.
While we're at it, let's make sure we treat loose and packed
objects the same (i.e., setting up dummy objects for both
and skipping the actual sha1 check). That makes the
connectivity-only check actually fast on a real repo (40
seconds versus 180 seconds on my copy of linux.git).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-17 22:32:57 +01:00
|
|
|
test_expect_success 'fsck --connectivity-only with explicit head' '
|
|
|
|
rm -rf connectivity-only &&
|
|
|
|
git init connectivity-only &&
|
|
|
|
(
|
|
|
|
cd connectivity-only &&
|
|
|
|
test_commit foo &&
|
|
|
|
rm -f .git/index &&
|
|
|
|
tree=$(git rev-parse HEAD^{tree}) &&
|
|
|
|
remove_object $(git rev-parse HEAD:foo.t) &&
|
|
|
|
test_must_fail git fsck --connectivity-only $tree
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2016-07-17 13:00:02 +02:00
|
|
|
test_expect_success 'fsck --name-objects' '
|
|
|
|
rm -rf name-objects &&
|
|
|
|
git init name-objects &&
|
|
|
|
(
|
|
|
|
cd name-objects &&
|
|
|
|
test_commit julius caesar.t &&
|
|
|
|
test_commit augustus &&
|
|
|
|
test_commit caesar &&
|
t1450: refactor loose-object removal
Commit 90cf590f5 (fsck: optionally show more helpful info
for broken links, 2016-07-17) added a remove_loose_object()
helper, but we already had a remove_object() helper that did
the same thing. Let's combine these into one.
The implementations had a few subtle differences, so I've
tried to take the best of both:
- the original used "sed", but the newer version avoids
spawning an extra process
- the original processed "$*", which was nonsense, as it
assumed only a single sha1. Use "$1" to make that more
clear.
- the newer version ran an extra rev-parse, but it was not
necessary; it's sole caller already converted the
argument into a raw sha1
- the original used "rm -f", whereas the new one uses
"rm". The latter is better because it may notice a bug
or other unexpected failure in the test. (The original
does check that the object exists before we remove it,
which is good, but that's a subset of the possible
unexpected conditions).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-13 18:54:10 +01:00
|
|
|
remove_object $(git rev-parse julius:caesar.t) &&
|
2016-07-17 13:00:02 +02:00
|
|
|
test_must_fail git fsck --name-objects >out &&
|
|
|
|
tree=$(git rev-parse --verify julius:) &&
|
2017-06-28 19:17:04 +02:00
|
|
|
egrep "$tree \((refs/heads/master|HEAD)@\{[0-9]*\}:" out
|
2016-07-17 13:00:02 +02:00
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2017-01-13 18:54:39 +01:00
|
|
|
test_expect_success 'alternate objects are correctly blamed' '
|
|
|
|
test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
|
|
|
|
git init --bare alt.git &&
|
|
|
|
echo "../../alt.git/objects" >.git/objects/info/alternates &&
|
|
|
|
mkdir alt.git/objects/12 &&
|
|
|
|
>alt.git/objects/12/34567890123456789012345678901234567890 &&
|
|
|
|
test_must_fail git fsck >out 2>&1 &&
|
|
|
|
grep alt.git out
|
|
|
|
'
|
|
|
|
|
2017-01-13 18:55:55 +01:00
|
|
|
test_expect_success 'fsck errors in packed objects' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
sed "s/</one/" basis >one &&
|
|
|
|
sed "s/</foo/" basis >two &&
|
|
|
|
one=$(git hash-object -t commit -w one) &&
|
|
|
|
two=$(git hash-object -t commit -w two) &&
|
|
|
|
pack=$(
|
|
|
|
{
|
|
|
|
echo $one &&
|
|
|
|
echo $two
|
|
|
|
} | git pack-objects .git/objects/pack/pack
|
|
|
|
) &&
|
|
|
|
test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
|
|
|
|
remove_object $one &&
|
|
|
|
remove_object $two &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
grep "error in commit $one.* - bad name" out &&
|
|
|
|
grep "error in commit $two.* - bad name" out &&
|
|
|
|
! grep corrupt out
|
|
|
|
'
|
|
|
|
|
2017-07-28 22:08:02 +02:00
|
|
|
test_expect_success 'fsck fails on corrupt packfile' '
|
|
|
|
hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
|
|
|
|
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
|
|
|
|
|
|
|
|
# Corrupt the first byte of the first object. (It contains 3 type bits,
|
|
|
|
# at least one of which is not zero, so setting the first byte to 0 is
|
|
|
|
# sufficient.)
|
|
|
|
chmod a+w .git/objects/pack/pack-$pack.pack &&
|
|
|
|
printf '\0' | dd of=.git/objects/pack/pack-$pack.pack bs=1 conv=notrunc seek=12 &&
|
|
|
|
|
|
|
|
test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
|
|
|
|
remove_object $hsh &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
test_i18ngrep "checksum mismatch" out
|
|
|
|
'
|
|
|
|
|
fsck: parse loose object paths directly
When we iterate over the list of loose objects to check, we
get the actual path of each object. But we then throw it
away and pass just the sha1 to fsck_sha1(), which will do a
fresh lookup. Usually it would find the same object, but it
may not if an object exists both as a loose and a packed
object. We may end up checking the packed object twice, and
never look at the loose one.
In practice this isn't too terrible, because if fsck doesn't
complain, it means you have at least one good copy. But
since the point of fsck is to look for corruption, we should
be thorough.
The new read_loose_object() interface can help us get the
data from disk, and then we replace parse_object() with
parse_object_buffer(). As a bonus, our error messages now
mention the path to a corrupted object, which should make it
easier to track down errors when they do happen.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-13 18:59:44 +01:00
|
|
|
test_expect_success 'fsck finds problems in duplicate loose objects' '
|
|
|
|
rm -rf broken-duplicate &&
|
|
|
|
git init broken-duplicate &&
|
|
|
|
(
|
|
|
|
cd broken-duplicate &&
|
|
|
|
test_commit duplicate &&
|
|
|
|
# no "-d" here, so we end up with duplicates
|
|
|
|
git repack &&
|
|
|
|
# now corrupt the loose copy
|
|
|
|
file=$(sha1_file "$(git rev-parse HEAD)") &&
|
|
|
|
rm "$file" &&
|
|
|
|
echo broken >"$file" &&
|
|
|
|
test_must_fail git fsck
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2017-01-13 19:00:25 +01:00
|
|
|
test_expect_success 'fsck detects trailing loose garbage (commit)' '
|
|
|
|
git cat-file commit HEAD >basis &&
|
|
|
|
echo bump-commit-sha1 >>basis &&
|
|
|
|
commit=$(git hash-object -w -t commit basis) &&
|
|
|
|
file=$(sha1_file $commit) &&
|
|
|
|
test_when_finished "remove_object $commit" &&
|
|
|
|
chmod +w "$file" &&
|
|
|
|
echo garbage >>"$file" &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
test_i18ngrep "garbage.*$commit" out
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'fsck detects trailing loose garbage (blob)' '
|
|
|
|
blob=$(echo trailing | git hash-object -w --stdin) &&
|
|
|
|
file=$(sha1_file $blob) &&
|
|
|
|
test_when_finished "remove_object $blob" &&
|
|
|
|
chmod +w "$file" &&
|
|
|
|
echo garbage >>"$file" &&
|
|
|
|
test_must_fail git fsck 2>out &&
|
|
|
|
test_i18ngrep "garbage.*$blob" out
|
|
|
|
'
|
|
|
|
|
2017-01-16 22:25:35 +01:00
|
|
|
# for each of type, we have one version which is referenced by another object
|
|
|
|
# (and so while unreachable, not dangling), and another variant which really is
|
|
|
|
# dangling.
|
|
|
|
test_expect_success 'fsck notices dangling objects' '
|
|
|
|
git init dangling &&
|
|
|
|
(
|
|
|
|
cd dangling &&
|
|
|
|
blob=$(echo not-dangling | git hash-object -w --stdin) &&
|
|
|
|
dblob=$(echo dangling | git hash-object -w --stdin) &&
|
|
|
|
tree=$(printf "100644 blob %s\t%s\n" $blob one | git mktree) &&
|
|
|
|
dtree=$(printf "100644 blob %s\t%s\n" $blob two | git mktree) &&
|
|
|
|
commit=$(git commit-tree $tree) &&
|
|
|
|
dcommit=$(git commit-tree -p $commit $tree) &&
|
|
|
|
|
|
|
|
cat >expect <<-EOF &&
|
|
|
|
dangling blob $dblob
|
|
|
|
dangling commit $dcommit
|
|
|
|
dangling tree $dtree
|
|
|
|
EOF
|
|
|
|
|
|
|
|
git fsck >actual &&
|
|
|
|
# the output order is non-deterministic, as it comes from a hash
|
|
|
|
sort <actual >actual.sorted &&
|
|
|
|
test_cmp expect actual.sorted
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2017-01-16 22:33:29 +01:00
|
|
|
test_expect_success 'fsck $name notices bogus $name' '
|
|
|
|
test_must_fail git fsck bogus &&
|
2018-05-13 04:24:13 +02:00
|
|
|
test_must_fail git fsck $ZERO_OID
|
2017-01-16 22:33:29 +01:00
|
|
|
'
|
|
|
|
|
2017-01-16 22:34:21 +01:00
|
|
|
test_expect_success 'bogus head does not fallback to all heads' '
|
|
|
|
# set up a case that will cause a reachability complaint
|
|
|
|
echo to-be-deleted >foo &&
|
|
|
|
git add foo &&
|
|
|
|
blob=$(git rev-parse :foo) &&
|
|
|
|
test_when_finished "git rm --cached foo" &&
|
|
|
|
remove_object $blob &&
|
2018-05-13 04:24:13 +02:00
|
|
|
test_must_fail git fsck $ZERO_OID >out 2>&1 &&
|
2017-01-16 22:34:21 +01:00
|
|
|
! grep $blob out
|
|
|
|
'
|
|
|
|
|
2017-04-25 20:41:09 +02:00
|
|
|
# Corrupt the checksum on the index.
|
|
|
|
# Add 1 to the last byte in the SHA.
|
|
|
|
corrupt_index_checksum () {
|
|
|
|
perl -w -e '
|
|
|
|
use Fcntl ":seek";
|
|
|
|
open my $fh, "+<", ".git/index" or die "open: $!";
|
|
|
|
binmode $fh;
|
|
|
|
seek $fh, -1, SEEK_END or die "seek: $!";
|
|
|
|
read $fh, my $in_byte, 1 or die "read: $!";
|
|
|
|
|
|
|
|
$in_value = unpack("C", $in_byte);
|
|
|
|
$out_value = ($in_value + 1) & 255;
|
|
|
|
|
|
|
|
$out_byte = pack("C", $out_value);
|
|
|
|
|
|
|
|
seek $fh, -1, SEEK_END or die "seek: $!";
|
|
|
|
print $fh $out_byte;
|
|
|
|
close $fh or die "close: $!";
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
|
|
|
# Corrupt the checksum on the index and then
|
|
|
|
# verify that only fsck notices.
|
2017-04-14 22:32:21 +02:00
|
|
|
test_expect_success 'detect corrupt index file in fsck' '
|
|
|
|
cp .git/index .git/index.backup &&
|
|
|
|
test_when_finished "mv .git/index.backup .git/index" &&
|
2017-04-25 20:41:09 +02:00
|
|
|
corrupt_index_checksum &&
|
|
|
|
test_must_fail git fsck --cache 2>errors &&
|
|
|
|
grep "bad index file" errors
|
2017-04-14 22:32:21 +02:00
|
|
|
'
|
|
|
|
|
2009-01-30 09:33:00 +01:00
|
|
|
test_done
|