2008-11-12 18:59:02 +01:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='git repack works correctly'
|
|
|
|
|
|
|
|
. ./test-lib.sh
|
builtin/repack.c: support writing a MIDX while repacking
Teach `git repack` a new `--write-midx` option for callers that wish to
persist a multi-pack index in their repository while repacking.
There are two existing alternatives to this new flag, but they don't
cover our particular use-case. These alternatives are:
- Call 'git multi-pack-index write' after running 'git repack', or
- Set 'GIT_TEST_MULTI_PACK_INDEX=1' in your environment when running
'git repack'.
The former works, but introduces a gap in bitmap coverage between
repacking and writing a new MIDX (since the repack may have deleted a
pack included in the existing MIDX, invalidating it altogether).
Setting the 'GIT_TEST_' environment variable is obviously unsupported.
In fact, even if it were supported officially, it still wouldn't work,
because it generates the MIDX *after* redundant packs have been dropped,
leading to the same issue as above.
Introduce a new option which eliminates this race by teaching `git
repack` to generate the MIDX at the critical point: after the new packs
have been written and moved into place, but before the redundant packs
have been removed.
This option is compatible with `git repack`'s '--bitmap' option (it
changes the interpretation to be: "write a bitmap corresponding to the
MIDX after one has been generated").
There is a little bit of additional noise in the patch below to avoid
repeating ourselves when selecting which packs to delete. Instead of a
single loop as before (where we iterate over 'existing_packs', decide if
a pack is worth deleting, and if so, delete it), we have two loops (the
first where we decide which ones are worth deleting, and the second
where we actually do the deleting). This makes it so we have a single
check we can make consistently when (1) telling the MIDX which packs we
want to exclude, and (2) actually unlinking the redundant packs.
There is also a tiny change to short-circuit the body of
write_midx_included_packs() when no packs remain in the case of an empty
repository. The MIDX code does not handle this, so avoid trying to
generate a MIDX covering zero packs in the first place.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-29 03:55:18 +02:00
|
|
|
. "${TEST_DIRECTORY}/lib-bitmap.sh"
|
|
|
|
. "${TEST_DIRECTORY}/lib-midx.sh"
|
2021-12-20 15:48:11 +01:00
|
|
|
. "${TEST_DIRECTORY}/lib-terminal.sh"
|
2008-11-12 18:59:02 +01:00
|
|
|
|
2019-12-04 23:03:09 +01:00
|
|
|
commit_and_pack () {
|
2019-11-27 20:53:45 +01:00
|
|
|
test_commit "$@" 1>&2 &&
|
2019-12-04 23:03:24 +01:00
|
|
|
incrpackid=$(git pack-objects --all --unpacked --incremental .git/objects/pack/pack </dev/null) &&
|
|
|
|
echo pack-${incrpackid}.pack
|
2018-04-15 17:36:13 +02:00
|
|
|
}
|
|
|
|
|
2019-12-04 23:03:09 +01:00
|
|
|
test_no_missing_in_packs () {
|
|
|
|
myidx=$(ls -1 .git/objects/pack/*.idx) &&
|
|
|
|
test_path_is_file "$myidx" &&
|
|
|
|
git verify-pack -v alt_objects/pack/*.idx >orig.raw &&
|
2019-12-04 23:03:24 +01:00
|
|
|
sed -n -e "s/^\($OID_REGEX\).*/\1/p" orig.raw | sort >orig &&
|
2019-12-04 23:03:09 +01:00
|
|
|
git verify-pack -v $myidx >dest.raw &&
|
|
|
|
cut -d" " -f1 dest.raw | sort >dest &&
|
|
|
|
comm -23 orig dest >missing &&
|
|
|
|
test_must_be_empty missing
|
|
|
|
}
|
|
|
|
|
2019-12-04 23:03:24 +01:00
|
|
|
# we expect $packid and $oid to be defined
|
2019-12-04 23:03:14 +01:00
|
|
|
test_has_duplicate_object () {
|
|
|
|
want_duplicate_object="$1"
|
|
|
|
found_duplicate_object=false
|
|
|
|
for p in .git/objects/pack/*.idx
|
|
|
|
do
|
|
|
|
idx=$(basename $p)
|
2019-12-04 23:03:24 +01:00
|
|
|
test "pack-$packid.idx" = "$idx" && continue
|
2019-12-04 23:03:14 +01:00
|
|
|
git verify-pack -v $p >packlist || return $?
|
2019-12-04 23:03:24 +01:00
|
|
|
if grep "^$oid" packlist
|
2019-12-04 23:03:14 +01:00
|
|
|
then
|
|
|
|
found_duplicate_object=true
|
|
|
|
echo "DUPLICATE OBJECT FOUND"
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done &&
|
|
|
|
test "$want_duplicate_object" = "$found_duplicate_object"
|
|
|
|
}
|
|
|
|
|
2008-11-12 18:59:05 +01:00
|
|
|
test_expect_success 'objects in packs marked .keep are not repacked' '
|
2019-11-27 20:53:47 +01:00
|
|
|
echo content1 >file1 &&
|
|
|
|
echo content2 >file2 &&
|
2008-11-12 18:59:02 +01:00
|
|
|
git add . &&
|
2010-04-15 00:09:57 +02:00
|
|
|
test_tick &&
|
2008-11-12 18:59:02 +01:00
|
|
|
git commit -m initial_commit &&
|
|
|
|
# Create two packs
|
|
|
|
# The first pack will contain all of the objects except one
|
2019-12-04 23:03:30 +01:00
|
|
|
git rev-list --objects --all >objs &&
|
|
|
|
grep -v file2 objs | git pack-objects pack &&
|
2008-11-12 18:59:02 +01:00
|
|
|
# The second pack will contain the excluded object
|
2019-12-04 23:03:30 +01:00
|
|
|
packid=$(grep file2 objs | git pack-objects pack) &&
|
2019-12-04 23:03:24 +01:00
|
|
|
>pack-$packid.keep &&
|
2019-12-04 23:03:30 +01:00
|
|
|
git verify-pack -v pack-$packid.idx >packlist &&
|
|
|
|
oid=$(head -n 1 packlist | sed -e "s/^\($OID_REGEX\).*/\1/") &&
|
2008-11-12 18:59:02 +01:00
|
|
|
mv pack-* .git/objects/pack/ &&
|
2014-06-11 08:32:45 +02:00
|
|
|
git repack -A -d -l &&
|
2008-11-12 18:59:02 +01:00
|
|
|
git prune-packed &&
|
2019-12-04 23:03:14 +01:00
|
|
|
test_has_duplicate_object false
|
2008-11-12 18:59:02 +01:00
|
|
|
'
|
|
|
|
|
2014-06-10 22:09:23 +02:00
|
|
|
test_expect_success 'writing bitmaps via command-line can duplicate .keep objects' '
|
2019-12-04 23:03:24 +01:00
|
|
|
# build on $oid, $packid, and .keep state from previous
|
2021-08-31 22:52:41 +02:00
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 git repack -Adbl &&
|
2019-12-04 23:03:14 +01:00
|
|
|
test_has_duplicate_object true
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-03 21:04:20 +01:00
|
|
|
'
|
|
|
|
|
2014-06-10 22:09:23 +02:00
|
|
|
test_expect_success 'writing bitmaps via config can duplicate .keep objects' '
|
2019-12-04 23:03:24 +01:00
|
|
|
# build on $oid, $packid, and .keep state from previous
|
2021-08-31 22:52:41 +02:00
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
|
|
|
|
git -c repack.writebitmaps=true repack -Adl &&
|
2019-12-04 23:03:14 +01:00
|
|
|
test_has_duplicate_object true
|
repack: add `repack.packKeptObjects` config var
The git-repack command always passes `--honor-pack-keep`
to pack-objects. This has traditionally been a good thing,
as we do not want to duplicate those objects in a new pack,
and we are not going to delete the old pack.
However, when bitmaps are in use, it is important for a full
repack to include all reachable objects, even if they may be
duplicated in a .keep pack. Otherwise, we cannot generate
the bitmaps, as the on-disk format requires the set of
objects in the pack to be fully closed.
Even if the repository does not generally have .keep files,
a simultaneous push could cause a race condition in which a
.keep file exists at the moment of a repack. The repack may
try to include those objects in one of two situations:
1. The pushed .keep pack contains objects that were
already in the repository (e.g., blobs due to a revert of
an old commit).
2. Receive-pack updates the refs, making the objects
reachable, but before it removes the .keep file, the
repack runs.
In either case, we may prefer to duplicate some objects in
the new, full pack, and let the next repack (after the .keep
file is cleaned up) take care of removing them.
This patch introduces both a command-line and config option
to disable the `--honor-pack-keep` option. By default, it
is triggered when pack.writeBitmaps (or `--write-bitmap-index`
is turned on), but specifying it explicitly can override the
behavior (e.g., in cases where you prefer .keep files to
bitmaps, but only when they are present).
Note that this option just disables the pack-objects
behavior. We still leave packs with a .keep in place, as we
do not necessarily know that we have duplicated all of their
objects.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-03 21:04:20 +01:00
|
|
|
'
|
|
|
|
|
2008-11-10 06:59:58 +01:00
|
|
|
test_expect_success 'loose objects in alternate ODB are not repacked' '
|
2008-11-10 06:59:56 +01:00
|
|
|
mkdir alt_objects &&
|
2019-11-27 20:53:47 +01:00
|
|
|
echo $(pwd)/alt_objects >.git/objects/info/alternates &&
|
|
|
|
echo content3 >file3 &&
|
2019-12-04 23:03:24 +01:00
|
|
|
oid=$(GIT_OBJECT_DIRECTORY=alt_objects git hash-object -w file3) &&
|
2008-11-10 06:59:56 +01:00
|
|
|
git add file3 &&
|
2010-04-15 00:09:57 +02:00
|
|
|
test_tick &&
|
2008-11-10 06:59:56 +01:00
|
|
|
git commit -m commit_file3 &&
|
|
|
|
git repack -a -d -l &&
|
|
|
|
git prune-packed &&
|
2019-12-04 23:03:14 +01:00
|
|
|
test_has_duplicate_object false
|
2008-11-10 06:59:56 +01:00
|
|
|
'
|
|
|
|
|
object-file: use real paths when adding alternates
When adding an alternate ODB, we check if the alternate has the same
path as the object dir, and if so, we do nothing. However, that
comparison does not resolve symlinks. This makes it possible to add the
object dir as an alternate, which may result in bad behavior. For
example, it can trick "git repack -a -l -d" (possibly run by "git gc")
into thinking that all packs come from an alternate and delete all
objects.
rm -rf test &&
git clone https://github.com/git/git test &&
(
cd test &&
ln -s objects .git/alt-objects &&
# -c repack.updateserverinfo=false silences a warning about not
# being able to update "info/refs", it isn't needed to show the
# bad behavior
GIT_ALTERNATE_OBJECT_DIRECTORIES=".git/alt-objects" git \
-c repack.updateserverinfo=false repack -a -l -d &&
# It's broken!
git status
# Because there are no more objects!
ls .git/objects/pack
)
Fix this by resolving symlinks and relative paths before comparing the
alternate and object dir. This lets us clean up a number of issues noted
in 37a95862c6 (alternates: re-allow relative paths from environment,
2016-11-07):
- Now that we compare the real paths, duplicate detection is no longer
foiled by relative paths.
- Using strbuf_realpath() allows us to "normalize" paths that
strbuf_normalize_path() can't, so we can stop silently ignoring errors
when "normalizing" paths from the environment.
- We now store an absolute path based on getcwd() (the "future
direction" named in 37a95862c6), so chdir()-ing in the process no
longer changes the directory pointed to by the alternate. This is a
change in behavior, but a desirable one.
Signed-off-by: Glen Choo <chooglen@google.com>
Acked-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-11-24 01:55:31 +01:00
|
|
|
test_expect_success SYMLINKS '--local keeps packs when alternate is objectdir ' '
|
|
|
|
test_when_finished "rm -rf repo" &&
|
|
|
|
git init repo &&
|
|
|
|
test_commit -C repo A &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
git repack -a &&
|
|
|
|
ls .git/objects/pack/*.pack >../expect &&
|
|
|
|
ln -s objects .git/alt_objects &&
|
|
|
|
echo "$(pwd)/.git/alt_objects" >.git/objects/info/alternates &&
|
|
|
|
git repack -a -d -l &&
|
|
|
|
ls .git/objects/pack/*.pack >../actual
|
|
|
|
) &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2008-11-13 01:50:26 +01:00
|
|
|
test_expect_success 'packed obs in alt ODB are repacked even when local repo is packless' '
|
2010-10-31 08:30:58 +01:00
|
|
|
mkdir alt_objects/pack &&
|
2008-11-13 01:50:26 +01:00
|
|
|
mv .git/objects/pack/* alt_objects/pack &&
|
|
|
|
git repack -a &&
|
2019-12-04 23:03:09 +01:00
|
|
|
test_no_missing_in_packs
|
2008-11-13 01:50:26 +01:00
|
|
|
'
|
|
|
|
|
2009-04-25 01:18:53 +02:00
|
|
|
test_expect_success 'packed obs in alt ODB are repacked when local repo has packs' '
|
t7700: demonstrate misbehavior of 'repack -a' when local packs exist
The ability to "...fatten [the] local repository by packing everything that
is needed by the local ref into a single new pack, including things that are
borrowed from alternates"[1] is supposed to be provided by the '-a' or '-A'
options to repack when '-l' is not used, but there is a flaw. For each
pack in the local repository without a .keep file, repack supplies a
--unpacked=<pack> argument to pack-objects.
The --unpacked option to pack-objects, with or without an argument, causes
pack-objects to ignore any object which is packed in a pack not mentioned
in an argument to --unpacked=. So, if there are local packs, and
'repack -a' is called, then any objects which reside in packs accessible
through alternates will _not_ be packed. If there are no local packs, then
no --unpacked argument will be supplied, and repack will behave as expected.
[1] http://mid.gmane.org/7v8wrwidi3.fsf@gitster.siamese.dyndns.org
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-09 23:14:39 +01:00
|
|
|
rm -f .git/objects/pack/* &&
|
2019-11-27 20:53:47 +01:00
|
|
|
echo new_content >>file1 &&
|
t7700: demonstrate misbehavior of 'repack -a' when local packs exist
The ability to "...fatten [the] local repository by packing everything that
is needed by the local ref into a single new pack, including things that are
borrowed from alternates"[1] is supposed to be provided by the '-a' or '-A'
options to repack when '-l' is not used, but there is a flaw. For each
pack in the local repository without a .keep file, repack supplies a
--unpacked=<pack> argument to pack-objects.
The --unpacked option to pack-objects, with or without an argument, causes
pack-objects to ignore any object which is packed in a pack not mentioned
in an argument to --unpacked=. So, if there are local packs, and
'repack -a' is called, then any objects which reside in packs accessible
through alternates will _not_ be packed. If there are no local packs, then
no --unpacked argument will be supplied, and repack will behave as expected.
[1] http://mid.gmane.org/7v8wrwidi3.fsf@gitster.siamese.dyndns.org
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-09 23:14:39 +01:00
|
|
|
git add file1 &&
|
2010-04-15 00:09:57 +02:00
|
|
|
test_tick &&
|
t7700: demonstrate misbehavior of 'repack -a' when local packs exist
The ability to "...fatten [the] local repository by packing everything that
is needed by the local ref into a single new pack, including things that are
borrowed from alternates"[1] is supposed to be provided by the '-a' or '-A'
options to repack when '-l' is not used, but there is a flaw. For each
pack in the local repository without a .keep file, repack supplies a
--unpacked=<pack> argument to pack-objects.
The --unpacked option to pack-objects, with or without an argument, causes
pack-objects to ignore any object which is packed in a pack not mentioned
in an argument to --unpacked=. So, if there are local packs, and
'repack -a' is called, then any objects which reside in packs accessible
through alternates will _not_ be packed. If there are no local packs, then
no --unpacked argument will be supplied, and repack will behave as expected.
[1] http://mid.gmane.org/7v8wrwidi3.fsf@gitster.siamese.dyndns.org
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-09 23:14:39 +01:00
|
|
|
git commit -m more_content &&
|
|
|
|
git repack &&
|
|
|
|
git repack -a -d &&
|
2019-12-04 23:03:09 +01:00
|
|
|
test_no_missing_in_packs
|
t7700: demonstrate misbehavior of 'repack -a' when local packs exist
The ability to "...fatten [the] local repository by packing everything that
is needed by the local ref into a single new pack, including things that are
borrowed from alternates"[1] is supposed to be provided by the '-a' or '-A'
options to repack when '-l' is not used, but there is a flaw. For each
pack in the local repository without a .keep file, repack supplies a
--unpacked=<pack> argument to pack-objects.
The --unpacked option to pack-objects, with or without an argument, causes
pack-objects to ignore any object which is packed in a pack not mentioned
in an argument to --unpacked=. So, if there are local packs, and
'repack -a' is called, then any objects which reside in packs accessible
through alternates will _not_ be packed. If there are no local packs, then
no --unpacked argument will be supplied, and repack will behave as expected.
[1] http://mid.gmane.org/7v8wrwidi3.fsf@gitster.siamese.dyndns.org
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-01-09 23:14:39 +01:00
|
|
|
'
|
|
|
|
|
2009-03-20 04:47:51 +01:00
|
|
|
test_expect_success 'packed obs in alternate ODB kept pack are repacked' '
|
2009-03-20 04:47:50 +01:00
|
|
|
# swap the .keep so the commit object is in the pack with .keep
|
|
|
|
for p in alt_objects/pack/*.pack
|
|
|
|
do
|
2010-10-31 08:30:58 +01:00
|
|
|
base_name=$(basename $p .pack) &&
|
2019-11-27 20:53:52 +01:00
|
|
|
if test_path_is_file alt_objects/pack/$base_name.keep
|
2009-03-20 04:47:50 +01:00
|
|
|
then
|
|
|
|
rm alt_objects/pack/$base_name.keep
|
|
|
|
else
|
|
|
|
touch alt_objects/pack/$base_name.keep
|
2021-12-09 06:11:15 +01:00
|
|
|
fi || return 1
|
2010-10-31 08:30:58 +01:00
|
|
|
done &&
|
2009-03-20 04:47:50 +01:00
|
|
|
git repack -a -d &&
|
2019-12-04 23:03:09 +01:00
|
|
|
test_no_missing_in_packs
|
2009-03-20 04:47:50 +01:00
|
|
|
'
|
|
|
|
|
2009-03-20 04:47:52 +01:00
|
|
|
test_expect_success 'packed unreachable obs in alternate ODB are not loosened' '
|
2009-03-20 04:47:50 +01:00
|
|
|
rm -f alt_objects/pack/*.keep &&
|
|
|
|
mv .git/objects/pack/* alt_objects/pack/ &&
|
2019-12-04 23:03:24 +01:00
|
|
|
coid=$(git rev-parse HEAD^{commit}) &&
|
2009-03-20 04:47:50 +01:00
|
|
|
git reset --hard HEAD^ &&
|
2010-04-15 00:09:57 +02:00
|
|
|
test_tick &&
|
|
|
|
git reflog expire --expire=$test_tick --expire-unreachable=$test_tick --all &&
|
2009-03-20 04:47:50 +01:00
|
|
|
# The pack-objects call on the next line is equivalent to
|
|
|
|
# git repack -A -d without the call to prune-packed
|
|
|
|
git pack-objects --honor-pack-keep --non-empty --all --reflog \
|
|
|
|
--unpack-unreachable </dev/null pack &&
|
|
|
|
rm -f .git/objects/pack/* &&
|
|
|
|
mv pack-* .git/objects/pack/ &&
|
2019-12-04 23:03:30 +01:00
|
|
|
git verify-pack -v -- .git/objects/pack/*.idx >packlist &&
|
|
|
|
! grep "^$coid " packlist &&
|
2019-11-27 20:53:47 +01:00
|
|
|
echo >.git/objects/info/alternates &&
|
2019-12-04 23:03:24 +01:00
|
|
|
test_must_fail git show $coid
|
2009-03-20 04:47:50 +01:00
|
|
|
'
|
|
|
|
|
2009-03-21 23:26:11 +01:00
|
|
|
test_expect_success 'local packed unreachable obs that exist in alternate ODB are not loosened' '
|
2019-11-27 20:53:47 +01:00
|
|
|
echo $(pwd)/alt_objects >.git/objects/info/alternates &&
|
2019-12-04 23:03:24 +01:00
|
|
|
echo "$coid" | git pack-objects --non-empty --all --reflog pack &&
|
2009-03-21 23:25:30 +01:00
|
|
|
rm -f .git/objects/pack/* &&
|
|
|
|
mv pack-* .git/objects/pack/ &&
|
|
|
|
# The pack-objects call on the next line is equivalent to
|
|
|
|
# git repack -A -d without the call to prune-packed
|
|
|
|
git pack-objects --honor-pack-keep --non-empty --all --reflog \
|
|
|
|
--unpack-unreachable </dev/null pack &&
|
|
|
|
rm -f .git/objects/pack/* &&
|
|
|
|
mv pack-* .git/objects/pack/ &&
|
2019-12-04 23:03:30 +01:00
|
|
|
git verify-pack -v -- .git/objects/pack/*.idx >packlist &&
|
|
|
|
! grep "^$coid " &&
|
2019-11-27 20:53:47 +01:00
|
|
|
echo >.git/objects/info/alternates &&
|
2019-12-04 23:03:24 +01:00
|
|
|
test_must_fail git show $coid
|
2009-03-21 23:25:30 +01:00
|
|
|
'
|
|
|
|
|
2009-07-23 17:33:49 +02:00
|
|
|
test_expect_success 'objects made unreachable by grafts only are kept' '
|
2009-07-23 17:33:45 +02:00
|
|
|
test_tick &&
|
|
|
|
git commit --allow-empty -m "commit 4" &&
|
|
|
|
H0=$(git rev-parse HEAD) &&
|
|
|
|
H1=$(git rev-parse HEAD^) &&
|
|
|
|
H2=$(git rev-parse HEAD^^) &&
|
2019-11-27 20:53:47 +01:00
|
|
|
echo "$H0 $H2" >.git/info/grafts &&
|
2010-04-15 00:09:57 +02:00
|
|
|
git reflog expire --expire=$test_tick --expire-unreachable=$test_tick --all &&
|
2009-07-23 17:33:45 +02:00
|
|
|
git repack -a -d &&
|
|
|
|
git cat-file -t $H1
|
2018-04-15 17:36:12 +02:00
|
|
|
'
|
2009-07-23 17:33:45 +02:00
|
|
|
|
2018-04-15 17:36:13 +02:00
|
|
|
test_expect_success 'repack --keep-pack' '
|
|
|
|
test_create_repo keep-pack &&
|
|
|
|
(
|
|
|
|
cd keep-pack &&
|
|
|
|
P1=$(commit_and_pack 1) &&
|
|
|
|
P2=$(commit_and_pack 2) &&
|
|
|
|
P3=$(commit_and_pack 3) &&
|
|
|
|
P4=$(commit_and_pack 4) &&
|
|
|
|
ls .git/objects/pack/*.pack >old-counts &&
|
|
|
|
test_line_count = 4 old-counts &&
|
|
|
|
git repack -a -d --keep-pack $P1 --keep-pack $P4 &&
|
|
|
|
ls .git/objects/pack/*.pack >new-counts &&
|
|
|
|
grep -q $P1 new-counts &&
|
|
|
|
grep -q $P4 new-counts &&
|
|
|
|
test_line_count = 3 new-counts &&
|
|
|
|
git fsck
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-03-14 10:12:54 +01:00
|
|
|
test_expect_success 'bitmaps are created by default in bare repos' '
|
|
|
|
git clone --bare .git bare.git &&
|
2021-08-31 22:52:41 +02:00
|
|
|
rm -f bare.git/objects/pack/*.bitmap &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
|
|
|
|
git -C bare.git repack -ad &&
|
2019-03-14 10:12:54 +01:00
|
|
|
bitmap=$(ls bare.git/objects/pack/*.bitmap) &&
|
|
|
|
test_path_is_file "$bitmap"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'incremental repack does not complain' '
|
|
|
|
git -C bare.git repack -q 2>repack.err &&
|
|
|
|
test_must_be_empty repack.err
|
|
|
|
'
|
2008-11-12 18:59:02 +01:00
|
|
|
|
2019-03-14 10:12:54 +01:00
|
|
|
test_expect_success 'bitmaps can be disabled on bare repos' '
|
2021-08-31 22:52:41 +02:00
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
|
|
|
|
git -c repack.writeBitmaps=false -C bare.git repack -ad &&
|
2019-11-27 20:53:45 +01:00
|
|
|
bitmap=$(ls bare.git/objects/pack/*.bitmap || :) &&
|
2019-03-14 10:12:54 +01:00
|
|
|
test -z "$bitmap"
|
|
|
|
'
|
|
|
|
|
2019-06-29 21:13:59 +02:00
|
|
|
test_expect_success 'no bitmaps created if .keep files present' '
|
|
|
|
pack=$(ls bare.git/objects/pack/*.pack) &&
|
|
|
|
test_path_is_file "$pack" &&
|
|
|
|
keep=${pack%.pack}.keep &&
|
2019-07-31 07:37:36 +02:00
|
|
|
test_when_finished "rm -f \"\$keep\"" &&
|
2019-06-29 21:13:59 +02:00
|
|
|
>"$keep" &&
|
2021-08-31 22:52:41 +02:00
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
|
|
|
|
git -C bare.git repack -ad 2>stderr &&
|
2019-07-31 07:40:56 +02:00
|
|
|
test_must_be_empty stderr &&
|
2019-06-29 21:13:59 +02:00
|
|
|
find bare.git/objects/pack/ -type f -name "*.bitmap" >actual &&
|
|
|
|
test_must_be_empty actual
|
|
|
|
'
|
|
|
|
|
2019-07-31 07:39:27 +02:00
|
|
|
test_expect_success 'auto-bitmaps do not complain if unavailable' '
|
|
|
|
test_config -C bare.git pack.packSizeLimit 1M &&
|
|
|
|
blob=$(test-tool genrandom big $((1024*1024)) |
|
|
|
|
git -C bare.git hash-object -w --stdin) &&
|
|
|
|
git -C bare.git update-ref refs/tags/big $blob &&
|
2021-08-31 22:52:41 +02:00
|
|
|
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
|
|
|
|
git -C bare.git repack -ad 2>stderr &&
|
2019-07-31 07:39:27 +02:00
|
|
|
test_must_be_empty stderr &&
|
|
|
|
find bare.git/objects/pack -type f -name "*.bitmap" >actual &&
|
|
|
|
test_must_be_empty actual
|
|
|
|
'
|
|
|
|
|
builtin/repack.c: support writing a MIDX while repacking
Teach `git repack` a new `--write-midx` option for callers that wish to
persist a multi-pack index in their repository while repacking.
There are two existing alternatives to this new flag, but they don't
cover our particular use-case. These alternatives are:
- Call 'git multi-pack-index write' after running 'git repack', or
- Set 'GIT_TEST_MULTI_PACK_INDEX=1' in your environment when running
'git repack'.
The former works, but introduces a gap in bitmap coverage between
repacking and writing a new MIDX (since the repack may have deleted a
pack included in the existing MIDX, invalidating it altogether).
Setting the 'GIT_TEST_' environment variable is obviously unsupported.
In fact, even if it were supported officially, it still wouldn't work,
because it generates the MIDX *after* redundant packs have been dropped,
leading to the same issue as above.
Introduce a new option which eliminates this race by teaching `git
repack` to generate the MIDX at the critical point: after the new packs
have been written and moved into place, but before the redundant packs
have been removed.
This option is compatible with `git repack`'s '--bitmap' option (it
changes the interpretation to be: "write a bitmap corresponding to the
MIDX after one has been generated").
There is a little bit of additional noise in the patch below to avoid
repeating ourselves when selecting which packs to delete. Instead of a
single loop as before (where we iterate over 'existing_packs', decide if
a pack is worth deleting, and if so, delete it), we have two loops (the
first where we decide which ones are worth deleting, and the second
where we actually do the deleting). This makes it so we have a single
check we can make consistently when (1) telling the MIDX which packs we
want to exclude, and (2) actually unlinking the redundant packs.
There is also a tiny change to short-circuit the body of
write_midx_included_packs() when no packs remain in the case of an empty
repository. The MIDX code does not handle this, so avoid trying to
generate a MIDX covering zero packs in the first place.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-09-29 03:55:18 +02:00
|
|
|
objdir=.git/objects
|
|
|
|
midx=$objdir/pack/multi-pack-index
|
|
|
|
|
|
|
|
test_expect_success 'setup for --write-midx tests' '
|
|
|
|
git init midx &&
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
git config core.multiPackIndex true &&
|
|
|
|
|
|
|
|
test_commit base
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--write-midx unchanged' '
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack &&
|
|
|
|
test_path_is_missing $midx &&
|
|
|
|
test_path_is_missing $midx-*.bitmap &&
|
|
|
|
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack --write-midx &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_missing $midx-*.bitmap &&
|
|
|
|
test_midx_consistent $objdir
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--write-midx with a new pack' '
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
test_commit loose &&
|
|
|
|
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack --write-midx &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_missing $midx-*.bitmap &&
|
|
|
|
test_midx_consistent $objdir
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--write-midx with -b' '
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -mb &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_file $midx-*.bitmap &&
|
|
|
|
test_midx_consistent $objdir
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--write-midx with -d' '
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
test_commit repack &&
|
|
|
|
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Ad --write-midx &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_missing $midx-*.bitmap &&
|
|
|
|
test_midx_consistent $objdir
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cleans up MIDX when appropriate' '
|
|
|
|
(
|
|
|
|
cd midx &&
|
|
|
|
|
|
|
|
test_commit repack-2 &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx &&
|
|
|
|
|
|
|
|
checksum=$(midx_checksum $objdir) &&
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_file $midx-$checksum.bitmap &&
|
|
|
|
|
|
|
|
test_commit repack-3 &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_missing $midx-$checksum.bitmap &&
|
|
|
|
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
|
|
|
|
|
|
|
|
test_commit repack-4 &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb &&
|
|
|
|
|
|
|
|
find $objdir/pack -type f -name "multi-pack-index*" >files &&
|
|
|
|
test_must_be_empty files
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2021-10-02 00:38:10 +02:00
|
|
|
test_expect_success '--write-midx with preferred bitmap tips' '
|
|
|
|
git init midx-preferred-tips &&
|
|
|
|
test_when_finished "rm -fr midx-preferred-tips" &&
|
|
|
|
(
|
|
|
|
cd midx-preferred-tips &&
|
|
|
|
|
|
|
|
test_commit_bulk --message="%s" 103 &&
|
|
|
|
|
|
|
|
git log --format="%H" >commits.raw &&
|
|
|
|
sort <commits.raw >commits &&
|
|
|
|
|
|
|
|
git log --format="create refs/tags/%s/%s %H" HEAD >refs &&
|
|
|
|
git update-ref --stdin <refs &&
|
|
|
|
|
|
|
|
git repack --write-midx --write-bitmap-index &&
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
|
|
|
|
|
|
|
|
test-tool bitmap list-commits | sort >bitmaps &&
|
|
|
|
comm -13 bitmaps commits >before &&
|
|
|
|
test_line_count = 1 before &&
|
|
|
|
|
|
|
|
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
|
|
|
|
rm -fr $midx &&
|
|
|
|
|
|
|
|
# instead of constructing the snapshot ourselves (c.f., the test
|
|
|
|
# "write a bitmap with --refs-snapshot (preferred tips)" in
|
|
|
|
# t5326), mark the missing commit as preferred by adding it to
|
|
|
|
# the pack.preferBitmapTips configuration.
|
|
|
|
git for-each-ref --format="%(refname:rstrip=1)" \
|
|
|
|
--points-at="$(cat before)" >missing &&
|
|
|
|
git config pack.preferBitmapTips "$(cat missing)" &&
|
|
|
|
git repack --write-midx --write-bitmap-index &&
|
|
|
|
|
|
|
|
test-tool bitmap list-commits | sort >bitmaps &&
|
|
|
|
comm -13 bitmaps commits >after &&
|
|
|
|
|
|
|
|
! test_cmp before after
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2022-03-25 20:02:46 +01:00
|
|
|
# The first argument is expected to be a filename
|
|
|
|
# and that file should contain the name of a .idx
|
|
|
|
# file. Send the list of objects in that .idx file
|
|
|
|
# into stdout.
|
|
|
|
get_sorted_objects_from_pack () {
|
|
|
|
git show-index <$(cat "$1") >raw &&
|
|
|
|
cut -d" " -f2 raw
|
|
|
|
}
|
|
|
|
|
2021-12-20 15:48:10 +01:00
|
|
|
test_expect_success '--write-midx -b packs non-kept objects' '
|
2022-03-25 20:02:46 +01:00
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
# Create a kept pack-file
|
|
|
|
test_commit base &&
|
|
|
|
git repack -ad &&
|
|
|
|
find $objdir/pack -name "*.idx" >before &&
|
|
|
|
test_line_count = 1 before &&
|
|
|
|
before_name=$(cat before) &&
|
|
|
|
>${before_name%.idx}.keep &&
|
|
|
|
|
|
|
|
# Create a non-kept pack-file
|
|
|
|
test_commit other &&
|
|
|
|
git repack &&
|
|
|
|
|
|
|
|
# Create loose objects
|
|
|
|
test_commit loose &&
|
|
|
|
|
|
|
|
# Repack everything
|
|
|
|
git repack --write-midx -a -b -d &&
|
|
|
|
|
|
|
|
# There should be two pack-files now, the
|
|
|
|
# old, kept pack and the new, non-kept pack.
|
|
|
|
find $objdir/pack -name "*.idx" | sort >after &&
|
|
|
|
test_line_count = 2 after &&
|
|
|
|
find $objdir/pack -name "*.keep" >kept &&
|
|
|
|
kept_name=$(cat kept) &&
|
|
|
|
echo ${kept_name%.keep}.idx >kept-idx &&
|
|
|
|
test_cmp before kept-idx &&
|
|
|
|
|
|
|
|
# Get object list from the kept pack.
|
|
|
|
get_sorted_objects_from_pack before >old.objects &&
|
|
|
|
|
|
|
|
# Get object list from the one non-kept pack-file
|
|
|
|
comm -13 before after >new-pack &&
|
|
|
|
test_line_count = 1 new-pack &&
|
|
|
|
get_sorted_objects_from_pack new-pack >new.objects &&
|
|
|
|
|
|
|
|
# None of the objects in the new pack should
|
|
|
|
# exist within the kept pack.
|
|
|
|
comm -12 old.objects new.objects >shared.objects &&
|
|
|
|
test_must_be_empty shared.objects
|
|
|
|
)
|
2021-12-20 15:48:10 +01:00
|
|
|
'
|
|
|
|
|
2022-10-18 04:45:12 +02:00
|
|
|
test_expect_success '--write-midx removes stale pack-based bitmaps' '
|
|
|
|
rm -fr repo &&
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
test_commit base &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Ab &&
|
|
|
|
|
|
|
|
pack_bitmap=$(ls $objdir/pack/pack-*.bitmap) &&
|
|
|
|
test_path_is_file "$pack_bitmap" &&
|
|
|
|
|
|
|
|
test_commit tip &&
|
|
|
|
GIT_TEST_MULTI_PACK_INDEX=0 git repack -bm &&
|
|
|
|
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
|
|
|
|
test_path_is_missing $pack_bitmap
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
repack: don't remove .keep packs with `--pack-kept-objects`
`git repack` supports a `--pack-kept-objects` flag which more or less
translates to whether or not we pass `--honor-pack-keep` down to `git
pack-objects` when assembling a new pack.
This behavior has existed since ee34a2bead (repack: add
`repack.packKeptObjects` config var, 2014-03-03). In that commit, the
documentation was extended to say:
[...] Note that we still do not delete `.keep` packs after
`pack-objects` finishes.
Unfortunately, this is not the case when `--pack-kept-objects` is
combined with a `--geometric` repack. When doing a geometric repack, we
include `.keep` packs when enumerating available packs only when
`pack_kept_objects` is set.
So this all works fine when `--no-pack-kept-objects` (or similar) is
given. Kept packs are excluded from the geometric roll-up, so when we go
to delete redundant packs (with `-d`), no `.keep` packs appear "below
the split" in our geometric progression.
But when `--pack-kept-objects` is given, things can go awry. Namely,
when a kept pack is included in the list of packs tracked by the
`pack_geometry` struct *and* part of the pack roll-up, we will delete
the `.keep` pack when we shouldn't.
Note that this *doesn't* result in object corruption, since the `.keep`
pack's objects are still present in the new pack. But the `.keep` pack
itself is removed, which violates our promise from back in ee34a2bead.
But there's more. Because `repack` computes the geometric roll-up
independently from selecting which packs belong in a MIDX (with
`--write-midx`), this can lead to odd behavior. Consider when a `.keep`
pack appears below the geometric split (ie., its objects will be part of
the new pack we generate).
We'll write a MIDX containing the new pack along with the existing
`.keep` pack. But because the `.keep` pack appears below the geometric
split line, we'll (incorrectly) try to remove it. While this doesn't
corrupt the repository, it does cause us to remove the MIDX we just
wrote, since removing that pack would invalidate the new MIDX.
Funny enough, this behavior became far less noticeable after e4d0c11c04
(repack: respect kept objects with '--write-midx -b', 2021-12-20), which
made `pack_kept_objects` be enabled by default only when we were writing
a non-MIDX bitmap.
But e4d0c11c04 didn't resolve this bug, it just made it harder to notice
unless callers explicitly passed `--pack-kept-objects`.
The solution is to avoid trying to remove `.keep` packs during
`--geometric` repacks, even when they appear below the geometric split
line, which is the approach this patch implements.
Co-authored-by: Victoria Dye <vdye@github.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-18 04:26:06 +02:00
|
|
|
test_expect_success '--write-midx with --pack-kept-objects' '
|
|
|
|
git init repo &&
|
|
|
|
test_when_finished "rm -fr repo" &&
|
|
|
|
(
|
|
|
|
cd repo &&
|
|
|
|
|
|
|
|
test_commit one &&
|
|
|
|
test_commit two &&
|
|
|
|
|
|
|
|
one="$(echo "one" | git pack-objects --revs $objdir/pack/pack)" &&
|
|
|
|
two="$(echo "one..two" | git pack-objects --revs $objdir/pack/pack)" &&
|
|
|
|
|
|
|
|
keep="$objdir/pack/pack-$one.keep" &&
|
|
|
|
touch "$keep" &&
|
|
|
|
|
|
|
|
git repack --write-midx --write-bitmap-index --geometric=2 -d \
|
|
|
|
--pack-kept-objects &&
|
|
|
|
|
|
|
|
test_path_is_file $keep &&
|
|
|
|
test_path_is_file $midx &&
|
|
|
|
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2021-12-20 15:48:11 +01:00
|
|
|
test_expect_success TTY '--quiet disables progress' '
|
|
|
|
test_terminal env GIT_PROGRESS_DELAY=0 \
|
|
|
|
git -C midx repack -ad --quiet --write-midx 2>stderr &&
|
|
|
|
test_must_be_empty stderr
|
|
|
|
'
|
|
|
|
|
repack: use tempfiles for signal cleanup
When git-repack exits due to a signal, it tries to clean up by calling
its remove_temporary_files() function, which walks through the packs dir
looking for ".tmp-$$-pack-*" files to delete (where "$$" is the pid of
the current process).
The biggest problem here is that remove_temporary_files() is not safe to
call in a signal handler. It uses opendir(), which isn't on the POSIX
async-signal-safe list. The details will be platform-specific, but a
likely issue is that it needs to allocate memory; if we receive a signal
while inside malloc(), etc, we'll conflict on the allocator lock and
deadlock with ourselves.
We can fix this by just cleaning up the files directly, without walking
the directory. We already know the complete list of .tmp-* files that
were generated, because we recorded them via populate_pack_exts(). When
we find files there, we can use register_tempfile() to record the
filenames. If we receive a signal, then the tempfile API will clean them
up for us, and it's async-safe and pretty battle-tested.
Note that this is slightly racier than the existing scheme. We don't
record the filenames until pack-objects tells us the hash over stdout.
So during the period between it generating the file and reporting the
hash, we'd fail to clean up. However, that period is very small. During
most of the pack generation process pack-objects is using its own
internal tempfiles. It's only at the very end that it moves them into
the names git-repack expects, and then it immediately reports the name
to us. Given that cleanup like this is best effort (after all, we may
get SIGKILL), this level of race is acceptable.
When we register the tempfiles, we'll record them locally and use the
result to call rename_tempfile(), rather than renaming by hand. This
isn't strictly necessary, as once we've renamed the files they're gone,
and the tempfile API's cleanup unlink() would simply become a pointless
noop. But managing the lifetimes of the tempfile objects is the cleanest
thing to do, and the tempfile pointers naturally fill the same role as
the old booleans.
This patch also fixes another small problem. We only hook signals, and
don't set up an atexit handler. So if we see an error that causes us to
die(), we'll leave the .tmp-* files in place. But since the tempfile API
handles this for us, this is now fixed for free. The new test covers
this by stimulating a failure of pack-objects when generating a cruft
pack. Before this patch, the .tmp-* file for the main pack would have
been left, but now we correctly clean it up.
Two small subtleties on the implementation:
- in the renaming loop, we can stop re-constructing fname_old; we only
use it when we have a tempfile to rename, so we can just ask the
tempfile for its path (which, barring bugs, should be identical)
- when renaming fails, our error message mentions fname_old. But since
a failed rename_tempfile() invalidates the tempfile struct, we'll
lose access to that string. Instead, let's mention the destination
filename, which is what most other callers do.
Reported-by: Jan Pokorný <poki@fnusa.cz>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-22 02:21:54 +02:00
|
|
|
test_expect_success 'clean up .tmp-* packs on error' '
|
2022-10-23 19:00:45 +02:00
|
|
|
test_must_fail ok=sigpipe git \
|
repack: use tempfiles for signal cleanup
When git-repack exits due to a signal, it tries to clean up by calling
its remove_temporary_files() function, which walks through the packs dir
looking for ".tmp-$$-pack-*" files to delete (where "$$" is the pid of
the current process).
The biggest problem here is that remove_temporary_files() is not safe to
call in a signal handler. It uses opendir(), which isn't on the POSIX
async-signal-safe list. The details will be platform-specific, but a
likely issue is that it needs to allocate memory; if we receive a signal
while inside malloc(), etc, we'll conflict on the allocator lock and
deadlock with ourselves.
We can fix this by just cleaning up the files directly, without walking
the directory. We already know the complete list of .tmp-* files that
were generated, because we recorded them via populate_pack_exts(). When
we find files there, we can use register_tempfile() to record the
filenames. If we receive a signal, then the tempfile API will clean them
up for us, and it's async-safe and pretty battle-tested.
Note that this is slightly racier than the existing scheme. We don't
record the filenames until pack-objects tells us the hash over stdout.
So during the period between it generating the file and reporting the
hash, we'd fail to clean up. However, that period is very small. During
most of the pack generation process pack-objects is using its own
internal tempfiles. It's only at the very end that it moves them into
the names git-repack expects, and then it immediately reports the name
to us. Given that cleanup like this is best effort (after all, we may
get SIGKILL), this level of race is acceptable.
When we register the tempfiles, we'll record them locally and use the
result to call rename_tempfile(), rather than renaming by hand. This
isn't strictly necessary, as once we've renamed the files they're gone,
and the tempfile API's cleanup unlink() would simply become a pointless
noop. But managing the lifetimes of the tempfile objects is the cleanest
thing to do, and the tempfile pointers naturally fill the same role as
the old booleans.
This patch also fixes another small problem. We only hook signals, and
don't set up an atexit handler. So if we see an error that causes us to
die(), we'll leave the .tmp-* files in place. But since the tempfile API
handles this for us, this is now fixed for free. The new test covers
this by stimulating a failure of pack-objects when generating a cruft
pack. Before this patch, the .tmp-* file for the main pack would have
been left, but now we correctly clean it up.
Two small subtleties on the implementation:
- in the renaming loop, we can stop re-constructing fname_old; we only
use it when we have a tempfile to rename, so we can just ask the
tempfile for its path (which, barring bugs, should be identical)
- when renaming fails, our error message mentions fname_old. But since
a failed rename_tempfile() invalidates the tempfile struct, we'll
lose access to that string. Instead, let's mention the destination
filename, which is what most other callers do.
Reported-by: Jan Pokorný <poki@fnusa.cz>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-22 02:21:54 +02:00
|
|
|
-c repack.cruftwindow=bogus \
|
|
|
|
repack -ad --cruft &&
|
|
|
|
find $objdir/pack -name '.tmp-*' >tmpfiles &&
|
|
|
|
test_must_be_empty tmpfiles
|
|
|
|
'
|
|
|
|
|
repack: drop remove_temporary_files()
After we've successfully finished the repack, we call
remove_temporary_files(), which looks for and removes any files matching
".tmp-$$-pack-*", where $$ is the pid of the current process. But this
is pointless. If we make it this far in the process, we've already
renamed these tempfiles into place, and there is nothing left to delete.
Nor is there a point in trying to call it to clean up when we _aren't_
successful. It's not safe for using in a signal handler, and the
previous commit already handed that job over to the tempfile API.
It might seem like it would be useful to clean up stray .tmp files left
by other invocations of git-repack. But it won't clean those files; it
only matches ones with its pid, and leaves the rest. Fortunately, those
are cleaned up naturally by successive calls to git-repack; we'll
consider .tmp-*.pack the same as normal packfiles, so "repack -ad", etc,
will roll up their contents and eventually delete them.
The one case that could matter is if pack-objects generates an extension
we don't know about, like ".tmp-pack-$$-$hash.some-new-ext". The current
code will quietly delete such a file, while after this patch we'd leave
it in place. In practice this doesn't happen, and would be indicative of
a bug. Leaving the file as cruft is arguably a better behavior, as it
means somebody is more likely to eventually notice and fix the bug. If
we really wanted to be paranoid, we could scan for and warn about such
files, but that seems like overkill.
There's nothing to test with regard to the removal of this function. It
was doing nothing, so the behavior should be the same. However, we can
verify (and protect) our assumption that "repack -ad" will eventually
remove stray files by adding a test for that.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-22 02:21:58 +02:00
|
|
|
test_expect_success 'repack -ad cleans up old .tmp-* packs' '
|
|
|
|
git rev-parse HEAD >input &&
|
|
|
|
git pack-objects $objdir/pack/.tmp-1234 <input &&
|
|
|
|
git repack -ad &&
|
|
|
|
find $objdir/pack -name '.tmp-*' >tmpfiles &&
|
|
|
|
test_must_be_empty tmpfiles
|
|
|
|
'
|
|
|
|
|
2022-03-14 08:42:46 +01:00
|
|
|
test_expect_success 'setup for update-server-info' '
|
|
|
|
git init update-server-info &&
|
|
|
|
test_commit -C update-server-info message
|
|
|
|
'
|
|
|
|
|
|
|
|
test_server_info_present () {
|
|
|
|
test_path_is_file update-server-info/.git/objects/info/packs &&
|
|
|
|
test_path_is_file update-server-info/.git/info/refs
|
|
|
|
}
|
|
|
|
|
|
|
|
test_server_info_missing () {
|
|
|
|
test_path_is_missing update-server-info/.git/objects/info/packs &&
|
|
|
|
test_path_is_missing update-server-info/.git/info/refs
|
|
|
|
}
|
|
|
|
|
|
|
|
test_server_info_cleanup () {
|
|
|
|
rm -f update-server-info/.git/objects/info/packs update-server-info/.git/info/refs &&
|
|
|
|
test_server_info_missing
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'updates server info by default' '
|
|
|
|
test_server_info_cleanup &&
|
|
|
|
git -C update-server-info repack &&
|
|
|
|
test_server_info_present
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '-n skips updating server info' '
|
|
|
|
test_server_info_cleanup &&
|
|
|
|
git -C update-server-info repack -n &&
|
|
|
|
test_server_info_missing
|
|
|
|
'
|
|
|
|
|
2022-03-14 08:42:51 +01:00
|
|
|
test_expect_success 'repack.updateServerInfo=true updates server info' '
|
|
|
|
test_server_info_cleanup &&
|
|
|
|
git -C update-server-info -c repack.updateServerInfo=true repack &&
|
|
|
|
test_server_info_present
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'repack.updateServerInfo=false skips updating server info' '
|
|
|
|
test_server_info_cleanup &&
|
|
|
|
git -C update-server-info -c repack.updateServerInfo=false repack &&
|
|
|
|
test_server_info_missing
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '-n overrides repack.updateServerInfo=true' '
|
|
|
|
test_server_info_cleanup &&
|
|
|
|
git -C update-server-info -c repack.updateServerInfo=true repack -n &&
|
|
|
|
test_server_info_missing
|
|
|
|
'
|
|
|
|
|
builtin/repack.c: implement `--expire-to` for storing pruned objects
When pruning objects with `--cruft`, `git repack` offers some
flexibility when selecting the set of which objects are pruned via the
`--cruft-expiration` option.
This is useful for expiring objects which are older than the grace
period, making races where to-be-pruned objects become reachable and
then ancestors of freshly pushed objects, leaving the repository in a
corrupt state after pruning substantially less likely [1].
But in practice, such races are impossible to avoid entirely, no matter
how long the grace period is. To prevent this race, it is often
advisable to temporarily put a repository into a read-only state. But in
practice, this is not always practical, and so some middle ground would
be nice.
This patch introduces a new option, `--expire-to`, which teaches `git
repack` to write an additional cruft pack containing just the objects
which were pruned from the repository. The caller can specify a
directory outside of the current repository as the destination for this
second cruft pack.
This makes it possible to prune objects from a repository, while still
holding onto a supplemental copy of them outside of the original
repository. Having this copy on-disk makes it substantially easier to
recover objects when the aforementioned race is encountered.
`--expire-to` is implemented in a somewhat convoluted manner, which is
to take advantage of the fact that the first time `write_cruft_pack()`
is called, it adds the name of the cruft pack to the `names` string
list. That means the second time we call `write_cruft_pack()`, objects
in the previously-written cruft pack will be excluded.
As long as the caller ensures that no objects are expired during the
second pass, this is sufficient to generate a cruft pack containing all
objects which don't appear in any of the new packs written by `git
repack`, including the cruft pack. In other words, all of the objects
which are about to be pruned from the repository.
It is important to note that the destination in `--expire-to` does not
necessarily need to be a Git repository (though it can be) Notably, the
expired packs do not contain all ancestors of expired objects. So if the
source repository contains something like:
<unreachable>
/
C1 --- C2
\
refs/heads/master
where C2 is unreachable, but has a parent (C1) which is reachable, and
C2 would be pruned, then the expiry pack will contain only C2, not C1.
[1]: https://lore.kernel.org/git/20190319001829.GL29661@sigill.intra.peff.net/
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-10-24 20:43:12 +02:00
|
|
|
test_expect_success '--expire-to stores pruned objects (now)' '
|
|
|
|
git init expire-to-now &&
|
|
|
|
(
|
|
|
|
cd expire-to-now &&
|
|
|
|
|
|
|
|
git branch -M main &&
|
|
|
|
|
|
|
|
test_commit base &&
|
|
|
|
|
|
|
|
git checkout -b cruft &&
|
|
|
|
test_commit --no-tag cruft &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names main..cruft >moved.raw &&
|
|
|
|
sort moved.raw >moved.want &&
|
|
|
|
|
|
|
|
git rev-list --all --objects --no-object-names >expect.raw &&
|
|
|
|
sort expect.raw >expect &&
|
|
|
|
|
|
|
|
git checkout main &&
|
|
|
|
git branch -D cruft &&
|
|
|
|
git reflog expire --all --expire=all &&
|
|
|
|
|
|
|
|
git init --bare expired.git &&
|
|
|
|
git repack -d \
|
|
|
|
--cruft --cruft-expiration="now" \
|
|
|
|
--expire-to="expired.git/objects/pack/pack" &&
|
|
|
|
|
|
|
|
expired="$(ls expired.git/objects/pack/pack-*.idx)" &&
|
|
|
|
test_path_is_file "${expired%.idx}.mtimes" &&
|
|
|
|
|
|
|
|
# Since the `--cruft-expiration` is "now", the effective
|
|
|
|
# behavior is to move _all_ unreachable objects out to
|
|
|
|
# the location in `--expire-to`.
|
|
|
|
git show-index <$expired >expired.raw &&
|
|
|
|
cut -d" " -f2 expired.raw | sort >expired.objects &&
|
|
|
|
git rev-list --all --objects --no-object-names \
|
|
|
|
>remaining.objects &&
|
|
|
|
|
|
|
|
# ...in other words, the combined contents of this
|
|
|
|
# repository and expired.git should be the same as the
|
|
|
|
# set of objects we started with.
|
|
|
|
cat expired.objects remaining.objects | sort >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
# The "moved" objects (i.e., those in expired.git)
|
|
|
|
# should be the same as the cruft objects which were
|
|
|
|
# expired in the previous step.
|
|
|
|
test_cmp moved.want expired.objects
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '--expire-to stores pruned objects (5.minutes.ago)' '
|
|
|
|
git init expire-to-5.minutes.ago &&
|
|
|
|
(
|
|
|
|
cd expire-to-5.minutes.ago &&
|
|
|
|
|
|
|
|
git branch -M main &&
|
|
|
|
|
|
|
|
test_commit base &&
|
|
|
|
|
|
|
|
# Create two classes of unreachable objects, one which
|
|
|
|
# is older than 5 minutes (stale), and another which is
|
|
|
|
# newer (recent).
|
|
|
|
for kind in stale recent
|
|
|
|
do
|
|
|
|
git checkout -b $kind main &&
|
|
|
|
test_commit --no-tag $kind || return 1
|
|
|
|
done &&
|
|
|
|
|
|
|
|
git rev-list --objects --no-object-names main..stale >in &&
|
|
|
|
stale="$(git pack-objects $objdir/pack/pack <in)" &&
|
|
|
|
mtime="$(test-tool chmtime --get =-600 $objdir/pack/pack-$stale.pack)" &&
|
|
|
|
|
|
|
|
# expect holds the set of objects we expect to find in
|
|
|
|
# this repository after repacking
|
|
|
|
git rev-list --objects --no-object-names recent >expect.raw &&
|
|
|
|
sort expect.raw >expect &&
|
|
|
|
|
|
|
|
# moved.want holds the set of objects we expect to find
|
|
|
|
# in expired.git
|
|
|
|
git rev-list --objects --no-object-names main..stale >out &&
|
|
|
|
sort out >moved.want &&
|
|
|
|
|
|
|
|
git checkout main &&
|
|
|
|
git branch -D stale recent &&
|
|
|
|
git reflog expire --all --expire=all &&
|
|
|
|
git prune-packed &&
|
|
|
|
|
|
|
|
git init --bare expired.git &&
|
|
|
|
git repack -d \
|
|
|
|
--cruft --cruft-expiration=5.minutes.ago \
|
|
|
|
--expire-to="expired.git/objects/pack/pack" &&
|
|
|
|
|
|
|
|
# Some of the remaining objects in this repository are
|
|
|
|
# unreachable, so use `cat-file --batch-all-objects`
|
|
|
|
# instead of `rev-list` to get their names
|
|
|
|
git cat-file --batch-all-objects --batch-check="%(objectname)" \
|
|
|
|
>remaining.objects &&
|
|
|
|
sort remaining.objects >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
(
|
|
|
|
cd expired.git &&
|
|
|
|
|
|
|
|
expired="$(ls objects/pack/pack-*.mtimes)" &&
|
|
|
|
test-tool pack-mtimes $(basename $expired) >out &&
|
|
|
|
cut -d" " -f1 out | sort >../moved.got &&
|
|
|
|
|
|
|
|
# Ensure that there are as many objects with the
|
|
|
|
# expected mtime as were moved to expired.git.
|
|
|
|
#
|
|
|
|
# In other words, ensure that the recorded
|
|
|
|
# mtimes of any moved objects was written
|
|
|
|
# correctly.
|
|
|
|
grep " $mtime$" out >matching &&
|
|
|
|
test_line_count = $(wc -l <../moved.want) matching
|
|
|
|
) &&
|
|
|
|
test_cmp moved.want moved.got
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2019-03-14 10:12:54 +01:00
|
|
|
test_done
|