Merge branch 'tb/enable-cruft-packs-by-default'

When "gc" needs to retain unreachable objects, packing them into
cruft packs (instead of exploding them into loose object files) has
been offered as a more efficient option for some time.  Now the use
of cruft packs has been made the default and no longer considered
an experimental feature.

* tb/enable-cruft-packs-by-default:
  repository.h: drop unused `gc_cruft_packs`
  builtin/gc.c: make `gc.cruftPacks` enabled by default
  t/t9300-fast-import.sh: prepare for `gc --cruft` by default
  t/t6500-gc.sh: add additional test cases
  t/t6500-gc.sh: refactor cruft pack tests
  t/t6501-freshen-objects.sh: prepare for `gc --cruft` by default
  t/t5304-prune.sh: prepare for `gc --cruft` by default
  builtin/gc.c: ignore cruft packs with `--keep-largest-pack`
  builtin/repack.c: fix incorrect reference to '-C'
  pack-write.c: plug a leak in stage_tmp_packfiles()
This commit is contained in:
Junio C Hamano 2023-04-28 16:03:03 -07:00
commit fc23c397c7
13 changed files with 120 additions and 127 deletions

View File

@ -14,9 +14,6 @@ feature.experimental::
+
* `fetch.negotiationAlgorithm=skipping` may improve fetch negotiation times by
skipping more commits at a time, reducing the number of round trips.
+
* `gc.cruftPacks=true` reduces disk space used by unreachable objects during
garbage collection, preventing loose object explosions.
feature.manyFiles::
Enable config options that optimize for repos with many files in the

View File

@ -43,11 +43,11 @@ gc.autoDetach::
if the system supports it. Default is true.
gc.bigPackThreshold::
If non-zero, all packs larger than this limit are kept when
`git gc` is run. This is very similar to `--keep-largest-pack`
except that all packs that meet the threshold are kept, not
just the largest pack. Defaults to zero. Common unit suffixes of
'k', 'm', or 'g' are supported.
If non-zero, all non-cruft packs larger than this limit are kept
when `git gc` is run. This is very similar to
`--keep-largest-pack` except that all non-cruft packs that meet
the threshold are kept, not just the largest pack. Defaults to
zero. Common unit suffixes of 'k', 'm', or 'g' are supported.
+
Note that if the number of kept packs is more than gc.autoPackLimit,
this configuration variable is ignored, all packs except the base pack
@ -84,7 +84,7 @@ gc.packRefs::
gc.cruftPacks::
Store unreachable objects in a cruft pack (see
linkgit:git-repack[1]) instead of as loose objects. The default
is `false`.
is `true`.
gc.pruneExpire::
When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'

View File

@ -54,9 +54,10 @@ other housekeeping tasks (e.g. rerere, working trees, reflog...) will
be performed as well.
--cruft::
--[no-]cruft::
When expiring unreachable objects, pack them separately into a
cruft pack instead of storing them as loose objects.
cruft pack instead of storing them as loose objects. `--cruft`
is on by default.
--prune=<date>::
Prune loose objects older than date (default is 2 weeks ago,
@ -77,9 +78,10 @@ be performed as well.
instance running on this repository.
--keep-largest-pack::
All packs except the largest pack and those marked with a
`.keep` files are consolidated into a single pack. When this
option is used, `gc.bigPackThreshold` is ignored.
All packs except the largest non-cruft pack, any packs marked
with a `.keep` file, and any cruft pack(s) are consolidated into
a single pack. When this option is used, `gc.bigPackThreshold`
is ignored.
AGGRESSIVE
----------

View File

@ -611,8 +611,8 @@ result of repeatedly resetting the objects' mtimes to the present time.
If you are GC-ing repositories in a mixed version environment, consider omitting
the `--cruft` option when using linkgit:git-repack[1] and linkgit:git-gc[1], and
leaving the `gc.cruftPacks` configuration unset until all writers understand
cruft packs.
setting the `gc.cruftPacks` configuration to "false" until all writers
understand cruft packs.
=== Alternatives

View File

@ -50,7 +50,7 @@ static const char * const builtin_gc_usage[] = {
static int pack_refs = 1;
static int prune_reflogs = 1;
static int cruft_packs = -1;
static int cruft_packs = 1;
static int aggressive_depth = 50;
static int aggressive_window = 250;
static int gc_auto_threshold = 6700;
@ -221,7 +221,7 @@ static struct packed_git *find_base_packs(struct string_list *packs,
struct packed_git *p, *base = NULL;
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!p->pack_local)
if (!p->pack_local || p->is_cruft)
continue;
if (limit) {
if (p->pack_size >= limit)
@ -610,10 +610,6 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
if (prune_expire && parse_expiry_date(prune_expire, &dummy))
die(_("failed to parse prune expiry value %s"), prune_expire);
prepare_repo_settings(the_repository);
if (cruft_packs < 0)
cruft_packs = the_repository->settings.gc_cruft_packs;
if (aggressive) {
strvec_push(&repack, "-f");
if (aggressive_depth > 0)

View File

@ -810,7 +810,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("same as -a, pack unreachable cruft objects separately"),
PACK_CRUFT),
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
N_("with -C, expire objects older than this")),
N_("with --cruft, expire objects older than this")),
OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,

View File

@ -314,13 +314,13 @@ static void write_mtimes_trailer(struct hashfile *f, const unsigned char *hash)
hashwrite(f, hash, the_hash_algo->rawsz);
}
static const char *write_mtimes_file(struct packing_data *to_pack,
struct pack_idx_entry **objects,
uint32_t nr_objects,
const unsigned char *hash)
static char *write_mtimes_file(struct packing_data *to_pack,
struct pack_idx_entry **objects,
uint32_t nr_objects,
const unsigned char *hash)
{
struct strbuf tmp_file = STRBUF_INIT;
const char *mtimes_name;
char *mtimes_name;
struct hashfile *f;
int fd;
@ -546,7 +546,7 @@ void stage_tmp_packfiles(struct strbuf *name_buffer,
char **idx_tmp_name)
{
const char *rev_tmp_name = NULL;
const char *mtimes_tmp_name = NULL;
char *mtimes_tmp_name = NULL;
if (adjust_shared_perm(pack_tmp_name))
die_errno("unable to make temporary pack file readable");
@ -572,6 +572,7 @@ void stage_tmp_packfiles(struct strbuf *name_buffer,
rename_tmp_packfile(name_buffer, mtimes_tmp_name, "mtimes");
free((char *)rev_tmp_name);
free(mtimes_tmp_name);
}
void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought)

View File

@ -41,10 +41,8 @@ void prepare_repo_settings(struct repository *r)
repo_cfg_bool(r, "feature.experimental", &experimental, 0);
/* Defaults modified by feature.* */
if (experimental) {
if (experimental)
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
r->settings.gc_cruft_packs = 1;
}
if (manyfiles) {
r->settings.index_version = 4;
r->settings.index_skip_hash = 1;

View File

@ -33,7 +33,6 @@ struct repo_settings {
int commit_graph_generation_version;
int commit_graph_read_changed_paths;
int gc_write_commit_graph;
int gc_cruft_packs;
int fetch_write_commit_graph;
int command_requires_full_index;
int sparse_index;

View File

@ -62,11 +62,11 @@ test_expect_success 'prune --expire' '
test_expect_success 'gc: implicit prune --expire' '
add_blob &&
test-tool chmtime =-$((2*$week-30)) $BLOB_FILE &&
git gc &&
git gc --no-cruft &&
verbose test $((1 + $before)) = $(git count-objects | sed "s/ .*//") &&
test_path_is_file $BLOB_FILE &&
test-tool chmtime =-$((2*$week+1)) $BLOB_FILE &&
git gc &&
git gc --no-cruft &&
verbose test $before = $(git count-objects | sed "s/ .*//") &&
test_path_is_missing $BLOB_FILE
'
@ -86,7 +86,7 @@ test_expect_success 'gc: refuse to start with invalid gc.pruneExpire' '
test_expect_success 'gc: start with ok gc.pruneExpire' '
git config gc.pruneExpire 2.days.ago &&
git gc
git gc --no-cruft
'
test_expect_success 'prune: prune nonsense parameters' '
@ -137,44 +137,44 @@ test_expect_success 'gc --no-prune' '
add_blob &&
test-tool chmtime =-$((5001*$day)) $BLOB_FILE &&
git config gc.pruneExpire 2.days.ago &&
git gc --no-prune &&
git gc --no-prune --no-cruft &&
verbose test 1 = $(git count-objects | sed "s/ .*//") &&
test_path_is_file $BLOB_FILE
'
test_expect_success 'gc respects gc.pruneExpire' '
git config gc.pruneExpire 5002.days.ago &&
git gc &&
git gc --no-cruft &&
test_path_is_file $BLOB_FILE &&
git config gc.pruneExpire 5000.days.ago &&
git gc &&
git gc --no-cruft &&
test_path_is_missing $BLOB_FILE
'
test_expect_success 'gc --prune=<date>' '
add_blob &&
test-tool chmtime =-$((5001*$day)) $BLOB_FILE &&
git gc --prune=5002.days.ago &&
git gc --prune=5002.days.ago --no-cruft &&
test_path_is_file $BLOB_FILE &&
git gc --prune=5000.days.ago &&
git gc --prune=5000.days.ago --no-cruft &&
test_path_is_missing $BLOB_FILE
'
test_expect_success 'gc --prune=never' '
add_blob &&
git gc --prune=never &&
git gc --prune=never --no-cruft &&
test_path_is_file $BLOB_FILE &&
git gc --prune=now &&
git gc --prune=now --no-cruft &&
test_path_is_missing $BLOB_FILE
'
test_expect_success 'gc respects gc.pruneExpire=never' '
git config gc.pruneExpire never &&
add_blob &&
git gc &&
git gc --no-cruft &&
test_path_is_file $BLOB_FILE &&
git config gc.pruneExpire now &&
git gc &&
git gc --no-cruft &&
test_path_is_missing $BLOB_FILE
'
@ -194,7 +194,7 @@ test_expect_success 'gc: prune old objects after local clone' '
cd aclone &&
verbose test 1 = $(git count-objects | sed "s/ .*//") &&
test_path_is_file $BLOB_FILE &&
git gc --prune &&
git gc --prune --no-cruft &&
verbose test 0 = $(git count-objects | sed "s/ .*//") &&
test_path_is_missing $BLOB_FILE
)
@ -237,7 +237,7 @@ test_expect_success 'clean pack garbage with gc' '
>.git/objects/pack/fake2.keep &&
>.git/objects/pack/fake2.idx &&
>.git/objects/pack/fake3.keep &&
git gc &&
git gc --no-cruft &&
git count-objects -v 2>stderr &&
grep "^warning:" stderr | sort >actual &&
cat >expected <<\EOF &&

View File

@ -210,90 +210,95 @@ prepare_cruft_history () {
git reset HEAD^^
}
assert_cruft_packs () {
find .git/objects/pack -name "*.mtimes" >mtimes &&
sed -e 's/\.mtimes$/\.pack/g' mtimes >packs &&
test_file_not_empty packs &&
while read pack
do
test_path_is_file "$pack" || return 1
done <packs
}
assert_no_cruft_packs () {
find .git/objects/pack -name "*.mtimes" >mtimes &&
test_must_be_empty mtimes
}
test_expect_success 'gc --cruft generates a cruft pack' '
test_when_finished "rm -fr crufts" &&
git init crufts &&
(
cd crufts &&
for argv in \
"gc" \
"-c gc.cruftPacks=true gc" \
"-c gc.cruftPacks=false gc --cruft"
do
test_expect_success "git $argv generates a cruft pack" '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
prepare_cruft_history &&
git $argv &&
find .git/objects/pack -name "*.mtimes" >mtimes &&
sed -e 's/\.mtimes$/\.pack/g' mtimes >packs &&
test_file_not_empty packs &&
while read pack
do
test_path_is_file "$pack" || return 1
done <packs
)
'
done
for argv in \
"gc --no-cruft" \
"-c gc.cruftPacks=false gc" \
"-c gc.cruftPacks=true gc --no-cruft"
do
test_expect_success "git $argv does not generate a cruft pack" '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
prepare_cruft_history &&
git $argv &&
assert_no_cruft_packs
)
'
done
test_expect_success '--keep-largest-pack ignores cruft packs' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd repo &&
# Generate a pack for reachable objects (of which there
# are 3), and one for unreachable objects (of which
# there are 6).
prepare_cruft_history &&
git gc --cruft &&
assert_cruft_packs
)
'
test_expect_success 'gc.cruftPacks=true generates a cruft pack' '
test_when_finished "rm -fr crufts" &&
git init crufts &&
(
cd crufts &&
mtimes="$(find .git/objects/pack -type f -name "pack-*.mtimes")" &&
sz="$(test_file_size "${mtimes%.mtimes}.pack")" &&
prepare_cruft_history &&
git -c gc.cruftPacks=true gc &&
assert_cruft_packs
)
'
# Ensure that the cruft pack gets removed (due to
# `--prune=now`) despite it being the largest pack.
git -c gc.bigPackThreshold=$sz gc --cruft --prune=now &&
test_expect_success 'feature.experimental=true generates a cruft pack' '
git init crufts &&
test_when_finished "rm -fr crufts" &&
(
cd crufts &&
prepare_cruft_history &&
git -c feature.experimental=true gc &&
assert_cruft_packs
)
'
test_expect_success 'feature.experimental=false allows explicit cruft packs' '
git init crufts &&
test_when_finished "rm -fr crufts" &&
(
cd crufts &&
prepare_cruft_history &&
git -c gc.cruftPacks=true -c feature.experimental=false gc &&
assert_cruft_packs
)
'
test_expect_success 'feature.experimental=true can be overridden' '
git init crufts &&
test_when_finished "rm -fr crufts" &&
(
cd crufts &&
prepare_cruft_history &&
git -c feature.expiremental=true -c gc.cruftPacks=false gc &&
assert_no_cruft_packs
)
'
test_expect_success 'feature.experimental=false avoids cruft packs by default' '
git init crufts &&
test_when_finished "rm -fr crufts" &&
test_expect_success 'gc.bigPackThreshold ignores cruft packs' '
test_when_finished "rm -fr repo" &&
git init repo &&
(
cd crufts &&
cd repo &&
# Generate a pack for reachable objects (of which there
# are 3), and one for unreachable objects (of which
# there are 6).
prepare_cruft_history &&
git -c feature.experimental=false gc &&
git gc --cruft &&
# Ensure that the cruft pack gets removed (due to
# `--prune=now`) despite it being the largest pack.
git gc --cruft --prune=now --keep-largest-pack &&
assert_no_cruft_packs
)
'

View File

@ -101,7 +101,7 @@ do
'
test_expect_success "simultaneous gc ($title)" '
git gc --prune=12.hours.ago
git gc --no-cruft --prune=12.hours.ago
'
test_expect_success "finish writing out commit ($title)" '
@ -131,7 +131,7 @@ do
'
test_expect_success "simultaneous gc ($title)" '
git gc --prune=12.hours.ago
git gc --no-cruft --prune=12.hours.ago
'
# tree should have been refreshed by write-tree
@ -151,7 +151,7 @@ test_expect_success 'do not complain about existing broken links (commit)' '
some message
EOF
commit=$(git hash-object -t commit -w broken-commit) &&
git gc -q 2>stderr &&
git gc --no-cruft -q 2>stderr &&
verbose git cat-file -e $commit &&
test_must_be_empty stderr
'
@ -161,7 +161,7 @@ test_expect_success 'do not complain about existing broken links (tree)' '
100644 blob $(test_oid 003) foo
EOF
tree=$(git mktree --missing <broken-tree) &&
git gc -q 2>stderr &&
git gc --no-cruft -q 2>stderr &&
git cat-file -e $tree &&
test_must_be_empty stderr
'
@ -176,7 +176,7 @@ test_expect_success 'do not complain about existing broken links (tag)' '
this is a broken tag
EOF
tag=$(git hash-object -t tag -w broken-tag) &&
git gc -q 2>stderr &&
git gc --no-cruft -q 2>stderr &&
git cat-file -e $tag &&
test_must_be_empty stderr
'

View File

@ -388,9 +388,7 @@ test_expect_success 'B: accept branch name "TEMP_TAG"' '
INPUT_END
test_when_finished "rm -f .git/TEMP_TAG
git gc
git prune" &&
test_when_finished "rm -f .git/TEMP_TAG && git gc --prune=now" &&
git fast-import <input &&
test $(test-tool ref-store main resolve-ref TEMP_TAG 0 | cut -f1 -d " " ) != "$ZERO_OID" &&
test $(git rev-parse main) = $(git rev-parse TEMP_TAG^)
@ -406,8 +404,7 @@ test_expect_success 'B: accept empty committer' '
INPUT_END
test_when_finished "git update-ref -d refs/heads/empty-committer-1
git gc
git prune" &&
git gc --prune=now" &&
git fast-import <input &&
out=$(git fsck) &&
echo "$out" &&
@ -452,8 +449,7 @@ test_expect_success 'B: accept and fixup committer with no name' '
INPUT_END
test_when_finished "git update-ref -d refs/heads/empty-committer-2
git gc
git prune" &&
git gc --prune=now" &&
git fast-import <input &&
out=$(git fsck) &&
echo "$out" &&
@ -1778,8 +1774,7 @@ test_expect_success 'P: verbatim SHA gitlinks' '
INPUT_END
git branch -D sub &&
git gc &&
git prune &&
git gc --prune=now &&
git fast-import <input &&
test $(git rev-parse --verify subuse2) = $(git rev-parse --verify subuse1)
'