Merge branch 'dk/gc-idx-wo-pack'

Having a leftover .idx file without corresponding .pack file in
the repository hurts performance; "git gc" learned to prune them.

* dk/gc-idx-wo-pack:
  gc: remove garbage .idx files from pack dir
  t5304: test cleaning pack garbage
  prepare_packed_git(): refactor garbage reporting in pack directory
This commit is contained in:
Jeff King 2015-11-20 06:55:34 -05:00
commit 45014beac0
6 changed files with 78 additions and 22 deletions

View File

@ -15,9 +15,31 @@ static int verbose;
static unsigned long loose, packed, packed_loose;
static off_t loose_size;
static void real_report_garbage(const char *desc, const char *path)
static const char *bits_to_msg(unsigned seen_bits)
{
switch (seen_bits) {
case 0:
return "no corresponding .idx or .pack";
case PACKDIR_FILE_GARBAGE:
return "garbage found";
case PACKDIR_FILE_PACK:
return "no corresponding .idx";
case PACKDIR_FILE_IDX:
return "no corresponding .pack";
case PACKDIR_FILE_PACK|PACKDIR_FILE_IDX:
default:
return NULL;
}
}
static void real_report_garbage(unsigned seen_bits, const char *path)
{
struct stat st;
const char *desc = bits_to_msg(seen_bits);
if (!desc)
return;
if (!stat(path, &st))
size_garbage += st.st_size;
warning("%s: %s", desc, path);
@ -27,7 +49,7 @@ static void real_report_garbage(const char *desc, const char *path)
static void loose_garbage(const char *path)
{
if (verbose)
report_garbage("garbage found", path);
report_garbage(PACKDIR_FILE_GARBAGE, path);
}
static int count_loose(const unsigned char *sha1, const char *path, void *data)

View File

@ -46,6 +46,22 @@ static struct argv_array rerere = ARGV_ARRAY_INIT;
static struct tempfile pidfile;
static struct lock_file log_lock;
static struct string_list pack_garbage = STRING_LIST_INIT_DUP;
static void clean_pack_garbage(void)
{
int i;
for (i = 0; i < pack_garbage.nr; i++)
unlink_or_warn(pack_garbage.items[i].string);
string_list_clear(&pack_garbage, 0);
}
static void report_pack_garbage(unsigned seen_bits, const char *path)
{
if (seen_bits == PACKDIR_FILE_IDX)
string_list_append(&pack_garbage, path);
}
static void git_config_date_string(const char *key, const char **output)
{
if (git_config_get_string_const(key, output))
@ -416,6 +432,11 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
if (run_command_v_opt(rerere.argv, RUN_GIT_CMD))
return error(FAILED_RUN, rerere.argv[0]);
report_garbage = report_pack_garbage;
reprepare_packed_git();
if (pack_garbage.nr > 0)
clean_pack_garbage();
if (auto_gc && too_many_loose_objects())
warning(_("There are too many unreachable loose objects; "
"run 'git prune' to remove them."));

View File

@ -1289,8 +1289,11 @@ struct pack_entry {
extern struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path);
/* A hook for count-objects to report invalid files in pack directory */
extern void (*report_garbage)(const char *desc, const char *path);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
#define PACKDIR_FILE_IDX 2
#define PACKDIR_FILE_GARBAGE 4
extern void (*report_garbage)(unsigned seen_bits, const char *path);
extern void prepare_packed_git(void);
extern void reprepare_packed_git(void);

2
path.c
View File

@ -363,7 +363,7 @@ void report_linked_checkout_garbage(void)
strbuf_setlen(&sb, len);
strbuf_addstr(&sb, path);
if (file_exists(sb.buf))
report_garbage("unused in linked checkout", sb.buf);
report_garbage(PACKDIR_FILE_GARBAGE, sb.buf);
}
strbuf_release(&sb);
}

View File

@ -1217,27 +1217,16 @@ void install_packed_git(struct packed_git *pack)
packed_git = pack;
}
void (*report_garbage)(const char *desc, const char *path);
void (*report_garbage)(unsigned seen_bits, const char *path);
static void report_helper(const struct string_list *list,
int seen_bits, int first, int last)
{
const char *msg;
switch (seen_bits) {
case 0:
msg = "no corresponding .idx or .pack";
break;
case 1:
msg = "no corresponding .idx";
break;
case 2:
msg = "no corresponding .pack";
break;
default:
if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
return;
}
for (; first < last; first++)
report_garbage(msg, list->items[first].string);
report_garbage(seen_bits, list->items[first].string);
}
static void report_pack_garbage(struct string_list *list)
@ -1260,7 +1249,7 @@ static void report_pack_garbage(struct string_list *list)
if (baselen == -1) {
const char *dot = strrchr(path, '.');
if (!dot) {
report_garbage("garbage found", path);
report_garbage(PACKDIR_FILE_GARBAGE, path);
continue;
}
baselen = dot - path + 1;
@ -1332,7 +1321,7 @@ static void prepare_packed_git_one(char *objdir, int local)
ends_with(de->d_name, ".keep"))
string_list_append(&garbage, path.buf);
else
report_garbage("garbage found", path.buf);
report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
}
closedir(dir);
report_pack_garbage(&garbage);

View File

@ -219,6 +219,7 @@ test_expect_success 'gc: prune old objects after local clone' '
test_expect_success 'garbage report in count-objects -v' '
test_when_finished "rm -f .git/objects/pack/fake*" &&
test_when_finished "rm -f .git/objects/pack/foo*" &&
: >.git/objects/pack/foo &&
: >.git/objects/pack/foo.bar &&
: >.git/objects/pack/foo.keep &&
@ -244,6 +245,26 @@ EOF
test_cmp expected actual
'
test_expect_success 'clean pack garbage with gc' '
test_when_finished "rm -f .git/objects/pack/fake*" &&
test_when_finished "rm -f .git/objects/pack/foo*" &&
: >.git/objects/pack/foo.keep &&
: >.git/objects/pack/foo.pack &&
: >.git/objects/pack/fake.idx &&
: >.git/objects/pack/fake2.keep &&
: >.git/objects/pack/fake2.idx &&
: >.git/objects/pack/fake3.keep &&
git gc &&
git count-objects -v 2>stderr &&
grep "^warning:" stderr | sort >actual &&
cat >expected <<\EOF &&
warning: no corresponding .idx or .pack: .git/objects/pack/fake3.keep
warning: no corresponding .idx: .git/objects/pack/foo.keep
warning: no corresponding .idx: .git/objects/pack/foo.pack
EOF
test_cmp expected actual
'
test_expect_success 'prune .git/shallow' '
SHA1=`echo hi|git commit-tree HEAD^{tree}` &&
echo $SHA1 >.git/shallow &&