packfile.c: speed up loading lots of packfiles
When loading packfiles on start-up, we traverse the internal packfile list once per file to avoid reloading packfiles that have already been loaded. This check runs in quadratic time, so for poorly maintained repos with a large number of packfiles, it can be pretty slow. Add a hashmap containing the packfile names as we load them so that the average runtime cost of checking for already-loaded packs becomes constant. Add a perf test to p5303 to show speed-up. The existing p5303 test runtimes are dominated by other factors and do not show an appreciable speed-up. The new test in p5303 clearly exposes a speed-up in bad cases. In this test we create 10,000 packfiles and measure the start-up time of git rev-parse, which does little else besides load in the packs. Here are the numbers for the new p5303 test: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.12: load 10,000 packs 1.03(0.92+0.10) 0.12(0.02+0.09) -88.3% Signed-off-by: Colin Stolley <cstolley@runbox.com> Helped-by: Jeff King <peff@peff.net> [jc: squashed the change to call hashmap in install_packed_git() by peff] Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
d9f6f3b619
commit
ec48540fe8
@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
|
||||
void odb_clear_loose_cache(struct object_directory *odb);
|
||||
|
||||
struct packed_git {
|
||||
struct hashmap_entry packmap_ent;
|
||||
struct packed_git *next;
|
||||
struct list_head mru;
|
||||
struct pack_window *windows;
|
||||
@ -88,6 +89,20 @@ struct packed_git {
|
||||
|
||||
struct multi_pack_index;
|
||||
|
||||
static inline int pack_map_entry_cmp(const void *unused_cmp_data,
|
||||
const struct hashmap_entry *entry,
|
||||
const struct hashmap_entry *entry2,
|
||||
const void *keydata)
|
||||
{
|
||||
const char *key = keydata;
|
||||
const struct packed_git *pg1, *pg2;
|
||||
|
||||
pg1 = container_of(entry, const struct packed_git, packmap_ent);
|
||||
pg2 = container_of(entry2, const struct packed_git, packmap_ent);
|
||||
|
||||
return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
|
||||
}
|
||||
|
||||
struct raw_object_store {
|
||||
/*
|
||||
* Set of all object directories; the main directory is first (and
|
||||
@ -131,6 +146,12 @@ struct raw_object_store {
|
||||
/* A most-recently-used ordered version of the packed_git list. */
|
||||
struct list_head packed_git_mru;
|
||||
|
||||
/*
|
||||
* A map of packfiles to packed_git structs for tracking which
|
||||
* packs have been loaded already.
|
||||
*/
|
||||
struct hashmap pack_map;
|
||||
|
||||
/*
|
||||
* A fast, rough count of the number of objects in the repository.
|
||||
* These two fields are not meant for direct access. Use
|
||||
|
3
object.c
3
object.c
@ -479,6 +479,7 @@ struct raw_object_store *raw_object_store_new(void)
|
||||
|
||||
memset(o, 0, sizeof(*o));
|
||||
INIT_LIST_HEAD(&o->packed_git_mru);
|
||||
hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0);
|
||||
return o;
|
||||
}
|
||||
|
||||
@ -518,6 +519,8 @@ void raw_object_store_clear(struct raw_object_store *o)
|
||||
INIT_LIST_HEAD(&o->packed_git_mru);
|
||||
close_object_store(o);
|
||||
o->packed_git = NULL;
|
||||
|
||||
hashmap_free(&o->pack_map);
|
||||
}
|
||||
|
||||
void parsed_object_pool_clear(struct parsed_object_pool *o)
|
||||
|
19
packfile.c
19
packfile.c
@ -757,6 +757,9 @@ void install_packed_git(struct repository *r, struct packed_git *pack)
|
||||
|
||||
pack->next = r->objects->packed_git;
|
||||
r->objects->packed_git = pack;
|
||||
|
||||
hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));
|
||||
hashmap_add(&r->objects->pack_map, &pack->packmap_ent);
|
||||
}
|
||||
|
||||
void (*report_garbage)(unsigned seen_bits, const char *path);
|
||||
@ -856,20 +859,18 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
|
||||
|
||||
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
|
||||
!(data->m && midx_contains_pack(data->m, file_name))) {
|
||||
/* Don't reopen a pack we already have. */
|
||||
for (p = data->r->objects->packed_git; p; p = p->next) {
|
||||
size_t len;
|
||||
if (strip_suffix(p->pack_name, ".pack", &len) &&
|
||||
len == base_len &&
|
||||
!memcmp(p->pack_name, full_name, len))
|
||||
break;
|
||||
}
|
||||
struct hashmap_entry hent;
|
||||
char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);
|
||||
unsigned int hash = strhash(pack_name);
|
||||
hashmap_entry_init(&hent, hash);
|
||||
|
||||
if (!p) {
|
||||
/* Don't reopen a pack we already have. */
|
||||
if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
|
||||
p = add_packed_git(full_name, full_name_len, data->local);
|
||||
if (p)
|
||||
install_packed_git(data->r, p);
|
||||
}
|
||||
free(pack_name);
|
||||
}
|
||||
|
||||
if (!report_garbage)
|
||||
|
@ -84,4 +84,22 @@ do
|
||||
'
|
||||
done
|
||||
|
||||
# Measure pack loading with 10,000 packs.
|
||||
test_expect_success 'generate lots of packs' '
|
||||
for i in $(test_seq 10000); do
|
||||
echo "blob"
|
||||
echo "data <<EOF"
|
||||
echo "blob $i"
|
||||
echo "EOF"
|
||||
echo "checkpoint"
|
||||
done |
|
||||
git -c fastimport.unpackLimit=0 fast-import
|
||||
'
|
||||
|
||||
# The purpose of this test is to evaluate load time for a large number
|
||||
# of packs while doing as little other work as possible.
|
||||
test_perf "load 10,000 packs" '
|
||||
git rev-parse --verify "HEAD^{commit}"
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user