Merge branch 'ds/midx-expire-repack'

"git multi-pack-index" learned expire and repack subcommands.

* ds/midx-expire-repack:
  t5319: use 'test-tool path-utils' instead of 'ls -l'
  t5319-multi-pack-index.sh: test batch size zero
  midx: add test that 'expire' respects .keep files
  multi-pack-index: test expire while adding packs
  midx: implement midx_repack()
  multi-pack-index: prepare 'repack' subcommand
  multi-pack-index: implement 'expire' subcommand
  midx: refactor permutation logic and pack sorting
  midx: simplify computation of pack name lengths
  multi-pack-index: prepare for 'expire' subcommand
  Docs: rearrange subcommands for multi-pack-index
  repack: refactor pack deletion for future use
This commit is contained in:
Junio C Hamano 2019-07-19 11:30:19 -07:00
commit 4308d81d45
8 changed files with 602 additions and 119 deletions

View File

@ -9,7 +9,7 @@ git-multi-pack-index - Write and verify multi-pack-indexes
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git multi-pack-index' [--object-dir=<dir>] <verb> 'git multi-pack-index' [--object-dir=<dir>] <subcommand>
DESCRIPTION DESCRIPTION
----------- -----------
@ -23,13 +23,35 @@ OPTIONS
`<dir>/packs/multi-pack-index` for the current MIDX file, and `<dir>/packs/multi-pack-index` for the current MIDX file, and
`<dir>/packs` for the pack-files to index. `<dir>/packs` for the pack-files to index.
The following subcommands are available:
write:: write::
When given as the verb, write a new MIDX file to Write a new MIDX file.
`<dir>/packs/multi-pack-index`.
verify:: verify::
When given as the verb, verify the contents of the MIDX file Verify the contents of the MIDX file.
at `<dir>/packs/multi-pack-index`.
expire::
Delete the pack-files that are tracked by the MIDX file, but
have no objects referenced by the MIDX. Rewrite the MIDX file
afterward to remove all references to these pack-files.
repack::
Create a new pack-file containing objects in small pack-files
referenced by the multi-pack-index. If the size given by the
`--batch-size=<size>` argument is zero, then create a pack
containing all objects referenced by the multi-pack-index. For
a non-zero batch size, Select the pack-files by examining packs
from oldest-to-newest, computing the "expected size" by counting
the number of objects in the pack referenced by the
multi-pack-index, then divide by the total number of objects in
the pack and multiply by the pack size. We select packs with
expected size below the batch size until the set of packs have
total expected size at least the batch size. If the total size
does not reach the batch size, then do nothing. If a new pack-
file is created, rewrite the multi-pack-index to reference the
new pack-file. A later run of 'git multi-pack-index expire' will
delete the pack-files that were part of this batch.
EXAMPLES EXAMPLES

View File

@ -6,12 +6,13 @@
#include "trace2.h" #include "trace2.h"
static char const * const builtin_multi_pack_index_usage[] = { static char const * const builtin_multi_pack_index_usage[] = {
N_("git multi-pack-index [--object-dir=<dir>] (write|verify)"), N_("git multi-pack-index [--object-dir=<dir>] (write|verify|expire|repack --batch-size=<size>)"),
NULL NULL
}; };
static struct opts_multi_pack_index { static struct opts_multi_pack_index {
const char *object_dir; const char *object_dir;
unsigned long batch_size;
} opts; } opts;
int cmd_multi_pack_index(int argc, const char **argv, int cmd_multi_pack_index(int argc, const char **argv,
@ -20,6 +21,8 @@ int cmd_multi_pack_index(int argc, const char **argv,
static struct option builtin_multi_pack_index_options[] = { static struct option builtin_multi_pack_index_options[] = {
OPT_FILENAME(0, "object-dir", &opts.object_dir, OPT_FILENAME(0, "object-dir", &opts.object_dir,
N_("object directory containing set of packfile and pack-index pairs")), N_("object directory containing set of packfile and pack-index pairs")),
OPT_MAGNITUDE(0, "batch-size", &opts.batch_size,
N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")),
OPT_END(), OPT_END(),
}; };
@ -43,10 +46,17 @@ int cmd_multi_pack_index(int argc, const char **argv,
trace2_cmd_mode(argv[0]); trace2_cmd_mode(argv[0]);
if (!strcmp(argv[0], "repack"))
return midx_repack(the_repository, opts.object_dir, (size_t)opts.batch_size);
if (opts.batch_size)
die(_("--batch-size option is only for 'repack' subcommand"));
if (!strcmp(argv[0], "write")) if (!strcmp(argv[0], "write"))
return write_midx_file(opts.object_dir); return write_midx_file(opts.object_dir);
if (!strcmp(argv[0], "verify")) if (!strcmp(argv[0], "verify"))
return verify_midx_file(the_repository, opts.object_dir); return verify_midx_file(the_repository, opts.object_dir);
if (!strcmp(argv[0], "expire"))
return expire_midx_packs(the_repository, opts.object_dir);
die(_("unrecognized verb: %s"), argv[0]); die(_("unrecognized subcommand: %s"), argv[0]);
} }

View File

@ -129,19 +129,9 @@ static void get_non_kept_pack_filenames(struct string_list *fname_list,
static void remove_redundant_pack(const char *dir_name, const char *base_name) static void remove_redundant_pack(const char *dir_name, const char *base_name)
{ {
const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
int i;
struct strbuf buf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT;
size_t plen; strbuf_addf(&buf, "%s/%s.pack", dir_name, base_name);
unlink_pack_path(buf.buf, 1);
strbuf_addf(&buf, "%s/%s", dir_name, base_name);
plen = buf.len;
for (i = 0; i < ARRAY_SIZE(exts); i++) {
strbuf_setlen(&buf, plen);
strbuf_addstr(&buf, exts[i]);
unlink(buf.buf);
}
strbuf_release(&buf); strbuf_release(&buf);
} }

440
midx.c
View File

@ -9,6 +9,7 @@
#include "midx.h" #include "midx.h"
#include "progress.h" #include "progress.h"
#include "trace2.h" #include "trace2.h"
#include "run-command.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
#define MIDX_VERSION 1 #define MIDX_VERSION 1
@ -34,6 +35,8 @@
#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
#define MIDX_LARGE_OFFSET_NEEDED 0x80000000 #define MIDX_LARGE_OFFSET_NEEDED 0x80000000
#define PACK_EXPIRED UINT_MAX
static char *get_midx_filename(const char *object_dir) static char *get_midx_filename(const char *object_dir)
{ {
return xstrfmt("%s/pack/multi-pack-index", object_dir); return xstrfmt("%s/pack/multi-pack-index", object_dir);
@ -427,13 +430,24 @@ static size_t write_midx_header(struct hashfile *f,
return MIDX_HEADER_SIZE; return MIDX_HEADER_SIZE;
} }
struct pack_info {
uint32_t orig_pack_int_id;
char *pack_name;
struct packed_git *p;
unsigned expired : 1;
};
static int pack_info_compare(const void *_a, const void *_b)
{
struct pack_info *a = (struct pack_info *)_a;
struct pack_info *b = (struct pack_info *)_b;
return strcmp(a->pack_name, b->pack_name);
}
struct pack_list { struct pack_list {
struct packed_git **list; struct pack_info *info;
char **names;
uint32_t nr; uint32_t nr;
uint32_t alloc_list; uint32_t alloc;
uint32_t alloc_names;
size_t pack_name_concat_len;
struct multi_pack_index *m; struct multi_pack_index *m;
}; };
@ -446,67 +460,33 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
if (packs->m && midx_contains_pack(packs->m, file_name)) if (packs->m && midx_contains_pack(packs->m, file_name))
return; return;
ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list); ALLOC_GROW(packs->info, packs->nr + 1, packs->alloc);
ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
packs->list[packs->nr] = add_packed_git(full_path, packs->info[packs->nr].p = add_packed_git(full_path,
full_path_len, full_path_len,
0); 0);
if (!packs->list[packs->nr]) { if (!packs->info[packs->nr].p) {
warning(_("failed to add packfile '%s'"), warning(_("failed to add packfile '%s'"),
full_path); full_path);
return; return;
} }
if (open_pack_index(packs->list[packs->nr])) { if (open_pack_index(packs->info[packs->nr].p)) {
warning(_("failed to open pack-index '%s'"), warning(_("failed to open pack-index '%s'"),
full_path); full_path);
close_pack(packs->list[packs->nr]); close_pack(packs->info[packs->nr].p);
FREE_AND_NULL(packs->list[packs->nr]); FREE_AND_NULL(packs->info[packs->nr].p);
return; return;
} }
packs->names[packs->nr] = xstrdup(file_name); packs->info[packs->nr].pack_name = xstrdup(file_name);
packs->pack_name_concat_len += strlen(file_name) + 1; packs->info[packs->nr].orig_pack_int_id = packs->nr;
packs->info[packs->nr].expired = 0;
packs->nr++; packs->nr++;
} }
} }
struct pack_pair {
uint32_t pack_int_id;
char *pack_name;
};
static int pack_pair_compare(const void *_a, const void *_b)
{
struct pack_pair *a = (struct pack_pair *)_a;
struct pack_pair *b = (struct pack_pair *)_b;
return strcmp(a->pack_name, b->pack_name);
}
static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
{
uint32_t i;
struct pack_pair *pairs;
ALLOC_ARRAY(pairs, nr_packs);
for (i = 0; i < nr_packs; i++) {
pairs[i].pack_int_id = i;
pairs[i].pack_name = pack_names[i];
}
QSORT(pairs, nr_packs, pack_pair_compare);
for (i = 0; i < nr_packs; i++) {
pack_names[i] = pairs[i].pack_name;
perm[pairs[i].pack_int_id] = i;
}
free(pairs);
}
struct pack_midx_entry { struct pack_midx_entry {
struct object_id oid; struct object_id oid;
uint32_t pack_int_id; uint32_t pack_int_id;
@ -532,7 +512,6 @@ static int midx_oid_compare(const void *_a, const void *_b)
} }
static int nth_midxed_pack_midx_entry(struct multi_pack_index *m, static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
uint32_t *pack_perm,
struct pack_midx_entry *e, struct pack_midx_entry *e,
uint32_t pos) uint32_t pos)
{ {
@ -540,7 +519,7 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
return 1; return 1;
nth_midxed_object_oid(&e->oid, m, pos); nth_midxed_object_oid(&e->oid, m, pos);
e->pack_int_id = pack_perm[nth_midxed_pack_int_id(m, pos)]; e->pack_int_id = nth_midxed_pack_int_id(m, pos);
e->offset = nth_midxed_offset(m, pos); e->offset = nth_midxed_offset(m, pos);
/* consider objects in midx to be from "old" packs */ /* consider objects in midx to be from "old" packs */
@ -574,8 +553,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
* of a packfile containing the object). * of a packfile containing the object).
*/ */
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
struct packed_git **p, struct pack_info *info,
uint32_t *perm,
uint32_t nr_packs, uint32_t nr_packs,
uint32_t *nr_objects) uint32_t *nr_objects)
{ {
@ -586,7 +564,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
uint32_t start_pack = m ? m->num_packs : 0; uint32_t start_pack = m ? m->num_packs : 0;
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
total_objects += p[cur_pack]->num_objects; total_objects += info[cur_pack].p->num_objects;
/* /*
* As we de-duplicate by fanout value, we expect the fanout * As we de-duplicate by fanout value, we expect the fanout
@ -611,7 +589,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
for (cur_object = start; cur_object < end; cur_object++) { for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
nth_midxed_pack_midx_entry(m, perm, nth_midxed_pack_midx_entry(m,
&entries_by_fanout[nr_fanout], &entries_by_fanout[nr_fanout],
cur_object); cur_object);
nr_fanout++; nr_fanout++;
@ -622,12 +600,12 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
uint32_t start = 0, end; uint32_t start = 0, end;
if (cur_fanout) if (cur_fanout)
start = get_pack_fanout(p[cur_pack], cur_fanout - 1); start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
end = get_pack_fanout(p[cur_pack], cur_fanout); end = get_pack_fanout(info[cur_pack].p, cur_fanout);
for (cur_object = start; cur_object < end; cur_object++) { for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
fill_pack_entry(perm[cur_pack], p[cur_pack], cur_object, &entries_by_fanout[nr_fanout]); fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
nr_fanout++; nr_fanout++;
} }
} }
@ -656,7 +634,7 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
} }
static size_t write_midx_pack_names(struct hashfile *f, static size_t write_midx_pack_names(struct hashfile *f,
char **pack_names, struct pack_info *info,
uint32_t num_packs) uint32_t num_packs)
{ {
uint32_t i; uint32_t i;
@ -664,14 +642,18 @@ static size_t write_midx_pack_names(struct hashfile *f,
size_t written = 0; size_t written = 0;
for (i = 0; i < num_packs; i++) { for (i = 0; i < num_packs; i++) {
size_t writelen = strlen(pack_names[i]) + 1; size_t writelen;
if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0) if (info[i].expired)
continue;
if (i && strcmp(info[i].pack_name, info[i - 1].pack_name) <= 0)
BUG("incorrect pack-file order: %s before %s", BUG("incorrect pack-file order: %s before %s",
pack_names[i - 1], info[i - 1].pack_name,
pack_names[i]); info[i].pack_name);
hashwrite(f, pack_names[i], writelen); writelen = strlen(info[i].pack_name) + 1;
hashwrite(f, info[i].pack_name, writelen);
written += writelen; written += writelen;
} }
@ -742,6 +724,7 @@ static size_t write_midx_oid_lookup(struct hashfile *f, unsigned char hash_len,
} }
static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed, static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_needed,
uint32_t *perm,
struct pack_midx_entry *objects, uint32_t nr_objects) struct pack_midx_entry *objects, uint32_t nr_objects)
{ {
struct pack_midx_entry *list = objects; struct pack_midx_entry *list = objects;
@ -751,7 +734,12 @@ static size_t write_midx_object_offsets(struct hashfile *f, int large_offset_nee
for (i = 0; i < nr_objects; i++) { for (i = 0; i < nr_objects; i++) {
struct pack_midx_entry *obj = list++; struct pack_midx_entry *obj = list++;
hashwrite_be32(f, obj->pack_int_id); if (perm[obj->pack_int_id] == PACK_EXPIRED)
BUG("object %s is in an expired pack with int-id %d",
oid_to_hex(&obj->oid),
obj->pack_int_id);
hashwrite_be32(f, perm[obj->pack_int_id]);
if (large_offset_needed && obj->offset >> 31) if (large_offset_needed && obj->offset >> 31)
hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@ -797,7 +785,8 @@ static size_t write_midx_large_offsets(struct hashfile *f, uint32_t nr_large_off
return written; return written;
} }
int write_midx_file(const char *object_dir) static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
struct string_list *packs_to_drop)
{ {
unsigned char cur_chunk, num_chunks = 0; unsigned char cur_chunk, num_chunks = 0;
char *midx_name; char *midx_name;
@ -812,6 +801,9 @@ int write_midx_file(const char *object_dir)
uint32_t nr_entries, num_large_offsets = 0; uint32_t nr_entries, num_large_offsets = 0;
struct pack_midx_entry *entries = NULL; struct pack_midx_entry *entries = NULL;
int large_offsets_needed = 0; int large_offsets_needed = 0;
int pack_name_concat_len = 0;
int dropped_packs = 0;
int result = 0;
midx_name = get_midx_filename(object_dir); midx_name = get_midx_filename(object_dir);
if (safe_create_leading_directories(midx_name)) { if (safe_create_leading_directories(midx_name)) {
@ -820,42 +812,34 @@ int write_midx_file(const char *object_dir)
midx_name); midx_name);
} }
packs.m = load_multi_pack_index(object_dir, 1); if (m)
packs.m = m;
else
packs.m = load_multi_pack_index(object_dir, 1);
packs.nr = 0; packs.nr = 0;
packs.alloc_list = packs.m ? packs.m->num_packs : 16; packs.alloc = packs.m ? packs.m->num_packs : 16;
packs.alloc_names = packs.alloc_list; packs.info = NULL;
packs.list = NULL; ALLOC_ARRAY(packs.info, packs.alloc);
packs.names = NULL;
packs.pack_name_concat_len = 0;
ALLOC_ARRAY(packs.list, packs.alloc_list);
ALLOC_ARRAY(packs.names, packs.alloc_names);
if (packs.m) { if (packs.m) {
for (i = 0; i < packs.m->num_packs; i++) { for (i = 0; i < packs.m->num_packs; i++) {
ALLOC_GROW(packs.list, packs.nr + 1, packs.alloc_list); ALLOC_GROW(packs.info, packs.nr + 1, packs.alloc);
ALLOC_GROW(packs.names, packs.nr + 1, packs.alloc_names);
packs.list[packs.nr] = NULL; packs.info[packs.nr].orig_pack_int_id = i;
packs.names[packs.nr] = xstrdup(packs.m->pack_names[i]); packs.info[packs.nr].pack_name = xstrdup(packs.m->pack_names[i]);
packs.pack_name_concat_len += strlen(packs.names[packs.nr]) + 1; packs.info[packs.nr].p = NULL;
packs.info[packs.nr].expired = 0;
packs.nr++; packs.nr++;
} }
} }
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs); for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs);
if (packs.m && packs.nr == packs.m->num_packs) if (packs.m && packs.nr == packs.m->num_packs && !packs_to_drop)
goto cleanup; goto cleanup;
if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT) entries = get_sorted_entries(packs.m, packs.info, packs.nr, &nr_entries);
packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
(packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
ALLOC_ARRAY(pack_perm, packs.nr);
sort_packs_by_name(packs.names, packs.nr, pack_perm);
entries = get_sorted_entries(packs.m, packs.list, pack_perm, packs.nr, &nr_entries);
for (i = 0; i < nr_entries; i++) { for (i = 0; i < nr_entries; i++) {
if (entries[i].offset > 0x7fffffff) if (entries[i].offset > 0x7fffffff)
@ -864,6 +848,61 @@ int write_midx_file(const char *object_dir)
large_offsets_needed = 1; large_offsets_needed = 1;
} }
QSORT(packs.info, packs.nr, pack_info_compare);
if (packs_to_drop && packs_to_drop->nr) {
int drop_index = 0;
int missing_drops = 0;
for (i = 0; i < packs.nr && drop_index < packs_to_drop->nr; i++) {
int cmp = strcmp(packs.info[i].pack_name,
packs_to_drop->items[drop_index].string);
if (!cmp) {
drop_index++;
packs.info[i].expired = 1;
} else if (cmp > 0) {
error(_("did not see pack-file %s to drop"),
packs_to_drop->items[drop_index].string);
drop_index++;
missing_drops++;
i--;
} else {
packs.info[i].expired = 0;
}
}
if (missing_drops) {
result = 1;
goto cleanup;
}
}
/*
* pack_perm stores a permutation between pack-int-ids from the
* previous multi-pack-index to the new one we are writing:
*
* pack_perm[old_id] = new_id
*/
ALLOC_ARRAY(pack_perm, packs.nr);
for (i = 0; i < packs.nr; i++) {
if (packs.info[i].expired) {
dropped_packs++;
pack_perm[packs.info[i].orig_pack_int_id] = PACK_EXPIRED;
} else {
pack_perm[packs.info[i].orig_pack_int_id] = i - dropped_packs;
}
}
for (i = 0; i < packs.nr; i++) {
if (!packs.info[i].expired)
pack_name_concat_len += strlen(packs.info[i].pack_name) + 1;
}
if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
(pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
FREE_AND_NULL(midx_name); FREE_AND_NULL(midx_name);
@ -874,14 +913,14 @@ int write_midx_file(const char *object_dir)
cur_chunk = 0; cur_chunk = 0;
num_chunks = large_offsets_needed ? 5 : 4; num_chunks = large_offsets_needed ? 5 : 4;
written = write_midx_header(f, num_chunks, packs.nr); written = write_midx_header(f, num_chunks, packs.nr - dropped_packs);
chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES; chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH; chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
cur_chunk++; cur_chunk++;
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT; chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDFANOUT;
chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len; chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + pack_name_concat_len;
cur_chunk++; cur_chunk++;
chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP; chunk_ids[cur_chunk] = MIDX_CHUNKID_OIDLOOKUP;
@ -929,7 +968,7 @@ int write_midx_file(const char *object_dir)
switch (chunk_ids[i]) { switch (chunk_ids[i]) {
case MIDX_CHUNKID_PACKNAMES: case MIDX_CHUNKID_PACKNAMES:
written += write_midx_pack_names(f, packs.names, packs.nr); written += write_midx_pack_names(f, packs.info, packs.nr);
break; break;
case MIDX_CHUNKID_OIDFANOUT: case MIDX_CHUNKID_OIDFANOUT:
@ -941,7 +980,7 @@ int write_midx_file(const char *object_dir)
break; break;
case MIDX_CHUNKID_OBJECTOFFSETS: case MIDX_CHUNKID_OBJECTOFFSETS:
written += write_midx_object_offsets(f, large_offsets_needed, entries, nr_entries); written += write_midx_object_offsets(f, large_offsets_needed, pack_perm, entries, nr_entries);
break; break;
case MIDX_CHUNKID_LARGEOFFSETS: case MIDX_CHUNKID_LARGEOFFSETS:
@ -964,19 +1003,23 @@ int write_midx_file(const char *object_dir)
cleanup: cleanup:
for (i = 0; i < packs.nr; i++) { for (i = 0; i < packs.nr; i++) {
if (packs.list[i]) { if (packs.info[i].p) {
close_pack(packs.list[i]); close_pack(packs.info[i].p);
free(packs.list[i]); free(packs.info[i].p);
} }
free(packs.names[i]); free(packs.info[i].pack_name);
} }
free(packs.list); free(packs.info);
free(packs.names);
free(entries); free(entries);
free(pack_perm); free(pack_perm);
free(midx_name); free(midx_name);
return 0; return result;
}
int write_midx_file(const char *object_dir)
{
return write_midx_internal(object_dir, NULL, NULL);
} }
void clear_midx_file(struct repository *r) void clear_midx_file(struct repository *r)
@ -1140,3 +1183,200 @@ int verify_midx_file(struct repository *r, const char *object_dir)
return verify_midx_error; return verify_midx_error;
} }
int expire_midx_packs(struct repository *r, const char *object_dir)
{
uint32_t i, *count, result = 0;
struct string_list packs_to_drop = STRING_LIST_INIT_DUP;
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
if (!m)
return 0;
count = xcalloc(m->num_packs, sizeof(uint32_t));
for (i = 0; i < m->num_objects; i++) {
int pack_int_id = nth_midxed_pack_int_id(m, i);
count[pack_int_id]++;
}
for (i = 0; i < m->num_packs; i++) {
char *pack_name;
if (count[i])
continue;
if (prepare_midx_pack(r, m, i))
continue;
if (m->packs[i]->pack_keep)
continue;
pack_name = xstrdup(m->packs[i]->pack_name);
close_pack(m->packs[i]);
string_list_insert(&packs_to_drop, m->pack_names[i]);
unlink_pack_path(pack_name, 0);
free(pack_name);
}
free(count);
if (packs_to_drop.nr)
result = write_midx_internal(object_dir, m, &packs_to_drop);
string_list_clear(&packs_to_drop, 0);
return result;
}
struct repack_info {
timestamp_t mtime;
uint32_t referenced_objects;
uint32_t pack_int_id;
};
static int compare_by_mtime(const void *a_, const void *b_)
{
const struct repack_info *a, *b;
a = (const struct repack_info *)a_;
b = (const struct repack_info *)b_;
if (a->mtime < b->mtime)
return -1;
if (a->mtime > b->mtime)
return 1;
return 0;
}
static int fill_included_packs_all(struct multi_pack_index *m,
unsigned char *include_pack)
{
uint32_t i;
for (i = 0; i < m->num_packs; i++)
include_pack[i] = 1;
return m->num_packs < 2;
}
static int fill_included_packs_batch(struct repository *r,
struct multi_pack_index *m,
unsigned char *include_pack,
size_t batch_size)
{
uint32_t i, packs_to_repack;
size_t total_size;
struct repack_info *pack_info = xcalloc(m->num_packs, sizeof(struct repack_info));
for (i = 0; i < m->num_packs; i++) {
pack_info[i].pack_int_id = i;
if (prepare_midx_pack(r, m, i))
continue;
pack_info[i].mtime = m->packs[i]->mtime;
}
for (i = 0; batch_size && i < m->num_objects; i++) {
uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
pack_info[pack_int_id].referenced_objects++;
}
QSORT(pack_info, m->num_packs, compare_by_mtime);
total_size = 0;
packs_to_repack = 0;
for (i = 0; total_size < batch_size && i < m->num_packs; i++) {
int pack_int_id = pack_info[i].pack_int_id;
struct packed_git *p = m->packs[pack_int_id];
size_t expected_size;
if (!p)
continue;
if (open_pack_index(p) || !p->num_objects)
continue;
expected_size = (size_t)(p->pack_size
* pack_info[i].referenced_objects);
expected_size /= p->num_objects;
if (expected_size >= batch_size)
continue;
packs_to_repack++;
total_size += expected_size;
include_pack[pack_int_id] = 1;
}
free(pack_info);
if (total_size < batch_size || packs_to_repack < 2)
return 1;
return 0;
}
int midx_repack(struct repository *r, const char *object_dir, size_t batch_size)
{
int result = 0;
uint32_t i;
unsigned char *include_pack;
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf base_name = STRBUF_INIT;
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
if (!m)
return 0;
include_pack = xcalloc(m->num_packs, sizeof(unsigned char));
if (batch_size) {
if (fill_included_packs_batch(r, m, include_pack, batch_size))
goto cleanup;
} else if (fill_included_packs_all(m, include_pack))
goto cleanup;
argv_array_push(&cmd.args, "pack-objects");
strbuf_addstr(&base_name, object_dir);
strbuf_addstr(&base_name, "/pack/pack");
argv_array_push(&cmd.args, base_name.buf);
strbuf_release(&base_name);
cmd.git_cmd = 1;
cmd.in = cmd.out = -1;
if (start_command(&cmd)) {
error(_("could not start pack-objects"));
result = 1;
goto cleanup;
}
for (i = 0; i < m->num_objects; i++) {
struct object_id oid;
uint32_t pack_int_id = nth_midxed_pack_int_id(m, i);
if (!include_pack[pack_int_id])
continue;
nth_midxed_object_oid(&oid, m, i);
xwrite(cmd.in, oid_to_hex(&oid), the_hash_algo->hexsz);
xwrite(cmd.in, "\n", 1);
}
close(cmd.in);
if (finish_command(&cmd)) {
error(_("could not finish pack-objects"));
result = 1;
goto cleanup;
}
result = write_midx_internal(object_dir, m, NULL);
m = NULL;
cleanup:
if (m)
close_midx(m);
free(include_pack);
return result;
}

2
midx.h
View File

@ -50,6 +50,8 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i
int write_midx_file(const char *object_dir); int write_midx_file(const char *object_dir);
void clear_midx_file(struct repository *r); void clear_midx_file(struct repository *r);
int verify_midx_file(struct repository *r, const char *object_dir); int verify_midx_file(struct repository *r, const char *object_dir);
int expire_midx_packs(struct repository *r, const char *object_dir);
int midx_repack(struct repository *r, const char *object_dir, size_t batch_size);
void close_midx(struct multi_pack_index *m); void close_midx(struct multi_pack_index *m);

View File

@ -355,6 +355,34 @@ void close_object_store(struct raw_object_store *o)
close_commit_graph(o); close_commit_graph(o);
} }
void unlink_pack_path(const char *pack_name, int force_delete)
{
static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
int i;
struct strbuf buf = STRBUF_INIT;
size_t plen;
strbuf_addstr(&buf, pack_name);
strip_suffix_mem(buf.buf, &buf.len, ".pack");
plen = buf.len;
if (!force_delete) {
strbuf_addstr(&buf, ".keep");
if (!access(buf.buf, F_OK)) {
strbuf_release(&buf);
return;
}
}
for (i = 0; i < ARRAY_SIZE(exts); i++) {
strbuf_setlen(&buf, plen);
strbuf_addstr(&buf, exts[i]);
unlink(buf.buf);
}
strbuf_release(&buf);
}
/* /*
* The LRU pack is the one with the oldest MRU window, preferring packs * The LRU pack is the one with the oldest MRU window, preferring packs
* with no used windows, or the oldest mtime if it has no windows allocated. * with no used windows, or the oldest mtime if it has no windows allocated.

View File

@ -95,6 +95,13 @@ void unuse_pack(struct pack_window **);
void clear_delta_base_cache(void); void clear_delta_base_cache(void);
struct packed_git *add_packed_git(const char *path, size_t path_len, int local); struct packed_git *add_packed_git(const char *path, size_t path_len, int local);
/*
* Unlink the .pack and associated extension files.
* Does not unlink if 'force_delete' is false and the pack-file is
* marked as ".keep".
*/
extern void unlink_pack_path(const char *pack_name, int force_delete);
/* /*
* Make sure that a pointer access into an mmap'd index file is within bounds, * Make sure that a pointer access into an mmap'd index file is within bounds,
* and can provide at least 8 bytes of data. * and can provide at least 8 bytes of data.

View File

@ -363,4 +363,188 @@ test_expect_success 'verify incorrect 64-bit offset' '
"incorrect object offset" "incorrect object offset"
' '
test_expect_success 'setup expire tests' '
mkdir dup &&
(
cd dup &&
git init &&
test-tool genrandom "data" 4096 >large_file.txt &&
git update-index --add large_file.txt &&
for i in $(test_seq 1 20)
do
test_commit $i
done &&
git branch A HEAD &&
git branch B HEAD~8 &&
git branch C HEAD~13 &&
git branch D HEAD~16 &&
git branch E HEAD~18 &&
git pack-objects --revs .git/objects/pack/pack-A <<-EOF &&
refs/heads/A
^refs/heads/B
EOF
git pack-objects --revs .git/objects/pack/pack-B <<-EOF &&
refs/heads/B
^refs/heads/C
EOF
git pack-objects --revs .git/objects/pack/pack-C <<-EOF &&
refs/heads/C
^refs/heads/D
EOF
git pack-objects --revs .git/objects/pack/pack-D <<-EOF &&
refs/heads/D
^refs/heads/E
EOF
git pack-objects --revs .git/objects/pack/pack-E <<-EOF &&
refs/heads/E
EOF
git multi-pack-index write &&
cp -r .git/objects/pack .git/objects/pack-backup
)
'
test_expect_success 'expire does not remove any packs' '
(
cd dup &&
ls .git/objects/pack >expect &&
git multi-pack-index expire &&
ls .git/objects/pack >actual &&
test_cmp expect actual
)
'
test_expect_success 'expire removes unreferenced packs' '
(
cd dup &&
git pack-objects --revs .git/objects/pack/pack-combined <<-EOF &&
refs/heads/A
^refs/heads/C
EOF
git multi-pack-index write &&
ls .git/objects/pack | grep -v -e pack-[AB] >expect &&
git multi-pack-index expire &&
ls .git/objects/pack >actual &&
test_cmp expect actual &&
ls .git/objects/pack/ | grep idx >expect-idx &&
test-tool read-midx .git/objects | grep idx >actual-midx &&
test_cmp expect-idx actual-midx &&
git multi-pack-index verify &&
git fsck
)
'
test_expect_success 'repack with minimum size does not alter existing packs' '
(
cd dup &&
rm -rf .git/objects/pack &&
mv .git/objects/pack-backup .git/objects/pack &&
touch -m -t 201901010000 .git/objects/pack/pack-D* &&
touch -m -t 201901010001 .git/objects/pack/pack-C* &&
touch -m -t 201901010002 .git/objects/pack/pack-B* &&
touch -m -t 201901010003 .git/objects/pack/pack-A* &&
ls .git/objects/pack >expect &&
MINSIZE=$(test-tool path-utils file-size .git/objects/pack/*pack | sort -n | head -n 1) &&
git multi-pack-index repack --batch-size=$MINSIZE &&
ls .git/objects/pack >actual &&
test_cmp expect actual
)
'
test_expect_success 'repack creates a new pack' '
(
cd dup &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 5 idx-list &&
THIRD_SMALLEST_SIZE=$(test-tool path-utils file-size .git/objects/pack/*pack | sort -n | head -n 3 | tail -n 1) &&
BATCH_SIZE=$(($THIRD_SMALLEST_SIZE + 1)) &&
git multi-pack-index repack --batch-size=$BATCH_SIZE &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 6 idx-list &&
test-tool read-midx .git/objects | grep idx >midx-list &&
test_line_count = 6 midx-list
)
'
test_expect_success 'expire removes repacked packs' '
(
cd dup &&
ls -al .git/objects/pack/*pack &&
ls -S .git/objects/pack/*pack | head -n 4 >expect &&
git multi-pack-index expire &&
ls -S .git/objects/pack/*pack >actual &&
test_cmp expect actual &&
test-tool read-midx .git/objects | grep idx >midx-list &&
test_line_count = 4 midx-list
)
'
test_expect_success 'expire works when adding new packs' '
(
cd dup &&
git pack-objects --revs .git/objects/pack/pack-combined <<-EOF &&
refs/heads/A
^refs/heads/B
EOF
git pack-objects --revs .git/objects/pack/pack-combined <<-EOF &&
refs/heads/B
^refs/heads/C
EOF
git pack-objects --revs .git/objects/pack/pack-combined <<-EOF &&
refs/heads/C
^refs/heads/D
EOF
git multi-pack-index write &&
git pack-objects --revs .git/objects/pack/a-pack <<-EOF &&
refs/heads/D
^refs/heads/E
EOF
git multi-pack-index write &&
git pack-objects --revs .git/objects/pack/z-pack <<-EOF &&
refs/heads/E
EOF
git multi-pack-index expire &&
ls .git/objects/pack/ | grep idx >expect &&
test-tool read-midx .git/objects | grep idx >actual &&
test_cmp expect actual &&
git multi-pack-index verify
)
'
test_expect_success 'expire respects .keep files' '
(
cd dup &&
git pack-objects --revs .git/objects/pack/pack-all <<-EOF &&
refs/heads/A
EOF
git multi-pack-index write &&
PACKA=$(ls .git/objects/pack/a-pack*\.pack | sed s/\.pack\$//) &&
touch $PACKA.keep &&
git multi-pack-index expire &&
ls -S .git/objects/pack/a-pack* | grep $PACKA >a-pack-files &&
test_line_count = 3 a-pack-files &&
test-tool read-midx .git/objects | grep idx >midx-list &&
test_line_count = 2 midx-list
)
'
test_expect_success 'repack --batch-size=0 repacks everything' '
(
cd dup &&
rm .git/objects/pack/*.keep &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 2 idx-list &&
git multi-pack-index repack --batch-size=0 &&
ls .git/objects/pack/*idx >idx-list &&
test_line_count = 3 idx-list &&
test-tool read-midx .git/objects | grep idx >midx-list &&
test_line_count = 3 midx-list &&
git multi-pack-index expire &&
ls -al .git/objects/pack/*idx >idx-list &&
test_line_count = 1 idx-list &&
git multi-pack-index repack --batch-size=0 &&
ls -al .git/objects/pack/*idx >new-idx-list &&
test_cmp idx-list new-idx-list
)
'
test_done test_done