Merge branch 'tb/geometric-repack'

"git repack" so far has been only capable of repacking everything
under the sun into a single pack (or split by size).  A cleverer
strategy to reduce the cost of repacking a repository has been
introduced.

* tb/geometric-repack:
  builtin/pack-objects.c: ignore missing links with --stdin-packs
  builtin/repack.c: reword comment around pack-objects flags
  builtin/repack.c: be more conservative with unsigned overflows
  builtin/repack.c: assign pack split later
  t7703: test --geometric repack with loose objects
  builtin/repack.c: do not repack single packs with --geometric
  builtin/repack.c: add '--geometric' option
  packfile: add kept-pack cache for find_kept_pack_entry()
  builtin/pack-objects.c: rewrite honor-pack-keep logic
  p5303: measure time to repack with keep
  p5303: add missing &&-chains
  builtin/pack-objects.c: add '--stdin-packs' option
  revision: learn '--no-kept-objects'
  packfile: introduce 'find_kept_pack_entry()'
This commit is contained in:
Junio C Hamano 2021-03-24 14:36:27 -07:00
commit 2744383cbd
13 changed files with 1029 additions and 60 deletions

View File

@ -85,6 +85,16 @@ base-name::
reference was included in the resulting packfile. This
can be useful to send new tags to native Git clients.
--stdin-packs::
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
from the standard input, instead of object names or revision
arguments. The resulting pack contains all objects listed in the
included packs (those not beginning with `^`), excluding any
objects listed in the excluded packs (beginning with `^`).
+
Incompatible with `--revs`, or options that imply `--revs` (such as
`--all`), with the exception of `--unpacked`, which is compatible.
--window=<n>::
--depth=<n>::
These two options affect how the objects contained in

View File

@ -165,6 +165,29 @@ depth is 4095.
Pass the `--delta-islands` option to `git-pack-objects`, see
linkgit:git-pack-objects[1].
-g=<factor>::
--geometric=<factor>::
Arrange resulting pack structure so that each successive pack
contains at least `<factor>` times the number of objects as the
next-largest pack.
+
`git repack` ensures this by determining a "cut" of packfiles that need
to be repacked into one in order to ensure a geometric progression. It
picks the smallest set of packfiles such that as many of the larger
packfiles (by count of objects contained in that pack) may be left
intact.
+
Unlike other repack modes, the set of objects to pack is determined
uniquely by the set of packs being "rolled-up"; in other words, the
packs determined to need to be combined in order to restore a geometric
progression.
+
When `--unpacked` is specified, loose objects are implicitly included in
this "roll-up", without respect to their reachability. This is subject
to change in the future. This option (implying a drastically different
repack mode) is not guaranteed to work with all other combinations of
option to `git repack`).
CONFIGURATION
-------------

View File

@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid,
return 1;
}
static int want_found_object(int exclude, struct packed_git *p)
static int want_found_object(const struct object_id *oid, int exclude,
struct packed_git *p)
{
if (exclude)
return 1;
@ -1204,27 +1205,82 @@ static int want_found_object(int exclude, struct packed_git *p)
* make sure no copy of this object appears in _any_ pack that makes us
* to omit the object, so we need to check all the packs.
*
* We can however first check whether these options can possible matter;
* We can however first check whether these options can possibly matter;
* if they do not matter we know we want the object in generated pack.
* Otherwise, we signal "-1" at the end to tell the caller that we do
* not know either way, and it needs to check more packs.
*/
if (!ignore_packed_keep_on_disk &&
!ignore_packed_keep_in_core &&
(!local || !have_non_local_packs))
return 1;
/*
* Objects in packs borrowed from elsewhere are discarded regardless of
* if they appear in other packs that weren't borrowed.
*/
if (local && !p->pack_local)
return 0;
if (p->pack_local &&
((ignore_packed_keep_on_disk && p->pack_keep) ||
(ignore_packed_keep_in_core && p->pack_keep_in_core)))
return 0;
/*
* Then handle .keep first, as we have a fast(er) path there.
*/
if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
/*
* Set the flags for the kept-pack cache to be the ones we want
* to ignore.
*
* That is, if we are ignoring objects in on-disk keep packs,
* then we want to search through the on-disk keep and ignore
* the in-core ones.
*/
unsigned flags = 0;
if (ignore_packed_keep_on_disk)
flags |= ON_DISK_KEEP_PACKS;
if (ignore_packed_keep_in_core)
flags |= IN_CORE_KEEP_PACKS;
if (ignore_packed_keep_on_disk && p->pack_keep)
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
if (has_object_kept_pack(oid, flags))
return 0;
}
/*
* At this point we know definitively that either we don't care about
* keep-packs, or the object is not in one. Keep checking other
* conditions...
*/
if (!local || !have_non_local_packs)
return 1;
/* we don't know yet; keep looking for more packs */
return -1;
}
static int want_object_in_pack_one(struct packed_git *p,
const struct object_id *oid,
int exclude,
struct packed_git **found_pack,
off_t *found_offset)
{
off_t offset;
if (p == *found_pack)
offset = *found_offset;
else
offset = find_pack_entry_one(oid->hash, p);
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
return -1;
*found_offset = offset;
*found_pack = p;
}
return want_found_object(oid, exclude, p);
}
return -1;
}
/*
* Check whether we want the object in the pack (e.g., we do not want
* objects found in non-local stores if the "--local" option was used).
@ -1252,7 +1308,7 @@ static int want_object_in_pack(const struct object_id *oid,
* are present we will determine the answer right now.
*/
if (*found_pack) {
want = want_found_object(exclude, *found_pack);
want = want_found_object(oid, exclude, *found_pack);
if (want != -1)
return want;
}
@ -1260,51 +1316,20 @@ static int want_object_in_pack(const struct object_id *oid,
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
struct pack_entry e;
if (fill_midx_entry(the_repository, oid, &e, m)) {
struct packed_git *p = e.p;
off_t offset;
if (p == *found_pack)
offset = *found_offset;
else
offset = find_pack_entry_one(oid->hash, p);
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
continue;
*found_offset = offset;
*found_pack = p;
}
want = want_found_object(exclude, p);
if (want != -1)
return want;
}
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
if (want != -1)
return want;
}
}
list_for_each(pos, get_packed_git_mru(the_repository)) {
struct packed_git *p = list_entry(pos, struct packed_git, mru);
off_t offset;
if (p == *found_pack)
offset = *found_offset;
else
offset = find_pack_entry_one(oid->hash, p);
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
continue;
*found_offset = offset;
*found_pack = p;
}
want = want_found_object(exclude, p);
if (!exclude && want > 0)
list_move(&p->mru,
get_packed_git_mru(the_repository));
if (want != -1)
return want;
}
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
if (!exclude && want > 0)
list_move(&p->mru,
get_packed_git_mru(the_repository));
if (want != -1)
return want;
}
if (uri_protocols.nr) {
@ -2986,6 +3011,191 @@ static int git_pack_config(const char *k, const char *v, void *cb)
return git_default_config(k, v, cb);
}
/* Counters for trace2 output when in --stdin-packs mode. */
static int stdin_packs_found_nr;
static int stdin_packs_hints_nr;
static int add_object_entry_from_pack(const struct object_id *oid,
struct packed_git *p,
uint32_t pos,
void *_data)
{
struct rev_info *revs = _data;
struct object_info oi = OBJECT_INFO_INIT;
off_t ofs;
enum object_type type;
display_progress(progress_state, ++nr_seen);
if (have_duplicate_entry(oid, 0))
return 0;
ofs = nth_packed_object_offset(p, pos);
if (!want_object_in_pack(oid, 0, &p, &ofs))
return 0;
oi.typep = &type;
if (packed_object_info(the_repository, p, ofs, &oi) < 0)
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
else if (type == OBJ_COMMIT) {
/*
* commits in included packs are used as starting points for the
* subsequent revision walk
*/
add_pending_oid(revs, NULL, oid, 0);
}
stdin_packs_found_nr++;
create_object_entry(oid, type, 0, 0, 0, p, ofs);
return 0;
}
static void show_commit_pack_hint(struct commit *commit, void *_data)
{
/* nothing to do; commits don't have a namehash */
}
static void show_object_pack_hint(struct object *object, const char *name,
void *_data)
{
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
if (!oe)
return;
/*
* Our 'to_pack' list was constructed by iterating all objects packed in
* included packs, and so doesn't have a non-zero hash field that you
* would typically pick up during a reachability traversal.
*
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
* here using a now in order to perhaps improve the delta selection
* process.
*/
oe->hash = pack_name_hash(name);
oe->no_try_delta = name && no_try_delta(name);
stdin_packs_hints_nr++;
}
static int pack_mtime_cmp(const void *_a, const void *_b)
{
struct packed_git *a = ((const struct string_list_item*)_a)->util;
struct packed_git *b = ((const struct string_list_item*)_b)->util;
/*
* order packs by descending mtime so that objects are laid out
* roughly as newest-to-oldest
*/
if (a->mtime < b->mtime)
return 1;
else if (b->mtime < a->mtime)
return -1;
else
return 0;
}
static void read_packs_list_from_stdin(void)
{
struct strbuf buf = STRBUF_INIT;
struct string_list include_packs = STRING_LIST_INIT_DUP;
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
struct string_list_item *item = NULL;
struct packed_git *p;
struct rev_info revs;
repo_init_revisions(the_repository, &revs, NULL);
/*
* Use a revision walk to fill in the namehash of objects in the include
* packs. To save time, we'll avoid traversing through objects that are
* in excluded packs.
*
* That may cause us to avoid populating all of the namehash fields of
* all included objects, but our goal is best-effort, since this is only
* an optimization during delta selection.
*/
revs.no_kept_objects = 1;
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs.blob_objects = 1;
revs.tree_objects = 1;
revs.tag_objects = 1;
revs.ignore_missing_links = 1;
while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
continue;
if (*buf.buf == '^')
string_list_append(&exclude_packs, buf.buf + 1);
else
string_list_append(&include_packs, buf.buf);
strbuf_reset(&buf);
}
string_list_sort(&include_packs);
string_list_sort(&exclude_packs);
for (p = get_all_packs(the_repository); p; p = p->next) {
const char *pack_name = pack_basename(p);
item = string_list_lookup(&include_packs, pack_name);
if (!item)
item = string_list_lookup(&exclude_packs, pack_name);
if (item)
item->util = p;
}
/*
* First handle all of the excluded packs, marking them as kept in-core
* so that later calls to add_object_entry() discards any objects that
* are also found in excluded packs.
*/
for_each_string_list_item(item, &exclude_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
p->pack_keep_in_core = 1;
}
/*
* Order packs by ascending mtime; use QSORT directly to access the
* string_list_item's ->util pointer, which string_list_sort() does not
* provide.
*/
QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
for_each_object_in_pack(p,
add_object_entry_from_pack,
&revs,
FOR_EACH_OBJECT_PACK_ORDER);
}
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
traverse_commit_list(&revs,
show_commit_pack_hint,
show_object_pack_hint,
NULL);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
stdin_packs_found_nr);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
stdin_packs_hints_nr);
strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
}
static void read_object_list_from_stdin(void)
{
char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
@ -3489,6 +3699,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
struct strvec rp = STRVEC_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
int rev_list_index = 0;
int stdin_packs = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
@ -3539,6 +3750,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
N_("include objects referred to by the index"),
1, PARSE_OPT_NONEG),
OPT_BOOL(0, "stdin-packs", &stdin_packs,
N_("read packs from stdin")),
OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag,
@ -3645,7 +3858,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
use_internal_rev_list = 1;
strvec_push(&rp, "--indexed-objects");
}
if (rev_list_unpacked) {
if (rev_list_unpacked && !stdin_packs) {
use_internal_rev_list = 1;
strvec_push(&rp, "--unpacked");
}
@ -3690,8 +3903,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (filter_options.choice) {
if (!pack_to_stdout)
die(_("cannot use --filter without --stdout"));
if (stdin_packs)
die(_("cannot use --filter with --stdin-packs"));
}
if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
@ -3750,7 +3968,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (progress)
progress_state = start_progress(_("Enumerating objects"), 0);
if (!use_internal_rev_list)
if (stdin_packs) {
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin();
if (rev_list_unpacked)
add_unreachable_loose_objects();
} else if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
get_object_list(rp.nr, rp.v);

View File

@ -297,6 +297,142 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
struct pack_geometry {
struct packed_git **pack;
uint32_t pack_nr, pack_alloc;
uint32_t split;
};
static uint32_t geometry_pack_weight(struct packed_git *p)
{
if (open_pack_index(p))
die(_("cannot open index for %s"), p->pack_name);
return p->num_objects;
}
static int geometry_cmp(const void *va, const void *vb)
{
uint32_t aw = geometry_pack_weight(*(struct packed_git **)va),
bw = geometry_pack_weight(*(struct packed_git **)vb);
if (aw < bw)
return -1;
if (aw > bw)
return 1;
return 0;
}
static void init_pack_geometry(struct pack_geometry **geometry_p)
{
struct packed_git *p;
struct pack_geometry *geometry;
*geometry_p = xcalloc(1, sizeof(struct pack_geometry));
geometry = *geometry_p;
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!pack_kept_objects && p->pack_keep)
continue;
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
geometry->pack_alloc);
geometry->pack[geometry->pack_nr] = p;
geometry->pack_nr++;
}
QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
}
static void split_pack_geometry(struct pack_geometry *geometry, int factor)
{
uint32_t i;
uint32_t split;
off_t total_size = 0;
if (!geometry->pack_nr) {
geometry->split = geometry->pack_nr;
return;
}
/*
* First, count the number of packs (in descending order of size) which
* already form a geometric progression.
*/
for (i = geometry->pack_nr - 1; i > 0; i--) {
struct packed_git *ours = geometry->pack[i];
struct packed_git *prev = geometry->pack[i - 1];
if (unsigned_mult_overflows(factor, geometry_pack_weight(prev)))
die(_("pack %s too large to consider in geometric "
"progression"),
prev->pack_name);
if (geometry_pack_weight(ours) < factor * geometry_pack_weight(prev))
break;
}
split = i;
if (split) {
/*
* Move the split one to the right, since the top element in the
* last-compared pair can't be in the progression. Only do this
* when we split in the middle of the array (otherwise if we got
* to the end, then the split is in the right place).
*/
split++;
}
/*
* Then, anything to the left of 'split' must be in a new pack. But,
* creating that new pack may cause packs in the heavy half to no longer
* form a geometric progression.
*
* Compute an expected size of the new pack, and then determine how many
* packs in the heavy half need to be joined into it (if any) to restore
* the geometric progression.
*/
for (i = 0; i < split; i++) {
struct packed_git *p = geometry->pack[i];
if (unsigned_add_overflows(total_size, geometry_pack_weight(p)))
die(_("pack %s too large to roll up"), p->pack_name);
total_size += geometry_pack_weight(p);
}
for (i = split; i < geometry->pack_nr; i++) {
struct packed_git *ours = geometry->pack[i];
if (unsigned_mult_overflows(factor, total_size))
die(_("pack %s too large to roll up"), ours->pack_name);
if (geometry_pack_weight(ours) < factor * total_size) {
if (unsigned_add_overflows(total_size,
geometry_pack_weight(ours)))
die(_("pack %s too large to roll up"),
ours->pack_name);
split++;
total_size += geometry_pack_weight(ours);
} else
break;
}
geometry->split = split;
}
static void clear_pack_geometry(struct pack_geometry *geometry)
{
if (!geometry)
return;
free(geometry->pack);
geometry->pack_nr = 0;
geometry->pack_alloc = 0;
geometry->split = 0;
}
int cmd_repack(int argc, const char **argv, const char *prefix)
{
struct child_process cmd = CHILD_PROCESS_INIT;
@ -304,6 +440,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list names = STRING_LIST_INIT_DUP;
struct string_list rollback = STRING_LIST_INIT_NODUP;
struct string_list existing_packs = STRING_LIST_INIT_DUP;
struct pack_geometry *geometry = NULL;
struct strbuf line = STRBUF_INIT;
int i, ext, ret;
FILE *out;
@ -316,6 +453,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_update_server_info = 0;
struct pack_objects_args po_args = {NULL};
int geometric_factor = 0;
struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything,
@ -356,6 +494,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
N_("do not repack this pack")),
OPT_INTEGER('g', "geometric", &geometric_factor,
N_("find a geometric progression with factor <N>")),
OPT_END()
};
@ -382,6 +522,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
die(_(incremental_bitmap_conflict_error));
if (geometric_factor) {
if (pack_everything)
die(_("--geometric is incompatible with -A, -a"));
init_pack_geometry(&geometry);
split_pack_geometry(geometry, geometric_factor);
}
packdir = mkpathdup("%s/pack", get_object_directory());
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
@ -396,9 +543,21 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_pushf(&cmd.args, "--keep-pack=%s",
keep_pack_list.items[i].string);
strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--all");
strvec_push(&cmd.args, "--reflog");
strvec_push(&cmd.args, "--indexed-objects");
if (!geometry) {
/*
* We need to grab all reachable objects, including those that
* are reachable from reflogs and the index.
*
* When repacking into a geometric progression of packs,
* however, we ask 'git pack-objects --stdin-packs', and it is
* not about packing objects based on reachability but about
* repacking all the objects in specified packs and loose ones
* (indeed, --stdin-packs is incompatible with these options).
*/
strvec_push(&cmd.args, "--all");
strvec_push(&cmd.args, "--reflog");
strvec_push(&cmd.args, "--indexed-objects");
}
if (has_promisor_remote())
strvec_push(&cmd.args, "--exclude-promisor-objects");
if (write_bitmaps > 0)
@ -429,17 +588,37 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
}
}
} else if (geometry) {
strvec_push(&cmd.args, "--stdin-packs");
strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
strvec_push(&cmd.args, "--incremental");
}
cmd.no_stdin = 1;
if (geometry)
cmd.in = -1;
else
cmd.no_stdin = 1;
ret = start_command(&cmd);
if (ret)
return ret;
if (geometry) {
FILE *in = xfdopen(cmd.in, "w");
/*
* The resulting pack should contain all objects in packs that
* are going to be rolled up, but exclude objects in packs which
* are being left alone.
*/
for (i = 0; i < geometry->split; i++)
fprintf(in, "%s\n", pack_basename(geometry->pack[i]));
for (i = geometry->split; i < geometry->pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry->pack[i]));
fclose(in);
}
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz)
@ -507,6 +686,25 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!string_list_has_string(&names, sha1))
remove_redundant_pack(packdir, item->string);
}
if (geometry) {
struct strbuf buf = STRBUF_INIT;
uint32_t i;
for (i = 0; i < geometry->split; i++) {
struct packed_git *p = geometry->pack[i];
if (string_list_has_string(&names,
hash_to_hex(p->hash)))
continue;
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(p));
strbuf_strip_suffix(&buf, ".pack");
remove_redundant_pack(packdir, buf.buf);
}
strbuf_release(&buf);
}
if (!po_args.quiet && isatty(2))
opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts);
@ -528,6 +726,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
string_list_clear(&names, 0);
string_list_clear(&rollback, 0);
string_list_clear(&existing_packs, 0);
clear_pack_geometry(geometry);
strbuf_release(&line);
return 0;

View File

@ -153,6 +153,11 @@ struct raw_object_store {
/* A most-recently-used ordered version of the packed_git list. */
struct list_head packed_git_mru;
struct {
struct packed_git **packs;
unsigned flags;
} kept_pack_cache;
/*
* A map of packfiles to packed_git structs for tracking which
* packs have been loaded already.

View File

@ -2066,12 +2066,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0;
}
static void maybe_invalidate_kept_pack_cache(struct repository *r,
unsigned flags)
{
if (!r->objects->kept_pack_cache.packs)
return;
if (r->objects->kept_pack_cache.flags == flags)
return;
FREE_AND_NULL(r->objects->kept_pack_cache.packs);
r->objects->kept_pack_cache.flags = 0;
}
static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
{
maybe_invalidate_kept_pack_cache(r, flags);
if (!r->objects->kept_pack_cache.packs) {
struct packed_git **packs = NULL;
size_t nr = 0, alloc = 0;
struct packed_git *p;
/*
* We want "all" packs here, because we need to cover ones that
* are used by a midx, as well. We need to look in every one of
* them (instead of the midx itself) to cover duplicates. It's
* possible that an object is found in two packs that the midx
* covers, one kept and one not kept, but the midx returns only
* the non-kept version.
*/
for (p = get_all_packs(r); p; p = p->next) {
if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
(p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr++] = p;
}
}
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr] = NULL;
r->objects->kept_pack_cache.packs = packs;
r->objects->kept_pack_cache.flags = flags;
}
return r->objects->kept_pack_cache.packs;
}
int find_kept_pack_entry(struct repository *r,
const struct object_id *oid,
unsigned flags,
struct pack_entry *e)
{
struct packed_git **cache;
for (cache = kept_pack_cache(r, flags); *cache; cache++) {
struct packed_git *p = *cache;
if (fill_pack_entry(oid, e, p))
return 1;
}
return 0;
}
int has_object_pack(const struct object_id *oid)
{
struct pack_entry e;
return find_pack_entry(the_repository, oid, &e);
}
int has_object_kept_pack(const struct object_id *oid, unsigned flags)
{
struct pack_entry e;
return find_kept_pack_entry(the_repository, oid, flags, &e);
}
int has_pack_index(const unsigned char *sha1)
{
struct stat st;

View File

@ -162,13 +162,18 @@ int packed_object_info(struct repository *r,
void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1);
const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1);
#define ON_DISK_KEEP_PACKS 1
#define IN_CORE_KEEP_PACKS 2
/*
* Iff a pack file in the given repository contains the object named by sha1,
* return true and store its location to e.
*/
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
int has_object_pack(const struct object_id *oid);
int has_object_kept_pack(const struct object_id *oid, unsigned flags);
int has_pack_index(const unsigned char *sha1);

View File

@ -2336,6 +2336,16 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
revs->unpacked = 1;
} else if (starts_with(arg, "--unpacked=")) {
die(_("--unpacked=<packfile> no longer supported"));
} else if (!strcmp(arg, "--no-kept-objects")) {
revs->no_kept_objects = 1;
revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
} else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) {
revs->no_kept_objects = 1;
if (!strcmp(optarg, "in-core"))
revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
if (!strcmp(optarg, "on-disk"))
revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
} else if (!strcmp(arg, "-r")) {
revs->diff = 1;
revs->diffopt.flags.recursive = 1;
@ -3795,6 +3805,11 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
return commit_ignore;
if (revs->unpacked && has_object_pack(&commit->object.oid))
return commit_ignore;
if (revs->no_kept_objects) {
if (has_object_kept_pack(&commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
if (commit->object.flags & UNINTERESTING)
return commit_ignore;
if (revs->line_level_traverse && !want_ancestry(revs)) {

View File

@ -148,6 +148,7 @@ struct rev_info {
edge_hint_aggressive:1,
limited:1,
unpacked:1,
no_kept_objects:1,
boundary:2,
count:1,
left_right:1,
@ -317,6 +318,9 @@ struct rev_info {
* This is loaded from the commit-graph being used.
*/
struct bloom_filter_settings *bloom_filter_settings;
/* misc. flags related to '--no-kept-objects' */
unsigned keep_pack_cache_flags;
};
int ref_excluded(struct string_list *, const char *path);

View File

@ -28,11 +28,18 @@ repack_into_n () {
push @commits, $_ if $. % 5 == 1;
}
print reverse @commits;
' "$1" >pushes
' "$1" >pushes &&
# create base packfile
head -n 1 pushes |
git pack-objects --delta-base-offset --revs staging/pack
base_pack=$(
head -n 1 pushes |
git pack-objects --delta-base-offset --revs staging/pack
) &&
test_export base_pack &&
# create an empty packfile
empty_pack=$(git pack-objects staging/pack </dev/null) &&
test_export empty_pack &&
# and then incrementals between each pair of commits
last= &&
@ -49,6 +56,12 @@ repack_into_n () {
last=$rev
done <pushes &&
(
find staging -type f -name 'pack-*.pack' |
xargs -n 1 basename | grep -v "$base_pack" &&
printf "^pack-%s.pack\n" $base_pack
) >stdin.packs
# and install the whole thing
rm -f .git/objects/pack/* &&
mv staging/* .git/objects/pack/
@ -91,6 +104,23 @@ do
--reflog --indexed-objects --delta-base-offset \
--stdout </dev/null >/dev/null
'
test_perf "repack with kept ($nr_packs)" '
git pack-objects --keep-true-parents \
--keep-pack=pack-$empty_pack.pack \
--honor-pack-keep --non-empty --all \
--reflog --indexed-objects --delta-base-offset \
--stdout </dev/null >/dev/null
'
test_perf "repack with --stdin-packs ($nr_packs)" '
git pack-objects \
--keep-true-parents \
--stdin-packs \
--non-empty \
--delta-base-offset \
--stdout <stdin.packs >/dev/null
'
done
# Measure pack loading with 10,000 packs.

View File

@ -532,4 +532,139 @@ test_expect_success 'prefetch objects' '
test_line_count = 1 donelines
'
test_expect_success 'setup for --stdin-packs tests' '
git init stdin-packs &&
(
cd stdin-packs &&
test_commit A &&
test_commit B &&
test_commit C &&
for id in A B C
do
git pack-objects .git/objects/pack/pack-$id \
--incremental --revs <<-EOF
refs/tags/$id
EOF
done &&
ls -la .git/objects/pack
)
'
test_expect_success '--stdin-packs with excluded packs' '
(
cd stdin-packs &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
git pack-objects test --stdin-packs <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx)
) >expect.raw &&
git show-index <$(ls test-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--stdin-packs is incompatible with --filter' '
(
cd stdin-packs &&
test_must_fail git pack-objects --stdin-packs --stdout \
--filter=blob:none </dev/null 2>err &&
test_i18ngrep "cannot use --filter with --stdin-packs" err
)
'
test_expect_success '--stdin-packs is incompatible with --revs' '
(
cd stdin-packs &&
test_must_fail git pack-objects --stdin-packs --revs out \
</dev/null 2>err &&
test_i18ngrep "cannot use internal rev list with --stdin-packs" err
)
'
test_expect_success '--stdin-packs with loose objects' '
(
cd stdin-packs &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
test_commit D && # loose
git pack-objects test2 --stdin-packs --unpacked <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
git rev-list --objects --no-object-names \
refs/tags/C..refs/tags/D
) >expect.raw &&
ls -la . &&
git show-index <$(ls test2-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--stdin-packs with broken links' '
(
cd stdin-packs &&
# make an unreachable object with a bogus parent
git cat-file -p HEAD >commit &&
sed "s/$(git rev-parse HEAD^)/$(test_oid zero)/" <commit |
git hash-object -w -t commit --stdin >in &&
git pack-objects .git/objects/pack/pack-D <in &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
PACK_D="$(basename .git/objects/pack/pack-D-*.pack)" &&
git pack-objects test3 --stdin-packs --unpacked <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
$PACK_D
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-D-*.idx) &&
git rev-list --objects --no-object-names \
refs/tags/C..refs/tags/D
) >expect.raw &&
git show-index <$(ls test3-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_done

69
t/t6114-keep-packs.sh Executable file
View File

@ -0,0 +1,69 @@
#!/bin/sh
test_description='rev-list with .keep packs'
. ./test-lib.sh
test_expect_success 'setup' '
test_commit loose &&
test_commit packed &&
test_commit kept &&
KEPT_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
refs/tags/kept
^refs/tags/packed
EOF
) &&
MISC_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
refs/tags/packed
^refs/tags/loose
EOF
) &&
touch .git/objects/pack/pack-$KEPT_PACK.keep
'
rev_list_objects () {
git rev-list "$@" >out &&
sort out
}
idx_objects () {
git show-index <$1 >expect-idx &&
cut -d" " -f2 <expect-idx | sort
}
test_expect_success '--no-kept-objects excludes trees and blobs in .keep packs' '
rev_list_objects --objects --all --no-object-names >kept &&
rev_list_objects --objects --all --no-object-names --no-kept-objects >no-kept &&
idx_objects .git/objects/pack/pack-$KEPT_PACK.idx >expect &&
comm -3 kept no-kept >actual &&
test_cmp expect actual
'
test_expect_success '--no-kept-objects excludes kept non-MIDX object' '
test_config core.multiPackIndex true &&
# Create a pack with just the commit object in pack, and do not mark it
# as kept (even though it appears in $KEPT_PACK, which does have a .keep
# file).
MIDX_PACK=$(git pack-objects .git/objects/pack/pack <<-EOF
$(git rev-parse kept)
EOF
) &&
# Write a MIDX containing all packs, but use the version of the commit
# at "kept" in a non-kept pack by touching $MIDX_PACK.
touch .git/objects/pack/pack-$MIDX_PACK.pack &&
git multi-pack-index write &&
rev_list_objects --objects --no-object-names --no-kept-objects HEAD >actual &&
(
idx_objects .git/objects/pack/pack-$MISC_PACK.idx &&
git rev-list --objects --no-object-names refs/tags/loose
) | sort >expect &&
test_cmp expect actual
'
test_done

183
t/t7703-repack-geometric.sh Executable file
View File

@ -0,0 +1,183 @@
#!/bin/sh
test_description='git repack --geometric works correctly'
. ./test-lib.sh
GIT_TEST_MULTI_PACK_INDEX=0
objdir=.git/objects
midx=$objdir/pack/multi-pack-index
test_expect_success '--geometric with no packs' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
git repack --geometric 2 >out &&
test_i18ngrep "Nothing new to pack" out
)
'
test_expect_success '--geometric with one pack' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit "base" &&
git repack -d &&
git repack --geometric 2 >out &&
test_i18ngrep "Nothing new to pack" out
)
'
test_expect_success '--geometric with an intact progression' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# These packs already form a geometric progression.
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 2 && # 6 objects
test_commit_bulk --start=4 4 && # 12 objects
find $objdir/pack -name "*.pack" | sort >expect &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--geometric with loose objects' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# These packs already form a geometric progression.
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 2 && # 6 objects
# The loose objects are packed together, breaking the
# progression.
test_commit loose && # 3 objects
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
comm -13 before after >new &&
comm -23 before after >removed &&
test_line_count = 1 new &&
test_must_be_empty removed &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
# The progression (3, 3, 6) is combined into one new pack.
test_line_count = 1 after
)
'
test_expect_success '--geometric with small-pack rollup' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 1 && # 3 objects
find $objdir/pack -name "*.pack" | sort >small &&
test_commit_bulk --start=3 4 && # 12 objects
test_commit_bulk --start=7 8 && # 24 objects
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
# Three packs in total; two of the existing large ones, and one
# new one.
find $objdir/pack -name "*.pack" | sort >after &&
test_line_count = 3 after &&
comm -3 small before | tr -d "\t" >large &&
grep -qFf large after
)
'
test_expect_success '--geometric with small- and large-pack rollup' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# size(small1) + size(small2) > size(medium) / 2
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 1 && # 3 objects
test_commit_bulk --start=2 3 && # 7 objects
test_commit_bulk --start=6 9 && # 27 objects &&
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
comm -12 before after >untouched &&
# Two packs in total; the largest pack from before running "git
# repack", and one new one.
test_line_count = 1 untouched &&
test_line_count = 2 after
)
'
test_expect_success '--geometric ignores kept packs' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit kept && # 3 objects
test_commit pack && # 3 objects
KEPT=$(git pack-objects --revs $objdir/pack/pack <<-EOF
refs/tags/kept
EOF
) &&
PACK=$(git pack-objects --revs $objdir/pack/pack <<-EOF
refs/tags/pack
^refs/tags/kept
EOF
) &&
# neither pack contains more than twice the number of objects in
# the other, so they should be combined. but, marking one as
# .kept on disk will "freeze" it, so the pack structure should
# remain unchanged.
touch $objdir/pack/pack-$KEPT.keep &&
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
# both packs should still exist
test_path_is_file $objdir/pack/pack-$KEPT.pack &&
test_path_is_file $objdir/pack/pack-$PACK.pack &&
# and no new packs should be created
test_cmp before after &&
# Passing --pack-kept-objects causes packs with a .keep file to
# be repacked, too.
git repack --geometric 2 -d --pack-kept-objects &&
find $objdir/pack -name "*.pack" >after &&
test_line_count = 1 after
)
'
test_done