Merge branch 'nd/pack-objects-pack-struct'

"git pack-objects" needs to allocate tons of "struct object_entry"
while doing its work, and shrinking its size helps the performance
quite a bit.

* nd/pack-objects-pack-struct:
  ci: exercise the whole test suite with uncommon code in pack-objects
  pack-objects: reorder members to shrink struct object_entry
  pack-objects: shrink delta_size field in struct object_entry
  pack-objects: shrink size field in struct object_entry
  pack-objects: clarify the use of object_entry::size
  pack-objects: don't check size when the object is bad
  pack-objects: shrink z_delta_size field in struct object_entry
  pack-objects: refer to delta objects by index instead of pointer
  pack-objects: move in_pack out of struct object_entry
  pack-objects: move in_pack_pos out of struct object_entry
  pack-objects: use bitfield for object_entry::depth
  pack-objects: use bitfield for object_entry::dfs_state
  pack-objects: turn type and in_pack_type to bitfields
  pack-objects: a bit of document about struct object_entry
  read-cache.c: make $GIT_TEST_SPLIT_INDEX boolean
This commit is contained in:
Junio C Hamano 2018-05-23 14:38:19 +09:00
commit ad635e82d6
16 changed files with 656 additions and 167 deletions

View File

@ -2472,6 +2472,7 @@ pack.window::
pack.depth:: pack.depth::
The maximum delta depth used by linkgit:git-pack-objects[1] when no The maximum delta depth used by linkgit:git-pack-objects[1] when no
maximum depth is given on the command line. Defaults to 50. maximum depth is given on the command line. Defaults to 50.
Maximum value is 4095.
pack.windowMemory:: pack.windowMemory::
The maximum size of memory that is consumed by each thread The maximum size of memory that is consumed by each thread
@ -2508,7 +2509,8 @@ pack.deltaCacheLimit::
The maximum size of a delta, that is cached in The maximum size of a delta, that is cached in
linkgit:git-pack-objects[1]. This cache is used to speed up the linkgit:git-pack-objects[1]. This cache is used to speed up the
writing object phase by not having to recompute the final delta writing object phase by not having to recompute the final delta
result once the best match for all objects is found. Defaults to 1000. result once the best match for all objects is found.
Defaults to 1000. Maximum value is 65535.
pack.threads:: pack.threads::
Specifies the number of threads to spawn when searching for best Specifies the number of threads to spawn when searching for best

View File

@ -96,7 +96,9 @@ base-name::
it too deep affects the performance on the unpacker it too deep affects the performance on the unpacker
side, because delta data needs to be applied that many side, because delta data needs to be applied that many
times to get to the necessary object. times to get to the necessary object.
The default value for --window is 10 and --depth is 50. +
The default value for --window is 10 and --depth is 50. The maximum
depth is 4095.
--window-memory=<n>:: --window-memory=<n>::
This option provides an additional limit on top of `--window`; This option provides an additional limit on top of `--window`;

View File

@ -90,7 +90,9 @@ other objects in that pack they already have locally.
space. `--depth` limits the maximum delta depth; making it too deep space. `--depth` limits the maximum delta depth; making it too deep
affects the performance on the unpacker side, because delta data needs affects the performance on the unpacker side, because delta data needs
to be applied that many times to get to the necessary object. to be applied that many times to get to the necessary object.
The default value for --window is 10 and --depth is 50. +
The default value for --window is 10 and --depth is 50. The maximum
depth is 4095.
--threads=<n>:: --threads=<n>::
This option is passed through to `git pack-objects`. This option is passed through to `git pack-objects`.

View File

@ -32,6 +32,18 @@
#include "object-store.h" #include "object-store.h"
#include "dir.h" #include "dir.h"
#define IN_PACK(obj) oe_in_pack(&to_pack, obj)
#define SIZE(obj) oe_size(&to_pack, obj)
#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
#define DELTA(obj) oe_delta(&to_pack, obj)
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
static const char *pack_usage[] = { static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"), N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
@ -129,13 +141,14 @@ static void *get_delta(struct object_entry *entry)
buf = read_object_file(&entry->idx.oid, &type, &size); buf = read_object_file(&entry->idx.oid, &type, &size);
if (!buf) if (!buf)
die("unable to read %s", oid_to_hex(&entry->idx.oid)); die("unable to read %s", oid_to_hex(&entry->idx.oid));
base_buf = read_object_file(&entry->delta->idx.oid, &type, &base_size); base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
&base_size);
if (!base_buf) if (!base_buf)
die("unable to read %s", die("unable to read %s",
oid_to_hex(&entry->delta->idx.oid)); oid_to_hex(&DELTA(entry)->idx.oid));
delta_buf = diff_delta(base_buf, base_size, delta_buf = diff_delta(base_buf, base_size,
buf, size, &delta_size, 0); buf, size, &delta_size, 0);
if (!delta_buf || delta_size != entry->delta_size) if (!delta_buf || delta_size != DELTA_SIZE(entry))
die("delta size changed"); die("delta size changed");
free(buf); free(buf);
free(base_buf); free(base_buf);
@ -268,8 +281,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
struct git_istream *st = NULL; struct git_istream *st = NULL;
if (!usable_delta) { if (!usable_delta) {
if (entry->type == OBJ_BLOB && if (oe_type(entry) == OBJ_BLOB &&
entry->size > big_file_threshold && oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
(st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL) (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL)
buf = NULL; buf = NULL;
else { else {
@ -285,15 +298,15 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
FREE_AND_NULL(entry->delta_data); FREE_AND_NULL(entry->delta_data);
entry->z_delta_size = 0; entry->z_delta_size = 0;
} else if (entry->delta_data) { } else if (entry->delta_data) {
size = entry->delta_size; size = DELTA_SIZE(entry);
buf = entry->delta_data; buf = entry->delta_data;
entry->delta_data = NULL; entry->delta_data = NULL;
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
} else { } else {
buf = get_delta(entry); buf = get_delta(entry);
size = entry->delta_size; size = DELTA_SIZE(entry);
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
} }
@ -317,7 +330,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
* encoding of the relative offset for the delta * encoding of the relative offset for the delta
* base from this object's position in the pack. * base from this object's position in the pack.
*/ */
off_t ofs = entry->idx.offset - entry->delta->idx.offset; off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
unsigned pos = sizeof(dheader) - 1; unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127; dheader[pos] = ofs & 127;
while (ofs >>= 7) while (ofs >>= 7)
@ -343,7 +356,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
return 0; return 0;
} }
hashwrite(f, header, hdrlen); hashwrite(f, header, hdrlen);
hashwrite(f, entry->delta->idx.oid.hash, 20); hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
hdrlen += 20; hdrlen += 20;
} else { } else {
if (limit && hdrlen + datalen + 20 >= limit) { if (limit && hdrlen + datalen + 20 >= limit) {
@ -369,21 +382,22 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
unsigned long limit, int usable_delta) unsigned long limit, int usable_delta)
{ {
struct packed_git *p = entry->in_pack; struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL; struct pack_window *w_curs = NULL;
struct revindex_entry *revidx; struct revindex_entry *revidx;
off_t offset; off_t offset;
enum object_type type = entry->type; enum object_type type = oe_type(entry);
off_t datalen; off_t datalen;
unsigned char header[MAX_PACK_OBJECT_HEADER], unsigned char header[MAX_PACK_OBJECT_HEADER],
dheader[MAX_PACK_OBJECT_HEADER]; dheader[MAX_PACK_OBJECT_HEADER];
unsigned hdrlen; unsigned hdrlen;
unsigned long entry_size = SIZE(entry);
if (entry->delta) if (DELTA(entry))
type = (allow_ofs_delta && entry->delta->idx.offset) ? type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA; OBJ_OFS_DELTA : OBJ_REF_DELTA;
hdrlen = encode_in_pack_object_header(header, sizeof(header), hdrlen = encode_in_pack_object_header(header, sizeof(header),
type, entry->size); type, entry_size);
offset = entry->in_pack_offset; offset = entry->in_pack_offset;
revidx = find_pack_revindex(p, offset); revidx = find_pack_revindex(p, offset);
@ -400,7 +414,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
datalen -= entry->in_pack_header_size; datalen -= entry->in_pack_header_size;
if (!pack_to_stdout && p->index_version == 1 && if (!pack_to_stdout && p->index_version == 1 &&
check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
error("corrupt packed object for %s", error("corrupt packed object for %s",
oid_to_hex(&entry->idx.oid)); oid_to_hex(&entry->idx.oid));
unuse_pack(&w_curs); unuse_pack(&w_curs);
@ -408,7 +422,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
} }
if (type == OBJ_OFS_DELTA) { if (type == OBJ_OFS_DELTA) {
off_t ofs = entry->idx.offset - entry->delta->idx.offset; off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
unsigned pos = sizeof(dheader) - 1; unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127; dheader[pos] = ofs & 127;
while (ofs >>= 7) while (ofs >>= 7)
@ -427,7 +441,7 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
return 0; return 0;
} }
hashwrite(f, header, hdrlen); hashwrite(f, header, hdrlen);
hashwrite(f, entry->delta->idx.oid.hash, 20); hashwrite(f, DELTA(entry)->idx.oid.hash, 20);
hdrlen += 20; hdrlen += 20;
reused_delta++; reused_delta++;
} else { } else {
@ -467,28 +481,29 @@ static off_t write_object(struct hashfile *f,
else else
limit = pack_size_limit - write_offset; limit = pack_size_limit - write_offset;
if (!entry->delta) if (!DELTA(entry))
usable_delta = 0; /* no delta */ usable_delta = 0; /* no delta */
else if (!pack_size_limit) else if (!pack_size_limit)
usable_delta = 1; /* unlimited packfile */ usable_delta = 1; /* unlimited packfile */
else if (entry->delta->idx.offset == (off_t)-1) else if (DELTA(entry)->idx.offset == (off_t)-1)
usable_delta = 0; /* base was written to another pack */ usable_delta = 0; /* base was written to another pack */
else if (entry->delta->idx.offset) else if (DELTA(entry)->idx.offset)
usable_delta = 1; /* base already exists in this pack */ usable_delta = 1; /* base already exists in this pack */
else else
usable_delta = 0; /* base could end up in another pack */ usable_delta = 0; /* base could end up in another pack */
if (!reuse_object) if (!reuse_object)
to_reuse = 0; /* explicit */ to_reuse = 0; /* explicit */
else if (!entry->in_pack) else if (!IN_PACK(entry))
to_reuse = 0; /* can't reuse what we don't have */ to_reuse = 0; /* can't reuse what we don't have */
else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA) else if (oe_type(entry) == OBJ_REF_DELTA ||
oe_type(entry) == OBJ_OFS_DELTA)
/* check_object() decided it for us ... */ /* check_object() decided it for us ... */
to_reuse = usable_delta; to_reuse = usable_delta;
/* ... but pack split may override that */ /* ... but pack split may override that */
else if (entry->type != entry->in_pack_type) else if (oe_type(entry) != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */ to_reuse = 0; /* pack has delta which is unusable */
else if (entry->delta) else if (DELTA(entry))
to_reuse = 0; /* we want to pack afresh */ to_reuse = 0; /* we want to pack afresh */
else else
to_reuse = 1; /* we have it in-pack undeltified, to_reuse = 1; /* we have it in-pack undeltified,
@ -540,12 +555,12 @@ static enum write_one_status write_one(struct hashfile *f,
} }
/* if we are deltified, write out base object first. */ /* if we are deltified, write out base object first. */
if (e->delta) { if (DELTA(e)) {
e->idx.offset = 1; /* now recurse */ e->idx.offset = 1; /* now recurse */
switch (write_one(f, e->delta, offset)) { switch (write_one(f, DELTA(e), offset)) {
case WRITE_ONE_RECURSIVE: case WRITE_ONE_RECURSIVE:
/* we cannot depend on this one */ /* we cannot depend on this one */
e->delta = NULL; SET_DELTA(e, NULL);
break; break;
default: default:
break; break;
@ -607,34 +622,34 @@ static void add_descendants_to_write_order(struct object_entry **wo,
/* add this node... */ /* add this node... */
add_to_write_order(wo, endp, e); add_to_write_order(wo, endp, e);
/* all its siblings... */ /* all its siblings... */
for (s = e->delta_sibling; s; s = s->delta_sibling) { for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
add_to_write_order(wo, endp, s); add_to_write_order(wo, endp, s);
} }
} }
/* drop down a level to add left subtree nodes if possible */ /* drop down a level to add left subtree nodes if possible */
if (e->delta_child) { if (DELTA_CHILD(e)) {
add_to_order = 1; add_to_order = 1;
e = e->delta_child; e = DELTA_CHILD(e);
} else { } else {
add_to_order = 0; add_to_order = 0;
/* our sibling might have some children, it is next */ /* our sibling might have some children, it is next */
if (e->delta_sibling) { if (DELTA_SIBLING(e)) {
e = e->delta_sibling; e = DELTA_SIBLING(e);
continue; continue;
} }
/* go back to our parent node */ /* go back to our parent node */
e = e->delta; e = DELTA(e);
while (e && !e->delta_sibling) { while (e && !DELTA_SIBLING(e)) {
/* we're on the right side of a subtree, keep /* we're on the right side of a subtree, keep
* going up until we can go right again */ * going up until we can go right again */
e = e->delta; e = DELTA(e);
} }
if (!e) { if (!e) {
/* done- we hit our original root node */ /* done- we hit our original root node */
return; return;
} }
/* pass it off to sibling at this level */ /* pass it off to sibling at this level */
e = e->delta_sibling; e = DELTA_SIBLING(e);
} }
}; };
} }
@ -645,7 +660,7 @@ static void add_family_to_write_order(struct object_entry **wo,
{ {
struct object_entry *root; struct object_entry *root;
for (root = e; root->delta; root = root->delta) for (root = e; DELTA(root); root = DELTA(root))
; /* nothing */ ; /* nothing */
add_descendants_to_write_order(wo, endp, root); add_descendants_to_write_order(wo, endp, root);
} }
@ -660,8 +675,8 @@ static struct object_entry **compute_write_order(void)
for (i = 0; i < to_pack.nr_objects; i++) { for (i = 0; i < to_pack.nr_objects; i++) {
objects[i].tagged = 0; objects[i].tagged = 0;
objects[i].filled = 0; objects[i].filled = 0;
objects[i].delta_child = NULL; SET_DELTA_CHILD(&objects[i], NULL);
objects[i].delta_sibling = NULL; SET_DELTA_SIBLING(&objects[i], NULL);
} }
/* /*
@ -671,11 +686,11 @@ static struct object_entry **compute_write_order(void)
*/ */
for (i = to_pack.nr_objects; i > 0;) { for (i = to_pack.nr_objects; i > 0;) {
struct object_entry *e = &objects[--i]; struct object_entry *e = &objects[--i];
if (!e->delta) if (!DELTA(e))
continue; continue;
/* Mark me as the first child */ /* Mark me as the first child */
e->delta_sibling = e->delta->delta_child; e->delta_sibling_idx = DELTA(e)->delta_child_idx;
e->delta->delta_child = e; SET_DELTA_CHILD(DELTA(e), e);
} }
/* /*
@ -707,8 +722,8 @@ static struct object_entry **compute_write_order(void)
* And then all remaining commits and tags. * And then all remaining commits and tags.
*/ */
for (i = last_untagged; i < to_pack.nr_objects; i++) { for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (objects[i].type != OBJ_COMMIT && if (oe_type(&objects[i]) != OBJ_COMMIT &&
objects[i].type != OBJ_TAG) oe_type(&objects[i]) != OBJ_TAG)
continue; continue;
add_to_write_order(wo, &wo_end, &objects[i]); add_to_write_order(wo, &wo_end, &objects[i]);
} }
@ -717,7 +732,7 @@ static struct object_entry **compute_write_order(void)
* And then all the trees. * And then all the trees.
*/ */
for (i = last_untagged; i < to_pack.nr_objects; i++) { for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (objects[i].type != OBJ_TREE) if (oe_type(&objects[i]) != OBJ_TREE)
continue; continue;
add_to_write_order(wo, &wo_end, &objects[i]); add_to_write_order(wo, &wo_end, &objects[i]);
} }
@ -880,7 +895,8 @@ static void write_pack_file(void)
if (write_bitmap_index) { if (write_bitmap_index) {
bitmap_writer_set_checksum(oid.hash); bitmap_writer_set_checksum(oid.hash);
bitmap_writer_build_type_index(written_list, nr_written); bitmap_writer_build_type_index(
&to_pack, written_list, nr_written);
} }
finish_tmp_packfile(&tmpname, pack_tmp_name, finish_tmp_packfile(&tmpname, pack_tmp_name,
@ -1071,14 +1087,13 @@ static void create_object_entry(const struct object_id *oid,
entry = packlist_alloc(&to_pack, oid->hash, index_pos); entry = packlist_alloc(&to_pack, oid->hash, index_pos);
entry->hash = hash; entry->hash = hash;
if (type) oe_set_type(entry, type);
entry->type = type;
if (exclude) if (exclude)
entry->preferred_base = 1; entry->preferred_base = 1;
else else
nr_result++; nr_result++;
if (found_pack) { if (found_pack) {
entry->in_pack = found_pack; oe_set_in_pack(&to_pack, entry, found_pack);
entry->in_pack_offset = found_offset; entry->in_pack_offset = found_offset;
} }
@ -1403,8 +1418,10 @@ static void cleanup_preferred_base(void)
static void check_object(struct object_entry *entry) static void check_object(struct object_entry *entry)
{ {
if (entry->in_pack) { unsigned long canonical_size;
struct packed_git *p = entry->in_pack;
if (IN_PACK(entry)) {
struct packed_git *p = IN_PACK(entry);
struct pack_window *w_curs = NULL; struct pack_window *w_curs = NULL;
const unsigned char *base_ref = NULL; const unsigned char *base_ref = NULL;
struct object_entry *base_entry; struct object_entry *base_entry;
@ -1412,6 +1429,8 @@ static void check_object(struct object_entry *entry)
unsigned long avail; unsigned long avail;
off_t ofs; off_t ofs;
unsigned char *buf, c; unsigned char *buf, c;
enum object_type type;
unsigned long in_pack_size;
buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
@ -1420,11 +1439,15 @@ static void check_object(struct object_entry *entry)
* since non-delta representations could still be reused. * since non-delta representations could still be reused.
*/ */
used = unpack_object_header_buffer(buf, avail, used = unpack_object_header_buffer(buf, avail,
&entry->in_pack_type, &type,
&entry->size); &in_pack_size);
if (used == 0) if (used == 0)
goto give_up; goto give_up;
if (type < 0)
BUG("invalid type %d", type);
entry->in_pack_type = type;
/* /*
* Determine if this is a delta and if so whether we can * Determine if this is a delta and if so whether we can
* reuse it or not. Otherwise let's find out as cheaply as * reuse it or not. Otherwise let's find out as cheaply as
@ -1433,9 +1456,10 @@ static void check_object(struct object_entry *entry)
switch (entry->in_pack_type) { switch (entry->in_pack_type) {
default: default:
/* Not a delta hence we've already got all we need. */ /* Not a delta hence we've already got all we need. */
entry->type = entry->in_pack_type; oe_set_type(entry, entry->in_pack_type);
SET_SIZE(entry, in_pack_size);
entry->in_pack_header_size = used; entry->in_pack_header_size = used;
if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB) if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
goto give_up; goto give_up;
unuse_pack(&w_curs); unuse_pack(&w_curs);
return; return;
@ -1489,25 +1513,29 @@ static void check_object(struct object_entry *entry)
* deltify other objects against, in order to avoid * deltify other objects against, in order to avoid
* circular deltas. * circular deltas.
*/ */
entry->type = entry->in_pack_type; oe_set_type(entry, entry->in_pack_type);
entry->delta = base_entry; SET_SIZE(entry, in_pack_size); /* delta size */
entry->delta_size = entry->size; SET_DELTA(entry, base_entry);
entry->delta_sibling = base_entry->delta_child; SET_DELTA_SIZE(entry, in_pack_size);
base_entry->delta_child = entry; entry->delta_sibling_idx = base_entry->delta_child_idx;
SET_DELTA_CHILD(base_entry, entry);
unuse_pack(&w_curs); unuse_pack(&w_curs);
return; return;
} }
if (entry->type) { if (oe_type(entry)) {
off_t delta_pos;
/* /*
* This must be a delta and we already know what the * This must be a delta and we already know what the
* final object type is. Let's extract the actual * final object type is. Let's extract the actual
* object size from the delta header. * object size from the delta header.
*/ */
entry->size = get_size_from_delta(p, &w_curs, delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
entry->in_pack_offset + entry->in_pack_header_size); canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
if (entry->size == 0) if (canonical_size == 0)
goto give_up; goto give_up;
SET_SIZE(entry, canonical_size);
unuse_pack(&w_curs); unuse_pack(&w_curs);
return; return;
} }
@ -1521,28 +1549,34 @@ static void check_object(struct object_entry *entry)
unuse_pack(&w_curs); unuse_pack(&w_curs);
} }
entry->type = oid_object_info(the_repository, &entry->idx.oid, oe_set_type(entry,
&entry->size); oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
if (entry->type_valid) {
SET_SIZE(entry, canonical_size);
} else {
/* /*
* The error condition is checked in prepare_pack(). This is * Bad object type is checked in prepare_pack(). This is
* to permit a missing preferred base object to be ignored * to permit a missing preferred base object to be ignored
* as a preferred base. Doing so can result in a larger * as a preferred base. Doing so can result in a larger
* pack file, but the transfer will still take place. * pack file, but the transfer will still take place.
*/ */
}
} }
static int pack_offset_sort(const void *_a, const void *_b) static int pack_offset_sort(const void *_a, const void *_b)
{ {
const struct object_entry *a = *(struct object_entry **)_a; const struct object_entry *a = *(struct object_entry **)_a;
const struct object_entry *b = *(struct object_entry **)_b; const struct object_entry *b = *(struct object_entry **)_b;
const struct packed_git *a_in_pack = IN_PACK(a);
const struct packed_git *b_in_pack = IN_PACK(b);
/* avoid filesystem trashing with loose objects */ /* avoid filesystem trashing with loose objects */
if (!a->in_pack && !b->in_pack) if (!a_in_pack && !b_in_pack)
return oidcmp(&a->idx.oid, &b->idx.oid); return oidcmp(&a->idx.oid, &b->idx.oid);
if (a->in_pack < b->in_pack) if (a_in_pack < b_in_pack)
return -1; return -1;
if (a->in_pack > b->in_pack) if (a_in_pack > b_in_pack)
return 1; return 1;
return a->in_pack_offset < b->in_pack_offset ? -1 : return a->in_pack_offset < b->in_pack_offset ? -1 :
(a->in_pack_offset > b->in_pack_offset); (a->in_pack_offset > b->in_pack_offset);
@ -1563,31 +1597,37 @@ static int pack_offset_sort(const void *_a, const void *_b)
*/ */
static void drop_reused_delta(struct object_entry *entry) static void drop_reused_delta(struct object_entry *entry)
{ {
struct object_entry **p = &entry->delta->delta_child; unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
struct object_info oi = OBJECT_INFO_INIT; struct object_info oi = OBJECT_INFO_INIT;
enum object_type type;
unsigned long size;
while (*p) { while (*idx) {
if (*p == entry) struct object_entry *oe = &to_pack.objects[*idx - 1];
*p = (*p)->delta_sibling;
if (oe == entry)
*idx = oe->delta_sibling_idx;
else else
p = &(*p)->delta_sibling; idx = &oe->delta_sibling_idx;
} }
entry->delta = NULL; SET_DELTA(entry, NULL);
entry->depth = 0; entry->depth = 0;
oi.sizep = &entry->size; oi.sizep = &size;
oi.typep = &entry->type; oi.typep = &type;
if (packed_object_info(the_repository, entry->in_pack, if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
entry->in_pack_offset, &oi) < 0) {
/* /*
* We failed to get the info from this pack for some reason; * We failed to get the info from this pack for some reason;
* fall back to sha1_object_info, which may find another copy. * fall back to sha1_object_info, which may find another copy.
* And if that fails, the error will be recorded in entry->type * And if that fails, the error will be recorded in oe_type(entry)
* and dealt with in prepare_pack(). * and dealt with in prepare_pack().
*/ */
entry->type = oid_object_info(the_repository, &entry->idx.oid, oe_set_type(entry,
&entry->size); oid_object_info(the_repository, &entry->idx.oid, &size));
} else {
oe_set_type(entry, type);
} }
SET_SIZE(entry, size);
} }
/* /*
@ -1611,7 +1651,7 @@ static void break_delta_chains(struct object_entry *entry)
for (cur = entry, total_depth = 0; for (cur = entry, total_depth = 0;
cur; cur;
cur = cur->delta, total_depth++) { cur = DELTA(cur), total_depth++) {
if (cur->dfs_state == DFS_DONE) { if (cur->dfs_state == DFS_DONE) {
/* /*
* We've already seen this object and know it isn't * We've already seen this object and know it isn't
@ -1636,7 +1676,7 @@ static void break_delta_chains(struct object_entry *entry)
* it's not a delta, we're done traversing, but we'll mark it * it's not a delta, we're done traversing, but we'll mark it
* done to save time on future traversals. * done to save time on future traversals.
*/ */
if (!cur->delta) { if (!DELTA(cur)) {
cur->dfs_state = DFS_DONE; cur->dfs_state = DFS_DONE;
break; break;
} }
@ -1659,7 +1699,7 @@ static void break_delta_chains(struct object_entry *entry)
* We keep all commits in the chain that we examined. * We keep all commits in the chain that we examined.
*/ */
cur->dfs_state = DFS_ACTIVE; cur->dfs_state = DFS_ACTIVE;
if (cur->delta->dfs_state == DFS_ACTIVE) { if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
drop_reused_delta(cur); drop_reused_delta(cur);
cur->dfs_state = DFS_DONE; cur->dfs_state = DFS_DONE;
break; break;
@ -1674,7 +1714,7 @@ static void break_delta_chains(struct object_entry *entry)
* an extra "next" pointer to keep going after we reset cur->delta. * an extra "next" pointer to keep going after we reset cur->delta.
*/ */
for (cur = entry; cur; cur = next) { for (cur = entry; cur; cur = next) {
next = cur->delta; next = DELTA(cur);
/* /*
* We should have a chain of zero or more ACTIVE states down to * We should have a chain of zero or more ACTIVE states down to
@ -1731,7 +1771,8 @@ static void get_object_details(void)
for (i = 0; i < to_pack.nr_objects; i++) { for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = sorted_by_offset[i]; struct object_entry *entry = sorted_by_offset[i];
check_object(entry); check_object(entry);
if (big_file_threshold < entry->size) if (entry->type_valid &&
oe_size_greater_than(&to_pack, entry, big_file_threshold))
entry->no_try_delta = 1; entry->no_try_delta = 1;
display_progress(progress_state, i + 1); display_progress(progress_state, i + 1);
} }
@ -1760,10 +1801,14 @@ static int type_size_sort(const void *_a, const void *_b)
{ {
const struct object_entry *a = *(struct object_entry **)_a; const struct object_entry *a = *(struct object_entry **)_a;
const struct object_entry *b = *(struct object_entry **)_b; const struct object_entry *b = *(struct object_entry **)_b;
enum object_type a_type = oe_type(a);
enum object_type b_type = oe_type(b);
unsigned long a_size = SIZE(a);
unsigned long b_size = SIZE(b);
if (a->type > b->type) if (a_type > b_type)
return -1; return -1;
if (a->type < b->type) if (a_type < b_type)
return 1; return 1;
if (a->hash > b->hash) if (a->hash > b->hash)
return -1; return -1;
@ -1773,9 +1818,9 @@ static int type_size_sort(const void *_a, const void *_b)
return -1; return -1;
if (a->preferred_base < b->preferred_base) if (a->preferred_base < b->preferred_base)
return 1; return 1;
if (a->size > b->size) if (a_size > b_size)
return -1; return -1;
if (a->size < b->size) if (a_size < b_size)
return 1; return 1;
return a < b ? -1 : (a > b); /* newest first */ return a < b ? -1 : (a > b); /* newest first */
} }
@ -1828,6 +1873,46 @@ static pthread_mutex_t progress_mutex;
#endif #endif
/*
* Return the size of the object without doing any delta
* reconstruction (so non-deltas are true object sizes, but deltas
* return the size of the delta data).
*/
unsigned long oe_get_size_slow(struct packing_data *pack,
const struct object_entry *e)
{
struct packed_git *p;
struct pack_window *w_curs;
unsigned char *buf;
enum object_type type;
unsigned long used, avail, size;
if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
read_lock();
if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
die(_("unable to get size of %s"),
oid_to_hex(&e->idx.oid));
read_unlock();
return size;
}
p = oe_in_pack(pack, e);
if (!p)
BUG("when e->type is a delta, it must belong to a pack");
read_lock();
w_curs = NULL;
buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
used = unpack_object_header_buffer(buf, avail, &type, &size);
if (used == 0)
die(_("unable to parse object header of %s"),
oid_to_hex(&e->idx.oid));
unuse_pack(&w_curs);
read_unlock();
return size;
}
static int try_delta(struct unpacked *trg, struct unpacked *src, static int try_delta(struct unpacked *trg, struct unpacked *src,
unsigned max_depth, unsigned long *mem_usage) unsigned max_depth, unsigned long *mem_usage)
{ {
@ -1839,7 +1924,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
void *delta_buf; void *delta_buf;
/* Don't bother doing diffs between different types */ /* Don't bother doing diffs between different types */
if (trg_entry->type != src_entry->type) if (oe_type(trg_entry) != oe_type(src_entry))
return -1; return -1;
/* /*
@ -1850,8 +1935,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
* it, we will still save the transfer cost, as we already know * it, we will still save the transfer cost, as we already know
* the other side has it and we won't send src_entry at all. * the other side has it and we won't send src_entry at all.
*/ */
if (reuse_delta && trg_entry->in_pack && if (reuse_delta && IN_PACK(trg_entry) &&
trg_entry->in_pack == src_entry->in_pack && IN_PACK(trg_entry) == IN_PACK(src_entry) &&
!src_entry->preferred_base && !src_entry->preferred_base &&
trg_entry->in_pack_type != OBJ_REF_DELTA && trg_entry->in_pack_type != OBJ_REF_DELTA &&
trg_entry->in_pack_type != OBJ_OFS_DELTA) trg_entry->in_pack_type != OBJ_OFS_DELTA)
@ -1862,19 +1947,19 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
return 0; return 0;
/* Now some size filtering heuristics. */ /* Now some size filtering heuristics. */
trg_size = trg_entry->size; trg_size = SIZE(trg_entry);
if (!trg_entry->delta) { if (!DELTA(trg_entry)) {
max_size = trg_size/2 - 20; max_size = trg_size/2 - 20;
ref_depth = 1; ref_depth = 1;
} else { } else {
max_size = trg_entry->delta_size; max_size = DELTA_SIZE(trg_entry);
ref_depth = trg->depth; ref_depth = trg->depth;
} }
max_size = (uint64_t)max_size * (max_depth - src->depth) / max_size = (uint64_t)max_size * (max_depth - src->depth) /
(max_depth - ref_depth + 1); (max_depth - ref_depth + 1);
if (max_size == 0) if (max_size == 0)
return 0; return 0;
src_size = src_entry->size; src_size = SIZE(src_entry);
sizediff = src_size < trg_size ? trg_size - src_size : 0; sizediff = src_size < trg_size ? trg_size - src_size : 0;
if (sizediff >= max_size) if (sizediff >= max_size)
return 0; return 0;
@ -1936,10 +2021,14 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
if (!delta_buf) if (!delta_buf)
return 0; return 0;
if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
free(delta_buf);
return 0;
}
if (trg_entry->delta) { if (DELTA(trg_entry)) {
/* Prefer only shallower same-sized deltas. */ /* Prefer only shallower same-sized deltas. */
if (delta_size == trg_entry->delta_size && if (delta_size == DELTA_SIZE(trg_entry) &&
src->depth + 1 >= trg->depth) { src->depth + 1 >= trg->depth) {
free(delta_buf); free(delta_buf);
return 0; return 0;
@ -1954,7 +2043,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
free(trg_entry->delta_data); free(trg_entry->delta_data);
cache_lock(); cache_lock();
if (trg_entry->delta_data) { if (trg_entry->delta_data) {
delta_cache_size -= trg_entry->delta_size; delta_cache_size -= DELTA_SIZE(trg_entry);
trg_entry->delta_data = NULL; trg_entry->delta_data = NULL;
} }
if (delta_cacheable(src_size, trg_size, delta_size)) { if (delta_cacheable(src_size, trg_size, delta_size)) {
@ -1966,8 +2055,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
free(delta_buf); free(delta_buf);
} }
trg_entry->delta = src_entry; SET_DELTA(trg_entry, src_entry);
trg_entry->delta_size = delta_size; SET_DELTA_SIZE(trg_entry, delta_size);
trg->depth = src->depth + 1; trg->depth = src->depth + 1;
return 1; return 1;
@ -1975,13 +2064,13 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{ {
struct object_entry *child = me->delta_child; struct object_entry *child = DELTA_CHILD(me);
unsigned int m = n; unsigned int m = n;
while (child) { while (child) {
unsigned int c = check_delta_limit(child, n + 1); unsigned int c = check_delta_limit(child, n + 1);
if (m < c) if (m < c)
m = c; m = c;
child = child->delta_sibling; child = DELTA_SIBLING(child);
} }
return m; return m;
} }
@ -1992,7 +2081,7 @@ static unsigned long free_unpacked(struct unpacked *n)
free_delta_index(n->index); free_delta_index(n->index);
n->index = NULL; n->index = NULL;
if (n->data) { if (n->data) {
freed_mem += n->entry->size; freed_mem += SIZE(n->entry);
FREE_AND_NULL(n->data); FREE_AND_NULL(n->data);
} }
n->entry = NULL; n->entry = NULL;
@ -2050,7 +2139,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* otherwise they would become too deep. * otherwise they would become too deep.
*/ */
max_depth = depth; max_depth = depth;
if (entry->delta_child) { if (DELTA_CHILD(entry)) {
max_depth -= check_delta_limit(entry, 0); max_depth -= check_delta_limit(entry, 0);
if (max_depth <= 0) if (max_depth <= 0)
goto next; goto next;
@ -2088,19 +2177,26 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* between writes at that moment. * between writes at that moment.
*/ */
if (entry->delta_data && !pack_to_stdout) { if (entry->delta_data && !pack_to_stdout) {
entry->z_delta_size = do_compress(&entry->delta_data, unsigned long size;
entry->delta_size);
size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
if (size < (1U << OE_Z_DELTA_BITS)) {
entry->z_delta_size = size;
cache_lock(); cache_lock();
delta_cache_size -= entry->delta_size; delta_cache_size -= DELTA_SIZE(entry);
delta_cache_size += entry->z_delta_size; delta_cache_size += entry->z_delta_size;
cache_unlock(); cache_unlock();
} else {
FREE_AND_NULL(entry->delta_data);
entry->z_delta_size = 0;
}
} }
/* if we made n a delta, and if n is already at max /* if we made n a delta, and if n is already at max
* depth, leaving it in the window is pointless. we * depth, leaving it in the window is pointless. we
* should evict it first. * should evict it first.
*/ */
if (entry->delta && max_depth <= n->depth) if (DELTA(entry) && max_depth <= n->depth)
continue; continue;
/* /*
@ -2108,7 +2204,7 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
* currently deltified object, to keep it longer. It will * currently deltified object, to keep it longer. It will
* be the first base object to be attempted next. * be the first base object to be attempted next.
*/ */
if (entry->delta) { if (DELTA(entry)) {
struct unpacked swap = array[best_base]; struct unpacked swap = array[best_base];
int dist = (window + idx - best_base) % window; int dist = (window + idx - best_base) % window;
int dst = best_base; int dst = best_base;
@ -2429,13 +2525,14 @@ static void prepare_pack(int window, int depth)
for (i = 0; i < to_pack.nr_objects; i++) { for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = to_pack.objects + i; struct object_entry *entry = to_pack.objects + i;
if (entry->delta) if (DELTA(entry))
/* This happens if we decided to reuse existing /* This happens if we decided to reuse existing
* delta from a pack. "reuse_delta &&" is implied. * delta from a pack. "reuse_delta &&" is implied.
*/ */
continue; continue;
if (entry->size < 50) if (!entry->type_valid ||
oe_size_less_than(&to_pack, entry, 50))
continue; continue;
if (entry->no_try_delta) if (entry->no_try_delta)
@ -2443,11 +2540,11 @@ static void prepare_pack(int window, int depth)
if (!entry->preferred_base) { if (!entry->preferred_base) {
nr_deltas++; nr_deltas++;
if (entry->type < 0) if (oe_type(entry) < 0)
die("unable to get type of object %s", die("unable to get type of object %s",
oid_to_hex(&entry->idx.oid)); oid_to_hex(&entry->idx.oid));
} else { } else {
if (entry->type < 0) { if (oe_type(entry) < 0) {
/* /*
* This object is not found, but we * This object is not found, but we
* don't have to include it anyway. * don't have to include it anyway.
@ -2556,7 +2653,7 @@ static void read_object_list_from_stdin(void)
die("expected object ID, got garbage:\n %s", line); die("expected object ID, got garbage:\n %s", line);
add_preferred_base_object(p + 1); add_preferred_base_object(p + 1);
add_object_entry(&oid, 0, p + 1, 0); add_object_entry(&oid, OBJ_NONE, p + 1, 0);
} }
} }
@ -3083,6 +3180,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_END(), OPT_END(),
}; };
if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
BUG("too many dfs states, increase OE_DFS_STATE_BITS");
check_replace_refs = 0; check_replace_refs = 0;
reset_pack_idx_option(&pack_idx_opts); reset_pack_idx_option(&pack_idx_opts);
@ -3099,6 +3199,17 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name || argc) if (pack_to_stdout != !base_name || argc)
usage_with_options(pack_usage, pack_objects_options); usage_with_options(pack_usage, pack_objects_options);
if (depth >= (1 << OE_DEPTH_BITS)) {
warning(_("delta chain depth %d is too deep, forcing %d"),
depth, (1 << OE_DEPTH_BITS) - 1);
depth = (1 << OE_DEPTH_BITS) - 1;
}
if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
warning(_("pack.deltaCacheLimit is too high, forcing %d"),
(1U << OE_Z_DELTA_BITS) - 1);
cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
}
argv_array_push(&rp, "pack-objects"); argv_array_push(&rp, "pack-objects");
if (thin) { if (thin) {
use_internal_rev_list = 1; use_internal_rev_list = 1;
@ -3217,6 +3328,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
} }
} }
prepare_packing_data(&to_pack);
if (progress) if (progress)
progress_state = start_progress(_("Enumerating objects"), 0); progress_state = start_progress(_("Enumerating objects"), 0);
if (!use_internal_rev_list) if (!use_internal_rev_list)

View File

@ -373,6 +373,8 @@ extern void free_name_hash(struct index_state *istate);
#define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz)) #define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz))
#endif #endif
#define TYPE_BITS 3
/* /*
* Values in this enum (except those outside the 3 bit range) are part * Values in this enum (except those outside the 3 bit range) are part
* of pack file format. See Documentation/technical/pack-format.txt * of pack file format. See Documentation/technical/pack-format.txt

View File

@ -11,7 +11,10 @@ make --jobs=2
make --quiet test make --quiet test
if test "$jobname" = "linux-gcc" if test "$jobname" = "linux-gcc"
then then
GIT_TEST_SPLIT_INDEX=YesPlease make --quiet test export GIT_TEST_SPLIT_INDEX=yes
export GIT_TEST_FULL_IN_PACK_ARRAY=true
export GIT_TEST_OE_SIZE=10
make --quiet test
fi fi
check_unignored_build_artifacts check_unignored_build_artifacts

View File

@ -71,6 +71,7 @@ struct packed_git {
int index_version; int index_version;
time_t mtime; time_t mtime;
int pack_fd; int pack_fd;
int index; /* for builtin/pack-objects.c */
unsigned pack_local:1, unsigned pack_local:1,
pack_keep:1, pack_keep:1,
pack_keep_in_core:1, pack_keep_in_core:1,

View File

@ -25,7 +25,6 @@ struct object_array {
#define OBJECT_ARRAY_INIT { 0, 0, NULL } #define OBJECT_ARRAY_INIT { 0, 0, NULL }
#define TYPE_BITS 3
/* /*
* object flag allocation: * object flag allocation:
* revision.h: 0---------10 26 * revision.h: 0---------10 26

View File

@ -48,7 +48,8 @@ void bitmap_writer_show_progress(int show)
/** /**
* Build the initial type index for the packfile * Build the initial type index for the packfile
*/ */
void bitmap_writer_build_type_index(struct pack_idx_entry **index, void bitmap_writer_build_type_index(struct packing_data *to_pack,
struct pack_idx_entry **index,
uint32_t index_nr) uint32_t index_nr)
{ {
uint32_t i; uint32_t i;
@ -57,19 +58,20 @@ void bitmap_writer_build_type_index(struct pack_idx_entry **index,
writer.trees = ewah_new(); writer.trees = ewah_new();
writer.blobs = ewah_new(); writer.blobs = ewah_new();
writer.tags = ewah_new(); writer.tags = ewah_new();
ALLOC_ARRAY(to_pack->in_pack_pos, to_pack->nr_objects);
for (i = 0; i < index_nr; ++i) { for (i = 0; i < index_nr; ++i) {
struct object_entry *entry = (struct object_entry *)index[i]; struct object_entry *entry = (struct object_entry *)index[i];
enum object_type real_type; enum object_type real_type;
entry->in_pack_pos = i; oe_set_in_pack_pos(to_pack, entry, i);
switch (entry->type) { switch (oe_type(entry)) {
case OBJ_COMMIT: case OBJ_COMMIT:
case OBJ_TREE: case OBJ_TREE:
case OBJ_BLOB: case OBJ_BLOB:
case OBJ_TAG: case OBJ_TAG:
real_type = entry->type; real_type = oe_type(entry);
break; break;
default: default:
@ -98,7 +100,7 @@ void bitmap_writer_build_type_index(struct pack_idx_entry **index,
default: default:
die("Missing type information for %s (%d/%d)", die("Missing type information for %s (%d/%d)",
oid_to_hex(&entry->idx.oid), real_type, oid_to_hex(&entry->idx.oid), real_type,
entry->type); oe_type(entry));
} }
} }
} }
@ -147,7 +149,7 @@ static uint32_t find_object_pos(const unsigned char *sha1)
"(object %s is missing)", sha1_to_hex(sha1)); "(object %s is missing)", sha1_to_hex(sha1));
} }
return entry->in_pack_pos; return oe_in_pack_pos(writer.to_pack, entry);
} }
static void show_object(struct object *object, const char *name, void *data) static void show_object(struct object *object, const char *name, void *data)

View File

@ -1033,7 +1033,7 @@ int rebuild_existing_bitmaps(struct packing_data *mapping,
oe = packlist_find(mapping, sha1, NULL); oe = packlist_find(mapping, sha1, NULL);
if (oe) if (oe)
reposition[i] = oe->in_pack_pos + 1; reposition[i] = oe_in_pack_pos(mapping, oe) + 1;
} }
rebuild = bitmap_new(); rebuild = bitmap_new();

View File

@ -44,7 +44,9 @@ int rebuild_existing_bitmaps(struct packing_data *mapping, khash_sha1 *reused_bi
void bitmap_writer_show_progress(int show); void bitmap_writer_show_progress(int show);
void bitmap_writer_set_checksum(unsigned char *sha1); void bitmap_writer_set_checksum(unsigned char *sha1);
void bitmap_writer_build_type_index(struct pack_idx_entry **index, uint32_t index_nr); void bitmap_writer_build_type_index(struct packing_data *to_pack,
struct pack_idx_entry **index,
uint32_t index_nr);
void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack); void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack);
void bitmap_writer_select_commits(struct commit **indexed_commits, void bitmap_writer_select_commits(struct commit **indexed_commits,
unsigned int indexed_commits_nr, int max_bitmaps); unsigned int indexed_commits_nr, int max_bitmaps);

View File

@ -2,6 +2,8 @@
#include "object.h" #include "object.h"
#include "pack.h" #include "pack.h"
#include "pack-objects.h" #include "pack-objects.h"
#include "packfile.h"
#include "config.h"
static uint32_t locate_object_entry_hash(struct packing_data *pdata, static uint32_t locate_object_entry_hash(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
@ -86,6 +88,66 @@ struct object_entry *packlist_find(struct packing_data *pdata,
return &pdata->objects[pdata->index[i] - 1]; return &pdata->objects[pdata->index[i] - 1];
} }
static void prepare_in_pack_by_idx(struct packing_data *pdata)
{
struct packed_git **mapping, *p;
int cnt = 0, nr = 1U << OE_IN_PACK_BITS;
ALLOC_ARRAY(mapping, nr);
/*
* oe_in_pack() on an all-zero'd object_entry
* (i.e. in_pack_idx also zero) should return NULL.
*/
mapping[cnt++] = NULL;
for (p = get_packed_git(the_repository); p; p = p->next, cnt++) {
if (cnt == nr) {
free(mapping);
return;
}
p->index = cnt;
mapping[cnt] = p;
}
pdata->in_pack_by_idx = mapping;
}
/*
* A new pack appears after prepare_in_pack_by_idx() has been
* run. This is likely a race.
*
* We could map this new pack to in_pack_by_idx[] array, but then we
* have to deal with full array anyway. And since it's hard to test
* this fall back code, just stay simple and fall back to using
* in_pack[] array.
*/
void oe_map_new_pack(struct packing_data *pack,
struct packed_git *p)
{
uint32_t i;
REALLOC_ARRAY(pack->in_pack, pack->nr_alloc);
for (i = 0; i < pack->nr_objects; i++)
pack->in_pack[i] = oe_in_pack(pack, pack->objects + i);
FREE_AND_NULL(pack->in_pack_by_idx);
}
/* assume pdata is already zero'd by caller */
void prepare_packing_data(struct packing_data *pdata)
{
if (git_env_bool("GIT_TEST_FULL_IN_PACK_ARRAY", 0)) {
/*
* do not initialize in_pack_by_idx[] to force the
* slow path in oe_in_pack()
*/
} else {
prepare_in_pack_by_idx(pdata);
}
pdata->oe_size_limit = git_env_ulong("GIT_TEST_OE_SIZE",
1U << OE_SIZE_BITS);
}
struct object_entry *packlist_alloc(struct packing_data *pdata, struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
uint32_t index_pos) uint32_t index_pos)
@ -95,6 +157,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
if (pdata->nr_objects >= pdata->nr_alloc) { if (pdata->nr_objects >= pdata->nr_alloc) {
pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2; pdata->nr_alloc = (pdata->nr_alloc + 1024) * 3 / 2;
REALLOC_ARRAY(pdata->objects, pdata->nr_alloc); REALLOC_ARRAY(pdata->objects, pdata->nr_alloc);
if (!pdata->in_pack_by_idx)
REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc);
} }
new_entry = pdata->objects + pdata->nr_objects++; new_entry = pdata->objects + pdata->nr_objects++;
@ -107,5 +172,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
else else
pdata->index[index_pos] = pdata->nr_objects; pdata->index[index_pos] = pdata->nr_objects;
if (pdata->in_pack)
pdata->in_pack[pdata->nr_objects - 1] = NULL;
return new_entry; return new_entry;
} }

View File

@ -1,47 +1,125 @@
#ifndef PACK_OBJECTS_H #ifndef PACK_OBJECTS_H
#define PACK_OBJECTS_H #define PACK_OBJECTS_H
#include "object-store.h"
#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024) #define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024)
struct object_entry { #define OE_DFS_STATE_BITS 2
struct pack_idx_entry idx; #define OE_DEPTH_BITS 12
unsigned long size; /* uncompressed size */ #define OE_IN_PACK_BITS 10
struct packed_git *in_pack; /* already in pack */ #define OE_Z_DELTA_BITS 20
off_t in_pack_offset; /*
struct object_entry *delta; /* delta base object */ * Note that oe_set_size() becomes expensive when the given size is
struct object_entry *delta_child; /* deltified objects who bases me */ * above this limit. Don't lower it too much.
struct object_entry *delta_sibling; /* other deltified objects who
* uses the same base as me
*/ */
void *delta_data; /* cached delta (uncompressed) */ #define OE_SIZE_BITS 31
unsigned long delta_size; /* delta data size (uncompressed) */ #define OE_DELTA_SIZE_BITS 20
unsigned long z_delta_size; /* delta data size (compressed) */
enum object_type type;
enum object_type in_pack_type; /* could be delta */
uint32_t hash; /* name hint hash */
unsigned int in_pack_pos;
unsigned char in_pack_header_size;
unsigned preferred_base:1; /*
* we do not pack this, but is available
* to be used as the base object to delta
* objects against.
*/
unsigned no_try_delta:1;
unsigned tagged:1; /* near the very tip of refs */
unsigned filled:1; /* assigned write-order */
/* /*
* State flags for depth-first search used for analyzing delta cycles. * State flags for depth-first search used for analyzing delta cycles.
* *
* The depth is measured in delta-links to the base (so if A is a delta * The depth is measured in delta-links to the base (so if A is a delta
* against B, then A has a depth of 1, and B a depth of 0). * against B, then A has a depth of 1, and B a depth of 0).
*/ */
enum { enum dfs_state {
DFS_NONE = 0, DFS_NONE = 0,
DFS_ACTIVE, DFS_ACTIVE,
DFS_DONE DFS_DONE,
} dfs_state; DFS_NUM_STATES
int depth; };
/*
* The size of struct nearly determines pack-objects's memory
* consumption. This struct is packed tight for that reason. When you
* add or reorder something in this struct, think a bit about this.
*
* basic object info
* -----------------
* idx.oid is filled up before delta searching starts. idx.crc32 is
* only valid after the object is written out and will be used for
* generating the index. idx.offset will be both gradually set and
* used in writing phase (base objects get offset first, then deltas
* refer to them)
*
* "size" is the uncompressed object size. Compressed size of the raw
* data for an object in a pack is not stored anywhere but is computed
* and made available when reverse .idx is made. Note that when a
* delta is reused, "size" is the uncompressed _delta_ size, not the
* canonical one after the delta has been applied.
*
* "hash" contains a path name hash which is used for sorting the
* delta list and also during delta searching. Once prepare_pack()
* returns it's no longer needed.
*
* source pack info
* ----------------
* The (in_pack, in_pack_offset) tuple contains the location of the
* object in the source pack. in_pack_header_size allows quickly
* skipping the header and going straight to the zlib stream.
*
* "type" and "in_pack_type" both describe object type. in_pack_type
* may contain a delta type, while type is always the canonical type.
*
* deltas
* ------
* Delta links (delta, delta_child and delta_sibling) are created to
* reflect that delta graph from the source pack then updated or added
* during delta searching phase when we find better deltas.
*
* delta_child and delta_sibling are last needed in
* compute_write_order(). "delta" and "delta_size" must remain valid
* at object writing phase in case the delta is not cached.
*
* If a delta is cached in memory and is compressed, delta_data points
* to the data and z_delta_size contains the compressed size. If it's
* uncompressed [1], z_delta_size must be zero. delta_size is always
* the uncompressed size and must be valid even if the delta is not
* cached.
*
* [1] during try_delta phase we don't bother with compressing because
* the delta could be quickly replaced with a better one.
*/
struct object_entry {
struct pack_idx_entry idx;
void *delta_data; /* cached delta (uncompressed) */
off_t in_pack_offset;
uint32_t hash; /* name hint hash */
unsigned size_:OE_SIZE_BITS;
unsigned size_valid:1;
uint32_t delta_idx; /* delta base object */
uint32_t delta_child_idx; /* deltified objects who bases me */
uint32_t delta_sibling_idx; /* other deltified objects who
* uses the same base as me
*/
unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
unsigned delta_size_valid:1;
unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */
unsigned z_delta_size:OE_Z_DELTA_BITS;
unsigned type_valid:1;
unsigned type_:TYPE_BITS;
unsigned no_try_delta:1;
unsigned in_pack_type:TYPE_BITS; /* could be delta */
unsigned preferred_base:1; /*
* we do not pack this, but is available
* to be used as the base object to delta
* objects against.
*/
unsigned tagged:1; /* near the very tip of refs */
unsigned filled:1; /* assigned write-order */
unsigned dfs_state:OE_DFS_STATE_BITS;
unsigned char in_pack_header_size;
unsigned depth:OE_DEPTH_BITS;
/*
* pahole results on 64-bit linux (gcc and clang)
*
* size: 80, bit_padding: 20 bits, holes: 8 bits
*
* and on 32-bit (gcc)
*
* size: 76, bit_padding: 20 bits, holes: 8 bits
*/
}; };
struct packing_data { struct packing_data {
@ -50,8 +128,22 @@ struct packing_data {
int32_t *index; int32_t *index;
uint32_t index_size; uint32_t index_size;
unsigned int *in_pack_pos;
/*
* Only one of these can be non-NULL and they have different
* sizes. if in_pack_by_idx is allocated, oe_in_pack() returns
* the pack of an object using in_pack_idx field. If not,
* in_pack[] array is used the same way as in_pack_pos[]
*/
struct packed_git **in_pack_by_idx;
struct packed_git **in_pack;
uintmax_t oe_size_limit;
}; };
void prepare_packing_data(struct packing_data *pdata);
struct object_entry *packlist_alloc(struct packing_data *pdata, struct object_entry *packlist_alloc(struct packing_data *pdata,
const unsigned char *sha1, const unsigned char *sha1,
uint32_t index_pos); uint32_t index_pos);
@ -80,4 +172,178 @@ static inline uint32_t pack_name_hash(const char *name)
return hash; return hash;
} }
static inline enum object_type oe_type(const struct object_entry *e)
{
return e->type_valid ? e->type_ : OBJ_BAD;
}
static inline void oe_set_type(struct object_entry *e,
enum object_type type)
{
if (type >= OBJ_ANY)
BUG("OBJ_ANY cannot be set in pack-objects code");
e->type_valid = type >= OBJ_NONE;
e->type_ = (unsigned)type;
}
static inline unsigned int oe_in_pack_pos(const struct packing_data *pack,
const struct object_entry *e)
{
return pack->in_pack_pos[e - pack->objects];
}
static inline void oe_set_in_pack_pos(const struct packing_data *pack,
const struct object_entry *e,
unsigned int pos)
{
pack->in_pack_pos[e - pack->objects] = pos;
}
static inline struct packed_git *oe_in_pack(const struct packing_data *pack,
const struct object_entry *e)
{
if (pack->in_pack_by_idx)
return pack->in_pack_by_idx[e->in_pack_idx];
else
return pack->in_pack[e - pack->objects];
}
void oe_map_new_pack(struct packing_data *pack,
struct packed_git *p);
static inline void oe_set_in_pack(struct packing_data *pack,
struct object_entry *e,
struct packed_git *p)
{
if (!p->index)
oe_map_new_pack(pack, p);
if (pack->in_pack_by_idx)
e->in_pack_idx = p->index;
else
pack->in_pack[e - pack->objects] = p;
}
static inline struct object_entry *oe_delta(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_idx)
return &pack->objects[e->delta_idx - 1];
return NULL;
}
static inline void oe_set_delta(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_idx = (delta - pack->objects) + 1;
else
e->delta_idx = 0;
}
static inline struct object_entry *oe_delta_child(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_child_idx)
return &pack->objects[e->delta_child_idx - 1];
return NULL;
}
static inline void oe_set_delta_child(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_child_idx = (delta - pack->objects) + 1;
else
e->delta_child_idx = 0;
}
static inline struct object_entry *oe_delta_sibling(
const struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_sibling_idx)
return &pack->objects[e->delta_sibling_idx - 1];
return NULL;
}
static inline void oe_set_delta_sibling(struct packing_data *pack,
struct object_entry *e,
struct object_entry *delta)
{
if (delta)
e->delta_sibling_idx = (delta - pack->objects) + 1;
else
e->delta_sibling_idx = 0;
}
unsigned long oe_get_size_slow(struct packing_data *pack,
const struct object_entry *e);
static inline unsigned long oe_size(struct packing_data *pack,
const struct object_entry *e)
{
if (e->size_valid)
return e->size_;
return oe_get_size_slow(pack, e);
}
static inline int oe_size_less_than(struct packing_data *pack,
const struct object_entry *lhs,
unsigned long rhs)
{
if (lhs->size_valid)
return lhs->size_ < rhs;
if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
return 0;
return oe_get_size_slow(pack, lhs) < rhs;
}
static inline int oe_size_greater_than(struct packing_data *pack,
const struct object_entry *lhs,
unsigned long rhs)
{
if (lhs->size_valid)
return lhs->size_ > rhs;
if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
return 1;
return oe_get_size_slow(pack, lhs) > rhs;
}
static inline void oe_set_size(struct packing_data *pack,
struct object_entry *e,
unsigned long size)
{
if (size < pack->oe_size_limit) {
e->size_ = size;
e->size_valid = 1;
} else {
e->size_valid = 0;
if (oe_get_size_slow(pack, e) != size)
BUG("'size' is supposed to be the object size!");
}
}
static inline unsigned long oe_delta_size(struct packing_data *pack,
const struct object_entry *e)
{
if (e->delta_size_valid)
return e->delta_size_;
return oe_size(pack, e);
}
static inline void oe_set_delta_size(struct packing_data *pack,
struct object_entry *e,
unsigned long size)
{
e->delta_size_ = size;
e->delta_size_valid = e->delta_size_ == size;
if (!e->delta_size_valid && size != oe_size(pack, e))
BUG("this can only happen in check_object() "
"where delta size is the same as entry size");
}
#endif #endif

View File

@ -2268,7 +2268,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
if (!istate->version) { if (!istate->version) {
istate->version = get_index_format_default(); istate->version = get_index_format_default();
if (getenv("GIT_TEST_SPLIT_INDEX")) if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0))
init_split_index(istate); init_split_index(istate);
} }
@ -2559,7 +2559,7 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock,
goto out; goto out;
} }
if (getenv("GIT_TEST_SPLIT_INDEX")) { if (git_env_bool("GIT_TEST_SPLIT_INDEX", 0)) {
int v = si->base_sha1[0]; int v = si->base_sha1[0];
if ((v & 15) < 6) if ((v & 15) < 6)
istate->cache_changed |= SPLIT_INDEX_ORDERED; istate->cache_changed |= SPLIT_INDEX_ORDERED;

View File

@ -293,6 +293,28 @@ and know what setup is needed for it. Or when you want to run
everything up to a certain test. everything up to a certain test.
Running tests with special setups
---------------------------------
The whole test suite could be run to test some special features
that cannot be easily covered by a few specific test cases. These
could be enabled by running the test suite with correct GIT_TEST_
environment set.
GIT_TEST_SPLIT_INDEX=<boolean> forces split-index mode on the whole
test suite. Accept any boolean values that are accepted by git-config.
GIT_TEST_FULL_IN_PACK_ARRAY=<boolean> exercises the uncommon
pack-objects code path where there are more than 1024 packs even if
the actual number of packs in repository is below this limit. Accept
any boolean values that are accepted by git-config.
GIT_TEST_OE_SIZE=<n> exercises the uncommon pack-objects code path
where we do not cache object size in memory and read it from existing
packs on demand. This normally only happens when the object size is
over 2GB. This variable forces the code path on any object larger than
<n> bytes.
Naming Tests Naming Tests
------------ ------------

View File

@ -457,6 +457,11 @@ test_expect_success !PTHREADS,C_LOCALE_OUTPUT 'pack-objects --threads=N or pack.
grep -F "no threads support, ignoring pack.threads" err grep -F "no threads support, ignoring pack.threads" err
' '
test_expect_success 'pack-objects in too-many-packs mode' '
GIT_TEST_FULL_IN_PACK_ARRAY=1 git repack -ad &&
git fsck
'
# #
# WARNING! # WARNING!
# #