midx.c: extract struct midx_fanout

To build up a list of objects (along with their packs, and the offsets
within those packs that each object appears at), the MIDX code
implements `get_sorted_entries()` which builds up a list of candidates,
sorts them, and then removes duplicate entries.

To do this, it keeps an array of `pack_midx_entry` structures that it
builds up once for each fanout level (ie., for all possible values of
the first byte of each object's ID).

This array is a function-local variable of `get_sorted_entries()`. Since
it uses the ALLOC_GROW() macro, having the `alloc_fanout` variable also
be local to that function, and only modified within that function is
convenient.

However, subsequent changes will extract the two ways this array is
filled (from a pack at some fanout value, and from an existing MIDX at
some fanout value) into separate functions. Instead of passing around
pointers to the entries array, along with `nr_fanout` and
`alloc_fanout`, encapsulate these three into a structure instead. Then
pass around a pointer to this structure instead.

This patch does not yet extract the above two functions, but sets us up
to begin doing so in the following commit. For now, the implementation
of get_sorted_entries() is only modified to replace `entries_by_fanout`
with `fanout.entries`, `nr_fanout` with `fanout.nr`, and so on.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau 2022-08-22 15:50:38 -04:00 committed by Junio C Hamano
parent 0b6203c4ef
commit 989d9cbd5c

54
midx.c
View File

@ -577,6 +577,22 @@ static void fill_pack_entry(uint32_t pack_int_id,
entry->preferred = !!preferred;
}
struct midx_fanout {
struct pack_midx_entry *entries;
uint32_t nr;
uint32_t alloc;
};
static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr)
{
ALLOC_GROW(fanout->entries, nr, fanout->alloc);
}
static void midx_fanout_sort(struct midx_fanout *fanout)
{
QSORT(fanout->entries, fanout->nr, midx_oid_compare);
}
/*
* It is possible to artificially get into a state where there are many
* duplicate copies of objects. That can create high memory pressure if
@ -595,8 +611,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
int preferred_pack)
{
uint32_t cur_fanout, cur_pack, cur_object;
uint32_t alloc_fanout, alloc_objects, total_objects = 0;
struct pack_midx_entry *entries_by_fanout = NULL;
uint32_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 };
struct pack_midx_entry *deduplicated_entries = NULL;
uint32_t start_pack = m ? m->num_packs : 0;
@ -608,14 +624,14 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
* slices to be evenly distributed, with some noise. Hence,
* allocate slightly more than one 256th.
*/
alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16;
alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;
ALLOC_ARRAY(entries_by_fanout, alloc_fanout);
ALLOC_ARRAY(fanout.entries, fanout.alloc);
ALLOC_ARRAY(deduplicated_entries, alloc_objects);
*nr_objects = 0;
for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
uint32_t nr_fanout = 0;
fanout.nr = 0;
if (m) {
uint32_t start = 0, end;
@ -625,15 +641,15 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
end = ntohl(m->chunk_oid_fanout[cur_fanout]);
for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
midx_fanout_grow(&fanout, fanout.nr + 1);
nth_midxed_pack_midx_entry(m,
&entries_by_fanout[nr_fanout],
&fanout.entries[fanout.nr],
cur_object);
if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
entries_by_fanout[nr_fanout].preferred = 1;
fanout.entries[fanout.nr].preferred = 1;
else
entries_by_fanout[nr_fanout].preferred = 0;
nr_fanout++;
fanout.entries[fanout.nr].preferred = 0;
fanout.nr++;
}
}
@ -646,36 +662,36 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
end = get_pack_fanout(info[cur_pack].p, cur_fanout);
for (cur_object = start; cur_object < end; cur_object++) {
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
midx_fanout_grow(&fanout, fanout.nr + 1);
fill_pack_entry(cur_pack,
info[cur_pack].p,
cur_object,
&entries_by_fanout[nr_fanout],
&fanout.entries[fanout.nr],
preferred);
nr_fanout++;
fanout.nr++;
}
}
QSORT(entries_by_fanout, nr_fanout, midx_oid_compare);
midx_fanout_sort(&fanout);
/*
* The batch is now sorted by OID and then mtime (descending).
* Take only the first duplicate.
*/
for (cur_object = 0; cur_object < nr_fanout; cur_object++) {
if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid,
&entries_by_fanout[cur_object].oid))
for (cur_object = 0; cur_object < fanout.nr; cur_object++) {
if (cur_object && oideq(&fanout.entries[cur_object - 1].oid,
&fanout.entries[cur_object].oid))
continue;
ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects);
memcpy(&deduplicated_entries[*nr_objects],
&entries_by_fanout[cur_object],
&fanout.entries[cur_object],
sizeof(struct pack_midx_entry));
(*nr_objects)++;
}
}
free(entries_by_fanout);
free(fanout.entries);
return deduplicated_entries;
}