From 989d9cbd5c32fad54391a4ed06b1271f0a891077 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 22 Aug 2022 15:50:38 -0400 Subject: [PATCH] midx.c: extract `struct midx_fanout` To build up a list of objects (along with their packs, and the offsets within those packs that each object appears at), the MIDX code implements `get_sorted_entries()` which builds up a list of candidates, sorts them, and then removes duplicate entries. To do this, it keeps an array of `pack_midx_entry` structures that it builds up once for each fanout level (ie., for all possible values of the first byte of each object's ID). This array is a function-local variable of `get_sorted_entries()`. Since it uses the ALLOC_GROW() macro, having the `alloc_fanout` variable also be local to that function, and only modified within that function is convenient. However, subsequent changes will extract the two ways this array is filled (from a pack at some fanout value, and from an existing MIDX at some fanout value) into separate functions. Instead of passing around pointers to the entries array, along with `nr_fanout` and `alloc_fanout`, encapsulate these three into a structure instead. Then pass around a pointer to this structure instead. This patch does not yet extract the above two functions, but sets us up to begin doing so in the following commit. For now, the implementation of get_sorted_entries() is only modified to replace `entries_by_fanout` with `fanout.entries`, `nr_fanout` with `fanout.nr`, and so on. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/midx.c b/midx.c index 5f0dd386b0..b4f6e37cd0 100644 --- a/midx.c +++ b/midx.c @@ -577,6 +577,22 @@ static void fill_pack_entry(uint32_t pack_int_id, entry->preferred = !!preferred; } +struct midx_fanout { + struct pack_midx_entry *entries; + uint32_t nr; + uint32_t alloc; +}; + +static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr) +{ + ALLOC_GROW(fanout->entries, nr, fanout->alloc); +} + +static void midx_fanout_sort(struct midx_fanout *fanout) +{ + QSORT(fanout->entries, fanout->nr, midx_oid_compare); +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -595,8 +611,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, int preferred_pack) { uint32_t cur_fanout, cur_pack, cur_object; - uint32_t alloc_fanout, alloc_objects, total_objects = 0; - struct pack_midx_entry *entries_by_fanout = NULL; + uint32_t alloc_objects, total_objects = 0; + struct midx_fanout fanout = { 0 }; struct pack_midx_entry *deduplicated_entries = NULL; uint32_t start_pack = m ? m->num_packs : 0; @@ -608,14 +624,14 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, * slices to be evenly distributed, with some noise. Hence, * allocate slightly more than one 256th. */ - alloc_objects = alloc_fanout = total_objects > 3200 ? total_objects / 200 : 16; + alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16; - ALLOC_ARRAY(entries_by_fanout, alloc_fanout); + ALLOC_ARRAY(fanout.entries, fanout.alloc); ALLOC_ARRAY(deduplicated_entries, alloc_objects); *nr_objects = 0; for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { - uint32_t nr_fanout = 0; + fanout.nr = 0; if (m) { uint32_t start = 0, end; @@ -625,15 +641,15 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, end = ntohl(m->chunk_oid_fanout[cur_fanout]); for (cur_object = start; cur_object < end; cur_object++) { - ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + midx_fanout_grow(&fanout, fanout.nr + 1); nth_midxed_pack_midx_entry(m, - &entries_by_fanout[nr_fanout], + &fanout.entries[fanout.nr], cur_object); if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack) - entries_by_fanout[nr_fanout].preferred = 1; + fanout.entries[fanout.nr].preferred = 1; else - entries_by_fanout[nr_fanout].preferred = 0; - nr_fanout++; + fanout.entries[fanout.nr].preferred = 0; + fanout.nr++; } } @@ -646,36 +662,36 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, end = get_pack_fanout(info[cur_pack].p, cur_fanout); for (cur_object = start; cur_object < end; cur_object++) { - ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout); + midx_fanout_grow(&fanout, fanout.nr + 1); fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, - &entries_by_fanout[nr_fanout], + &fanout.entries[fanout.nr], preferred); - nr_fanout++; + fanout.nr++; } } - QSORT(entries_by_fanout, nr_fanout, midx_oid_compare); + midx_fanout_sort(&fanout); /* * The batch is now sorted by OID and then mtime (descending). * Take only the first duplicate. */ - for (cur_object = 0; cur_object < nr_fanout; cur_object++) { - if (cur_object && oideq(&entries_by_fanout[cur_object - 1].oid, - &entries_by_fanout[cur_object].oid)) + for (cur_object = 0; cur_object < fanout.nr; cur_object++) { + if (cur_object && oideq(&fanout.entries[cur_object - 1].oid, + &fanout.entries[cur_object].oid)) continue; ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); memcpy(&deduplicated_entries[*nr_objects], - &entries_by_fanout[cur_object], + &fanout.entries[cur_object], sizeof(struct pack_midx_entry)); (*nr_objects)++; } } - free(entries_by_fanout); + free(fanout.entries); return deduplicated_entries; }