pack-bitmap: prepare to read lookup table extension

Earlier change teaches Git to write bitmap lookup table. But Git
does not know how to parse them.

Teach Git to parse the existing bitmap lookup table. The older
versions of Git are not affected by it. Those versions ignore the
lookup table.

Mentored-by: Taylor Blau <me@ttaylorr.com>
Co-Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@gmail.com>
Signed-off-by: Abhradeep Chakraborty <chakrabortyabhradeep79@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Abhradeep Chakraborty 2022-08-14 16:55:10 +00:00 committed by Junio C Hamano
parent 76f14b777c
commit 28cd730680
3 changed files with 312 additions and 9 deletions

View File

@ -83,6 +83,12 @@ struct bitmap_index {
/* The checksum of the packfile or MIDX; points into map. */ /* The checksum of the packfile or MIDX; points into map. */
const unsigned char *checksum; const unsigned char *checksum;
/*
* If not NULL, this point into the commit table extension
* (within the memory mapped region `map`).
*/
unsigned char *table_lookup;
/* /*
* Extended index. * Extended index.
* *
@ -186,6 +192,16 @@ static int load_bitmap_header(struct bitmap_index *index)
index->hashes = (void *)(index_end - cache_size); index->hashes = (void *)(index_end - cache_size);
index_end -= cache_size; index_end -= cache_size;
} }
if (flags & BITMAP_OPT_LOOKUP_TABLE) {
size_t table_size = st_mult(ntohl(header->entry_count),
BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
if (table_size > index_end - index->map - header_size)
return error(_("corrupted bitmap index file (too short to fit lookup table)"));
if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1))
index->table_lookup = (void *)(index_end - table_size);
index_end -= table_size;
}
} }
index->entry_count = ntohl(header->entry_count); index->entry_count = ntohl(header->entry_count);
@ -212,9 +228,11 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index,
hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret);
/* a 0 return code means the insertion succeeded with no changes, /*
* because the SHA1 already existed on the map. this is bad, there * A 0 return code means the insertion succeeded with no changes,
* shouldn't be duplicated commits in the index */ * because the SHA1 already existed on the map. This is bad, there
* shouldn't be duplicated commits in the index.
*/
if (ret == 0) { if (ret == 0) {
error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid)); error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid));
return NULL; return NULL;
@ -482,7 +500,7 @@ static int load_bitmap(struct bitmap_index *bitmap_git)
!(bitmap_git->tags = read_bitmap_1(bitmap_git))) !(bitmap_git->tags = read_bitmap_1(bitmap_git)))
goto failed; goto failed;
if (load_bitmap_entries_v1(bitmap_git) < 0) if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0)
goto failed; goto failed;
return 0; return 0;
@ -570,13 +588,256 @@ struct include_data {
struct bitmap *seen; struct bitmap *seen;
}; };
struct bitmap_lookup_table_triplet {
uint32_t commit_pos;
uint64_t offset;
uint32_t xor_row;
};
struct bitmap_lookup_table_xor_item {
struct object_id oid;
uint64_t offset;
};
/*
* Given a `triplet` struct pointer and pointer `p`, this
* function reads the triplet beginning at `p` into the struct.
* Note that this function assumes that there is enough memory
* left for filling the `triplet` struct from `p`.
*/
static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet,
const unsigned char *p)
{
if (!triplet)
return -1;
triplet->commit_pos = get_be32(p);
p += sizeof(uint32_t);
triplet->offset = get_be64(p);
p += sizeof(uint64_t);
triplet->xor_row = get_be32(p);
return 0;
}
/*
* This function gets the raw triplet from `row`'th row in the
* lookup table and fills that data to the `triplet`.
*/
static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git,
uint32_t pos,
struct bitmap_lookup_table_triplet *triplet)
{
unsigned char *p = NULL;
if (pos >= bitmap_git->entry_count)
return error(_("corrupt bitmap lookup table: triplet position out of index"));
p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH);
return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
}
/*
* Searches for a matching triplet. `commit_pos` is a pointer
* to the wanted commit position value. `table_entry` points to
* a triplet in lookup table. The first 4 bytes of each
* triplet (pointed by `table_entry`) are compared with `*commit_pos`.
*/
static int triplet_cmp(const void *commit_pos, const void *table_entry)
{
uint32_t a = *(uint32_t *)commit_pos;
uint32_t b = get_be32(table_entry);
if (a > b)
return 1;
else if (a < b)
return -1;
return 0;
}
static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git,
struct object_id *oid,
uint32_t *result)
{
int found;
if (bitmap_is_midx(bitmap_git))
found = bsearch_midx(oid, bitmap_git->midx, result);
else
found = bsearch_pack(oid, bitmap_git->pack, result);
return found;
}
/*
* `bsearch_triplet_by_pos` function searches for the raw triplet
* having commit position same as `commit_pos` and fills `triplet`
* object from the raw triplet. Returns 1 on success and 0 on
* failure.
*/
static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos,
struct bitmap_index *bitmap_git,
struct bitmap_lookup_table_triplet *triplet)
{
unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count,
BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp);
if (!p)
return -1;
return bitmap_lookup_table_get_triplet_by_pointer(triplet, p);
}
static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit)
{
uint32_t commit_pos, xor_row;
uint64_t offset;
int flags;
struct bitmap_lookup_table_triplet triplet;
struct object_id *oid = &commit->object.oid;
struct ewah_bitmap *bitmap;
struct stored_bitmap *xor_bitmap = NULL;
const int bitmap_header_size = 6;
static struct bitmap_lookup_table_xor_item *xor_items = NULL;
static size_t xor_items_nr = 0, xor_items_alloc = 0;
static int is_corrupt = 0;
int xor_flags;
khiter_t hash_pos;
struct bitmap_lookup_table_xor_item *xor_item;
if (is_corrupt)
return NULL;
if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos))
return NULL;
if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0)
return NULL;
xor_items_nr = 0;
offset = triplet.offset;
xor_row = triplet.xor_row;
while (xor_row != 0xffffffff) {
ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc);
if (xor_items_nr + 1 >= bitmap_git->entry_count) {
error(_("corrupt bitmap lookup table: xor chain exceed entry count"));
goto corrupt;
}
if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0)
goto corrupt;
xor_item = &xor_items[xor_items_nr];
xor_item->offset = triplet.offset;
if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) {
error(_("corrupt bitmap lookup table: commit index %u out of range"),
triplet.commit_pos);
goto corrupt;
}
hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid);
/*
* If desired bitmap is already stored, we don't need
* to iterate further. Because we know that bitmaps
* that are needed to be parsed to parse this bitmap
* has already been stored. So, assign this stored bitmap
* to the xor_bitmap.
*/
if (hash_pos < kh_end(bitmap_git->bitmaps) &&
(xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos)))
break;
xor_items_nr++;
xor_row = triplet.xor_row;
}
while (xor_items_nr) {
xor_item = &xor_items[xor_items_nr - 1];
bitmap_git->map_pos = xor_item->offset;
if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
oid_to_hex(&xor_item->oid));
goto corrupt;
}
bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
bitmap = read_bitmap_1(bitmap_git);
if (!bitmap)
goto corrupt;
xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, xor_bitmap, xor_flags);
xor_items_nr--;
}
bitmap_git->map_pos = offset;
if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) {
error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""),
oid_to_hex(oid));
goto corrupt;
}
/*
* Don't bother reading the commit's index position or its xor
* offset:
*
* - The commit's index position is irrelevant to us, since
* load_bitmap_entries_v1 only uses it to learn the object
* id which is used to compute the hashmap's key. We already
* have an object id, so no need to look it up again.
*
* - The xor_offset is unusable for us, since it specifies how
* many entries previous to ours we should look at. This
* makes sense when reading the bitmaps sequentially (as in
* load_bitmap_entries_v1()), since we can keep track of
* each bitmap as we read them.
*
* But it can't work for us, since the bitmap's don't have a
* fixed size. So we learn the position of the xor'd bitmap
* from the commit table (and resolve it to a bitmap in the
* above if-statement).
*
* Instead, we can skip ahead and immediately read the flags and
* ewah bitmap.
*/
bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t);
flags = read_u8(bitmap_git->map, &bitmap_git->map_pos);
bitmap = read_bitmap_1(bitmap_git);
if (!bitmap)
goto corrupt;
return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags);
corrupt:
free(xor_items);
is_corrupt = 1;
return NULL;
}
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit) struct commit *commit)
{ {
khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps,
commit->object.oid); commit->object.oid);
if (hash_pos >= kh_end(bitmap_git->bitmaps)) if (hash_pos >= kh_end(bitmap_git->bitmaps)) {
return NULL; struct stored_bitmap *bitmap = NULL;
if (!bitmap_git->table_lookup)
return NULL;
trace2_region_enter("pack-bitmap", "reading_lookup_table", the_repository);
/* NEEDSWORK: cache misses aren't recorded */
bitmap = lazy_bitmap_for_commit(bitmap_git, commit);
trace2_region_leave("pack-bitmap", "reading_lookup_table", the_repository);
if (!bitmap)
return NULL;
return lookup_stored_bitmap(bitmap);
}
return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos));
} }
@ -1712,8 +1973,10 @@ void test_bitmap_walk(struct rev_info *revs)
if (revs->pending.nr != 1) if (revs->pending.nr != 1)
die(_("you must specify exactly one commit to test")); die(_("you must specify exactly one commit to test"));
fprintf_ln(stderr, "Bitmap v%d test (%d entries loaded)", fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)",
bitmap_git->version, bitmap_git->entry_count); bitmap_git->version,
bitmap_git->entry_count,
bitmap_git->table_lookup ? "" : " loaded");
root = revs->pending.objects[0].item; root = revs->pending.objects[0].item;
bm = bitmap_for_commit(bitmap_git, (struct commit *)root); bm = bitmap_for_commit(bitmap_git, (struct commit *)root);
@ -1766,13 +2029,22 @@ void test_bitmap_walk(struct rev_info *revs)
int test_bitmap_commits(struct repository *r) int test_bitmap_commits(struct repository *r)
{ {
struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
struct object_id oid; struct object_id oid;
MAYBE_UNUSED void *value; MAYBE_UNUSED void *value;
struct bitmap_index *bitmap_git = prepare_bitmap_git(r);
if (!bitmap_git) if (!bitmap_git)
die(_("failed to load bitmap indexes")); die(_("failed to load bitmap indexes"));
/*
* As this function is only used to print bitmap selected
* commits, we don't have to read the commit table.
*/
if (bitmap_git->table_lookup) {
if (load_bitmap_entries_v1(bitmap_git) < 0)
die(_("failed to load bitmap indexes"));
}
kh_foreach(bitmap_git->bitmaps, oid, value, { kh_foreach(bitmap_git->bitmaps, oid, value, {
printf_ln("%s", oid_to_hex(&oid)); printf_ln("%s", oid_to_hex(&oid));
}); });

View File

@ -23,6 +23,15 @@ struct bitmap_disk_header {
#define NEEDS_BITMAP (1u<<22) #define NEEDS_BITMAP (1u<<22)
/*
* The width in bytes of a single triplet in the lookup table
* extension:
* (commit_pos, offset, xor_row)
*
* whose fields ar 32-, 64-, 32- bits wide, respectively.
*/
#define BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH (16)
enum pack_bitmap_opts { enum pack_bitmap_opts {
BITMAP_OPT_FULL_DAG = 0x1, BITMAP_OPT_FULL_DAG = 0x1,
BITMAP_OPT_HASH_CACHE = 0x4, BITMAP_OPT_HASH_CACHE = 0x4,

View File

@ -258,6 +258,7 @@ test_bitmap_cases () {
test_expect_success 'truncated bitmap fails gracefully (ewah)' ' test_expect_success 'truncated bitmap fails gracefully (ewah)' '
test_config pack.writebitmaphashcache false && test_config pack.writebitmaphashcache false &&
test_config pack.writebitmaplookuptable false &&
git repack -ad && git repack -ad &&
git rev-list --use-bitmap-index --count --all >expect && git rev-list --use-bitmap-index --count --all >expect &&
bitmap=$(ls .git/objects/pack/*.bitmap) && bitmap=$(ls .git/objects/pack/*.bitmap) &&
@ -270,6 +271,7 @@ test_bitmap_cases () {
' '
test_expect_success 'truncated bitmap fails gracefully (cache)' ' test_expect_success 'truncated bitmap fails gracefully (cache)' '
git config pack.writeBitmapLookupTable '"$writeLookupTable"' &&
git repack -ad && git repack -ad &&
git rev-list --use-bitmap-index --count --all >expect && git rev-list --use-bitmap-index --count --all >expect &&
bitmap=$(ls .git/objects/pack/*.bitmap) && bitmap=$(ls .git/objects/pack/*.bitmap) &&
@ -453,4 +455,24 @@ test_expect_success 'verify writing bitmap lookup table when enabled' '
grep "\"label\":\"writing_lookup_table\"" trace2 grep "\"label\":\"writing_lookup_table\"" trace2
' '
test_expect_success 'lookup table is actually used to traverse objects' '
git repack -adb &&
GIT_TRACE2_EVENT="$(pwd)/trace3" \
git rev-list --use-bitmap-index --count --all &&
grep "\"label\":\"reading_lookup_table\"" trace3
'
test_expect_success 'truncated bitmap fails gracefully (lookup table)' '
test_config pack.writebitmaphashcache false &&
git repack -adb &&
git rev-list --use-bitmap-index --count --all >expect &&
bitmap=$(ls .git/objects/pack/*.bitmap) &&
test_when_finished "rm -f $bitmap" &&
test_copy_bytes 512 <$bitmap >$bitmap.tmp &&
mv -f $bitmap.tmp $bitmap &&
git rev-list --use-bitmap-index --count --all >actual 2>stderr &&
test_cmp expect actual &&
test_i18ngrep corrupted.bitmap.index stderr
'
test_done test_done