git-commit-vandalism/t/helper/test-bloom.c
Garima Singh 1217c03e7b commit-graph: reuse existing Bloom filters during write
Add logic to
a) parse Bloom filter information from the commit graph file and,
b) re-use existing Bloom filters.

See Documentation/technical/commit-graph-format for the format in which
the Bloom filter information is written to the commit graph file.

To read Bloom filter for a given commit with lexicographic position
'i' we need to:
1. Read BIDX[i] which essentially gives us the starting index in BDAT for
   filter of commit i+1. It is essentially the index past the end
   of the filter of commit i. It is called end_index in the code.

2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT
   for filter of commit i. It is called the start_index in the code.
   For the first commit, where i = 0, Bloom filter data starts at the
   beginning, just past the header in the BDAT chunk. Hence, start_index
   will be 0.

3. The length of the filter will be end_index - start_index, because
   BIDX[i] gives the cumulative 8-byte words including the ith
   commit's filter.

We toggle whether Bloom filters should be recomputed based on the
compute_if_not_present flag.

Helped-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Garima Singh <garima.singh@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-06 11:08:37 -07:00

81 lines
1.9 KiB
C

#include "git-compat-util.h"
#include "bloom.h"
#include "test-tool.h"
#include "commit.h"
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
static void add_string_to_filter(const char *data, struct bloom_filter *filter) {
struct bloom_key key;
int i;
fill_bloom_key(data, strlen(data), &key, &settings);
printf("Hashes:");
for (i = 0; i < settings.num_hashes; i++){
printf("0x%08x|", key.hashes[i]);
}
printf("\n");
add_key_to_filter(&key, filter, &settings);
}
static void print_bloom_filter(struct bloom_filter *filter) {
int i;
if (!filter) {
printf("No filter.\n");
return;
}
printf("Filter_Length:%d\n", (int)filter->len);
printf("Filter_Data:");
for (i = 0; i < filter->len; i++){
printf("%02x|", filter->data[i]);
}
printf("\n");
}
static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
{
struct commit *c;
struct bloom_filter *filter;
setup_git_directory();
c = lookup_commit(the_repository, commit_oid);
filter = get_bloom_filter(the_repository, c, 1);
print_bloom_filter(filter);
}
int cmd__bloom(int argc, const char **argv)
{
if (!strcmp(argv[1], "get_murmur3")) {
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}
if (!strcmp(argv[1], "generate_filter")) {
struct bloom_filter filter;
int i = 2;
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
filter.data = xcalloc(filter.len, sizeof(unsigned char));
if (!argv[2]){
die("at least one input string expected");
}
while (argv[i]) {
add_string_to_filter(argv[i], &filter);
i++;
}
print_bloom_filter(&filter);
}
if (!strcmp(argv[1], "get_filter_for_commit")) {
struct object_id oid;
const char *end;
if (parse_oid_hex(argv[2], &oid, &end))
die("cannot parse oid '%s'", argv[2]);
init_bloom_filters();
get_bloom_filter_for_commit(&oid);
}
return 0;
}