1217c03e7b
Add logic to a) parse Bloom filter information from the commit graph file and, b) re-use existing Bloom filters. See Documentation/technical/commit-graph-format for the format in which the Bloom filter information is written to the commit graph file. To read Bloom filter for a given commit with lexicographic position 'i' we need to: 1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter of commit i+1. It is essentially the index past the end of the filter of commit i. It is called end_index in the code. 2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for filter of commit i. It is called the start_index in the code. For the first commit, where i = 0, Bloom filter data starts at the beginning, just past the header in the BDAT chunk. Hence, start_index will be 0. 3. The length of the filter will be end_index - start_index, because BIDX[i] gives the cumulative 8-byte words including the ith commit's filter. We toggle whether Bloom filters should be recomputed based on the compute_if_not_present flag. Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Garima Singh <garima.singh@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
81 lines
1.9 KiB
C
81 lines
1.9 KiB
C
#include "git-compat-util.h"
|
|
#include "bloom.h"
|
|
#include "test-tool.h"
|
|
#include "commit.h"
|
|
|
|
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
|
|
|
static void add_string_to_filter(const char *data, struct bloom_filter *filter) {
|
|
struct bloom_key key;
|
|
int i;
|
|
|
|
fill_bloom_key(data, strlen(data), &key, &settings);
|
|
printf("Hashes:");
|
|
for (i = 0; i < settings.num_hashes; i++){
|
|
printf("0x%08x|", key.hashes[i]);
|
|
}
|
|
printf("\n");
|
|
add_key_to_filter(&key, filter, &settings);
|
|
}
|
|
|
|
static void print_bloom_filter(struct bloom_filter *filter) {
|
|
int i;
|
|
|
|
if (!filter) {
|
|
printf("No filter.\n");
|
|
return;
|
|
}
|
|
printf("Filter_Length:%d\n", (int)filter->len);
|
|
printf("Filter_Data:");
|
|
for (i = 0; i < filter->len; i++){
|
|
printf("%02x|", filter->data[i]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
|
|
{
|
|
struct commit *c;
|
|
struct bloom_filter *filter;
|
|
setup_git_directory();
|
|
c = lookup_commit(the_repository, commit_oid);
|
|
filter = get_bloom_filter(the_repository, c, 1);
|
|
print_bloom_filter(filter);
|
|
}
|
|
|
|
int cmd__bloom(int argc, const char **argv)
|
|
{
|
|
if (!strcmp(argv[1], "get_murmur3")) {
|
|
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
|
|
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
|
}
|
|
|
|
if (!strcmp(argv[1], "generate_filter")) {
|
|
struct bloom_filter filter;
|
|
int i = 2;
|
|
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
|
|
filter.data = xcalloc(filter.len, sizeof(unsigned char));
|
|
|
|
if (!argv[2]){
|
|
die("at least one input string expected");
|
|
}
|
|
|
|
while (argv[i]) {
|
|
add_string_to_filter(argv[i], &filter);
|
|
i++;
|
|
}
|
|
|
|
print_bloom_filter(&filter);
|
|
}
|
|
|
|
if (!strcmp(argv[1], "get_filter_for_commit")) {
|
|
struct object_id oid;
|
|
const char *end;
|
|
if (parse_oid_hex(argv[2], &oid, &end))
|
|
die("cannot parse oid '%s'", argv[2]);
|
|
init_bloom_filters();
|
|
get_bloom_filter_for_commit(&oid);
|
|
}
|
|
|
|
return 0;
|
|
} |