commit-graph: reuse existing Bloom filters during write
Add logic to a) parse Bloom filter information from the commit graph file and, b) re-use existing Bloom filters. See Documentation/technical/commit-graph-format for the format in which the Bloom filter information is written to the commit graph file. To read Bloom filter for a given commit with lexicographic position 'i' we need to: 1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter of commit i+1. It is essentially the index past the end of the filter of commit i. It is called end_index in the code. 2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for filter of commit i. It is called the start_index in the code. For the first commit, where i = 0, Bloom filter data starts at the beginning, just past the header in the BDAT chunk. Hence, start_index will be 0. 3. The length of the filter will be end_index - start_index, because BIDX[i] gives the cumulative 8-byte words including the ith commit's filter. We toggle whether Bloom filters should be recomputed based on the compute_if_not_present flag. Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Garima Singh <garima.singh@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
76ffbca71a
commit
1217c03e7b
49
bloom.c
49
bloom.c
@ -4,6 +4,8 @@
|
|||||||
#include "diffcore.h"
|
#include "diffcore.h"
|
||||||
#include "revision.h"
|
#include "revision.h"
|
||||||
#include "hashmap.h"
|
#include "hashmap.h"
|
||||||
|
#include "commit-graph.h"
|
||||||
|
#include "commit.h"
|
||||||
|
|
||||||
define_commit_slab(bloom_filter_slab, struct bloom_filter);
|
define_commit_slab(bloom_filter_slab, struct bloom_filter);
|
||||||
|
|
||||||
@ -26,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos)
|
|||||||
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
|
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int load_bloom_filter_from_graph(struct commit_graph *g,
|
||||||
|
struct bloom_filter *filter,
|
||||||
|
struct commit *c)
|
||||||
|
{
|
||||||
|
uint32_t lex_pos, start_index, end_index;
|
||||||
|
|
||||||
|
while (c->graph_pos < g->num_commits_in_base)
|
||||||
|
g = g->base_graph;
|
||||||
|
|
||||||
|
/* The commit graph commit 'c' lives in doesn't carry bloom filters. */
|
||||||
|
if (!g->chunk_bloom_indexes)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
lex_pos = c->graph_pos - g->num_commits_in_base;
|
||||||
|
|
||||||
|
end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
|
||||||
|
|
||||||
|
if (lex_pos > 0)
|
||||||
|
start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
|
||||||
|
else
|
||||||
|
start_index = 0;
|
||||||
|
|
||||||
|
filter->len = end_index - start_index;
|
||||||
|
filter->data = (unsigned char *)(g->chunk_bloom_data +
|
||||||
|
sizeof(unsigned char) * start_index +
|
||||||
|
BLOOMDATA_CHUNK_HEADER_SIZE);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calculate the murmur3 32-bit hash value for the given data
|
* Calculate the murmur3 32-bit hash value for the given data
|
||||||
* using the given seed.
|
* using the given seed.
|
||||||
@ -127,7 +159,8 @@ void init_bloom_filters(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct bloom_filter *get_bloom_filter(struct repository *r,
|
struct bloom_filter *get_bloom_filter(struct repository *r,
|
||||||
struct commit *c)
|
struct commit *c,
|
||||||
|
int compute_if_not_present)
|
||||||
{
|
{
|
||||||
struct bloom_filter *filter;
|
struct bloom_filter *filter;
|
||||||
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
|
||||||
@ -140,6 +173,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
|
|||||||
|
|
||||||
filter = bloom_filter_slab_at(&bloom_filters, c);
|
filter = bloom_filter_slab_at(&bloom_filters, c);
|
||||||
|
|
||||||
|
if (!filter->data) {
|
||||||
|
load_commit_graph_info(r, c);
|
||||||
|
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
|
||||||
|
r->objects->commit_graph->chunk_bloom_indexes) {
|
||||||
|
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
|
||||||
|
return filter;
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter->data || !compute_if_not_present)
|
||||||
|
return filter;
|
||||||
|
|
||||||
repo_diff_setup(r, &diffopt);
|
repo_diff_setup(r, &diffopt);
|
||||||
diffopt.flags.recursive = 1;
|
diffopt.flags.recursive = 1;
|
||||||
diffopt.max_changes = max_changes;
|
diffopt.max_changes = max_changes;
|
||||||
|
4
bloom.h
4
bloom.h
@ -32,6 +32,7 @@ struct bloom_filter_settings {
|
|||||||
|
|
||||||
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
|
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
|
||||||
#define BITS_PER_WORD 8
|
#define BITS_PER_WORD 8
|
||||||
|
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A bloom_filter struct represents a data segment to
|
* A bloom_filter struct represents a data segment to
|
||||||
@ -79,6 +80,7 @@ void add_key_to_filter(const struct bloom_key *key,
|
|||||||
void init_bloom_filters(void);
|
void init_bloom_filters(void);
|
||||||
|
|
||||||
struct bloom_filter *get_bloom_filter(struct repository *r,
|
struct bloom_filter *get_bloom_filter(struct repository *r,
|
||||||
struct commit *c);
|
struct commit *c,
|
||||||
|
int compute_if_not_present);
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -1086,7 +1086,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
|
|||||||
ctx->commits.nr);
|
ctx->commits.nr);
|
||||||
|
|
||||||
while (list < last) {
|
while (list < last) {
|
||||||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
|
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
|
||||||
cur_pos += filter->len;
|
cur_pos += filter->len;
|
||||||
display_progress(progress, ++i);
|
display_progress(progress, ++i);
|
||||||
hashwrite_be32(f, cur_pos);
|
hashwrite_be32(f, cur_pos);
|
||||||
@ -1115,7 +1115,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
|
|||||||
hashwrite_be32(f, settings->bits_per_entry);
|
hashwrite_be32(f, settings->bits_per_entry);
|
||||||
|
|
||||||
while (list < last) {
|
while (list < last) {
|
||||||
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
|
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
|
||||||
display_progress(progress, ++i);
|
display_progress(progress, ++i);
|
||||||
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
|
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
|
||||||
list++;
|
list++;
|
||||||
@ -1296,7 +1296,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
|
|||||||
|
|
||||||
for (i = 0; i < ctx->commits.nr; i++) {
|
for (i = 0; i < ctx->commits.nr; i++) {
|
||||||
struct commit *c = sorted_commits[i];
|
struct commit *c = sorted_commits[i];
|
||||||
struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
|
struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
|
||||||
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
|
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
|
||||||
display_progress(progress, i + 1);
|
display_progress(progress, i + 1);
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
|
|||||||
struct bloom_filter *filter;
|
struct bloom_filter *filter;
|
||||||
setup_git_directory();
|
setup_git_directory();
|
||||||
c = lookup_commit(the_repository, commit_oid);
|
c = lookup_commit(the_repository, commit_oid);
|
||||||
filter = get_bloom_filter(the_repository, c);
|
filter = get_bloom_filter(the_repository, c, 1);
|
||||||
print_bloom_filter(filter);
|
print_bloom_filter(filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user