c3a02824cf
"git log -L..." now takes advantage of the "which paths are touched by this commit?" info stored in the commit-graph system. * ds/line-log-on-bloom: line-log: integrate with changed-path Bloom filters line-log: try to use generation number-based topo-ordering line-log: more responsive, incremental 'git log -L' t4211-line-log: add tests for parent oids line-log: remove unused fields from 'struct line_log_data'
92 lines
2.4 KiB
C
92 lines
2.4 KiB
C
#ifndef BLOOM_H
|
|
#define BLOOM_H
|
|
|
|
struct commit;
|
|
struct repository;
|
|
|
|
struct bloom_filter_settings {
|
|
/*
|
|
* The version of the hashing technique being used.
|
|
* We currently only support version = 1 which is
|
|
* the seeded murmur3 hashing technique implemented
|
|
* in bloom.c.
|
|
*/
|
|
uint32_t hash_version;
|
|
|
|
/*
|
|
* The number of times a path is hashed, i.e. the
|
|
* number of bit positions tht cumulatively
|
|
* determine whether a path is present in the
|
|
* Bloom filter.
|
|
*/
|
|
uint32_t num_hashes;
|
|
|
|
/*
|
|
* The minimum number of bits per entry in the Bloom
|
|
* filter. If the filter contains 'n' entries, then
|
|
* filter size is the minimum number of 8-bit words
|
|
* that contain n*b bits.
|
|
*/
|
|
uint32_t bits_per_entry;
|
|
};
|
|
|
|
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
|
|
#define BITS_PER_WORD 8
|
|
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
|
|
|
|
/*
|
|
* A bloom_filter struct represents a data segment to
|
|
* use when testing hash values. The 'len' member
|
|
* dictates how many entries are stored in
|
|
* 'data'.
|
|
*/
|
|
struct bloom_filter {
|
|
unsigned char *data;
|
|
size_t len;
|
|
};
|
|
|
|
/*
|
|
* A bloom_key represents the k hash values for a
|
|
* given string. These can be precomputed and
|
|
* stored in a bloom_key for re-use when testing
|
|
* against a bloom_filter. The number of hashes is
|
|
* given by the Bloom filter settings and is the same
|
|
* for all Bloom filters and keys interacting with
|
|
* the loaded version of the commit graph file and
|
|
* the Bloom data chunks.
|
|
*/
|
|
struct bloom_key {
|
|
uint32_t *hashes;
|
|
};
|
|
|
|
/*
|
|
* Calculate the murmur3 32-bit hash value for the given data
|
|
* using the given seed.
|
|
* Produces a uniformly distributed hash value.
|
|
* Not considered to be cryptographically secure.
|
|
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
|
*/
|
|
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
|
|
|
|
void fill_bloom_key(const char *data,
|
|
size_t len,
|
|
struct bloom_key *key,
|
|
const struct bloom_filter_settings *settings);
|
|
void clear_bloom_key(struct bloom_key *key);
|
|
|
|
void add_key_to_filter(const struct bloom_key *key,
|
|
struct bloom_filter *filter,
|
|
const struct bloom_filter_settings *settings);
|
|
|
|
void init_bloom_filters(void);
|
|
|
|
struct bloom_filter *get_bloom_filter(struct repository *r,
|
|
struct commit *c,
|
|
int compute_if_not_present);
|
|
|
|
int bloom_filter_contains(const struct bloom_filter *filter,
|
|
const struct bloom_key *key,
|
|
const struct bloom_filter_settings *settings);
|
|
|
|
#endif
|