bloom.c: add the murmur3 hash implementation
In preparation for computing changed paths Bloom filters, implement the Murmur3 hash algorithm as described in [1]. It hashes the given data using the given seed and produces a uniformly distributed hash value. [1] https://en.wikipedia.org/wiki/MurmurHash#Algorithm Helped-by: Derrick Stolee <dstolee@microsoft.com> Helped-by: Szeder Gábor <szeder.dev@gmail.com> Reviewed-by: Jakub Narębski <jnareb@gmail.com> Signed-off-by: Garima Singh <garima.singh@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
3be7efcafc
commit
f52207a45c
2
Makefile
2
Makefile
@ -695,6 +695,7 @@ X =
|
||||
PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
|
||||
|
||||
TEST_BUILTINS_OBJS += test-advise.o
|
||||
TEST_BUILTINS_OBJS += test-bloom.o
|
||||
TEST_BUILTINS_OBJS += test-chmtime.o
|
||||
TEST_BUILTINS_OBJS += test-config.o
|
||||
TEST_BUILTINS_OBJS += test-ctype.o
|
||||
@ -840,6 +841,7 @@ LIB_OBJS += base85.o
|
||||
LIB_OBJS += bisect.o
|
||||
LIB_OBJS += blame.o
|
||||
LIB_OBJS += blob.o
|
||||
LIB_OBJS += bloom.o
|
||||
LIB_OBJS += branch.o
|
||||
LIB_OBJS += bulk-checkin.o
|
||||
LIB_OBJS += bundle.o
|
||||
|
73
bloom.c
Normal file
73
bloom.c
Normal file
@ -0,0 +1,73 @@
|
||||
#include "git-compat-util.h"
|
||||
#include "bloom.h"
|
||||
|
||||
static uint32_t rotate_left(uint32_t value, int32_t count)
|
||||
{
|
||||
uint32_t mask = 8 * sizeof(uint32_t) - 1;
|
||||
count &= mask;
|
||||
return ((value << count) | (value >> ((-count) & mask)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the murmur3 32-bit hash value for the given data
|
||||
* using the given seed.
|
||||
* Produces a uniformly distributed hash value.
|
||||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
|
||||
{
|
||||
const uint32_t c1 = 0xcc9e2d51;
|
||||
const uint32_t c2 = 0x1b873593;
|
||||
const uint32_t r1 = 15;
|
||||
const uint32_t r2 = 13;
|
||||
const uint32_t m = 5;
|
||||
const uint32_t n = 0xe6546b64;
|
||||
int i;
|
||||
uint32_t k1 = 0;
|
||||
const char *tail;
|
||||
|
||||
int len4 = len / sizeof(uint32_t);
|
||||
|
||||
uint32_t k;
|
||||
for (i = 0; i < len4; i++) {
|
||||
uint32_t byte1 = (uint32_t)data[4*i];
|
||||
uint32_t byte2 = ((uint32_t)data[4*i + 1]) << 8;
|
||||
uint32_t byte3 = ((uint32_t)data[4*i + 2]) << 16;
|
||||
uint32_t byte4 = ((uint32_t)data[4*i + 3]) << 24;
|
||||
k = byte1 | byte2 | byte3 | byte4;
|
||||
k *= c1;
|
||||
k = rotate_left(k, r1);
|
||||
k *= c2;
|
||||
|
||||
seed ^= k;
|
||||
seed = rotate_left(seed, r2) * m + n;
|
||||
}
|
||||
|
||||
tail = (data + len4 * sizeof(uint32_t));
|
||||
|
||||
switch (len & (sizeof(uint32_t) - 1)) {
|
||||
case 3:
|
||||
k1 ^= ((uint32_t)tail[2]) << 16;
|
||||
/*-fallthrough*/
|
||||
case 2:
|
||||
k1 ^= ((uint32_t)tail[1]) << 8;
|
||||
/*-fallthrough*/
|
||||
case 1:
|
||||
k1 ^= ((uint32_t)tail[0]) << 0;
|
||||
k1 *= c1;
|
||||
k1 = rotate_left(k1, r1);
|
||||
k1 *= c2;
|
||||
seed ^= k1;
|
||||
break;
|
||||
}
|
||||
|
||||
seed ^= (uint32_t)len;
|
||||
seed ^= (seed >> 16);
|
||||
seed *= 0x85ebca6b;
|
||||
seed ^= (seed >> 13);
|
||||
seed *= 0xc2b2ae35;
|
||||
seed ^= (seed >> 16);
|
||||
|
||||
return seed;
|
||||
}
|
13
bloom.h
Normal file
13
bloom.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef BLOOM_H
|
||||
#define BLOOM_H
|
||||
|
||||
/*
|
||||
* Calculate the murmur3 32-bit hash value for the given data
|
||||
* using the given seed.
|
||||
* Produces a uniformly distributed hash value.
|
||||
* Not considered to be cryptographically secure.
|
||||
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
|
||||
*/
|
||||
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
|
||||
|
||||
#endif
|
13
t/helper/test-bloom.c
Normal file
13
t/helper/test-bloom.c
Normal file
@ -0,0 +1,13 @@
|
||||
#include "git-compat-util.h"
|
||||
#include "bloom.h"
|
||||
#include "test-tool.h"
|
||||
|
||||
int cmd__bloom(int argc, const char **argv)
|
||||
{
|
||||
if (!strcmp(argv[1], "get_murmur3")) {
|
||||
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
|
||||
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -15,6 +15,7 @@ struct test_cmd {
|
||||
|
||||
static struct test_cmd cmds[] = {
|
||||
{ "advise", cmd__advise_if_enabled },
|
||||
{ "bloom", cmd__bloom },
|
||||
{ "chmtime", cmd__chmtime },
|
||||
{ "config", cmd__config },
|
||||
{ "ctype", cmd__ctype },
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "git-compat-util.h"
|
||||
|
||||
int cmd__advise_if_enabled(int argc, const char **argv);
|
||||
int cmd__bloom(int argc, const char **argv);
|
||||
int cmd__chmtime(int argc, const char **argv);
|
||||
int cmd__config(int argc, const char **argv);
|
||||
int cmd__ctype(int argc, const char **argv);
|
||||
|
30
t/t0095-bloom.sh
Executable file
30
t/t0095-bloom.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='Testing the various Bloom filter computations in bloom.c'
|
||||
. ./test-lib.sh
|
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for empty string' '
|
||||
cat >expect <<-\EOF &&
|
||||
Murmur3 Hash with seed=0:0x00000000
|
||||
EOF
|
||||
test-tool bloom get_murmur3 "" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for test string 1' '
|
||||
cat >expect <<-\EOF &&
|
||||
Murmur3 Hash with seed=0:0x627b0c2c
|
||||
EOF
|
||||
test-tool bloom get_murmur3 "Hello world!" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_expect_success 'compute unseeded murmur3 hash for test string 2' '
|
||||
cat >expect <<-\EOF &&
|
||||
Murmur3 Hash with seed=0:0x2e4ff723
|
||||
EOF
|
||||
test-tool bloom get_murmur3 "The quick brown fox jumps over the lazy dog" >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user