Merge branch 'dk/diff-delta'
* dk/diff-delta: diff-delta.c: Rationalize culling of hash buckets diff-delta.c: pack the index structure
This commit is contained in:
commit
eb6d54fc79
109
diff-delta.c
109
diff-delta.c
@ -115,7 +115,11 @@ static const unsigned int U[256] = {
|
||||
struct index_entry {
|
||||
const unsigned char *ptr;
|
||||
unsigned int val;
|
||||
struct index_entry *next;
|
||||
};
|
||||
|
||||
struct unpacked_index_entry {
|
||||
struct index_entry entry;
|
||||
struct unpacked_index_entry *next;
|
||||
};
|
||||
|
||||
struct delta_index {
|
||||
@ -131,7 +135,8 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
|
||||
unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
|
||||
const unsigned char *data, *buffer = buf;
|
||||
struct delta_index *index;
|
||||
struct index_entry *entry, **hash;
|
||||
struct unpacked_index_entry *entry, **hash;
|
||||
struct index_entry *packed_entry, **packed_hash;
|
||||
void *mem;
|
||||
unsigned long memsize;
|
||||
|
||||
@ -148,28 +153,21 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
|
||||
hmask = hsize - 1;
|
||||
|
||||
/* allocate lookup index */
|
||||
memsize = sizeof(*index) +
|
||||
sizeof(*hash) * hsize +
|
||||
memsize = sizeof(*hash) * hsize +
|
||||
sizeof(*entry) * entries;
|
||||
mem = malloc(memsize);
|
||||
if (!mem)
|
||||
return NULL;
|
||||
index = mem;
|
||||
mem = index + 1;
|
||||
hash = mem;
|
||||
mem = hash + hsize;
|
||||
entry = mem;
|
||||
|
||||
index->memsize = memsize;
|
||||
index->src_buf = buf;
|
||||
index->src_size = bufsize;
|
||||
index->hash_mask = hmask;
|
||||
memset(hash, 0, hsize * sizeof(*hash));
|
||||
|
||||
/* allocate an array to count hash entries */
|
||||
hash_count = calloc(hsize, sizeof(*hash_count));
|
||||
if (!hash_count) {
|
||||
free(index);
|
||||
free(hash);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -183,12 +181,13 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
|
||||
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
|
||||
if (val == prev_val) {
|
||||
/* keep the lowest of consecutive identical blocks */
|
||||
entry[-1].ptr = data + RABIN_WINDOW;
|
||||
entry[-1].entry.ptr = data + RABIN_WINDOW;
|
||||
--entries;
|
||||
} else {
|
||||
prev_val = val;
|
||||
i = val & hmask;
|
||||
entry->ptr = data + RABIN_WINDOW;
|
||||
entry->val = val;
|
||||
entry->entry.ptr = data + RABIN_WINDOW;
|
||||
entry->entry.val = val;
|
||||
entry->next = hash[i];
|
||||
hash[i] = entry++;
|
||||
hash_count[i]++;
|
||||
@ -208,20 +207,84 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
|
||||
* the reference buffer.
|
||||
*/
|
||||
for (i = 0; i < hsize; i++) {
|
||||
if (hash_count[i] < HASH_LIMIT)
|
||||
int acc;
|
||||
|
||||
if (hash_count[i] <= HASH_LIMIT)
|
||||
continue;
|
||||
|
||||
entries -= hash_count[i] - HASH_LIMIT;
|
||||
/* We leave exactly HASH_LIMIT entries in the bucket */
|
||||
|
||||
entry = hash[i];
|
||||
acc = 0;
|
||||
do {
|
||||
struct index_entry *keep = entry;
|
||||
int skip = hash_count[i] / HASH_LIMIT;
|
||||
do {
|
||||
entry = entry->next;
|
||||
} while(--skip && entry);
|
||||
keep->next = entry;
|
||||
} while(entry);
|
||||
acc += hash_count[i] - HASH_LIMIT;
|
||||
if (acc > 0) {
|
||||
struct unpacked_index_entry *keep = entry;
|
||||
do {
|
||||
entry = entry->next;
|
||||
acc -= HASH_LIMIT;
|
||||
} while (acc > 0);
|
||||
keep->next = entry->next;
|
||||
}
|
||||
entry = entry->next;
|
||||
} while (entry);
|
||||
|
||||
/* Assume that this loop is gone through exactly
|
||||
* HASH_LIMIT times and is entered and left with
|
||||
* acc==0. So the first statement in the loop
|
||||
* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
|
||||
* to the accumulator, and the inner loop consequently
|
||||
* is run (hash_count[i]-HASH_LIMIT) times, removing
|
||||
* one element from the list each time. Since acc
|
||||
* balances out to 0 at the final run, the inner loop
|
||||
* body can't be left with entry==NULL. So we indeed
|
||||
* encounter entry==NULL in the outer loop only.
|
||||
*/
|
||||
}
|
||||
free(hash_count);
|
||||
|
||||
/* Now create the packed index in array form rather than
|
||||
* linked lists */
|
||||
|
||||
memsize = sizeof(*index)
|
||||
+ sizeof(*packed_hash) * (hsize+1)
|
||||
+ sizeof(*packed_entry) * entries;
|
||||
|
||||
mem = malloc(memsize);
|
||||
|
||||
if (!mem) {
|
||||
free(hash);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index = mem;
|
||||
index->memsize = memsize;
|
||||
index->src_buf = buf;
|
||||
index->src_size = bufsize;
|
||||
index->hash_mask = hmask;
|
||||
|
||||
mem = index + 1;
|
||||
packed_hash = mem;
|
||||
mem = packed_hash + (hsize+1);
|
||||
packed_entry = mem;
|
||||
|
||||
/* Coalesce all entries belonging to one linked list into
|
||||
* consecutive array entries */
|
||||
|
||||
for (i = 0; i < hsize; i++) {
|
||||
packed_hash[i] = packed_entry;
|
||||
for (entry = hash[i]; entry; entry = entry->next)
|
||||
*packed_entry++ = entry->entry;
|
||||
}
|
||||
|
||||
/* Sentinel value to indicate the length of the last hash
|
||||
* bucket */
|
||||
|
||||
packed_hash[hsize] = packed_entry;
|
||||
assert(packed_entry - (struct index_entry *)mem == entries);
|
||||
free(hash);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
@ -302,7 +365,7 @@ create_delta(const struct delta_index *index,
|
||||
val ^= U[data[-RABIN_WINDOW]];
|
||||
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
|
||||
i = val & index->hash_mask;
|
||||
for (entry = index->hash[i]; entry; entry = entry->next) {
|
||||
for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
|
||||
const unsigned char *ref = entry->ptr;
|
||||
const unsigned char *src = data;
|
||||
unsigned int ref_size = ref_top - ref;
|
||||
|
Loading…
Reference in New Issue
Block a user