split the diff-delta interface
This patch splits the diff-delta interface into index creation and delta generation. A wrapper is provided to preserve the diff-delta() call. This will allow for an optimization in pack-objects.c where the source object could be fixed and a full window of objects tentatively tried against that same source object without recomputing the source index each time. This patch only restructure things, plus a couple cleanups for good measure. There is no performance change yet. Signed-off-by: Nicolas Pitre <nico@cam.org>
This commit is contained in:
parent
36932eab77
commit
08abe669c0
75
delta.h
75
delta.h
@ -1,12 +1,73 @@
|
||||
#ifndef DELTA_H
|
||||
#define DELTA_H
|
||||
|
||||
/* handling of delta buffers */
|
||||
extern void *diff_delta(void *from_buf, unsigned long from_size,
|
||||
void *to_buf, unsigned long to_size,
|
||||
unsigned long *delta_size, unsigned long max_size);
|
||||
extern void *patch_delta(void *src_buf, unsigned long src_size,
|
||||
void *delta_buf, unsigned long delta_size,
|
||||
/* opaque object for delta index */
|
||||
struct delta_index;
|
||||
|
||||
/*
|
||||
* create_delta_index: compute index data from given buffer
|
||||
*
|
||||
* This returns a pointer to a struct delta_index that should be passed to
|
||||
* subsequent create_delta() calls, or to free_delta_index(). A NULL pointer
|
||||
* is returned on failure. The given buffer must not be freed nor altered
|
||||
* before free_delta_index() is called. The returned pointer must be freed
|
||||
* using free_delta_index().
|
||||
*/
|
||||
extern struct delta_index *
|
||||
create_delta_index(const void *buf, unsigned long bufsize);
|
||||
|
||||
/*
|
||||
* free_delta_index: free the index created by create_delta_index()
|
||||
*/
|
||||
extern void free_delta_index(struct delta_index *index);
|
||||
|
||||
/*
|
||||
* create_delta: create a delta from given index for the given buffer
|
||||
*
|
||||
* This function may be called multiple times with different buffers using
|
||||
* the same delta_index pointer. If max_delta_size is non-zero and the
|
||||
* resulting delta is to be larger than max_delta_size then NULL is returned.
|
||||
* On success, a non-NULL pointer to the buffer with the delta data is
|
||||
* returned and *delta_size is updated with its size. The returned buffer
|
||||
* must be freed by the caller.
|
||||
*/
|
||||
extern void *
|
||||
create_delta(const struct delta_index *index,
|
||||
const void *buf, unsigned long bufsize,
|
||||
unsigned long *delta_size, unsigned long max_delta_size);
|
||||
|
||||
/*
|
||||
* diff_delta: create a delta from source buffer to target buffer
|
||||
*
|
||||
* If max_delta_size is non-zero and the resulting delta is to be larger
|
||||
* than max_delta_size then NULL is returned. On success, a non-NULL
|
||||
* pointer to the buffer with the delta data is returned and *delta_size is
|
||||
* updated with its size. The returned buffer must be freed by the caller.
|
||||
*/
|
||||
static inline void *
|
||||
diff_delta(const void *src_buf, unsigned long src_bufsize,
|
||||
const void *trg_buf, unsigned long trg_bufsize,
|
||||
unsigned long *delta_size, unsigned long max_delta_size)
|
||||
{
|
||||
struct delta_index *index = create_delta_index(src_buf, src_bufsize);
|
||||
if (index) {
|
||||
void *delta = create_delta(index, trg_buf, trg_bufsize,
|
||||
delta_size, max_delta_size);
|
||||
free_delta_index(index);
|
||||
return delta;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* patch_delta: recreate target buffer given source buffer and delta data
|
||||
*
|
||||
* On success, a non-NULL pointer to the target buffer is returned and
|
||||
* *trg_bufsize is updated with its size. On failure a NULL pointer is
|
||||
* returned. The returned buffer must be freed by the caller.
|
||||
*/
|
||||
extern void *patch_delta(const void *src_buf, unsigned long src_size,
|
||||
const void *delta_buf, unsigned long delta_size,
|
||||
unsigned long *dst_size);
|
||||
|
||||
/* the smallest possible delta size is 4 bytes */
|
||||
@ -14,7 +75,7 @@ extern void *patch_delta(void *src_buf, unsigned long src_size,
|
||||
|
||||
/*
|
||||
* This must be called twice on the delta data buffer, first to get the
|
||||
* expected reference buffer size, and again to get the result buffer size.
|
||||
* expected source buffer size, and again to get the target buffer size.
|
||||
*/
|
||||
static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
|
||||
const unsigned char *top)
|
||||
|
168
diff-delta.c
168
diff-delta.c
@ -27,53 +27,70 @@
|
||||
/* block size: min = 16, max = 64k, power of 2 */
|
||||
#define BLK_SIZE 16
|
||||
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* maximum hash entry list for the same hash bucket */
|
||||
#define HASH_LIMIT 64
|
||||
|
||||
#define GR_PRIME 0x9e370001
|
||||
#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift))
|
||||
|
||||
struct index {
|
||||
struct index_entry {
|
||||
const unsigned char *ptr;
|
||||
unsigned int val;
|
||||
struct index *next;
|
||||
struct index_entry *next;
|
||||
};
|
||||
|
||||
static struct index ** delta_index(const unsigned char *buf,
|
||||
unsigned long bufsize,
|
||||
unsigned long trg_bufsize,
|
||||
unsigned int *hash_shift)
|
||||
struct delta_index {
|
||||
const void *src_buf;
|
||||
unsigned long src_size;
|
||||
unsigned int hash_shift;
|
||||
struct index_entry *hash[0];
|
||||
};
|
||||
|
||||
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
|
||||
{
|
||||
unsigned int i, hsize, hshift, hlimit, entries, *hash_count;
|
||||
const unsigned char *data;
|
||||
struct index *entry, **hash;
|
||||
unsigned int i, hsize, hshift, entries, *hash_count;
|
||||
const unsigned char *data, *buffer = buf;
|
||||
struct delta_index *index;
|
||||
struct index_entry *entry, **hash;
|
||||
void *mem;
|
||||
|
||||
if (!buf || !bufsize)
|
||||
return NULL;
|
||||
|
||||
/* determine index hash size */
|
||||
entries = bufsize / BLK_SIZE;
|
||||
hsize = entries / 4;
|
||||
for (i = 4; (1 << i) < hsize && i < 31; i++);
|
||||
hsize = 1 << i;
|
||||
hshift = 32 - i;
|
||||
*hash_shift = hshift;
|
||||
|
||||
/* allocate lookup index */
|
||||
mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry));
|
||||
mem = malloc(sizeof(*index) +
|
||||
sizeof(*hash) * hsize +
|
||||
sizeof(*entry) * entries);
|
||||
if (!mem)
|
||||
return NULL;
|
||||
index = mem;
|
||||
mem = index + 1;
|
||||
hash = mem;
|
||||
entry = mem + hsize * sizeof(*hash);
|
||||
mem = hash + hsize;
|
||||
entry = mem;
|
||||
|
||||
index->src_buf = buf;
|
||||
index->src_size = bufsize;
|
||||
index->hash_shift = hshift;
|
||||
memset(hash, 0, hsize * sizeof(*hash));
|
||||
|
||||
/* allocate an array to count hash entries */
|
||||
hash_count = calloc(hsize, sizeof(*hash_count));
|
||||
if (!hash_count) {
|
||||
free(hash);
|
||||
free(index);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* then populate the index */
|
||||
data = buf + entries * BLK_SIZE - BLK_SIZE;
|
||||
while (data >= buf) {
|
||||
data = buffer + entries * BLK_SIZE - BLK_SIZE;
|
||||
while (data >= buffer) {
|
||||
unsigned int val = adler32(0, data, BLK_SIZE);
|
||||
i = HASH(val, hshift);
|
||||
entry->ptr = data;
|
||||
@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf,
|
||||
* bucket that would bring us to O(m*n) computing costs (m and n
|
||||
* corresponding to reference and target buffer sizes).
|
||||
*
|
||||
* The more the target buffer is large, the more it is important to
|
||||
* have small entry lists for each hash buckets. With such a limit
|
||||
* the cost is bounded to something more like O(m+n).
|
||||
*/
|
||||
hlimit = (1 << 26) / trg_bufsize;
|
||||
if (hlimit < 4*BLK_SIZE)
|
||||
hlimit = 4*BLK_SIZE;
|
||||
|
||||
/*
|
||||
* Now make sure none of the hash buckets has more entries than
|
||||
* Make sure none of the hash buckets has more entries than
|
||||
* we're willing to test. Otherwise we cull the entry list
|
||||
* uniformly to still preserve a good repartition across
|
||||
* the reference buffer.
|
||||
*/
|
||||
for (i = 0; i < hsize; i++) {
|
||||
if (hash_count[i] < hlimit)
|
||||
if (hash_count[i] < HASH_LIMIT)
|
||||
continue;
|
||||
entry = hash[i];
|
||||
do {
|
||||
struct index *keep = entry;
|
||||
int skip = hash_count[i] / hlimit / 2;
|
||||
struct index_entry *keep = entry;
|
||||
int skip = hash_count[i] / HASH_LIMIT / 2;
|
||||
do {
|
||||
entry = entry->next;
|
||||
} while(--skip && entry);
|
||||
@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf,
|
||||
}
|
||||
free(hash_count);
|
||||
|
||||
return hash;
|
||||
return index;
|
||||
}
|
||||
|
||||
void free_delta_index(struct delta_index *index)
|
||||
{
|
||||
free(index);
|
||||
}
|
||||
|
||||
/* provide the size of the copy opcode given the block offset and size */
|
||||
@ -131,21 +144,17 @@ static struct index ** delta_index(const unsigned char *buf,
|
||||
/* the maximum size for any opcode */
|
||||
#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff)
|
||||
|
||||
void *diff_delta(void *from_buf, unsigned long from_size,
|
||||
void *to_buf, unsigned long to_size,
|
||||
unsigned long *delta_size,
|
||||
unsigned long max_size)
|
||||
void *
|
||||
create_delta(const struct delta_index *index,
|
||||
const void *trg_buf, unsigned long trg_size,
|
||||
unsigned long *delta_size, unsigned long max_size)
|
||||
{
|
||||
unsigned int i, outpos, outsize, hash_shift;
|
||||
int inscnt;
|
||||
const unsigned char *ref_data, *ref_top, *data, *top;
|
||||
unsigned char *out;
|
||||
struct index *entry, **hash;
|
||||
|
||||
if (!from_size || !to_size)
|
||||
return NULL;
|
||||
hash = delta_index(from_buf, from_size, to_size, &hash_shift);
|
||||
if (!hash)
|
||||
if (!trg_buf || !trg_size)
|
||||
return NULL;
|
||||
|
||||
outpos = 0;
|
||||
@ -153,60 +162,55 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
||||
if (max_size && outsize >= max_size)
|
||||
outsize = max_size + MAX_OP_SIZE + 1;
|
||||
out = malloc(outsize);
|
||||
if (!out) {
|
||||
free(hash);
|
||||
if (!out)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ref_data = from_buf;
|
||||
ref_top = from_buf + from_size;
|
||||
data = to_buf;
|
||||
top = to_buf + to_size;
|
||||
|
||||
/* store reference buffer size */
|
||||
out[outpos++] = from_size;
|
||||
from_size >>= 7;
|
||||
while (from_size) {
|
||||
out[outpos - 1] |= 0x80;
|
||||
out[outpos++] = from_size;
|
||||
from_size >>= 7;
|
||||
i = index->src_size;
|
||||
while (i >= 0x80) {
|
||||
out[outpos++] = i | 0x80;
|
||||
i >>= 7;
|
||||
}
|
||||
out[outpos++] = i;
|
||||
|
||||
/* store target buffer size */
|
||||
out[outpos++] = to_size;
|
||||
to_size >>= 7;
|
||||
while (to_size) {
|
||||
out[outpos - 1] |= 0x80;
|
||||
out[outpos++] = to_size;
|
||||
to_size >>= 7;
|
||||
i = trg_size;
|
||||
while (i >= 0x80) {
|
||||
out[outpos++] = i | 0x80;
|
||||
i >>= 7;
|
||||
}
|
||||
out[outpos++] = i;
|
||||
|
||||
ref_data = index->src_buf;
|
||||
ref_top = ref_data + index->src_size;
|
||||
data = trg_buf;
|
||||
top = trg_buf + trg_size;
|
||||
hash_shift = index->hash_shift;
|
||||
inscnt = 0;
|
||||
|
||||
while (data < top) {
|
||||
unsigned int moff = 0, msize = 0;
|
||||
if (data + BLK_SIZE <= top) {
|
||||
unsigned int val = adler32(0, data, BLK_SIZE);
|
||||
i = HASH(val, hash_shift);
|
||||
for (entry = hash[i]; entry; entry = entry->next) {
|
||||
const unsigned char *ref = entry->ptr;
|
||||
const unsigned char *src = data;
|
||||
unsigned int ref_size = ref_top - ref;
|
||||
if (entry->val != val)
|
||||
continue;
|
||||
if (ref_size > top - src)
|
||||
ref_size = top - src;
|
||||
if (ref_size > 0x10000)
|
||||
ref_size = 0x10000;
|
||||
if (ref_size <= msize)
|
||||
break;
|
||||
while (ref_size-- && *src++ == *ref)
|
||||
ref++;
|
||||
if (msize < ref - entry->ptr) {
|
||||
/* this is our best match so far */
|
||||
msize = ref - entry->ptr;
|
||||
moff = entry->ptr - ref_data;
|
||||
}
|
||||
struct index_entry *entry;
|
||||
unsigned int val = adler32(0, data, BLK_SIZE);
|
||||
i = HASH(val, hash_shift);
|
||||
for (entry = index->hash[i]; entry; entry = entry->next) {
|
||||
const unsigned char *ref = entry->ptr;
|
||||
const unsigned char *src = data;
|
||||
unsigned int ref_size = ref_top - ref;
|
||||
if (entry->val != val)
|
||||
continue;
|
||||
if (ref_size > top - src)
|
||||
ref_size = top - src;
|
||||
if (ref_size > 0x10000)
|
||||
ref_size = 0x10000;
|
||||
if (ref_size <= msize)
|
||||
break;
|
||||
while (ref_size-- && *src++ == *ref)
|
||||
ref++;
|
||||
if (msize < ref - entry->ptr) {
|
||||
/* this is our best match so far */
|
||||
msize = ref - entry->ptr;
|
||||
moff = entry->ptr - ref_data;
|
||||
}
|
||||
}
|
||||
|
||||
@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
||||
out = realloc(out, outsize);
|
||||
if (!out) {
|
||||
free(tmp);
|
||||
free(hash);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
||||
if (inscnt)
|
||||
out[outpos - inscnt - 1] = inscnt;
|
||||
|
||||
free(hash);
|
||||
*delta_size = outpos;
|
||||
return out;
|
||||
}
|
||||
|
@ -13,8 +13,8 @@
|
||||
#include <string.h>
|
||||
#include "delta.h"
|
||||
|
||||
void *patch_delta(void *src_buf, unsigned long src_size,
|
||||
void *delta_buf, unsigned long delta_size,
|
||||
void *patch_delta(const void *src_buf, unsigned long src_size,
|
||||
const void *delta_buf, unsigned long delta_size,
|
||||
unsigned long *dst_size)
|
||||
{
|
||||
const unsigned char *data, *top;
|
||||
|
Loading…
Reference in New Issue
Block a user