diff-delta: allow reusing of the reference buffer index

When a reference buffer is used multiple times then its index can be
computed only once and reused multiple times.  This patch adds an extra
pointer to a pointer argument (from_index) to diff_delta() for this.

If from_index is NULL then everything is like before.

If from_index is non NULL and *from_index is NULL then the index is
created and its location stored to *from_index.  In this case the caller
has the responsibility to free the memory pointed to by *from_index.

If from_index and *from_index are non NULL then the index is reused as
is.

This currently saves about 10% of CPU time to repack the git archive.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
Nicolas Pitre 2006-02-27 23:38:28 -05:00 committed by Junio C Hamano
parent 5bb86b82ba
commit 38fd0721d0
6 changed files with 40 additions and 21 deletions

View File

@ -4,7 +4,8 @@
/* handling of delta buffers */ /* handling of delta buffers */
extern void *diff_delta(void *from_buf, unsigned long from_size, extern void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size, void *to_buf, unsigned long to_size,
unsigned long *delta_size, unsigned long max_size); unsigned long *delta_size, unsigned long max_size,
void **from_index);
extern void *patch_delta(void *src_buf, unsigned long src_size, extern void *patch_delta(void *src_buf, unsigned long src_size,
void *delta_buf, unsigned long delta_size, void *delta_buf, unsigned long delta_size,
unsigned long *dst_size); unsigned long *dst_size);

View File

@ -30,8 +30,7 @@ struct index {
static struct index ** delta_index(const unsigned char *buf, static struct index ** delta_index(const unsigned char *buf,
unsigned long bufsize, unsigned long bufsize,
unsigned long trg_bufsize, unsigned long trg_bufsize)
unsigned int *hash_shift)
{ {
unsigned long hsize; unsigned long hsize;
unsigned int i, hshift, hlimit, *hash_count; unsigned int i, hshift, hlimit, *hash_count;
@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf,
for (i = 8; (1 << i) < hsize && i < 24; i += 2); for (i = 8; (1 << i) < hsize && i < 24; i += 2);
hsize = 1 << i; hsize = 1 << i;
hshift = (i - 8) / 2; hshift = (i - 8) / 2;
*hash_shift = hshift;
/* allocate lookup index */ /*
mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry)); * Allocate lookup index. Note the first hash pointer
* is used to store the hash shift value.
*/
mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
if (!mem) if (!mem)
return NULL; return NULL;
hash = mem; hash = mem;
entry = mem + hsize * sizeof(*hash); *hash++ = (void *)hshift;
entry = mem + (1 + hsize) * sizeof(*hash);
memset(hash, 0, hsize * sizeof(*hash)); memset(hash, 0, hsize * sizeof(*hash));
/* allocate an array to count hash entries */ /* allocate an array to count hash entries */
@ -107,7 +109,7 @@ static struct index ** delta_index(const unsigned char *buf,
} }
free(hash_count); free(hash_count);
return hash; return hash-1;
} }
/* provide the size of the copy opcode given the block offset and size */ /* provide the size of the copy opcode given the block offset and size */
@ -121,7 +123,8 @@ static struct index ** delta_index(const unsigned char *buf,
void *diff_delta(void *from_buf, unsigned long from_size, void *diff_delta(void *from_buf, unsigned long from_size,
void *to_buf, unsigned long to_size, void *to_buf, unsigned long to_size,
unsigned long *delta_size, unsigned long *delta_size,
unsigned long max_size) unsigned long max_size,
void **from_index)
{ {
unsigned int i, outpos, outsize, inscnt, hash_shift; unsigned int i, outpos, outsize, inscnt, hash_shift;
const unsigned char *ref_data, *ref_top, *data, *top; const unsigned char *ref_data, *ref_top, *data, *top;
@ -130,9 +133,16 @@ void *diff_delta(void *from_buf, unsigned long from_size,
if (!from_size || !to_size) if (!from_size || !to_size)
return NULL; return NULL;
hash = delta_index(from_buf, from_size, to_size, &hash_shift); if (from_index && *from_index) {
hash = *from_index;
} else {
hash = delta_index(from_buf, from_size, to_size);
if (!hash) if (!hash)
return NULL; return NULL;
if (from_index)
*from_index = hash;
}
hash_shift = (unsigned int)(*hash++);
outpos = 0; outpos = 0;
outsize = 8192; outsize = 8192;
@ -140,7 +150,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
outsize = max_size + MAX_OP_SIZE + 1; outsize = max_size + MAX_OP_SIZE + 1;
out = malloc(outsize); out = malloc(outsize);
if (!out) { if (!out) {
free(hash); if (!from_index)
free(hash-1);
return NULL; return NULL;
} }
@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
out = realloc(out, outsize); out = realloc(out, outsize);
if (!out) { if (!out) {
free(tmp); free(tmp);
free(hash); if (!from_index)
free(hash-1);
return NULL; return NULL;
} }
} }
@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
if (inscnt) if (inscnt)
out[outpos - inscnt - 1] = inscnt; out[outpos - inscnt - 1] = inscnt;
free(hash); if (!from_index)
free(hash-1);
*delta_size = outpos; *delta_size = outpos;
return out; return out;
} }

View File

@ -71,7 +71,7 @@ static int should_break(struct diff_filespec *src,
delta = diff_delta(src->data, src->size, delta = diff_delta(src->data, src->size,
dst->data, dst->size, dst->data, dst->size,
&delta_size, 0); &delta_size, 0, NULL);
if (!delta) if (!delta)
return 0; /* error but caught downstream */ return 0; /* error but caught downstream */

View File

@ -168,7 +168,7 @@ static int estimate_similarity(struct diff_filespec *src,
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE; delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
delta = diff_delta(src->data, src->size, delta = diff_delta(src->data, src->size,
dst->data, dst->size, dst->data, dst->size,
&delta_size, delta_limit); &delta_size, delta_limit, NULL);
if (!delta) if (!delta)
/* If delta_limit is exceeded, we have too much differences */ /* If delta_limit is exceeded, we have too much differences */
return 0; return 0;

View File

@ -204,7 +204,7 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e
if (!otherbuf) if (!otherbuf)
die("unable to read %s", sha1_to_hex(entry->delta->sha1)); die("unable to read %s", sha1_to_hex(entry->delta->sha1));
delta_buf = diff_delta(otherbuf, othersize, delta_buf = diff_delta(otherbuf, othersize,
buf, size, &delta_size, 0); buf, size, &delta_size, 0, NULL);
if (!delta_buf || delta_size != entry->delta_size) if (!delta_buf || delta_size != entry->delta_size)
die("delta size changed"); die("delta size changed");
free(buf); free(buf);
@ -810,6 +810,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
struct unpacked { struct unpacked {
struct object_entry *entry; struct object_entry *entry;
void *data; void *data;
void **delta_index;
}; };
/* /*
@ -891,7 +892,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
if (sizediff >= max_size) if (sizediff >= max_size)
return -1; return -1;
delta_buf = diff_delta(old->data, oldsize, delta_buf = diff_delta(old->data, oldsize,
cur->data, size, &delta_size, max_size); cur->data, size, &delta_size,
max_size, old->delta_index);
if (!delta_buf) if (!delta_buf)
return 0; return 0;
cur_entry->delta = old_entry; cur_entry->delta = old_entry;
@ -948,6 +950,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
*/ */
continue; continue;
free(n->delta_index);
free(n->data); free(n->data);
n->entry = entry; n->entry = entry;
n->data = read_sha1_file(entry->sha1, type, &size); n->data = read_sha1_file(entry->sha1, type, &size);
@ -974,8 +977,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
if (progress) if (progress)
fputc('\n', stderr); fputc('\n', stderr);
for (i = 0; i < window; ++i) for (i = 0; i < window; ++i) {
free(array[i].delta_index);
free(array[i].data); free(array[i].data);
}
free(array); free(array);
} }

View File

@ -63,7 +63,7 @@ int main(int argc, char *argv[])
if (argv[1][1] == 'd') if (argv[1][1] == 'd')
out_buf = diff_delta(from_buf, from_size, out_buf = diff_delta(from_buf, from_size,
data_buf, data_size, data_buf, data_size,
&out_size, 0); &out_size, 0, NULL);
else else
out_buf = patch_delta(from_buf, from_size, out_buf = patch_delta(from_buf, from_size,
data_buf, data_size, data_buf, data_size,