diff-delta: allow reusing of the reference buffer index
When a reference buffer is used multiple times then its index can be computed only once and reused multiple times. This patch adds an extra pointer to a pointer argument (from_index) to diff_delta() for this. If from_index is NULL then everything is like before. If from_index is non NULL and *from_index is NULL then the index is created and its location stored to *from_index. In this case the caller has the responsibility to free the memory pointed to by *from_index. If from_index and *from_index are non NULL then the index is reused as is. This currently saves about 10% of CPU time to repack the git archive. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
parent
5bb86b82ba
commit
38fd0721d0
3
delta.h
3
delta.h
@ -4,7 +4,8 @@
|
|||||||
/* handling of delta buffers */
|
/* handling of delta buffers */
|
||||||
extern void *diff_delta(void *from_buf, unsigned long from_size,
|
extern void *diff_delta(void *from_buf, unsigned long from_size,
|
||||||
void *to_buf, unsigned long to_size,
|
void *to_buf, unsigned long to_size,
|
||||||
unsigned long *delta_size, unsigned long max_size);
|
unsigned long *delta_size, unsigned long max_size,
|
||||||
|
void **from_index);
|
||||||
extern void *patch_delta(void *src_buf, unsigned long src_size,
|
extern void *patch_delta(void *src_buf, unsigned long src_size,
|
||||||
void *delta_buf, unsigned long delta_size,
|
void *delta_buf, unsigned long delta_size,
|
||||||
unsigned long *dst_size);
|
unsigned long *dst_size);
|
||||||
|
41
diff-delta.c
41
diff-delta.c
@ -30,8 +30,7 @@ struct index {
|
|||||||
|
|
||||||
static struct index ** delta_index(const unsigned char *buf,
|
static struct index ** delta_index(const unsigned char *buf,
|
||||||
unsigned long bufsize,
|
unsigned long bufsize,
|
||||||
unsigned long trg_bufsize,
|
unsigned long trg_bufsize)
|
||||||
unsigned int *hash_shift)
|
|
||||||
{
|
{
|
||||||
unsigned long hsize;
|
unsigned long hsize;
|
||||||
unsigned int i, hshift, hlimit, *hash_count;
|
unsigned int i, hshift, hlimit, *hash_count;
|
||||||
@ -44,14 +43,17 @@ static struct index ** delta_index(const unsigned char *buf,
|
|||||||
for (i = 8; (1 << i) < hsize && i < 24; i += 2);
|
for (i = 8; (1 << i) < hsize && i < 24; i += 2);
|
||||||
hsize = 1 << i;
|
hsize = 1 << i;
|
||||||
hshift = (i - 8) / 2;
|
hshift = (i - 8) / 2;
|
||||||
*hash_shift = hshift;
|
|
||||||
|
|
||||||
/* allocate lookup index */
|
/*
|
||||||
mem = malloc(hsize * sizeof(*hash) + bufsize * sizeof(*entry));
|
* Allocate lookup index. Note the first hash pointer
|
||||||
|
* is used to store the hash shift value.
|
||||||
|
*/
|
||||||
|
mem = malloc((1 + hsize) * sizeof(*hash) + bufsize * sizeof(*entry));
|
||||||
if (!mem)
|
if (!mem)
|
||||||
return NULL;
|
return NULL;
|
||||||
hash = mem;
|
hash = mem;
|
||||||
entry = mem + hsize * sizeof(*hash);
|
*hash++ = (void *)hshift;
|
||||||
|
entry = mem + (1 + hsize) * sizeof(*hash);
|
||||||
memset(hash, 0, hsize * sizeof(*hash));
|
memset(hash, 0, hsize * sizeof(*hash));
|
||||||
|
|
||||||
/* allocate an array to count hash entries */
|
/* allocate an array to count hash entries */
|
||||||
@ -107,7 +109,7 @@ static struct index ** delta_index(const unsigned char *buf,
|
|||||||
}
|
}
|
||||||
free(hash_count);
|
free(hash_count);
|
||||||
|
|
||||||
return hash;
|
return hash-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* provide the size of the copy opcode given the block offset and size */
|
/* provide the size of the copy opcode given the block offset and size */
|
||||||
@ -121,7 +123,8 @@ static struct index ** delta_index(const unsigned char *buf,
|
|||||||
void *diff_delta(void *from_buf, unsigned long from_size,
|
void *diff_delta(void *from_buf, unsigned long from_size,
|
||||||
void *to_buf, unsigned long to_size,
|
void *to_buf, unsigned long to_size,
|
||||||
unsigned long *delta_size,
|
unsigned long *delta_size,
|
||||||
unsigned long max_size)
|
unsigned long max_size,
|
||||||
|
void **from_index)
|
||||||
{
|
{
|
||||||
unsigned int i, outpos, outsize, inscnt, hash_shift;
|
unsigned int i, outpos, outsize, inscnt, hash_shift;
|
||||||
const unsigned char *ref_data, *ref_top, *data, *top;
|
const unsigned char *ref_data, *ref_top, *data, *top;
|
||||||
@ -130,9 +133,16 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
|||||||
|
|
||||||
if (!from_size || !to_size)
|
if (!from_size || !to_size)
|
||||||
return NULL;
|
return NULL;
|
||||||
hash = delta_index(from_buf, from_size, to_size, &hash_shift);
|
if (from_index && *from_index) {
|
||||||
if (!hash)
|
hash = *from_index;
|
||||||
return NULL;
|
} else {
|
||||||
|
hash = delta_index(from_buf, from_size, to_size);
|
||||||
|
if (!hash)
|
||||||
|
return NULL;
|
||||||
|
if (from_index)
|
||||||
|
*from_index = hash;
|
||||||
|
}
|
||||||
|
hash_shift = (unsigned int)(*hash++);
|
||||||
|
|
||||||
outpos = 0;
|
outpos = 0;
|
||||||
outsize = 8192;
|
outsize = 8192;
|
||||||
@ -140,7 +150,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
|||||||
outsize = max_size + MAX_OP_SIZE + 1;
|
outsize = max_size + MAX_OP_SIZE + 1;
|
||||||
out = malloc(outsize);
|
out = malloc(outsize);
|
||||||
if (!out) {
|
if (!out) {
|
||||||
free(hash);
|
if (!from_index)
|
||||||
|
free(hash-1);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -241,7 +252,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
|||||||
out = realloc(out, outsize);
|
out = realloc(out, outsize);
|
||||||
if (!out) {
|
if (!out) {
|
||||||
free(tmp);
|
free(tmp);
|
||||||
free(hash);
|
if (!from_index)
|
||||||
|
free(hash-1);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,7 +262,8 @@ void *diff_delta(void *from_buf, unsigned long from_size,
|
|||||||
if (inscnt)
|
if (inscnt)
|
||||||
out[outpos - inscnt - 1] = inscnt;
|
out[outpos - inscnt - 1] = inscnt;
|
||||||
|
|
||||||
free(hash);
|
if (!from_index)
|
||||||
|
free(hash-1);
|
||||||
*delta_size = outpos;
|
*delta_size = outpos;
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -71,7 +71,7 @@ static int should_break(struct diff_filespec *src,
|
|||||||
|
|
||||||
delta = diff_delta(src->data, src->size,
|
delta = diff_delta(src->data, src->size,
|
||||||
dst->data, dst->size,
|
dst->data, dst->size,
|
||||||
&delta_size, 0);
|
&delta_size, 0, NULL);
|
||||||
if (!delta)
|
if (!delta)
|
||||||
return 0; /* error but caught downstream */
|
return 0; /* error but caught downstream */
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ static int estimate_similarity(struct diff_filespec *src,
|
|||||||
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
|
delta_limit = base_size * (MAX_SCORE-minimum_score) / MAX_SCORE;
|
||||||
delta = diff_delta(src->data, src->size,
|
delta = diff_delta(src->data, src->size,
|
||||||
dst->data, dst->size,
|
dst->data, dst->size,
|
||||||
&delta_size, delta_limit);
|
&delta_size, delta_limit, NULL);
|
||||||
if (!delta)
|
if (!delta)
|
||||||
/* If delta_limit is exceeded, we have too much differences */
|
/* If delta_limit is exceeded, we have too much differences */
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -204,7 +204,7 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e
|
|||||||
if (!otherbuf)
|
if (!otherbuf)
|
||||||
die("unable to read %s", sha1_to_hex(entry->delta->sha1));
|
die("unable to read %s", sha1_to_hex(entry->delta->sha1));
|
||||||
delta_buf = diff_delta(otherbuf, othersize,
|
delta_buf = diff_delta(otherbuf, othersize,
|
||||||
buf, size, &delta_size, 0);
|
buf, size, &delta_size, 0, NULL);
|
||||||
if (!delta_buf || delta_size != entry->delta_size)
|
if (!delta_buf || delta_size != entry->delta_size)
|
||||||
die("delta size changed");
|
die("delta size changed");
|
||||||
free(buf);
|
free(buf);
|
||||||
@ -810,6 +810,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr
|
|||||||
struct unpacked {
|
struct unpacked {
|
||||||
struct object_entry *entry;
|
struct object_entry *entry;
|
||||||
void *data;
|
void *data;
|
||||||
|
void **delta_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -891,7 +892,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_de
|
|||||||
if (sizediff >= max_size)
|
if (sizediff >= max_size)
|
||||||
return -1;
|
return -1;
|
||||||
delta_buf = diff_delta(old->data, oldsize,
|
delta_buf = diff_delta(old->data, oldsize,
|
||||||
cur->data, size, &delta_size, max_size);
|
cur->data, size, &delta_size,
|
||||||
|
max_size, old->delta_index);
|
||||||
if (!delta_buf)
|
if (!delta_buf)
|
||||||
return 0;
|
return 0;
|
||||||
cur_entry->delta = old_entry;
|
cur_entry->delta = old_entry;
|
||||||
@ -948,6 +950,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
|
|||||||
*/
|
*/
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
free(n->delta_index);
|
||||||
free(n->data);
|
free(n->data);
|
||||||
n->entry = entry;
|
n->entry = entry;
|
||||||
n->data = read_sha1_file(entry->sha1, type, &size);
|
n->data = read_sha1_file(entry->sha1, type, &size);
|
||||||
@ -974,8 +977,10 @@ static void find_deltas(struct object_entry **list, int window, int depth)
|
|||||||
if (progress)
|
if (progress)
|
||||||
fputc('\n', stderr);
|
fputc('\n', stderr);
|
||||||
|
|
||||||
for (i = 0; i < window; ++i)
|
for (i = 0; i < window; ++i) {
|
||||||
|
free(array[i].delta_index);
|
||||||
free(array[i].data);
|
free(array[i].data);
|
||||||
|
}
|
||||||
free(array);
|
free(array);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ int main(int argc, char *argv[])
|
|||||||
if (argv[1][1] == 'd')
|
if (argv[1][1] == 'd')
|
||||||
out_buf = diff_delta(from_buf, from_size,
|
out_buf = diff_delta(from_buf, from_size,
|
||||||
data_buf, data_size,
|
data_buf, data_size,
|
||||||
&out_size, 0);
|
&out_size, 0, NULL);
|
||||||
else
|
else
|
||||||
out_buf = patch_delta(from_buf, from_size,
|
out_buf = patch_delta(from_buf, from_size,
|
||||||
data_buf, data_size,
|
data_buf, data_size,
|
||||||
|
Loading…
Reference in New Issue
Block a user