introduce delta objects with offset to base
This adds a new object, namely OBJ_OFS_DELTA, renames OBJ_DELTA to OBJ_REF_DELTA to better make the distinction between those two delta objects, and adds support for the handling of those new delta objects in sha1_file.c only. The OBJ_OFS_DELTA contains a relative offset from the delta object's position in a pack instead of the 20-byte SHA1 reference to identify the base object. Since the base is likely to be not so far away, the relative offset is more likely to have a smaller encoding on average than an absolute offset. And for those delta objects the base must always be stored first because there is no way to know the distance of later objects when streaming a pack. Hence this relative offset is always meant to be negative. The offset encoding is slightly denser than the one used for object size -- credits to <linux@horizon.com> (whoever this is) for bringing it to my attention. This allows for pack size reduction between 3.2% (Linux-2.6) to over 5% (linux-historic). Runtime pack access should be faster too since delta replay does skip a search in the pack index for each delta in a chain. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
This commit is contained in:
parent
4a0641b7cf
commit
eb32d236df
@ -232,7 +232,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
|
|||||||
int n = 1;
|
int n = 1;
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
|
|
||||||
if (type < OBJ_COMMIT || type > OBJ_DELTA)
|
if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
|
||||||
die("bad type %d", type);
|
die("bad type %d", type);
|
||||||
|
|
||||||
c = (type << 4) | (size & 15);
|
c = (type << 4) | (size & 15);
|
||||||
@ -297,7 +297,7 @@ static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data
|
|||||||
used = unpack_object_header_gently(data, len, &type, &size);
|
used = unpack_object_header_gently(data, len, &type, &size);
|
||||||
if (!used)
|
if (!used)
|
||||||
return -1;
|
return -1;
|
||||||
if (type == OBJ_DELTA)
|
if (type == OBJ_REF_DELTA)
|
||||||
used += 20; /* skip base object name */
|
used += 20; /* skip base object name */
|
||||||
data += used;
|
data += used;
|
||||||
len -= used;
|
len -= used;
|
||||||
@ -340,7 +340,7 @@ static unsigned long write_object(struct sha1file *f,
|
|||||||
obj_type = entry->type;
|
obj_type = entry->type;
|
||||||
if (! entry->in_pack)
|
if (! entry->in_pack)
|
||||||
to_reuse = 0; /* can't reuse what we don't have */
|
to_reuse = 0; /* can't reuse what we don't have */
|
||||||
else if (obj_type == OBJ_DELTA)
|
else if (obj_type == OBJ_REF_DELTA)
|
||||||
to_reuse = 1; /* check_object() decided it for us */
|
to_reuse = 1; /* check_object() decided it for us */
|
||||||
else if (obj_type != entry->in_pack_type)
|
else if (obj_type != entry->in_pack_type)
|
||||||
to_reuse = 0; /* pack has delta which is unusable */
|
to_reuse = 0; /* pack has delta which is unusable */
|
||||||
@ -380,7 +380,7 @@ static unsigned long write_object(struct sha1file *f,
|
|||||||
if (entry->delta) {
|
if (entry->delta) {
|
||||||
buf = delta_against(buf, size, entry);
|
buf = delta_against(buf, size, entry);
|
||||||
size = entry->delta_size;
|
size = entry->delta_size;
|
||||||
obj_type = OBJ_DELTA;
|
obj_type = OBJ_REF_DELTA;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* The object header is a byte of 'type' followed by zero or
|
* The object header is a byte of 'type' followed by zero or
|
||||||
@ -409,11 +409,11 @@ static unsigned long write_object(struct sha1file *f,
|
|||||||
sha1write(f, buf, datalen);
|
sha1write(f, buf, datalen);
|
||||||
unuse_packed_git(p);
|
unuse_packed_git(p);
|
||||||
hdrlen = 0; /* not really */
|
hdrlen = 0; /* not really */
|
||||||
if (obj_type == OBJ_DELTA)
|
if (obj_type == OBJ_REF_DELTA)
|
||||||
reused_delta++;
|
reused_delta++;
|
||||||
reused++;
|
reused++;
|
||||||
}
|
}
|
||||||
if (obj_type == OBJ_DELTA)
|
if (obj_type == OBJ_REF_DELTA)
|
||||||
written_delta++;
|
written_delta++;
|
||||||
written++;
|
written++;
|
||||||
return hdrlen + datalen;
|
return hdrlen + datalen;
|
||||||
@ -916,7 +916,7 @@ static void check_object(struct object_entry *entry)
|
|||||||
* delta.
|
* delta.
|
||||||
*/
|
*/
|
||||||
if (!no_reuse_delta &&
|
if (!no_reuse_delta &&
|
||||||
entry->in_pack_type == OBJ_DELTA &&
|
entry->in_pack_type == OBJ_REF_DELTA &&
|
||||||
(base_entry = locate_object_entry(base)) &&
|
(base_entry = locate_object_entry(base)) &&
|
||||||
(!base_entry->preferred_base)) {
|
(!base_entry->preferred_base)) {
|
||||||
|
|
||||||
@ -929,7 +929,7 @@ static void check_object(struct object_entry *entry)
|
|||||||
/* uncompressed size of the delta data */
|
/* uncompressed size of the delta data */
|
||||||
entry->size = entry->delta_size = size;
|
entry->size = entry->delta_size = size;
|
||||||
entry->delta = base_entry;
|
entry->delta = base_entry;
|
||||||
entry->type = OBJ_DELTA;
|
entry->type = OBJ_REF_DELTA;
|
||||||
|
|
||||||
entry->delta_sibling = base_entry->delta_child;
|
entry->delta_sibling = base_entry->delta_child;
|
||||||
base_entry->delta_child = entry;
|
base_entry->delta_child = entry;
|
||||||
|
@ -241,7 +241,7 @@ static void unpack_one(unsigned nr, unsigned total)
|
|||||||
case OBJ_TAG:
|
case OBJ_TAG:
|
||||||
unpack_non_delta_entry(type, size);
|
unpack_non_delta_entry(type, size);
|
||||||
return;
|
return;
|
||||||
case OBJ_DELTA:
|
case OBJ_REF_DELTA:
|
||||||
unpack_delta_entry(size);
|
unpack_delta_entry(size);
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
|
5
cache.h
5
cache.h
@ -274,8 +274,9 @@ enum object_type {
|
|||||||
OBJ_TREE = 2,
|
OBJ_TREE = 2,
|
||||||
OBJ_BLOB = 3,
|
OBJ_BLOB = 3,
|
||||||
OBJ_TAG = 4,
|
OBJ_TAG = 4,
|
||||||
/* 5/6 for future expansion */
|
/* 5 for future expansion */
|
||||||
OBJ_DELTA = 7,
|
OBJ_OFS_DELTA = 6,
|
||||||
|
OBJ_REF_DELTA = 7,
|
||||||
OBJ_BAD,
|
OBJ_BAD,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ static void *unpack_raw_entry(unsigned long offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case OBJ_DELTA:
|
case OBJ_REF_DELTA:
|
||||||
if (pos + 20 >= pack_limit)
|
if (pos + 20 >= pack_limit)
|
||||||
bad_object(offset, "object extends past end of pack");
|
bad_object(offset, "object extends past end of pack");
|
||||||
hashcpy(delta_base, pack_base + pos);
|
hashcpy(delta_base, pack_base + pos);
|
||||||
@ -301,7 +301,7 @@ static void parse_pack_objects(void)
|
|||||||
data = unpack_raw_entry(offset, &obj->type, &data_size,
|
data = unpack_raw_entry(offset, &obj->type, &data_size,
|
||||||
base_sha1, &offset);
|
base_sha1, &offset);
|
||||||
obj->real_type = obj->type;
|
obj->real_type = obj->type;
|
||||||
if (obj->type == OBJ_DELTA) {
|
if (obj->type == OBJ_REF_DELTA) {
|
||||||
struct delta_entry *delta = &deltas[nr_deltas++];
|
struct delta_entry *delta = &deltas[nr_deltas++];
|
||||||
delta->obj = obj;
|
delta->obj = obj;
|
||||||
hashcpy(delta->base_sha1, base_sha1);
|
hashcpy(delta->base_sha1, base_sha1);
|
||||||
@ -328,7 +328,7 @@ static void parse_pack_objects(void)
|
|||||||
struct object_entry *obj = &objects[i];
|
struct object_entry *obj = &objects[i];
|
||||||
int j, first, last;
|
int j, first, last;
|
||||||
|
|
||||||
if (obj->type == OBJ_DELTA)
|
if (obj->type == OBJ_REF_DELTA)
|
||||||
continue;
|
continue;
|
||||||
if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
|
if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
|
||||||
continue;
|
continue;
|
||||||
@ -341,7 +341,7 @@ static void parse_pack_objects(void)
|
|||||||
|
|
||||||
/* Check for unresolved deltas */
|
/* Check for unresolved deltas */
|
||||||
for (i = 0; i < nr_deltas; i++) {
|
for (i = 0; i < nr_deltas; i++) {
|
||||||
if (deltas[i].obj->real_type == OBJ_DELTA)
|
if (deltas[i].obj->real_type == OBJ_REF_DELTA)
|
||||||
die("packfile '%s' has unresolved deltas", pack_name);
|
die("packfile '%s' has unresolved deltas", pack_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
96
sha1_file.c
96
sha1_file.c
@ -883,26 +883,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
|
|||||||
return unpack_sha1_rest(&stream, hdr, *size);
|
return unpack_sha1_rest(&stream, hdr, *size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long get_delta_base(struct packed_git *p,
|
||||||
|
unsigned long offset,
|
||||||
|
enum object_type kind,
|
||||||
|
unsigned long delta_obj_offset,
|
||||||
|
unsigned long *base_obj_offset)
|
||||||
|
{
|
||||||
|
unsigned char *base_info = (unsigned char *) p->pack_base + offset;
|
||||||
|
unsigned long base_offset;
|
||||||
|
|
||||||
|
/* there must be at least 20 bytes left regardless of delta type */
|
||||||
|
if (p->pack_size <= offset + 20)
|
||||||
|
die("truncated pack file");
|
||||||
|
|
||||||
|
if (kind == OBJ_OFS_DELTA) {
|
||||||
|
unsigned used = 0;
|
||||||
|
unsigned char c = base_info[used++];
|
||||||
|
base_offset = c & 127;
|
||||||
|
while (c & 128) {
|
||||||
|
base_offset += 1;
|
||||||
|
if (!base_offset || base_offset & ~(~0UL >> 7))
|
||||||
|
die("offset value overflow for delta base object");
|
||||||
|
c = base_info[used++];
|
||||||
|
base_offset = (base_offset << 7) + (c & 127);
|
||||||
|
}
|
||||||
|
base_offset = delta_obj_offset - base_offset;
|
||||||
|
if (base_offset >= delta_obj_offset)
|
||||||
|
die("delta base offset out of bound");
|
||||||
|
offset += used;
|
||||||
|
} else if (kind == OBJ_REF_DELTA) {
|
||||||
|
/* The base entry _must_ be in the same pack */
|
||||||
|
base_offset = find_pack_entry_one(base_info, p);
|
||||||
|
if (!base_offset)
|
||||||
|
die("failed to find delta-pack base object %s",
|
||||||
|
sha1_to_hex(base_info));
|
||||||
|
offset += 20;
|
||||||
|
} else
|
||||||
|
die("I am totally screwed");
|
||||||
|
*base_obj_offset = base_offset;
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
/* forward declaration for a mutually recursive function */
|
/* forward declaration for a mutually recursive function */
|
||||||
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
||||||
char *type, unsigned long *sizep);
|
char *type, unsigned long *sizep);
|
||||||
|
|
||||||
static int packed_delta_info(struct packed_git *p,
|
static int packed_delta_info(struct packed_git *p,
|
||||||
unsigned long offset,
|
unsigned long offset,
|
||||||
|
enum object_type kind,
|
||||||
|
unsigned long obj_offset,
|
||||||
char *type,
|
char *type,
|
||||||
unsigned long *sizep)
|
unsigned long *sizep)
|
||||||
{
|
{
|
||||||
unsigned long base_offset;
|
unsigned long base_offset;
|
||||||
unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset;
|
|
||||||
|
|
||||||
if (p->pack_size < offset + 20)
|
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
|
||||||
die("truncated pack file");
|
|
||||||
/* The base entry _must_ be in the same pack */
|
|
||||||
base_offset = find_pack_entry_one(base_sha1, p);
|
|
||||||
if (!base_offset)
|
|
||||||
die("failed to find delta-pack base object %s",
|
|
||||||
sha1_to_hex(base_sha1));
|
|
||||||
offset += 20;
|
|
||||||
|
|
||||||
/* We choose to only get the type of the base object and
|
/* We choose to only get the type of the base object and
|
||||||
* ignore potentially corrupt pack file that expects the delta
|
* ignore potentially corrupt pack file that expects the delta
|
||||||
@ -975,7 +1010,7 @@ int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
|
|||||||
use_packed_git(p);
|
use_packed_git(p);
|
||||||
ptr = offset;
|
ptr = offset;
|
||||||
ptr = unpack_object_header(p, ptr, kindp, sizep);
|
ptr = unpack_object_header(p, ptr, kindp, sizep);
|
||||||
if (*kindp != OBJ_DELTA)
|
if (*kindp != OBJ_REF_DELTA)
|
||||||
goto done;
|
goto done;
|
||||||
hashcpy(base, (unsigned char *) p->pack_base + ptr);
|
hashcpy(base, (unsigned char *) p->pack_base + ptr);
|
||||||
status = 0;
|
status = 0;
|
||||||
@ -992,11 +1027,12 @@ void packed_object_info_detail(struct packed_git *p,
|
|||||||
unsigned int *delta_chain_length,
|
unsigned int *delta_chain_length,
|
||||||
unsigned char *base_sha1)
|
unsigned char *base_sha1)
|
||||||
{
|
{
|
||||||
unsigned long val;
|
unsigned long obj_offset, val;
|
||||||
unsigned char *next_sha1;
|
unsigned char *next_sha1;
|
||||||
enum object_type kind;
|
enum object_type kind;
|
||||||
|
|
||||||
*delta_chain_length = 0;
|
*delta_chain_length = 0;
|
||||||
|
obj_offset = offset;
|
||||||
offset = unpack_object_header(p, offset, &kind, size);
|
offset = unpack_object_header(p, offset, &kind, size);
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@ -1011,7 +1047,13 @@ void packed_object_info_detail(struct packed_git *p,
|
|||||||
strcpy(type, type_names[kind]);
|
strcpy(type, type_names[kind]);
|
||||||
*store_size = 0; /* notyet */
|
*store_size = 0; /* notyet */
|
||||||
return;
|
return;
|
||||||
case OBJ_DELTA:
|
case OBJ_OFS_DELTA:
|
||||||
|
get_delta_base(p, offset, kind, obj_offset, &offset);
|
||||||
|
if (*delta_chain_length == 0) {
|
||||||
|
/* TODO: find base_sha1 as pointed by offset */
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case OBJ_REF_DELTA:
|
||||||
if (p->pack_size <= offset + 20)
|
if (p->pack_size <= offset + 20)
|
||||||
die("pack file %s records an incomplete delta base",
|
die("pack file %s records an incomplete delta base",
|
||||||
p->pack_name);
|
p->pack_name);
|
||||||
@ -1021,6 +1063,7 @@ void packed_object_info_detail(struct packed_git *p,
|
|||||||
offset = find_pack_entry_one(next_sha1, p);
|
offset = find_pack_entry_one(next_sha1, p);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
obj_offset = offset;
|
||||||
offset = unpack_object_header(p, offset, &kind, &val);
|
offset = unpack_object_header(p, offset, &kind, &val);
|
||||||
(*delta_chain_length)++;
|
(*delta_chain_length)++;
|
||||||
}
|
}
|
||||||
@ -1029,15 +1072,15 @@ void packed_object_info_detail(struct packed_git *p,
|
|||||||
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
||||||
char *type, unsigned long *sizep)
|
char *type, unsigned long *sizep)
|
||||||
{
|
{
|
||||||
unsigned long size;
|
unsigned long size, obj_offset = offset;
|
||||||
enum object_type kind;
|
enum object_type kind;
|
||||||
|
|
||||||
offset = unpack_object_header(p, offset, &kind, &size);
|
offset = unpack_object_header(p, offset, &kind, &size);
|
||||||
|
|
||||||
if (kind == OBJ_DELTA)
|
|
||||||
return packed_delta_info(p, offset, type, sizep);
|
|
||||||
|
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
|
case OBJ_OFS_DELTA:
|
||||||
|
case OBJ_REF_DELTA:
|
||||||
|
return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
|
||||||
case OBJ_COMMIT:
|
case OBJ_COMMIT:
|
||||||
case OBJ_TREE:
|
case OBJ_TREE:
|
||||||
case OBJ_BLOB:
|
case OBJ_BLOB:
|
||||||
@ -1083,23 +1126,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
|
|||||||
static void *unpack_delta_entry(struct packed_git *p,
|
static void *unpack_delta_entry(struct packed_git *p,
|
||||||
unsigned long offset,
|
unsigned long offset,
|
||||||
unsigned long delta_size,
|
unsigned long delta_size,
|
||||||
|
enum object_type kind,
|
||||||
|
unsigned long obj_offset,
|
||||||
char *type,
|
char *type,
|
||||||
unsigned long *sizep)
|
unsigned long *sizep)
|
||||||
{
|
{
|
||||||
void *delta_data, *result, *base;
|
void *delta_data, *result, *base;
|
||||||
unsigned long result_size, base_size, base_offset;
|
unsigned long result_size, base_size, base_offset;
|
||||||
unsigned char *base_sha1;
|
|
||||||
|
|
||||||
if (p->pack_size < offset + 20)
|
|
||||||
die("truncated pack file");
|
|
||||||
/* The base entry _must_ be in the same pack */
|
|
||||||
base_sha1 = (unsigned char*)p->pack_base + offset;
|
|
||||||
base_offset = find_pack_entry_one(base_sha1, p);
|
|
||||||
if (!base_offset)
|
|
||||||
die("failed to find delta-pack base object %s",
|
|
||||||
sha1_to_hex(base_sha1));
|
|
||||||
offset += 20;
|
|
||||||
|
|
||||||
|
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
|
||||||
base = unpack_entry_gently(p, base_offset, type, &base_size);
|
base = unpack_entry_gently(p, base_offset, type, &base_size);
|
||||||
if (!base)
|
if (!base)
|
||||||
die("failed to read delta base object at %lu from %s",
|
die("failed to read delta base object at %lu from %s",
|
||||||
@ -1136,13 +1171,14 @@ static void *unpack_entry(struct pack_entry *entry,
|
|||||||
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
|
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
|
||||||
char *type, unsigned long *sizep)
|
char *type, unsigned long *sizep)
|
||||||
{
|
{
|
||||||
unsigned long size;
|
unsigned long size, obj_offset = offset;
|
||||||
enum object_type kind;
|
enum object_type kind;
|
||||||
|
|
||||||
offset = unpack_object_header(p, offset, &kind, &size);
|
offset = unpack_object_header(p, offset, &kind, &size);
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
case OBJ_DELTA:
|
case OBJ_OFS_DELTA:
|
||||||
return unpack_delta_entry(p, offset, size, type, sizep);
|
case OBJ_REF_DELTA:
|
||||||
|
return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
|
||||||
case OBJ_COMMIT:
|
case OBJ_COMMIT:
|
||||||
case OBJ_TREE:
|
case OBJ_TREE:
|
||||||
case OBJ_BLOB:
|
case OBJ_BLOB:
|
||||||
|
Loading…
Reference in New Issue
Block a user