From 16854571aae6302f457c5fbee41ac64669b09595 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:11:59 -0400 Subject: [PATCH 1/2] move pack creation to version 3 It's been quite a while now that GIT is able to read version 3 packs. Let's create them at last. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- diff-delta.c | 8 ++++++-- pack.h | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/diff-delta.c b/diff-delta.c index fa16d06c8d..51df4608a8 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -308,8 +308,8 @@ create_delta(const struct delta_index *index, continue; if (ref_size > top - src) ref_size = top - src; - if (ref_size > 0x10000) - ref_size = 0x10000; + if (ref_size > 0xffffff) + ref_size = 0xffffff; if (ref_size <= msize) break; while (ref_size-- && *src++ == *ref) @@ -318,6 +318,8 @@ create_delta(const struct delta_index *index, /* this is our best match so far */ msize = ref - entry->ptr; moff = entry->ptr - ref_data; + if (msize >= 0x10000) + break; /* this is good enough */ } } @@ -381,6 +383,8 @@ create_delta(const struct delta_index *index, if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; } msize >>= 8; if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; } + msize >>= 8; + if (msize & 0xff) { out[outpos++] = msize; i |= 0x40; } *op = i; } diff --git a/pack.h b/pack.h index eb07b033ae..05557da152 100644 --- a/pack.h +++ b/pack.h @@ -7,7 +7,7 @@ * Packed object header */ #define PACK_SIGNATURE 0x5041434b /* "PACK" */ -#define PACK_VERSION 2 +#define PACK_VERSION 3 #define pack_version_ok(v) ((v) == htonl(2) || (v) == htonl(3)) struct pack_header { unsigned int hdr_signature; From 43057304c0bbaf7bc7511daaf81df08c7909a90b Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 21 Sep 2006 00:05:37 -0400 Subject: [PATCH 2/2] many cleanups to sha1_file.c Those cleanups are mainly to set the table for the support of deltas with base objects referenced by offsets instead of sha1. This means that many pack lookup functions are converted to take a pack/offset tuple instead of a sha1. This eliminates many struct pack_entry usages since this structure carried redundent information in many cases, and it increased stack footprint needlessly for a couple recursively called functions that used to declare a local copy of it for every recursion loop. In the process, packed_object_info_detail() has been reorganized as well so to look much saner and more amenable to deltas with offset support. Finally the appropriate adjustments have been made to functions that depend on the above changes. But there is no functionality changes yet simply some code refactoring at this point. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 8 +- cache.h | 6 +- pack-check.c | 19 +++-- sha1_file.c | 185 +++++++++++++++++++---------------------- 4 files changed, 104 insertions(+), 114 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 8d7a1209d5..96c069a81d 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -597,15 +597,15 @@ static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclud if (!exclude) { for (p = packed_git; p; p = p->next) { - struct pack_entry e; - if (find_pack_entry_one(sha1, &e, p)) { + unsigned long offset = find_pack_entry_one(sha1, p); + if (offset) { if (incremental) return 0; if (local && !p->pack_local) return 0; if (!found_pack) { - found_offset = e.offset; - found_pack = e.p; + found_offset = offset; + found_pack = p; } } } diff --git a/cache.h b/cache.h index ef2e581475..97debd03c5 100644 --- a/cache.h +++ b/cache.h @@ -390,10 +390,10 @@ extern void unuse_packed_git(struct packed_git *); extern struct packed_git *add_packed_git(char *, int, int); extern int num_packed_objects(const struct packed_git *p); extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*); -extern int find_pack_entry_one(const unsigned char *, struct pack_entry *, struct packed_git *); -extern void *unpack_entry_gently(struct pack_entry *, char *, unsigned long *); +extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *); +extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *); extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); -extern void packed_object_info_detail(struct pack_entry *, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *); /* Dumb servers support */ extern int update_server_info(int); diff --git a/pack-check.c b/pack-check.c index 04c6c00821..c0caaee093 100644 --- a/pack-check.c +++ b/pack-check.c @@ -42,16 +42,16 @@ static int verify_packfile(struct packed_git *p) */ for (i = err = 0; i < nr_objects; i++) { unsigned char sha1[20]; - struct pack_entry e; void *data; char type[20]; - unsigned long size; + unsigned long size, offset; if (nth_packed_object_sha1(p, i, sha1)) die("internal error pack-check nth-packed-object"); - if (!find_pack_entry_one(sha1, &e, p)) + offset = find_pack_entry_one(sha1, p); + if (!offset) die("internal error pack-check find-pack-entry-one"); - data = unpack_entry_gently(&e, type, &size); + data = unpack_entry_gently(p, offset, type, &size); if (!data) { err = error("cannot unpack %s from %s", sha1_to_hex(sha1), p->pack_name); @@ -84,25 +84,26 @@ static void show_pack_info(struct packed_git *p) for (i = 0; i < nr_objects; i++) { unsigned char sha1[20], base_sha1[20]; - struct pack_entry e; char type[20]; unsigned long size; unsigned long store_size; + unsigned long offset; unsigned int delta_chain_length; if (nth_packed_object_sha1(p, i, sha1)) die("internal error pack-check nth-packed-object"); - if (!find_pack_entry_one(sha1, &e, p)) + offset = find_pack_entry_one(sha1, p); + if (!offset) die("internal error pack-check find-pack-entry-one"); - packed_object_info_detail(&e, type, &size, &store_size, + packed_object_info_detail(p, offset, type, &size, &store_size, &delta_chain_length, base_sha1); printf("%s ", sha1_to_hex(sha1)); if (!delta_chain_length) - printf("%-6s %lu %u\n", type, size, e.offset); + printf("%-6s %lu %lu\n", type, size, offset); else { - printf("%-6s %lu %u %u %s\n", type, size, e.offset, + printf("%-6s %lu %lu %u %s\n", type, size, offset, delta_chain_length, sha1_to_hex(base_sha1)); if (delta_chain_length < MAX_CHAIN) chain_histogram[delta_chain_length]++; diff --git a/sha1_file.c b/sha1_file.c index 0f9c2b6218..27b1ebb720 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -884,33 +884,32 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l } /* forward declaration for a mutually recursive function */ -static int packed_object_info(struct pack_entry *entry, +static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep); -static int packed_delta_info(unsigned char *base_sha1, - unsigned long delta_size, - unsigned long left, +static int packed_delta_info(struct packed_git *p, + unsigned long offset, char *type, - unsigned long *sizep, - struct packed_git *p) + unsigned long *sizep) { - struct pack_entry base_ent; + unsigned long base_offset; + unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset; - if (left < 20) + if (p->pack_size < offset + 20) die("truncated pack file"); - /* The base entry _must_ be in the same pack */ - if (!find_pack_entry_one(base_sha1, &base_ent, p)) + base_offset = find_pack_entry_one(base_sha1, p); + if (!base_offset) die("failed to find delta-pack base object %s", sha1_to_hex(base_sha1)); + offset += 20; /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - - if (packed_object_info(&base_ent, type, NULL)) + if (packed_object_info(p, base_offset, type, NULL)) die("cannot get info for delta-pack base"); if (sizep) { @@ -922,8 +921,8 @@ static int packed_delta_info(unsigned char *base_sha1, memset(&stream, 0, sizeof(stream)); - data = stream.next_in = base_sha1 + 20; - stream.avail_in = left - 20; + stream.next_in = (unsigned char *) p->pack_base + offset; + stream.avail_in = p->pack_size - offset; stream.next_out = delta_head; stream.avail_out = sizeof(delta_head); @@ -985,75 +984,60 @@ int check_reuse_pack_delta(struct packed_git *p, unsigned long offset, return status; } -void packed_object_info_detail(struct pack_entry *e, +void packed_object_info_detail(struct packed_git *p, + unsigned long offset, char *type, unsigned long *size, unsigned long *store_size, unsigned int *delta_chain_length, unsigned char *base_sha1) { - struct packed_git *p = e->p; - unsigned long offset; - unsigned char *pack; + unsigned long val; + unsigned char *next_sha1; enum object_type kind; - offset = unpack_object_header(p, e->offset, &kind, size); - pack = (unsigned char *) p->pack_base + offset; - if (kind != OBJ_DELTA) - *delta_chain_length = 0; - else { - unsigned int chain_length = 0; - if (p->pack_size <= offset + 20) - die("pack file %s records an incomplete delta base", - p->pack_name); - hashcpy(base_sha1, pack); - do { - struct pack_entry base_ent; - unsigned long junk; + *delta_chain_length = 0; + offset = unpack_object_header(p, offset, &kind, size); - find_pack_entry_one(pack, &base_ent, p); - offset = unpack_object_header(p, base_ent.offset, - &kind, &junk); - pack = (unsigned char *) p->pack_base + offset; - chain_length++; - } while (kind == OBJ_DELTA); - *delta_chain_length = chain_length; + for (;;) { + switch (kind) { + default: + die("corrupted pack file %s containing object of kind %d", + p->pack_name, kind); + case OBJ_COMMIT: + case OBJ_TREE: + case OBJ_BLOB: + case OBJ_TAG: + strcpy(type, type_names[kind]); + *store_size = 0; /* notyet */ + return; + case OBJ_DELTA: + if (p->pack_size <= offset + 20) + die("pack file %s records an incomplete delta base", + p->pack_name); + next_sha1 = (unsigned char *) p->pack_base + offset; + if (*delta_chain_length == 0) + hashcpy(base_sha1, next_sha1); + offset = find_pack_entry_one(next_sha1, p); + break; + } + offset = unpack_object_header(p, offset, &kind, &val); + (*delta_chain_length)++; } - switch (kind) { - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - strcpy(type, type_names[kind]); - break; - default: - die("corrupted pack file %s containing object of kind %d", - p->pack_name, kind); - } - *store_size = 0; /* notyet */ } -static int packed_object_info(struct pack_entry *entry, +static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - struct packed_git *p = entry->p; - unsigned long offset, size, left; - unsigned char *pack; + unsigned long size; enum object_type kind; - int retval; - if (use_packed_git(p)) - die("cannot map packed file"); + offset = unpack_object_header(p, offset, &kind, &size); - offset = unpack_object_header(p, entry->offset, &kind, &size); - pack = (unsigned char *) p->pack_base + offset; - left = p->pack_size - offset; + if (kind == OBJ_DELTA) + return packed_delta_info(p, offset, type, sizep); switch (kind) { - case OBJ_DELTA: - retval = packed_delta_info(pack, size, left, type, sizep, p); - unuse_packed_git(p); - return retval; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: @@ -1066,7 +1050,6 @@ static int packed_object_info(struct pack_entry *entry, } if (sizep) *sizep = size; - unuse_packed_git(p); return 0; } @@ -1103,25 +1086,26 @@ static void *unpack_delta_entry(struct packed_git *p, char *type, unsigned long *sizep) { - struct pack_entry base_ent; void *delta_data, *result, *base; - unsigned long result_size, base_size; - unsigned char* base_sha1; + unsigned long result_size, base_size, base_offset; + unsigned char *base_sha1; - if ((offset + 20) >= p->pack_size) + if (p->pack_size < offset + 20) die("truncated pack file"); - /* The base entry _must_ be in the same pack */ base_sha1 = (unsigned char*)p->pack_base + offset; - if (!find_pack_entry_one(base_sha1, &base_ent, p)) + base_offset = find_pack_entry_one(base_sha1, p); + if (!base_offset) die("failed to find delta-pack base object %s", sha1_to_hex(base_sha1)); - base = unpack_entry_gently(&base_ent, type, &base_size); - if (!base) - die("failed to read delta-pack base object %s", - sha1_to_hex(base_sha1)); + offset += 20; - delta_data = unpack_compressed_entry(p, offset + 20, delta_size); + base = unpack_entry_gently(p, base_offset, type, &base_size); + if (!base) + die("failed to read delta base object at %lu from %s", + base_offset, p->pack_name); + + delta_data = unpack_compressed_entry(p, offset, delta_size); result = patch_delta(base, base_size, delta_data, delta_size, &result_size); @@ -1141,7 +1125,7 @@ static void *unpack_entry(struct pack_entry *entry, if (use_packed_git(p)) die("cannot map packed file"); - retval = unpack_entry_gently(entry, type, sizep); + retval = unpack_entry_gently(p, entry->offset, type, sizep); unuse_packed_git(p); if (!retval) die("corrupted pack file %s", p->pack_name); @@ -1149,14 +1133,13 @@ static void *unpack_entry(struct pack_entry *entry, } /* The caller is responsible for use_packed_git()/unuse_packed_git() pair */ -void *unpack_entry_gently(struct pack_entry *entry, +void *unpack_entry_gently(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { - struct packed_git *p = entry->p; - unsigned long offset, size; + unsigned long size; enum object_type kind; - offset = unpack_object_header(p, entry->offset, &kind, &size); + offset = unpack_object_header(p, offset, &kind, &size); switch (kind) { case OBJ_DELTA: return unpack_delta_entry(p, offset, size, type, sizep); @@ -1188,8 +1171,8 @@ int nth_packed_object_sha1(const struct packed_git *p, int n, return 0; } -int find_pack_entry_one(const unsigned char *sha1, - struct pack_entry *e, struct packed_git *p) +unsigned long find_pack_entry_one(const unsigned char *sha1, + struct packed_git *p) { unsigned int *level1_ofs = p->index_base; int hi = ntohl(level1_ofs[*sha1]); @@ -1199,12 +1182,8 @@ int find_pack_entry_one(const unsigned char *sha1, do { int mi = (lo + hi) / 2; int cmp = hashcmp((unsigned char *)index + (24 * mi) + 4, sha1); - if (!cmp) { - e->offset = ntohl(*((unsigned int *) ((char *) index + (24 * mi)))); - hashcpy(e->sha1, sha1); - e->p = p; - return 1; - } + if (!cmp) + return ntohl(*((unsigned int *) ((char *) index + (24 * mi)))); if (cmp > 0) hi = mi; else @@ -1216,6 +1195,8 @@ int find_pack_entry_one(const unsigned char *sha1, static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) { struct packed_git *p; + unsigned long offset; + prepare_packed_git(); for (p = packed_git; p; p = p->next) { @@ -1227,8 +1208,13 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons if (*ig) continue; } - if (find_pack_entry_one(sha1, e, p)) + offset = find_pack_entry_one(sha1, p); + if (offset) { + e->offset = offset; + e->p = p; + hashcpy(e->sha1, sha1); return 1; + } } return 0; } @@ -1237,10 +1223,9 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, struct packed_git *packs) { struct packed_git *p; - struct pack_entry e; for (p = packs; p; p = p->next) { - if (find_pack_entry_one(sha1, &e, p)) + if (find_pack_entry_one(sha1, p)) return p; } return NULL; @@ -1259,12 +1244,16 @@ int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep if (!map) { struct pack_entry e; - if (find_pack_entry(sha1, &e, NULL)) - return packed_object_info(&e, type, sizep); - reprepare_packed_git(); - if (find_pack_entry(sha1, &e, NULL)) - return packed_object_info(&e, type, sizep); - return error("unable to find %s", sha1_to_hex(sha1)); + if (!find_pack_entry(sha1, &e, NULL)) { + reprepare_packed_git(); + if (!find_pack_entry(sha1, &e, NULL)) + return error("unable to find %s", sha1_to_hex(sha1)); + } + if (use_packed_git(e.p)) + die("cannot map packed file"); + status = packed_object_info(e.p, e.offset, type, sizep); + unuse_packed_git(e.p); + return status; } if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) status = error("unable to unpack %s header",