Merge branch 'np/pack'
* np/pack: add the capability for index-pack to read from a stream index-pack: compare only the first 20-bytes of the key. git-repack: repo.usedeltabaseoffset pack-objects: document --delta-base-offset option allow delta data reuse even if base object is a preferred base zap a debug remnant let the GIT native protocol use offsets to delta base when possible make pack data reuse compatible with both delta types make git-pack-objects able to create deltas with offset to base teach git-index-pack about deltas with offset to base teach git-unpack-objects about deltas with offset to base introduce delta objects with offset to base
This commit is contained in:
commit
05eb811aa1
@ -230,6 +230,10 @@ pull.octopus::
|
||||
pull.twohead::
|
||||
The default merge strategy to use when pulling a single branch.
|
||||
|
||||
repack.usedeltabaseoffset::
|
||||
Allow gitlink:git-repack[1] to create packs that uses
|
||||
delta-base offset. Defaults to false.
|
||||
|
||||
show.difftree::
|
||||
The default gitlink:git-diff-tree[1] arguments to be used
|
||||
for gitlink:git-show[1].
|
||||
|
@ -9,7 +9,7 @@ git-pack-objects - Create a packed archive of objects
|
||||
SYNOPSIS
|
||||
--------
|
||||
[verse]
|
||||
'git-pack-objects' [-q] [--no-reuse-delta] [--non-empty]
|
||||
'git-pack-objects' [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty]
|
||||
[--local] [--incremental] [--window=N] [--depth=N]
|
||||
[--revs [--unpacked | --all]*] [--stdout | base-name] < object-list
|
||||
|
||||
@ -111,6 +111,17 @@ base-name::
|
||||
This flag tells the command not to reuse existing deltas
|
||||
but compute them from scratch.
|
||||
|
||||
--delta-base-offset::
|
||||
A packed archive can express base object of a delta as
|
||||
either 20-byte object name or as an offset in the
|
||||
stream, but older version of git does not understand the
|
||||
latter. By default, git-pack-objects only uses the
|
||||
former format for better compatibility. This option
|
||||
allows the command to use the latter format for
|
||||
compactness. Depending on the average delta chain
|
||||
length, this option typically shrinks the resulting
|
||||
packfile by 3-5 per-cent.
|
||||
|
||||
|
||||
Author
|
||||
------
|
||||
|
@ -67,6 +67,20 @@ OPTIONS
|
||||
The default value for both --window and --depth is 10.
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
|
||||
When configuration variable `repack.UseDeltaBaseOffset` is set
|
||||
for the repository, the command passes `--delta-base-offset`
|
||||
option to `git-pack-objects`; this typically results in slightly
|
||||
smaller packs, but the generated packs are incompatible with
|
||||
versions of git older than (and including) v1.4.3; do not set
|
||||
the variable in a repository that older version of git needs to
|
||||
be able to read (this includes repositories from which packs can
|
||||
be copied out over http or rsync, and people who obtained packs
|
||||
that way can try to use older git with it).
|
||||
|
||||
|
||||
Author
|
||||
------
|
||||
Written by Linus Torvalds <torvalds@osdl.org>
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <signal.h>
|
||||
|
||||
static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
|
||||
static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
|
||||
|
||||
struct object_entry {
|
||||
unsigned char sha1[20];
|
||||
@ -29,6 +29,7 @@ struct object_entry {
|
||||
enum object_type type;
|
||||
enum object_type in_pack_type; /* could be delta */
|
||||
unsigned long delta_size; /* delta data size (uncompressed) */
|
||||
#define in_pack_header_size delta_size /* only when reusing pack data */
|
||||
struct object_entry *delta; /* delta base object */
|
||||
struct packed_git *in_pack; /* already in pack */
|
||||
unsigned int in_pack_offset;
|
||||
@ -60,6 +61,8 @@ static int non_empty;
|
||||
static int no_reuse_delta;
|
||||
static int local;
|
||||
static int incremental;
|
||||
static int allow_ofs_delta;
|
||||
|
||||
static struct object_entry **sorted_by_sha, **sorted_by_type;
|
||||
static struct object_entry *objects;
|
||||
static int nr_objects, nr_alloc, nr_result;
|
||||
@ -84,17 +87,25 @@ static int object_ix_hashsz;
|
||||
* Pack index for existing packs give us easy access to the offsets into
|
||||
* corresponding pack file where each object's data starts, but the entries
|
||||
* do not store the size of the compressed representation (uncompressed
|
||||
* size is easily available by examining the pack entry header). We build
|
||||
* a hashtable of existing packs (pack_revindex), and keep reverse index
|
||||
* here -- pack index file is sorted by object name mapping to offset; this
|
||||
* pack_revindex[].revindex array is an ordered list of offsets, so if you
|
||||
* know the offset of an object, next offset is where its packed
|
||||
* representation ends.
|
||||
* size is easily available by examining the pack entry header). It is
|
||||
* also rather expensive to find the sha1 for an object given its offset.
|
||||
*
|
||||
* We build a hashtable of existing packs (pack_revindex), and keep reverse
|
||||
* index here -- pack index file is sorted by object name mapping to offset;
|
||||
* this pack_revindex[].revindex array is a list of offset/index_nr pairs
|
||||
* ordered by offset, so if you know the offset of an object, next offset
|
||||
* is where its packed representation ends and the index_nr can be used to
|
||||
* get the object sha1 from the main index.
|
||||
*/
|
||||
struct revindex_entry {
|
||||
unsigned int offset;
|
||||
unsigned int nr;
|
||||
};
|
||||
struct pack_revindex {
|
||||
struct packed_git *p;
|
||||
unsigned long *revindex;
|
||||
} *pack_revindex = NULL;
|
||||
struct revindex_entry *revindex;
|
||||
};
|
||||
static struct pack_revindex *pack_revindex;
|
||||
static int pack_revindex_hashsz;
|
||||
|
||||
/*
|
||||
@ -141,14 +152,9 @@ static void prepare_pack_ix(void)
|
||||
|
||||
static int cmp_offset(const void *a_, const void *b_)
|
||||
{
|
||||
unsigned long a = *(unsigned long *) a_;
|
||||
unsigned long b = *(unsigned long *) b_;
|
||||
if (a < b)
|
||||
return -1;
|
||||
else if (a == b)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
const struct revindex_entry *a = a_;
|
||||
const struct revindex_entry *b = b_;
|
||||
return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -161,25 +167,27 @@ static void prepare_pack_revindex(struct pack_revindex *rix)
|
||||
int i;
|
||||
void *index = p->index_base + 256;
|
||||
|
||||
rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
|
||||
rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
|
||||
for (i = 0; i < num_ent; i++) {
|
||||
unsigned int hl = *((unsigned int *)((char *) index + 24*i));
|
||||
rix->revindex[i] = ntohl(hl);
|
||||
rix->revindex[i].offset = ntohl(hl);
|
||||
rix->revindex[i].nr = i;
|
||||
}
|
||||
/* This knows the pack format -- the 20-byte trailer
|
||||
* follows immediately after the last object data.
|
||||
*/
|
||||
rix->revindex[num_ent] = p->pack_size - 20;
|
||||
qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
|
||||
rix->revindex[num_ent].offset = p->pack_size - 20;
|
||||
rix->revindex[num_ent].nr = -1;
|
||||
qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
|
||||
}
|
||||
|
||||
static unsigned long find_packed_object_size(struct packed_git *p,
|
||||
unsigned long ofs)
|
||||
static struct revindex_entry * find_packed_object(struct packed_git *p,
|
||||
unsigned int ofs)
|
||||
{
|
||||
int num;
|
||||
int lo, hi;
|
||||
struct pack_revindex *rix;
|
||||
unsigned long *revindex;
|
||||
struct revindex_entry *revindex;
|
||||
num = pack_revindex_ix(p);
|
||||
if (num < 0)
|
||||
die("internal error: pack revindex uninitialized");
|
||||
@ -191,10 +199,10 @@ static unsigned long find_packed_object_size(struct packed_git *p,
|
||||
hi = num_packed_objects(p) + 1;
|
||||
do {
|
||||
int mi = (lo + hi) / 2;
|
||||
if (revindex[mi] == ofs) {
|
||||
return revindex[mi+1] - ofs;
|
||||
if (revindex[mi].offset == ofs) {
|
||||
return revindex + mi;
|
||||
}
|
||||
else if (ofs < revindex[mi])
|
||||
else if (ofs < revindex[mi].offset)
|
||||
hi = mi;
|
||||
else
|
||||
lo = mi + 1;
|
||||
@ -202,6 +210,20 @@ static unsigned long find_packed_object_size(struct packed_git *p,
|
||||
die("internal error: pack revindex corrupt");
|
||||
}
|
||||
|
||||
static unsigned long find_packed_object_size(struct packed_git *p,
|
||||
unsigned long ofs)
|
||||
{
|
||||
struct revindex_entry *entry = find_packed_object(p, ofs);
|
||||
return entry[1].offset - ofs;
|
||||
}
|
||||
|
||||
static unsigned char *find_packed_object_name(struct packed_git *p,
|
||||
unsigned long ofs)
|
||||
{
|
||||
struct revindex_entry *entry = find_packed_object(p, ofs);
|
||||
return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4;
|
||||
}
|
||||
|
||||
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
|
||||
{
|
||||
unsigned long othersize, delta_size;
|
||||
@ -232,7 +254,7 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
|
||||
int n = 1;
|
||||
unsigned char c;
|
||||
|
||||
if (type < OBJ_COMMIT || type > OBJ_DELTA)
|
||||
if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
|
||||
die("bad type %d", type);
|
||||
|
||||
c = (type << 4) | (size & 15);
|
||||
@ -247,6 +269,10 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* we are going to reuse the existing object data as is. make
|
||||
* sure it is not corrupt.
|
||||
*/
|
||||
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
|
||||
{
|
||||
z_stream stream;
|
||||
@ -278,32 +304,6 @@ static int check_inflate(unsigned char *data, unsigned long len, unsigned long e
|
||||
return st;
|
||||
}
|
||||
|
||||
/*
|
||||
* we are going to reuse the existing pack entry data. make
|
||||
* sure it is not corrupt.
|
||||
*/
|
||||
static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len)
|
||||
{
|
||||
enum object_type type;
|
||||
unsigned long size, used;
|
||||
|
||||
if (pack_to_stdout)
|
||||
return 0;
|
||||
|
||||
/* the caller has already called use_packed_git() for us,
|
||||
* so it is safe to access the pack data from mmapped location.
|
||||
* make sure the entry inflates correctly.
|
||||
*/
|
||||
used = unpack_object_header_gently(data, len, &type, &size);
|
||||
if (!used)
|
||||
return -1;
|
||||
if (type == OBJ_DELTA)
|
||||
used += 20; /* skip base object name */
|
||||
data += used;
|
||||
len -= used;
|
||||
return check_inflate(data, len, entry->size);
|
||||
}
|
||||
|
||||
static int revalidate_loose_object(struct object_entry *entry,
|
||||
unsigned char *map,
|
||||
unsigned long mapsize)
|
||||
@ -334,13 +334,10 @@ static unsigned long write_object(struct sha1file *f,
|
||||
enum object_type obj_type;
|
||||
int to_reuse = 0;
|
||||
|
||||
if (entry->preferred_base)
|
||||
return 0;
|
||||
|
||||
obj_type = entry->type;
|
||||
if (! entry->in_pack)
|
||||
to_reuse = 0; /* can't reuse what we don't have */
|
||||
else if (obj_type == OBJ_DELTA)
|
||||
else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
|
||||
to_reuse = 1; /* check_object() decided it for us */
|
||||
else if (obj_type != entry->in_pack_type)
|
||||
to_reuse = 0; /* pack has delta which is unusable */
|
||||
@ -380,18 +377,35 @@ static unsigned long write_object(struct sha1file *f,
|
||||
if (entry->delta) {
|
||||
buf = delta_against(buf, size, entry);
|
||||
size = entry->delta_size;
|
||||
obj_type = OBJ_DELTA;
|
||||
obj_type = (allow_ofs_delta && entry->delta->offset) ?
|
||||
OBJ_OFS_DELTA : OBJ_REF_DELTA;
|
||||
}
|
||||
/*
|
||||
* The object header is a byte of 'type' followed by zero or
|
||||
* more bytes of length. For deltas, the 20 bytes of delta
|
||||
* sha1 follows that.
|
||||
* more bytes of length.
|
||||
*/
|
||||
hdrlen = encode_header(obj_type, size, header);
|
||||
sha1write(f, header, hdrlen);
|
||||
|
||||
if (entry->delta) {
|
||||
sha1write(f, entry->delta, 20);
|
||||
if (obj_type == OBJ_OFS_DELTA) {
|
||||
/*
|
||||
* Deltas with relative base contain an additional
|
||||
* encoding of the relative offset for the delta
|
||||
* base from this object's position in the pack.
|
||||
*/
|
||||
unsigned long ofs = entry->offset - entry->delta->offset;
|
||||
unsigned pos = sizeof(header) - 1;
|
||||
header[pos] = ofs & 127;
|
||||
while (ofs >>= 7)
|
||||
header[--pos] = 128 | (--ofs & 127);
|
||||
sha1write(f, header + pos, sizeof(header) - pos);
|
||||
hdrlen += sizeof(header) - pos;
|
||||
} else if (obj_type == OBJ_REF_DELTA) {
|
||||
/*
|
||||
* Deltas with a base reference contain
|
||||
* an additional 20 bytes for the base sha1.
|
||||
*/
|
||||
sha1write(f, entry->delta->sha1, 20);
|
||||
hdrlen += 20;
|
||||
}
|
||||
datalen = sha1write_compressed(f, buf, size);
|
||||
@ -399,21 +413,40 @@ static unsigned long write_object(struct sha1file *f,
|
||||
}
|
||||
else {
|
||||
struct packed_git *p = entry->in_pack;
|
||||
|
||||
if (entry->delta) {
|
||||
obj_type = (allow_ofs_delta && entry->delta->offset) ?
|
||||
OBJ_OFS_DELTA : OBJ_REF_DELTA;
|
||||
reused_delta++;
|
||||
}
|
||||
hdrlen = encode_header(obj_type, entry->size, header);
|
||||
sha1write(f, header, hdrlen);
|
||||
if (obj_type == OBJ_OFS_DELTA) {
|
||||
unsigned long ofs = entry->offset - entry->delta->offset;
|
||||
unsigned pos = sizeof(header) - 1;
|
||||
header[pos] = ofs & 127;
|
||||
while (ofs >>= 7)
|
||||
header[--pos] = 128 | (--ofs & 127);
|
||||
sha1write(f, header + pos, sizeof(header) - pos);
|
||||
hdrlen += sizeof(header) - pos;
|
||||
} else if (obj_type == OBJ_REF_DELTA) {
|
||||
sha1write(f, entry->delta->sha1, 20);
|
||||
hdrlen += 20;
|
||||
}
|
||||
|
||||
use_packed_git(p);
|
||||
|
||||
datalen = find_packed_object_size(p, entry->in_pack_offset);
|
||||
buf = (char *) p->pack_base + entry->in_pack_offset;
|
||||
|
||||
if (revalidate_pack_entry(entry, buf, datalen))
|
||||
buf = (char *) p->pack_base
|
||||
+ entry->in_pack_offset
|
||||
+ entry->in_pack_header_size;
|
||||
datalen = find_packed_object_size(p, entry->in_pack_offset)
|
||||
- entry->in_pack_header_size;
|
||||
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
|
||||
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
|
||||
sha1write(f, buf, datalen);
|
||||
unuse_packed_git(p);
|
||||
hdrlen = 0; /* not really */
|
||||
if (obj_type == OBJ_DELTA)
|
||||
reused_delta++;
|
||||
reused++;
|
||||
}
|
||||
if (obj_type == OBJ_DELTA)
|
||||
if (entry->delta)
|
||||
written_delta++;
|
||||
written++;
|
||||
return hdrlen + datalen;
|
||||
@ -423,17 +456,16 @@ static unsigned long write_one(struct sha1file *f,
|
||||
struct object_entry *e,
|
||||
unsigned long offset)
|
||||
{
|
||||
if (e->offset)
|
||||
if (e->offset || e->preferred_base)
|
||||
/* offset starts from header size and cannot be zero
|
||||
* if it is written already.
|
||||
*/
|
||||
return offset;
|
||||
e->offset = offset;
|
||||
offset += write_object(f, e);
|
||||
/* if we are deltified, write out its base object. */
|
||||
/* if we are deltified, write out its base object first. */
|
||||
if (e->delta)
|
||||
offset = write_one(f, e->delta, offset);
|
||||
return offset;
|
||||
e->offset = offset;
|
||||
return offset + write_object(f, e);
|
||||
}
|
||||
|
||||
static void write_pack_file(void)
|
||||
@ -899,26 +931,64 @@ static void check_object(struct object_entry *entry)
|
||||
char type[20];
|
||||
|
||||
if (entry->in_pack && !entry->preferred_base) {
|
||||
unsigned char base[20];
|
||||
unsigned long size;
|
||||
struct object_entry *base_entry;
|
||||
struct packed_git *p = entry->in_pack;
|
||||
unsigned long left = p->pack_size - entry->in_pack_offset;
|
||||
unsigned long size, used;
|
||||
unsigned char *buf;
|
||||
struct object_entry *base_entry = NULL;
|
||||
|
||||
use_packed_git(p);
|
||||
buf = p->pack_base;
|
||||
buf += entry->in_pack_offset;
|
||||
|
||||
/* We want in_pack_type even if we do not reuse delta.
|
||||
* There is no point not reusing non-delta representations.
|
||||
*/
|
||||
check_reuse_pack_delta(entry->in_pack,
|
||||
entry->in_pack_offset,
|
||||
base, &size,
|
||||
&entry->in_pack_type);
|
||||
used = unpack_object_header_gently(buf, left,
|
||||
&entry->in_pack_type, &size);
|
||||
if (!used || left - used <= 20)
|
||||
die("corrupt pack for %s", sha1_to_hex(entry->sha1));
|
||||
|
||||
/* Check if it is delta, and the base is also an object
|
||||
* we are going to pack. If so we will reuse the existing
|
||||
* delta.
|
||||
*/
|
||||
if (!no_reuse_delta &&
|
||||
entry->in_pack_type == OBJ_DELTA &&
|
||||
(base_entry = locate_object_entry(base)) &&
|
||||
(!base_entry->preferred_base)) {
|
||||
if (!no_reuse_delta) {
|
||||
unsigned char c, *base_name;
|
||||
unsigned long ofs;
|
||||
/* there is at least 20 bytes left in the pack */
|
||||
switch (entry->in_pack_type) {
|
||||
case OBJ_REF_DELTA:
|
||||
base_name = buf + used;
|
||||
used += 20;
|
||||
break;
|
||||
case OBJ_OFS_DELTA:
|
||||
c = buf[used++];
|
||||
ofs = c & 127;
|
||||
while (c & 128) {
|
||||
ofs += 1;
|
||||
if (!ofs || ofs & ~(~0UL >> 7))
|
||||
die("delta base offset overflow in pack for %s",
|
||||
sha1_to_hex(entry->sha1));
|
||||
c = buf[used++];
|
||||
ofs = (ofs << 7) + (c & 127);
|
||||
}
|
||||
if (ofs >= entry->in_pack_offset)
|
||||
die("delta base offset out of bound for %s",
|
||||
sha1_to_hex(entry->sha1));
|
||||
ofs = entry->in_pack_offset - ofs;
|
||||
base_name = find_packed_object_name(p, ofs);
|
||||
break;
|
||||
default:
|
||||
base_name = NULL;
|
||||
}
|
||||
if (base_name)
|
||||
base_entry = locate_object_entry(base_name);
|
||||
}
|
||||
unuse_packed_git(p);
|
||||
entry->in_pack_header_size = used;
|
||||
|
||||
if (base_entry) {
|
||||
|
||||
/* Depth value does not matter - find_deltas()
|
||||
* will never consider reused delta as the
|
||||
@ -927,9 +997,9 @@ static void check_object(struct object_entry *entry)
|
||||
*/
|
||||
|
||||
/* uncompressed size of the delta data */
|
||||
entry->size = entry->delta_size = size;
|
||||
entry->size = size;
|
||||
entry->delta = base_entry;
|
||||
entry->type = OBJ_DELTA;
|
||||
entry->type = entry->in_pack_type;
|
||||
|
||||
entry->delta_sibling = base_entry->delta_child;
|
||||
base_entry->delta_child = entry;
|
||||
@ -1484,6 +1554,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
|
||||
no_reuse_delta = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp("--delta-base-offset", arg)) {
|
||||
allow_ofs_delta = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp("--stdout", arg)) {
|
||||
pack_to_stdout = 1;
|
||||
continue;
|
||||
|
@ -15,7 +15,7 @@ static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-fil
|
||||
|
||||
/* We always read in 4kB chunks. */
|
||||
static unsigned char buffer[4096];
|
||||
static unsigned long offset, len;
|
||||
static unsigned long offset, len, consumed_bytes;
|
||||
static SHA_CTX ctx;
|
||||
|
||||
/*
|
||||
@ -51,6 +51,7 @@ static void use(int bytes)
|
||||
die("used more bytes than were available");
|
||||
len -= bytes;
|
||||
offset += bytes;
|
||||
consumed_bytes += bytes;
|
||||
}
|
||||
|
||||
static void *get_data(unsigned long size)
|
||||
@ -89,35 +90,49 @@ static void *get_data(unsigned long size)
|
||||
|
||||
struct delta_info {
|
||||
unsigned char base_sha1[20];
|
||||
unsigned long base_offset;
|
||||
unsigned long size;
|
||||
void *delta;
|
||||
unsigned nr;
|
||||
struct delta_info *next;
|
||||
};
|
||||
|
||||
static struct delta_info *delta_list;
|
||||
|
||||
static void add_delta_to_list(unsigned char *base_sha1, void *delta, unsigned long size)
|
||||
static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
|
||||
unsigned long base_offset,
|
||||
void *delta, unsigned long size)
|
||||
{
|
||||
struct delta_info *info = xmalloc(sizeof(*info));
|
||||
|
||||
hashcpy(info->base_sha1, base_sha1);
|
||||
info->base_offset = base_offset;
|
||||
info->size = size;
|
||||
info->delta = delta;
|
||||
info->nr = nr;
|
||||
info->next = delta_list;
|
||||
delta_list = info;
|
||||
}
|
||||
|
||||
static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size);
|
||||
|
||||
static void write_object(void *buf, unsigned long size, const char *type)
|
||||
{
|
||||
struct obj_info {
|
||||
unsigned long offset;
|
||||
unsigned char sha1[20];
|
||||
if (write_sha1_file(buf, size, type, sha1) < 0)
|
||||
};
|
||||
|
||||
static struct obj_info *obj_list;
|
||||
|
||||
static void added_object(unsigned nr, const char *type, void *data,
|
||||
unsigned long size);
|
||||
|
||||
static void write_object(unsigned nr, void *buf, unsigned long size,
|
||||
const char *type)
|
||||
{
|
||||
if (write_sha1_file(buf, size, type, obj_list[nr].sha1) < 0)
|
||||
die("failed to write object");
|
||||
added_object(sha1, type, buf, size);
|
||||
added_object(nr, type, buf, size);
|
||||
}
|
||||
|
||||
static void resolve_delta(const char *type,
|
||||
static void resolve_delta(unsigned nr, const char *type,
|
||||
void *base, unsigned long base_size,
|
||||
void *delta, unsigned long delta_size)
|
||||
{
|
||||
@ -130,20 +145,23 @@ static void resolve_delta(const char *type,
|
||||
if (!result)
|
||||
die("failed to apply delta");
|
||||
free(delta);
|
||||
write_object(result, result_size, type);
|
||||
write_object(nr, result, result_size, type);
|
||||
free(result);
|
||||
}
|
||||
|
||||
static void added_object(unsigned char *sha1, const char *type, void *data, unsigned long size)
|
||||
static void added_object(unsigned nr, const char *type, void *data,
|
||||
unsigned long size)
|
||||
{
|
||||
struct delta_info **p = &delta_list;
|
||||
struct delta_info *info;
|
||||
|
||||
while ((info = *p) != NULL) {
|
||||
if (!hashcmp(info->base_sha1, sha1)) {
|
||||
if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
|
||||
info->base_offset == obj_list[nr].offset) {
|
||||
*p = info->next;
|
||||
p = &delta_list;
|
||||
resolve_delta(type, data, size, info->delta, info->size);
|
||||
resolve_delta(info->nr, type, data, size,
|
||||
info->delta, info->size);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
@ -151,7 +169,8 @@ static void added_object(unsigned char *sha1, const char *type, void *data, unsi
|
||||
}
|
||||
}
|
||||
|
||||
static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
|
||||
static void unpack_non_delta_entry(enum object_type kind, unsigned long size,
|
||||
unsigned nr)
|
||||
{
|
||||
void *buf = get_data(size);
|
||||
const char *type;
|
||||
@ -164,30 +183,80 @@ static void unpack_non_delta_entry(enum object_type kind, unsigned long size)
|
||||
default: die("bad type %d", kind);
|
||||
}
|
||||
if (!dry_run && buf)
|
||||
write_object(buf, size, type);
|
||||
write_object(nr, buf, size, type);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static void unpack_delta_entry(unsigned long delta_size)
|
||||
static void unpack_delta_entry(enum object_type kind, unsigned long delta_size,
|
||||
unsigned nr)
|
||||
{
|
||||
void *delta_data, *base;
|
||||
unsigned long base_size;
|
||||
char type[20];
|
||||
unsigned char base_sha1[20];
|
||||
|
||||
hashcpy(base_sha1, fill(20));
|
||||
use(20);
|
||||
if (kind == OBJ_REF_DELTA) {
|
||||
hashcpy(base_sha1, fill(20));
|
||||
use(20);
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
return;
|
||||
}
|
||||
if (!has_sha1_file(base_sha1)) {
|
||||
hashcpy(obj_list[nr].sha1, null_sha1);
|
||||
add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
unsigned base_found = 0;
|
||||
unsigned char *pack, c;
|
||||
unsigned long base_offset;
|
||||
unsigned lo, mid, hi;
|
||||
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
return;
|
||||
pack = fill(1);
|
||||
c = *pack;
|
||||
use(1);
|
||||
base_offset = c & 127;
|
||||
while (c & 128) {
|
||||
base_offset += 1;
|
||||
if (!base_offset || base_offset & ~(~0UL >> 7))
|
||||
die("offset value overflow for delta base object");
|
||||
pack = fill(1);
|
||||
c = *pack;
|
||||
use(1);
|
||||
base_offset = (base_offset << 7) + (c & 127);
|
||||
}
|
||||
base_offset = obj_list[nr].offset - base_offset;
|
||||
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
return;
|
||||
}
|
||||
lo = 0;
|
||||
hi = nr;
|
||||
while (lo < hi) {
|
||||
mid = (lo + hi)/2;
|
||||
if (base_offset < obj_list[mid].offset) {
|
||||
hi = mid;
|
||||
} else if (base_offset > obj_list[mid].offset) {
|
||||
lo = mid + 1;
|
||||
} else {
|
||||
hashcpy(base_sha1, obj_list[mid].sha1);
|
||||
base_found = !is_null_sha1(base_sha1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!base_found) {
|
||||
/* The delta base object is itself a delta that
|
||||
has not been resolved yet. */
|
||||
hashcpy(obj_list[nr].sha1, null_sha1);
|
||||
add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_sha1_file(base_sha1)) {
|
||||
add_delta_to_list(base_sha1, delta_data, delta_size);
|
||||
return;
|
||||
}
|
||||
base = read_sha1_file(base_sha1, type, &base_size);
|
||||
if (!base) {
|
||||
error("failed to read delta-pack base object %s",
|
||||
@ -197,7 +266,7 @@ static void unpack_delta_entry(unsigned long delta_size)
|
||||
has_errors = 1;
|
||||
return;
|
||||
}
|
||||
resolve_delta(type, base, base_size, delta_data, delta_size);
|
||||
resolve_delta(nr, type, base, base_size, delta_data, delta_size);
|
||||
free(base);
|
||||
}
|
||||
|
||||
@ -208,6 +277,8 @@ static void unpack_one(unsigned nr, unsigned total)
|
||||
unsigned long size;
|
||||
enum object_type type;
|
||||
|
||||
obj_list[nr].offset = consumed_bytes;
|
||||
|
||||
pack = fill(1);
|
||||
c = *pack;
|
||||
use(1);
|
||||
@ -216,7 +287,7 @@ static void unpack_one(unsigned nr, unsigned total)
|
||||
shift = 4;
|
||||
while (c & 0x80) {
|
||||
pack = fill(1);
|
||||
c = *pack++;
|
||||
c = *pack;
|
||||
use(1);
|
||||
size += (c & 0x7f) << shift;
|
||||
shift += 7;
|
||||
@ -225,13 +296,14 @@ static void unpack_one(unsigned nr, unsigned total)
|
||||
static unsigned long last_sec;
|
||||
static unsigned last_percent;
|
||||
struct timeval now;
|
||||
unsigned percentage = (nr * 100) / total;
|
||||
unsigned percentage = ((nr+1) * 100) / total;
|
||||
|
||||
gettimeofday(&now, NULL);
|
||||
if (percentage != last_percent || now.tv_sec != last_sec) {
|
||||
last_sec = now.tv_sec;
|
||||
last_percent = percentage;
|
||||
fprintf(stderr, "%4u%% (%u/%u) done\r", percentage, nr, total);
|
||||
fprintf(stderr, "%4u%% (%u/%u) done\r",
|
||||
percentage, (nr+1), total);
|
||||
}
|
||||
}
|
||||
switch (type) {
|
||||
@ -239,10 +311,11 @@ static void unpack_one(unsigned nr, unsigned total)
|
||||
case OBJ_TREE:
|
||||
case OBJ_BLOB:
|
||||
case OBJ_TAG:
|
||||
unpack_non_delta_entry(type, size);
|
||||
unpack_non_delta_entry(type, size, nr);
|
||||
return;
|
||||
case OBJ_DELTA:
|
||||
unpack_delta_entry(size);
|
||||
case OBJ_REF_DELTA:
|
||||
case OBJ_OFS_DELTA:
|
||||
unpack_delta_entry(type, size, nr);
|
||||
return;
|
||||
default:
|
||||
error("bad object type %d", type);
|
||||
@ -265,9 +338,10 @@ static void unpack_all(void)
|
||||
die("unknown pack file version %d", ntohl(hdr->hdr_version));
|
||||
fprintf(stderr, "Unpacking %d objects\n", nr_objects);
|
||||
|
||||
obj_list = xmalloc(nr_objects * sizeof(*obj_list));
|
||||
use(sizeof(struct pack_header));
|
||||
for (i = 0; i < nr_objects; i++)
|
||||
unpack_one(i+1, nr_objects);
|
||||
unpack_one(i, nr_objects);
|
||||
if (delta_list)
|
||||
die("unresolved deltas left after unpacking");
|
||||
}
|
||||
|
5
cache.h
5
cache.h
@ -269,8 +269,9 @@ enum object_type {
|
||||
OBJ_TREE = 2,
|
||||
OBJ_BLOB = 3,
|
||||
OBJ_TAG = 4,
|
||||
/* 5/6 for future expansion */
|
||||
OBJ_DELTA = 7,
|
||||
/* 5 for future expansion */
|
||||
OBJ_OFS_DELTA = 6,
|
||||
OBJ_REF_DELTA = 7,
|
||||
OBJ_BAD,
|
||||
};
|
||||
|
||||
|
@ -166,12 +166,13 @@ static int find_common(int fd[2], unsigned char *result_sha1,
|
||||
}
|
||||
|
||||
if (!fetching)
|
||||
packet_write(fd[1], "want %s%s%s%s%s\n",
|
||||
packet_write(fd[1], "want %s%s%s%s%s%s\n",
|
||||
sha1_to_hex(remote),
|
||||
(multi_ack ? " multi_ack" : ""),
|
||||
(use_sideband == 2 ? " side-band-64k" : ""),
|
||||
(use_sideband == 1 ? " side-band" : ""),
|
||||
(use_thin_pack ? " thin-pack" : ""));
|
||||
(use_thin_pack ? " thin-pack" : ""),
|
||||
" ofs-delta");
|
||||
else
|
||||
packet_write(fd[1], "want %s\n", sha1_to_hex(remote));
|
||||
fetching++;
|
||||
|
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2005 Linus Torvalds
|
||||
#
|
||||
|
||||
USAGE='[-a] [-d] [-f] [-l] [-n] [-q]'
|
||||
USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]'
|
||||
SUBDIRECTORY_OK='Yes'
|
||||
. git-sh-setup
|
||||
|
||||
@ -25,6 +25,15 @@ do
|
||||
shift
|
||||
done
|
||||
|
||||
# Later we will default repack.UseDeltaBaseOffset to true
|
||||
default_dbo=false
|
||||
|
||||
case "`git repo-config --bool repack.usedeltabaseoffset ||
|
||||
echo $default_dbo`" in
|
||||
true)
|
||||
extra="$extra --delta-base-offset" ;;
|
||||
esac
|
||||
|
||||
PACKDIR="$GIT_OBJECT_DIRECTORY/pack"
|
||||
PACKTMP="$GIT_DIR/.tmp-$$-pack"
|
||||
rm -f "$PACKTMP"-*
|
||||
|
331
index-pack.c
331
index-pack.c
@ -13,63 +13,93 @@ static const char index_pack_usage[] =
|
||||
struct object_entry
|
||||
{
|
||||
unsigned long offset;
|
||||
unsigned long size;
|
||||
unsigned int hdr_size;
|
||||
enum object_type type;
|
||||
enum object_type real_type;
|
||||
unsigned char sha1[20];
|
||||
};
|
||||
|
||||
union delta_base {
|
||||
unsigned char sha1[20];
|
||||
unsigned long offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
|
||||
* to memcmp() only the first 20 bytes.
|
||||
*/
|
||||
#define UNION_BASE_SZ 20
|
||||
|
||||
struct delta_entry
|
||||
{
|
||||
struct object_entry *obj;
|
||||
unsigned char base_sha1[20];
|
||||
union delta_base base;
|
||||
};
|
||||
|
||||
static const char *pack_name;
|
||||
static unsigned char *pack_base;
|
||||
static unsigned long pack_size;
|
||||
static struct object_entry *objects;
|
||||
static struct delta_entry *deltas;
|
||||
static int nr_objects;
|
||||
static int nr_deltas;
|
||||
|
||||
/* We always read in 4kB chunks. */
|
||||
static unsigned char input_buffer[4096];
|
||||
static unsigned long input_offset, input_len, consumed_bytes;
|
||||
static SHA_CTX input_ctx;
|
||||
static int input_fd;
|
||||
|
||||
/*
|
||||
* Make sure at least "min" bytes are available in the buffer, and
|
||||
* return the pointer to the buffer.
|
||||
*/
|
||||
static void * fill(int min)
|
||||
{
|
||||
if (min <= input_len)
|
||||
return input_buffer + input_offset;
|
||||
if (min > sizeof(input_buffer))
|
||||
die("cannot fill %d bytes", min);
|
||||
if (input_offset) {
|
||||
SHA1_Update(&input_ctx, input_buffer, input_offset);
|
||||
memcpy(input_buffer, input_buffer + input_offset, input_len);
|
||||
input_offset = 0;
|
||||
}
|
||||
do {
|
||||
int ret = xread(input_fd, input_buffer + input_len,
|
||||
sizeof(input_buffer) - input_len);
|
||||
if (ret <= 0) {
|
||||
if (!ret)
|
||||
die("early EOF");
|
||||
die("read error on input: %s", strerror(errno));
|
||||
}
|
||||
input_len += ret;
|
||||
} while (input_len < min);
|
||||
return input_buffer;
|
||||
}
|
||||
|
||||
static void use(int bytes)
|
||||
{
|
||||
if (bytes > input_len)
|
||||
die("used more bytes than were available");
|
||||
input_len -= bytes;
|
||||
input_offset += bytes;
|
||||
consumed_bytes += bytes;
|
||||
}
|
||||
|
||||
static void open_pack_file(void)
|
||||
{
|
||||
int fd;
|
||||
struct stat st;
|
||||
|
||||
fd = open(pack_name, O_RDONLY);
|
||||
if (fd < 0)
|
||||
input_fd = open(pack_name, O_RDONLY);
|
||||
if (input_fd < 0)
|
||||
die("cannot open packfile '%s': %s", pack_name,
|
||||
strerror(errno));
|
||||
if (fstat(fd, &st)) {
|
||||
int err = errno;
|
||||
close(fd);
|
||||
die("cannot fstat packfile '%s': %s", pack_name,
|
||||
strerror(err));
|
||||
}
|
||||
pack_size = st.st_size;
|
||||
pack_base = mmap(NULL, pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (pack_base == MAP_FAILED) {
|
||||
int err = errno;
|
||||
close(fd);
|
||||
die("cannot mmap packfile '%s': %s", pack_name,
|
||||
strerror(err));
|
||||
}
|
||||
close(fd);
|
||||
SHA1_Init(&input_ctx);
|
||||
}
|
||||
|
||||
static void parse_pack_header(void)
|
||||
{
|
||||
const struct pack_header *hdr;
|
||||
unsigned char sha1[20];
|
||||
SHA_CTX ctx;
|
||||
|
||||
/* Ensure there are enough bytes for the header and final SHA1 */
|
||||
if (pack_size < sizeof(struct pack_header) + 20)
|
||||
die("packfile '%s' is too small", pack_name);
|
||||
struct pack_header *hdr = fill(sizeof(struct pack_header));
|
||||
|
||||
/* Header consistency check */
|
||||
hdr = (void *)pack_base;
|
||||
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
|
||||
die("packfile '%s' signature mismatch", pack_name);
|
||||
if (!pack_version_ok(hdr->hdr_version))
|
||||
@ -77,13 +107,8 @@ static void parse_pack_header(void)
|
||||
pack_name, ntohl(hdr->hdr_version));
|
||||
|
||||
nr_objects = ntohl(hdr->hdr_entries);
|
||||
|
||||
/* Check packfile integrity */
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, pack_base, pack_size - 20);
|
||||
SHA1_Final(sha1, &ctx);
|
||||
if (hashcmp(sha1, pack_base + pack_size - 20))
|
||||
die("packfile '%s' SHA1 mismatch", pack_name);
|
||||
use(sizeof(struct pack_header));
|
||||
/*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
|
||||
}
|
||||
|
||||
static void bad_object(unsigned long offset, const char *format,
|
||||
@ -101,86 +126,121 @@ static void bad_object(unsigned long offset, const char *format, ...)
|
||||
pack_name, offset, buf);
|
||||
}
|
||||
|
||||
static void *unpack_entry_data(unsigned long offset,
|
||||
unsigned long *current_pos, unsigned long size)
|
||||
static void *unpack_entry_data(unsigned long offset, unsigned long size)
|
||||
{
|
||||
unsigned long pack_limit = pack_size - 20;
|
||||
unsigned long pos = *current_pos;
|
||||
z_stream stream;
|
||||
void *buf = xmalloc(size);
|
||||
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
stream.next_out = buf;
|
||||
stream.avail_out = size;
|
||||
stream.next_in = pack_base + pos;
|
||||
stream.avail_in = pack_limit - pos;
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = input_len;
|
||||
inflateInit(&stream);
|
||||
|
||||
for (;;) {
|
||||
int ret = inflate(&stream, 0);
|
||||
if (ret == Z_STREAM_END)
|
||||
use(input_len - stream.avail_in);
|
||||
if (stream.total_out == size && ret == Z_STREAM_END)
|
||||
break;
|
||||
if (ret != Z_OK)
|
||||
bad_object(offset, "inflate returned %d", ret);
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = input_len;
|
||||
}
|
||||
inflateEnd(&stream);
|
||||
if (stream.total_out != size)
|
||||
bad_object(offset, "size mismatch (expected %lu, got %lu)",
|
||||
size, stream.total_out);
|
||||
*current_pos = pack_limit - stream.avail_in;
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void *unpack_raw_entry(unsigned long offset,
|
||||
enum object_type *obj_type,
|
||||
unsigned long *obj_size,
|
||||
unsigned char *delta_base,
|
||||
unsigned long *next_obj_offset)
|
||||
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
|
||||
{
|
||||
unsigned long pack_limit = pack_size - 20;
|
||||
unsigned long pos = offset;
|
||||
unsigned char c;
|
||||
unsigned long size;
|
||||
unsigned char *p, c;
|
||||
unsigned long size, base_offset;
|
||||
unsigned shift;
|
||||
enum object_type type;
|
||||
void *data;
|
||||
|
||||
c = pack_base[pos++];
|
||||
type = (c >> 4) & 7;
|
||||
obj->offset = consumed_bytes;
|
||||
|
||||
p = fill(1);
|
||||
c = *p;
|
||||
use(1);
|
||||
obj->type = (c >> 4) & 7;
|
||||
size = (c & 15);
|
||||
shift = 4;
|
||||
while (c & 0x80) {
|
||||
if (pos >= pack_limit)
|
||||
bad_object(offset, "object extends past end of pack");
|
||||
c = pack_base[pos++];
|
||||
p = fill(1);
|
||||
c = *p;
|
||||
use(1);
|
||||
size += (c & 0x7fUL) << shift;
|
||||
shift += 7;
|
||||
}
|
||||
obj->size = size;
|
||||
|
||||
switch (type) {
|
||||
case OBJ_DELTA:
|
||||
if (pos + 20 >= pack_limit)
|
||||
bad_object(offset, "object extends past end of pack");
|
||||
hashcpy(delta_base, pack_base + pos);
|
||||
pos += 20;
|
||||
/* fallthru */
|
||||
switch (obj->type) {
|
||||
case OBJ_REF_DELTA:
|
||||
hashcpy(delta_base->sha1, fill(20));
|
||||
use(20);
|
||||
break;
|
||||
case OBJ_OFS_DELTA:
|
||||
memset(delta_base, 0, sizeof(*delta_base));
|
||||
p = fill(1);
|
||||
c = *p;
|
||||
use(1);
|
||||
base_offset = c & 127;
|
||||
while (c & 128) {
|
||||
base_offset += 1;
|
||||
if (!base_offset || base_offset & ~(~0UL >> 7))
|
||||
bad_object(obj->offset, "offset value overflow for delta base object");
|
||||
p = fill(1);
|
||||
c = *p;
|
||||
use(1);
|
||||
base_offset = (base_offset << 7) + (c & 127);
|
||||
}
|
||||
delta_base->offset = obj->offset - base_offset;
|
||||
if (delta_base->offset >= obj->offset)
|
||||
bad_object(obj->offset, "delta base offset is out of bound");
|
||||
break;
|
||||
case OBJ_COMMIT:
|
||||
case OBJ_TREE:
|
||||
case OBJ_BLOB:
|
||||
case OBJ_TAG:
|
||||
data = unpack_entry_data(offset, &pos, size);
|
||||
break;
|
||||
default:
|
||||
bad_object(offset, "bad object type %d", type);
|
||||
bad_object(obj->offset, "bad object type %d", obj->type);
|
||||
}
|
||||
obj->hdr_size = consumed_bytes - obj->offset;
|
||||
|
||||
*obj_type = type;
|
||||
*obj_size = size;
|
||||
*next_obj_offset = pos;
|
||||
return unpack_entry_data(obj->offset, obj->size);
|
||||
}
|
||||
|
||||
static void * get_data_from_pack(struct object_entry *obj)
|
||||
{
|
||||
unsigned long from = obj[0].offset + obj[0].hdr_size;
|
||||
unsigned long len = obj[1].offset - from;
|
||||
unsigned pg_offset = from % getpagesize();
|
||||
unsigned char *map, *data;
|
||||
z_stream stream;
|
||||
int st;
|
||||
|
||||
map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
|
||||
input_fd, from - pg_offset);
|
||||
if (map == MAP_FAILED)
|
||||
die("cannot mmap packfile '%s': %s", pack_name, strerror(errno));
|
||||
data = xmalloc(obj->size);
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
stream.next_out = data;
|
||||
stream.avail_out = obj->size;
|
||||
stream.next_in = map + pg_offset;
|
||||
stream.avail_in = len;
|
||||
inflateInit(&stream);
|
||||
while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
|
||||
inflateEnd(&stream);
|
||||
if (st != Z_STREAM_END || stream.total_out != obj->size)
|
||||
die("serious inflate inconsistency");
|
||||
munmap(map, len + pg_offset);
|
||||
return data;
|
||||
}
|
||||
|
||||
static int find_delta(const unsigned char *base_sha1)
|
||||
static int find_delta(const union delta_base *base)
|
||||
{
|
||||
int first = 0, last = nr_deltas;
|
||||
|
||||
@ -189,7 +249,7 @@ static int find_delta(const unsigned char *base_sha1)
|
||||
struct delta_entry *delta = &deltas[next];
|
||||
int cmp;
|
||||
|
||||
cmp = hashcmp(base_sha1, delta->base_sha1);
|
||||
cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
|
||||
if (!cmp)
|
||||
return next;
|
||||
if (cmp < 0) {
|
||||
@ -201,18 +261,18 @@ static int find_delta(const unsigned char *base_sha1)
|
||||
return -first-1;
|
||||
}
|
||||
|
||||
static int find_deltas_based_on_sha1(const unsigned char *base_sha1,
|
||||
int *first_index, int *last_index)
|
||||
static int find_delta_childs(const union delta_base *base,
|
||||
int *first_index, int *last_index)
|
||||
{
|
||||
int first = find_delta(base_sha1);
|
||||
int first = find_delta(base);
|
||||
int last = first;
|
||||
int end = nr_deltas - 1;
|
||||
|
||||
if (first < 0)
|
||||
return -1;
|
||||
while (first > 0 && !hashcmp(deltas[first - 1].base_sha1, base_sha1))
|
||||
while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
|
||||
--first;
|
||||
while (last < end && !hashcmp(deltas[last + 1].base_sha1, base_sha1))
|
||||
while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
|
||||
++last;
|
||||
*first_index = first;
|
||||
*last_index = last;
|
||||
@ -252,25 +312,34 @@ static void resolve_delta(struct delta_entry *delta, void *base_data,
|
||||
unsigned long delta_size;
|
||||
void *result;
|
||||
unsigned long result_size;
|
||||
enum object_type delta_type;
|
||||
unsigned char base_sha1[20];
|
||||
unsigned long next_obj_offset;
|
||||
union delta_base delta_base;
|
||||
int j, first, last;
|
||||
|
||||
obj->real_type = type;
|
||||
delta_data = unpack_raw_entry(obj->offset, &delta_type,
|
||||
&delta_size, base_sha1,
|
||||
&next_obj_offset);
|
||||
delta_data = get_data_from_pack(obj);
|
||||
delta_size = obj->size;
|
||||
result = patch_delta(base_data, base_size, delta_data, delta_size,
|
||||
&result_size);
|
||||
free(delta_data);
|
||||
if (!result)
|
||||
bad_object(obj->offset, "failed to apply delta");
|
||||
sha1_object(result, result_size, type, obj->sha1);
|
||||
if (!find_deltas_based_on_sha1(obj->sha1, &first, &last)) {
|
||||
|
||||
hashcpy(delta_base.sha1, obj->sha1);
|
||||
if (!find_delta_childs(&delta_base, &first, &last)) {
|
||||
for (j = first; j <= last; j++)
|
||||
resolve_delta(&deltas[j], result, result_size, type);
|
||||
if (deltas[j].obj->type == OBJ_REF_DELTA)
|
||||
resolve_delta(&deltas[j], result, result_size, type);
|
||||
}
|
||||
|
||||
memset(&delta_base, 0, sizeof(delta_base));
|
||||
delta_base.offset = obj->offset;
|
||||
if (!find_delta_childs(&delta_base, &first, &last)) {
|
||||
for (j = first; j <= last; j++)
|
||||
if (deltas[j].obj->type == OBJ_OFS_DELTA)
|
||||
resolve_delta(&deltas[j], result, result_size, type);
|
||||
}
|
||||
|
||||
free(result);
|
||||
}
|
||||
|
||||
@ -278,16 +347,16 @@ static int compare_delta_entry(const void *a, const void *b)
|
||||
{
|
||||
const struct delta_entry *delta_a = a;
|
||||
const struct delta_entry *delta_b = b;
|
||||
return hashcmp(delta_a->base_sha1, delta_b->base_sha1);
|
||||
return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
|
||||
}
|
||||
|
||||
static void parse_pack_objects(void)
|
||||
/* Parse all objects and return the pack content SHA1 hash */
|
||||
static void parse_pack_objects(unsigned char *sha1)
|
||||
{
|
||||
int i;
|
||||
unsigned long offset = sizeof(struct pack_header);
|
||||
unsigned char base_sha1[20];
|
||||
struct delta_entry *delta = deltas;
|
||||
void *data;
|
||||
unsigned long data_size;
|
||||
struct stat st;
|
||||
|
||||
/*
|
||||
* First pass:
|
||||
@ -297,22 +366,32 @@ static void parse_pack_objects(void)
|
||||
*/
|
||||
for (i = 0; i < nr_objects; i++) {
|
||||
struct object_entry *obj = &objects[i];
|
||||
obj->offset = offset;
|
||||
data = unpack_raw_entry(offset, &obj->type, &data_size,
|
||||
base_sha1, &offset);
|
||||
data = unpack_raw_entry(obj, &delta->base);
|
||||
obj->real_type = obj->type;
|
||||
if (obj->type == OBJ_DELTA) {
|
||||
struct delta_entry *delta = &deltas[nr_deltas++];
|
||||
if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
|
||||
nr_deltas++;
|
||||
delta->obj = obj;
|
||||
hashcpy(delta->base_sha1, base_sha1);
|
||||
delta++;
|
||||
} else
|
||||
sha1_object(data, data_size, obj->type, obj->sha1);
|
||||
sha1_object(data, obj->size, obj->type, obj->sha1);
|
||||
free(data);
|
||||
}
|
||||
if (offset != pack_size - 20)
|
||||
objects[i].offset = consumed_bytes;
|
||||
|
||||
/* Check pack integrity */
|
||||
SHA1_Update(&input_ctx, input_buffer, input_offset);
|
||||
SHA1_Final(sha1, &input_ctx);
|
||||
if (hashcmp(fill(20), sha1))
|
||||
die("packfile '%s' SHA1 mismatch", pack_name);
|
||||
use(20);
|
||||
|
||||
/* If input_fd is a file, we should have reached its end now. */
|
||||
if (fstat(input_fd, &st))
|
||||
die("cannot fstat packfile '%s': %s", pack_name, strerror(errno));
|
||||
if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
|
||||
die("packfile '%s' has junk at the end", pack_name);
|
||||
|
||||
/* Sort deltas by base SHA1 for fast searching */
|
||||
/* Sort deltas by base SHA1/offset for fast searching */
|
||||
qsort(deltas, nr_deltas, sizeof(struct delta_entry),
|
||||
compare_delta_entry);
|
||||
|
||||
@ -326,22 +405,36 @@ static void parse_pack_objects(void)
|
||||
*/
|
||||
for (i = 0; i < nr_objects; i++) {
|
||||
struct object_entry *obj = &objects[i];
|
||||
int j, first, last;
|
||||
union delta_base base;
|
||||
int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
|
||||
|
||||
if (obj->type == OBJ_DELTA)
|
||||
if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
|
||||
continue;
|
||||
if (find_deltas_based_on_sha1(obj->sha1, &first, &last))
|
||||
hashcpy(base.sha1, obj->sha1);
|
||||
ref = !find_delta_childs(&base, &ref_first, &ref_last);
|
||||
memset(&base, 0, sizeof(base));
|
||||
base.offset = obj->offset;
|
||||
ofs = !find_delta_childs(&base, &ofs_first, &ofs_last);
|
||||
if (!ref && !ofs)
|
||||
continue;
|
||||
data = unpack_raw_entry(obj->offset, &obj->type, &data_size,
|
||||
base_sha1, &offset);
|
||||
for (j = first; j <= last; j++)
|
||||
resolve_delta(&deltas[j], data, data_size, obj->type);
|
||||
data = get_data_from_pack(obj);
|
||||
if (ref)
|
||||
for (j = ref_first; j <= ref_last; j++)
|
||||
if (deltas[j].obj->type == OBJ_REF_DELTA)
|
||||
resolve_delta(&deltas[j], data,
|
||||
obj->size, obj->type);
|
||||
if (ofs)
|
||||
for (j = ofs_first; j <= ofs_last; j++)
|
||||
if (deltas[j].obj->type == OBJ_OFS_DELTA)
|
||||
resolve_delta(&deltas[j], data,
|
||||
obj->size, obj->type);
|
||||
free(data);
|
||||
}
|
||||
|
||||
/* Check for unresolved deltas */
|
||||
for (i = 0; i < nr_deltas; i++) {
|
||||
if (deltas[i].obj->real_type == OBJ_DELTA)
|
||||
if (deltas[i].obj->real_type == OBJ_REF_DELTA ||
|
||||
deltas[i].obj->real_type == OBJ_OFS_DELTA)
|
||||
die("packfile '%s' has unresolved deltas", pack_name);
|
||||
}
|
||||
}
|
||||
@ -353,6 +446,10 @@ static int sha1_compare(const void *_a, const void *_b)
|
||||
return hashcmp(a->sha1, b->sha1);
|
||||
}
|
||||
|
||||
/*
|
||||
* On entry *sha1 contains the pack content SHA1 hash, on exit it is
|
||||
* the SHA1 hash of sorted object names.
|
||||
*/
|
||||
static void write_index_file(const char *index_name, unsigned char *sha1)
|
||||
{
|
||||
struct sha1file *f;
|
||||
@ -412,7 +509,7 @@ static void write_index_file(const char *index_name, unsigned char *sha1)
|
||||
sha1write(f, obj->sha1, 20);
|
||||
SHA1_Update(&ctx, obj->sha1, 20);
|
||||
}
|
||||
sha1write(f, pack_base + pack_size - 20, 20);
|
||||
sha1write(f, sha1, 20);
|
||||
sha1close(f, NULL, 1);
|
||||
free(sorted_by_sha);
|
||||
SHA1_Final(sha1, &ctx);
|
||||
@ -458,9 +555,9 @@ int main(int argc, char **argv)
|
||||
|
||||
open_pack_file();
|
||||
parse_pack_header();
|
||||
objects = xcalloc(nr_objects, sizeof(struct object_entry));
|
||||
objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
|
||||
deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
|
||||
parse_pack_objects();
|
||||
parse_pack_objects(sha1);
|
||||
free(deltas);
|
||||
write_index_file(index_name, sha1);
|
||||
free(objects);
|
||||
|
3
pack.h
3
pack.h
@ -16,7 +16,4 @@ struct pack_header {
|
||||
};
|
||||
|
||||
extern int verify_pack(struct packed_git *, int);
|
||||
extern int check_reuse_pack_delta(struct packed_git *, unsigned long,
|
||||
unsigned char *, unsigned long *,
|
||||
enum object_type *);
|
||||
#endif
|
||||
|
113
sha1_file.c
113
sha1_file.c
@ -877,26 +877,61 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l
|
||||
return unpack_sha1_rest(&stream, hdr, *size);
|
||||
}
|
||||
|
||||
static unsigned long get_delta_base(struct packed_git *p,
|
||||
unsigned long offset,
|
||||
enum object_type kind,
|
||||
unsigned long delta_obj_offset,
|
||||
unsigned long *base_obj_offset)
|
||||
{
|
||||
unsigned char *base_info = (unsigned char *) p->pack_base + offset;
|
||||
unsigned long base_offset;
|
||||
|
||||
/* there must be at least 20 bytes left regardless of delta type */
|
||||
if (p->pack_size <= offset + 20)
|
||||
die("truncated pack file");
|
||||
|
||||
if (kind == OBJ_OFS_DELTA) {
|
||||
unsigned used = 0;
|
||||
unsigned char c = base_info[used++];
|
||||
base_offset = c & 127;
|
||||
while (c & 128) {
|
||||
base_offset += 1;
|
||||
if (!base_offset || base_offset & ~(~0UL >> 7))
|
||||
die("offset value overflow for delta base object");
|
||||
c = base_info[used++];
|
||||
base_offset = (base_offset << 7) + (c & 127);
|
||||
}
|
||||
base_offset = delta_obj_offset - base_offset;
|
||||
if (base_offset >= delta_obj_offset)
|
||||
die("delta base offset out of bound");
|
||||
offset += used;
|
||||
} else if (kind == OBJ_REF_DELTA) {
|
||||
/* The base entry _must_ be in the same pack */
|
||||
base_offset = find_pack_entry_one(base_info, p);
|
||||
if (!base_offset)
|
||||
die("failed to find delta-pack base object %s",
|
||||
sha1_to_hex(base_info));
|
||||
offset += 20;
|
||||
} else
|
||||
die("I am totally screwed");
|
||||
*base_obj_offset = base_offset;
|
||||
return offset;
|
||||
}
|
||||
|
||||
/* forward declaration for a mutually recursive function */
|
||||
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
||||
char *type, unsigned long *sizep);
|
||||
|
||||
static int packed_delta_info(struct packed_git *p,
|
||||
unsigned long offset,
|
||||
enum object_type kind,
|
||||
unsigned long obj_offset,
|
||||
char *type,
|
||||
unsigned long *sizep)
|
||||
{
|
||||
unsigned long base_offset;
|
||||
unsigned char *base_sha1 = (unsigned char *) p->pack_base + offset;
|
||||
|
||||
if (p->pack_size < offset + 20)
|
||||
die("truncated pack file");
|
||||
/* The base entry _must_ be in the same pack */
|
||||
base_offset = find_pack_entry_one(base_sha1, p);
|
||||
if (!base_offset)
|
||||
die("failed to find delta-pack base object %s",
|
||||
sha1_to_hex(base_sha1));
|
||||
offset += 20;
|
||||
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
|
||||
|
||||
/* We choose to only get the type of the base object and
|
||||
* ignore potentially corrupt pack file that expects the delta
|
||||
@ -959,25 +994,6 @@ static unsigned long unpack_object_header(struct packed_git *p, unsigned long of
|
||||
return offset + used;
|
||||
}
|
||||
|
||||
int check_reuse_pack_delta(struct packed_git *p, unsigned long offset,
|
||||
unsigned char *base, unsigned long *sizep,
|
||||
enum object_type *kindp)
|
||||
{
|
||||
unsigned long ptr;
|
||||
int status = -1;
|
||||
|
||||
use_packed_git(p);
|
||||
ptr = offset;
|
||||
ptr = unpack_object_header(p, ptr, kindp, sizep);
|
||||
if (*kindp != OBJ_DELTA)
|
||||
goto done;
|
||||
hashcpy(base, (unsigned char *) p->pack_base + ptr);
|
||||
status = 0;
|
||||
done:
|
||||
unuse_packed_git(p);
|
||||
return status;
|
||||
}
|
||||
|
||||
void packed_object_info_detail(struct packed_git *p,
|
||||
unsigned long offset,
|
||||
char *type,
|
||||
@ -986,11 +1002,12 @@ void packed_object_info_detail(struct packed_git *p,
|
||||
unsigned int *delta_chain_length,
|
||||
unsigned char *base_sha1)
|
||||
{
|
||||
unsigned long val;
|
||||
unsigned long obj_offset, val;
|
||||
unsigned char *next_sha1;
|
||||
enum object_type kind;
|
||||
|
||||
*delta_chain_length = 0;
|
||||
obj_offset = offset;
|
||||
offset = unpack_object_header(p, offset, &kind, size);
|
||||
|
||||
for (;;) {
|
||||
@ -1005,7 +1022,13 @@ void packed_object_info_detail(struct packed_git *p,
|
||||
strcpy(type, type_names[kind]);
|
||||
*store_size = 0; /* notyet */
|
||||
return;
|
||||
case OBJ_DELTA:
|
||||
case OBJ_OFS_DELTA:
|
||||
get_delta_base(p, offset, kind, obj_offset, &offset);
|
||||
if (*delta_chain_length == 0) {
|
||||
/* TODO: find base_sha1 as pointed by offset */
|
||||
}
|
||||
break;
|
||||
case OBJ_REF_DELTA:
|
||||
if (p->pack_size <= offset + 20)
|
||||
die("pack file %s records an incomplete delta base",
|
||||
p->pack_name);
|
||||
@ -1015,6 +1038,7 @@ void packed_object_info_detail(struct packed_git *p,
|
||||
offset = find_pack_entry_one(next_sha1, p);
|
||||
break;
|
||||
}
|
||||
obj_offset = offset;
|
||||
offset = unpack_object_header(p, offset, &kind, &val);
|
||||
(*delta_chain_length)++;
|
||||
}
|
||||
@ -1023,15 +1047,15 @@ void packed_object_info_detail(struct packed_git *p,
|
||||
static int packed_object_info(struct packed_git *p, unsigned long offset,
|
||||
char *type, unsigned long *sizep)
|
||||
{
|
||||
unsigned long size;
|
||||
unsigned long size, obj_offset = offset;
|
||||
enum object_type kind;
|
||||
|
||||
offset = unpack_object_header(p, offset, &kind, &size);
|
||||
|
||||
if (kind == OBJ_DELTA)
|
||||
return packed_delta_info(p, offset, type, sizep);
|
||||
|
||||
switch (kind) {
|
||||
case OBJ_OFS_DELTA:
|
||||
case OBJ_REF_DELTA:
|
||||
return packed_delta_info(p, offset, kind, obj_offset, type, sizep);
|
||||
case OBJ_COMMIT:
|
||||
case OBJ_TREE:
|
||||
case OBJ_BLOB:
|
||||
@ -1077,23 +1101,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
|
||||
static void *unpack_delta_entry(struct packed_git *p,
|
||||
unsigned long offset,
|
||||
unsigned long delta_size,
|
||||
enum object_type kind,
|
||||
unsigned long obj_offset,
|
||||
char *type,
|
||||
unsigned long *sizep)
|
||||
{
|
||||
void *delta_data, *result, *base;
|
||||
unsigned long result_size, base_size, base_offset;
|
||||
unsigned char *base_sha1;
|
||||
|
||||
if (p->pack_size < offset + 20)
|
||||
die("truncated pack file");
|
||||
/* The base entry _must_ be in the same pack */
|
||||
base_sha1 = (unsigned char*)p->pack_base + offset;
|
||||
base_offset = find_pack_entry_one(base_sha1, p);
|
||||
if (!base_offset)
|
||||
die("failed to find delta-pack base object %s",
|
||||
sha1_to_hex(base_sha1));
|
||||
offset += 20;
|
||||
|
||||
offset = get_delta_base(p, offset, kind, obj_offset, &base_offset);
|
||||
base = unpack_entry_gently(p, base_offset, type, &base_size);
|
||||
if (!base)
|
||||
die("failed to read delta base object at %lu from %s",
|
||||
@ -1130,13 +1146,14 @@ static void *unpack_entry(struct pack_entry *entry,
|
||||
void *unpack_entry_gently(struct packed_git *p, unsigned long offset,
|
||||
char *type, unsigned long *sizep)
|
||||
{
|
||||
unsigned long size;
|
||||
unsigned long size, obj_offset = offset;
|
||||
enum object_type kind;
|
||||
|
||||
offset = unpack_object_header(p, offset, &kind, &size);
|
||||
switch (kind) {
|
||||
case OBJ_DELTA:
|
||||
return unpack_delta_entry(p, offset, size, type, sizep);
|
||||
case OBJ_OFS_DELTA:
|
||||
case OBJ_REF_DELTA:
|
||||
return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep);
|
||||
case OBJ_COMMIT:
|
||||
case OBJ_TREE:
|
||||
case OBJ_BLOB:
|
||||
|
@ -16,7 +16,7 @@ static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=n
|
||||
#define OUR_REF (1U << 1)
|
||||
#define WANTED (1U << 2)
|
||||
static int multi_ack, nr_our_refs;
|
||||
static int use_thin_pack;
|
||||
static int use_thin_pack, use_ofs_delta;
|
||||
static struct object_array have_obj;
|
||||
static struct object_array want_obj;
|
||||
static unsigned int timeout;
|
||||
@ -137,7 +137,9 @@ static void create_pack_file(void)
|
||||
close(pu_pipe[1]);
|
||||
close(pe_pipe[0]);
|
||||
close(pe_pipe[1]);
|
||||
execl_git_cmd("pack-objects", "--stdout", "--progress", NULL);
|
||||
execl_git_cmd("pack-objects", "--stdout", "--progress",
|
||||
use_ofs_delta ? "--delta-base-offset" : NULL,
|
||||
NULL);
|
||||
kill(pid_rev_list, SIGKILL);
|
||||
die("git-upload-pack: unable to exec git-pack-objects");
|
||||
}
|
||||
@ -393,6 +395,8 @@ static void receive_needs(void)
|
||||
multi_ack = 1;
|
||||
if (strstr(line+45, "thin-pack"))
|
||||
use_thin_pack = 1;
|
||||
if (strstr(line+45, "ofs-delta"))
|
||||
use_ofs_delta = 1;
|
||||
if (strstr(line+45, "side-band-64k"))
|
||||
use_sideband = LARGE_PACKET_MAX;
|
||||
else if (strstr(line+45, "side-band"))
|
||||
@ -418,7 +422,7 @@ static void receive_needs(void)
|
||||
|
||||
static int send_ref(const char *refname, const unsigned char *sha1)
|
||||
{
|
||||
static const char *capabilities = "multi_ack thin-pack side-band side-band-64k";
|
||||
static const char *capabilities = "multi_ack thin-pack side-band side-band-64k ofs-delta";
|
||||
struct object *o = parse_object(sha1);
|
||||
|
||||
if (!o)
|
||||
|
Loading…
Reference in New Issue
Block a user