Merge branch 'np/fast-import-idx-v2' into maint
* np/fast-import-idx-v2: fast-import: use the diff_delta() max_delta_size argument fast-import: honor pack.indexversion and pack.packsizelimit config vars fast-import: make default pack size unlimited fast-import: use write_idx_file() instead of custom code fast-import: use sha1write() for pack data fast-import: start using struct pack_idx_entry
This commit is contained in:
commit
f54555ca29
@ -45,10 +45,7 @@ OPTIONS
|
||||
|
||||
--max-pack-size=<n>::
|
||||
Maximum size of each output packfile.
|
||||
The default is 4 GiB as that is the maximum allowed
|
||||
packfile size (due to file format limitations). Some
|
||||
importers may wish to lower this, such as to ensure the
|
||||
resulting packfiles fit on CDs.
|
||||
The default is unlimited.
|
||||
|
||||
--big-file-threshold=<n>::
|
||||
Maximum size of a blob that fast-import will attempt to
|
||||
|
174
fast-import.c
174
fast-import.c
@ -164,12 +164,11 @@ Format of STDIN stream:
|
||||
|
||||
struct object_entry
|
||||
{
|
||||
struct pack_idx_entry idx;
|
||||
struct object_entry *next;
|
||||
uint32_t offset;
|
||||
uint32_t type : TYPE_BITS,
|
||||
pack_id : PACK_ID_BITS,
|
||||
depth : DEPTH_BITS;
|
||||
unsigned char sha1[20];
|
||||
};
|
||||
|
||||
struct object_entry_pool
|
||||
@ -192,7 +191,7 @@ struct mark_set
|
||||
struct last_object
|
||||
{
|
||||
struct strbuf data;
|
||||
uint32_t offset;
|
||||
off_t offset;
|
||||
unsigned int depth;
|
||||
unsigned no_swap : 1;
|
||||
};
|
||||
@ -280,7 +279,7 @@ struct recent_command
|
||||
|
||||
/* Configured limits on output */
|
||||
static unsigned long max_depth = 10;
|
||||
static off_t max_packsize = (1LL << 32) - 1;
|
||||
static off_t max_packsize;
|
||||
static uintmax_t big_file_threshold = 512 * 1024 * 1024;
|
||||
static int force_update;
|
||||
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
||||
@ -313,9 +312,10 @@ static struct atom_str **atom_table;
|
||||
|
||||
/* The .pack file being generated */
|
||||
static unsigned int pack_id;
|
||||
static struct sha1file *pack_file;
|
||||
static struct packed_git *pack_data;
|
||||
static struct packed_git **all_packs;
|
||||
static unsigned long pack_size;
|
||||
static off_t pack_size;
|
||||
|
||||
/* Table of objects we've written. */
|
||||
static unsigned int object_entry_alloc = 5000;
|
||||
@ -521,7 +521,7 @@ static struct object_entry *new_object(unsigned char *sha1)
|
||||
alloc_objects(object_entry_alloc);
|
||||
|
||||
e = blocks->next_free++;
|
||||
hashcpy(e->sha1, sha1);
|
||||
hashcpy(e->idx.sha1, sha1);
|
||||
return e;
|
||||
}
|
||||
|
||||
@ -530,7 +530,7 @@ static struct object_entry *find_object(unsigned char *sha1)
|
||||
unsigned int h = sha1[0] << 8 | sha1[1];
|
||||
struct object_entry *e;
|
||||
for (e = object_table[h]; e; e = e->next)
|
||||
if (!hashcmp(sha1, e->sha1))
|
||||
if (!hashcmp(sha1, e->idx.sha1))
|
||||
return e;
|
||||
return NULL;
|
||||
}
|
||||
@ -542,7 +542,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
|
||||
struct object_entry *p = NULL;
|
||||
|
||||
while (e) {
|
||||
if (!hashcmp(sha1, e->sha1))
|
||||
if (!hashcmp(sha1, e->idx.sha1))
|
||||
return e;
|
||||
p = e;
|
||||
e = e->next;
|
||||
@ -550,7 +550,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
|
||||
|
||||
e = new_object(sha1);
|
||||
e->next = NULL;
|
||||
e->offset = 0;
|
||||
e->idx.offset = 0;
|
||||
if (p)
|
||||
p->next = e;
|
||||
else
|
||||
@ -839,11 +839,12 @@ static void start_packfile(void)
|
||||
p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
|
||||
strcpy(p->pack_name, tmpfile);
|
||||
p->pack_fd = pack_fd;
|
||||
pack_file = sha1fd(pack_fd, p->pack_name);
|
||||
|
||||
hdr.hdr_signature = htonl(PACK_SIGNATURE);
|
||||
hdr.hdr_version = htonl(2);
|
||||
hdr.hdr_entries = 0;
|
||||
write_or_die(p->pack_fd, &hdr, sizeof(hdr));
|
||||
sha1write(pack_file, &hdr, sizeof(hdr));
|
||||
|
||||
pack_data = p;
|
||||
pack_size = sizeof(hdr);
|
||||
@ -853,67 +854,30 @@ static void start_packfile(void)
|
||||
all_packs[pack_id] = p;
|
||||
}
|
||||
|
||||
static int oecmp (const void *a_, const void *b_)
|
||||
static const char *create_index(void)
|
||||
{
|
||||
struct object_entry *a = *((struct object_entry**)a_);
|
||||
struct object_entry *b = *((struct object_entry**)b_);
|
||||
return hashcmp(a->sha1, b->sha1);
|
||||
}
|
||||
|
||||
static char *create_index(void)
|
||||
{
|
||||
static char tmpfile[PATH_MAX];
|
||||
git_SHA_CTX ctx;
|
||||
struct sha1file *f;
|
||||
struct object_entry **idx, **c, **last, *e;
|
||||
const char *tmpfile;
|
||||
struct pack_idx_entry **idx, **c, **last;
|
||||
struct object_entry *e;
|
||||
struct object_entry_pool *o;
|
||||
uint32_t array[256];
|
||||
int i, idx_fd;
|
||||
|
||||
/* Build the sorted table of object IDs. */
|
||||
idx = xmalloc(object_count * sizeof(struct object_entry*));
|
||||
/* Build the table of object IDs. */
|
||||
idx = xmalloc(object_count * sizeof(*idx));
|
||||
c = idx;
|
||||
for (o = blocks; o; o = o->next_pool)
|
||||
for (e = o->next_free; e-- != o->entries;)
|
||||
if (pack_id == e->pack_id)
|
||||
*c++ = e;
|
||||
*c++ = &e->idx;
|
||||
last = idx + object_count;
|
||||
if (c != last)
|
||||
die("internal consistency error creating the index");
|
||||
qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
|
||||
|
||||
/* Generate the fan-out array. */
|
||||
c = idx;
|
||||
for (i = 0; i < 256; i++) {
|
||||
struct object_entry **next = c;
|
||||
while (next < last) {
|
||||
if ((*next)->sha1[0] != i)
|
||||
break;
|
||||
next++;
|
||||
}
|
||||
array[i] = htonl(next - idx);
|
||||
c = next;
|
||||
}
|
||||
|
||||
idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
|
||||
"pack/tmp_idx_XXXXXX");
|
||||
f = sha1fd(idx_fd, tmpfile);
|
||||
sha1write(f, array, 256 * sizeof(int));
|
||||
git_SHA1_Init(&ctx);
|
||||
for (c = idx; c != last; c++) {
|
||||
uint32_t offset = htonl((*c)->offset);
|
||||
sha1write(f, &offset, 4);
|
||||
sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
|
||||
git_SHA1_Update(&ctx, (*c)->sha1, 20);
|
||||
}
|
||||
sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
|
||||
sha1close(f, NULL, CSUM_FSYNC);
|
||||
tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
|
||||
free(idx);
|
||||
git_SHA1_Final(pack_data->sha1, &ctx);
|
||||
return tmpfile;
|
||||
}
|
||||
|
||||
static char *keep_pack(char *curr_index_name)
|
||||
static char *keep_pack(const char *curr_index_name)
|
||||
{
|
||||
static char name[PATH_MAX];
|
||||
static const char *keep_msg = "fast-import";
|
||||
@ -935,6 +899,7 @@ static char *keep_pack(char *curr_index_name)
|
||||
get_object_directory(), sha1_to_hex(pack_data->sha1));
|
||||
if (move_temp_to_file(curr_index_name, name))
|
||||
die("cannot store index file");
|
||||
free((void *)curr_index_name);
|
||||
return name;
|
||||
}
|
||||
|
||||
@ -957,15 +922,17 @@ static void end_packfile(void)
|
||||
|
||||
clear_delta_base_cache();
|
||||
if (object_count) {
|
||||
unsigned char cur_pack_sha1[20];
|
||||
char *idx_name;
|
||||
int i;
|
||||
struct branch *b;
|
||||
struct tag *t;
|
||||
|
||||
close_pack_windows(pack_data);
|
||||
sha1close(pack_file, cur_pack_sha1, 0);
|
||||
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
|
||||
pack_data->pack_name, object_count,
|
||||
NULL, 0);
|
||||
cur_pack_sha1, pack_size);
|
||||
close(pack_data->pack_fd);
|
||||
idx_name = keep_pack(create_index());
|
||||
|
||||
@ -1063,25 +1030,21 @@ static int store_object(
|
||||
e = insert_object(sha1);
|
||||
if (mark)
|
||||
insert_mark(mark, e);
|
||||
if (e->offset) {
|
||||
if (e->idx.offset) {
|
||||
duplicate_count_by_type[type]++;
|
||||
return 1;
|
||||
} else if (find_sha1_pack(sha1, packed_git)) {
|
||||
e->type = type;
|
||||
e->pack_id = MAX_PACK_ID;
|
||||
e->offset = 1; /* just not zero! */
|
||||
e->idx.offset = 1; /* just not zero! */
|
||||
duplicate_count_by_type[type]++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (last && last->data.buf && last->depth < max_depth) {
|
||||
if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
|
||||
delta = diff_delta(last->data.buf, last->data.len,
|
||||
dat->buf, dat->len,
|
||||
&deltalen, 0);
|
||||
if (delta && deltalen >= dat->len) {
|
||||
free(delta);
|
||||
delta = NULL;
|
||||
}
|
||||
&deltalen, dat->len - 20);
|
||||
} else
|
||||
delta = NULL;
|
||||
|
||||
@ -1101,7 +1064,7 @@ static int store_object(
|
||||
deflateEnd(&s);
|
||||
|
||||
/* Determine if we should auto-checkpoint. */
|
||||
if ((pack_size + 60 + s.total_out) > max_packsize
|
||||
if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
|
||||
|| (pack_size + 60 + s.total_out) < pack_size) {
|
||||
|
||||
/* This new object needs to *not* have the current pack_id. */
|
||||
@ -1127,36 +1090,40 @@ static int store_object(
|
||||
|
||||
e->type = type;
|
||||
e->pack_id = pack_id;
|
||||
e->offset = pack_size;
|
||||
e->idx.offset = pack_size;
|
||||
object_count++;
|
||||
object_count_by_type[type]++;
|
||||
|
||||
crc32_begin(pack_file);
|
||||
|
||||
if (delta) {
|
||||
unsigned long ofs = e->offset - last->offset;
|
||||
off_t ofs = e->idx.offset - last->offset;
|
||||
unsigned pos = sizeof(hdr) - 1;
|
||||
|
||||
delta_count_by_type[type]++;
|
||||
e->depth = last->depth + 1;
|
||||
|
||||
hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
|
||||
write_or_die(pack_data->pack_fd, hdr, hdrlen);
|
||||
sha1write(pack_file, hdr, hdrlen);
|
||||
pack_size += hdrlen;
|
||||
|
||||
hdr[pos] = ofs & 127;
|
||||
while (ofs >>= 7)
|
||||
hdr[--pos] = 128 | (--ofs & 127);
|
||||
write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
|
||||
sha1write(pack_file, hdr + pos, sizeof(hdr) - pos);
|
||||
pack_size += sizeof(hdr) - pos;
|
||||
} else {
|
||||
e->depth = 0;
|
||||
hdrlen = encode_header(type, dat->len, hdr);
|
||||
write_or_die(pack_data->pack_fd, hdr, hdrlen);
|
||||
sha1write(pack_file, hdr, hdrlen);
|
||||
pack_size += hdrlen;
|
||||
}
|
||||
|
||||
write_or_die(pack_data->pack_fd, out, s.total_out);
|
||||
sha1write(pack_file, out, s.total_out);
|
||||
pack_size += s.total_out;
|
||||
|
||||
e->idx.crc32 = crc32_end(pack_file);
|
||||
|
||||
free(out);
|
||||
free(delta);
|
||||
if (last) {
|
||||
@ -1165,18 +1132,23 @@ static int store_object(
|
||||
} else {
|
||||
strbuf_swap(&last->data, dat);
|
||||
}
|
||||
last->offset = e->offset;
|
||||
last->offset = e->idx.offset;
|
||||
last->depth = e->depth;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void truncate_pack(off_t to)
|
||||
static void truncate_pack(off_t to, git_SHA_CTX *ctx)
|
||||
{
|
||||
if (ftruncate(pack_data->pack_fd, to)
|
||||
|| lseek(pack_data->pack_fd, to, SEEK_SET) != to)
|
||||
die_errno("cannot truncate pack to skip duplicate");
|
||||
pack_size = to;
|
||||
|
||||
/* yes this is a layering violation */
|
||||
pack_file->total = to;
|
||||
pack_file->offset = 0;
|
||||
pack_file->ctx = *ctx;
|
||||
}
|
||||
|
||||
static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
@ -1189,16 +1161,21 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
unsigned long hdrlen;
|
||||
off_t offset;
|
||||
git_SHA_CTX c;
|
||||
git_SHA_CTX pack_file_ctx;
|
||||
z_stream s;
|
||||
int status = Z_OK;
|
||||
|
||||
/* Determine if we should auto-checkpoint. */
|
||||
if ((pack_size + 60 + len) > max_packsize
|
||||
if ((max_packsize && (pack_size + 60 + len) > max_packsize)
|
||||
|| (pack_size + 60 + len) < pack_size)
|
||||
cycle_packfile();
|
||||
|
||||
offset = pack_size;
|
||||
|
||||
/* preserve the pack_file SHA1 ctx in case we have to truncate later */
|
||||
sha1flush(pack_file);
|
||||
pack_file_ctx = pack_file->ctx;
|
||||
|
||||
hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
|
||||
if (out_sz <= hdrlen)
|
||||
die("impossibly large object header");
|
||||
@ -1206,6 +1183,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
git_SHA1_Init(&c);
|
||||
git_SHA1_Update(&c, out_buf, hdrlen);
|
||||
|
||||
crc32_begin(pack_file);
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
deflateInit(&s, pack_compression_level);
|
||||
|
||||
@ -1233,7 +1212,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
|
||||
if (!s.avail_out || status == Z_STREAM_END) {
|
||||
size_t n = s.next_out - out_buf;
|
||||
write_or_die(pack_data->pack_fd, out_buf, n);
|
||||
sha1write(pack_file, out_buf, n);
|
||||
pack_size += n;
|
||||
s.next_out = out_buf;
|
||||
s.avail_out = out_sz;
|
||||
@ -1259,22 +1238,23 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
if (mark)
|
||||
insert_mark(mark, e);
|
||||
|
||||
if (e->offset) {
|
||||
if (e->idx.offset) {
|
||||
duplicate_count_by_type[OBJ_BLOB]++;
|
||||
truncate_pack(offset);
|
||||
truncate_pack(offset, &pack_file_ctx);
|
||||
|
||||
} else if (find_sha1_pack(sha1, packed_git)) {
|
||||
e->type = OBJ_BLOB;
|
||||
e->pack_id = MAX_PACK_ID;
|
||||
e->offset = 1; /* just not zero! */
|
||||
e->idx.offset = 1; /* just not zero! */
|
||||
duplicate_count_by_type[OBJ_BLOB]++;
|
||||
truncate_pack(offset);
|
||||
truncate_pack(offset, &pack_file_ctx);
|
||||
|
||||
} else {
|
||||
e->depth = 0;
|
||||
e->type = OBJ_BLOB;
|
||||
e->pack_id = pack_id;
|
||||
e->offset = offset;
|
||||
e->idx.offset = offset;
|
||||
e->idx.crc32 = crc32_end(pack_file);
|
||||
object_count++;
|
||||
object_count_by_type[OBJ_BLOB]++;
|
||||
}
|
||||
@ -1317,6 +1297,7 @@ static void *gfi_unpack_entry(
|
||||
* the newly written data.
|
||||
*/
|
||||
close_pack_windows(p);
|
||||
sha1flush(pack_file);
|
||||
|
||||
/* We have to offer 20 bytes additional on the end of
|
||||
* the packfile as the core unpacker code assumes the
|
||||
@ -1326,7 +1307,7 @@ static void *gfi_unpack_entry(
|
||||
*/
|
||||
p->pack_size = pack_size + 20;
|
||||
}
|
||||
return unpack_entry(p, oe->offset, &type, sizep);
|
||||
return unpack_entry(p, oe->idx.offset, &type, sizep);
|
||||
}
|
||||
|
||||
static const char *get_mode(const char *str, uint16_t *modep)
|
||||
@ -1457,7 +1438,7 @@ static void store_tree(struct tree_entry *root)
|
||||
if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
|
||||
mktree(t, 0, &old_tree);
|
||||
lo.data = old_tree;
|
||||
lo.offset = le->offset;
|
||||
lo.offset = le->idx.offset;
|
||||
lo.depth = t->delta_depth;
|
||||
}
|
||||
|
||||
@ -1715,7 +1696,7 @@ static void dump_marks_helper(FILE *f,
|
||||
for (k = 0; k < 1024; k++) {
|
||||
if (m->data.marked[k])
|
||||
fprintf(f, ":%" PRIuMAX " %s\n", base + k,
|
||||
sha1_to_hex(m->data.marked[k]->sha1));
|
||||
sha1_to_hex(m->data.marked[k]->idx.sha1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1798,7 +1779,7 @@ static void read_marks(void)
|
||||
e = insert_object(sha1);
|
||||
e->type = type;
|
||||
e->pack_id = MAX_PACK_ID;
|
||||
e->offset = 1; /* just not zero! */
|
||||
e->idx.offset = 1; /* just not zero! */
|
||||
}
|
||||
insert_mark(mark, e);
|
||||
}
|
||||
@ -2183,7 +2164,7 @@ static void file_change_m(struct branch *b)
|
||||
if (*p == ':') {
|
||||
char *x;
|
||||
oe = find_mark(strtoumax(p + 1, &x, 10));
|
||||
hashcpy(sha1, oe->sha1);
|
||||
hashcpy(sha1, oe->idx.sha1);
|
||||
p = x;
|
||||
} else if (!prefixcmp(p, "inline")) {
|
||||
inline_data = 1;
|
||||
@ -2316,7 +2297,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
|
||||
if (*p == ':') {
|
||||
char *x;
|
||||
oe = find_mark(strtoumax(p + 1, &x, 10));
|
||||
hashcpy(sha1, oe->sha1);
|
||||
hashcpy(sha1, oe->idx.sha1);
|
||||
p = x;
|
||||
} else if (!prefixcmp(p, "inline")) {
|
||||
inline_data = 1;
|
||||
@ -2339,7 +2320,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
|
||||
struct object_entry *commit_oe = find_mark(commit_mark);
|
||||
if (commit_oe->type != OBJ_COMMIT)
|
||||
die("Mark :%" PRIuMAX " not a commit", commit_mark);
|
||||
hashcpy(commit_sha1, commit_oe->sha1);
|
||||
hashcpy(commit_sha1, commit_oe->idx.sha1);
|
||||
} else if (!get_sha1(p, commit_sha1)) {
|
||||
unsigned long size;
|
||||
char *buf = read_object_with_reference(commit_sha1,
|
||||
@ -2446,7 +2427,7 @@ static int parse_from(struct branch *b)
|
||||
struct object_entry *oe = find_mark(idnum);
|
||||
if (oe->type != OBJ_COMMIT)
|
||||
die("Mark :%" PRIuMAX " not a commit", idnum);
|
||||
hashcpy(b->sha1, oe->sha1);
|
||||
hashcpy(b->sha1, oe->idx.sha1);
|
||||
if (oe->pack_id != MAX_PACK_ID) {
|
||||
unsigned long size;
|
||||
char *buf = gfi_unpack_entry(oe, &size);
|
||||
@ -2481,7 +2462,7 @@ static struct hash_list *parse_merge(unsigned int *count)
|
||||
struct object_entry *oe = find_mark(idnum);
|
||||
if (oe->type != OBJ_COMMIT)
|
||||
die("Mark :%" PRIuMAX " not a commit", idnum);
|
||||
hashcpy(n->sha1, oe->sha1);
|
||||
hashcpy(n->sha1, oe->idx.sha1);
|
||||
} else if (!get_sha1(from, n->sha1)) {
|
||||
unsigned long size;
|
||||
char *buf = read_object_with_reference(n->sha1,
|
||||
@ -2639,7 +2620,7 @@ static void parse_new_tag(void)
|
||||
from_mark = strtoumax(from + 1, NULL, 10);
|
||||
oe = find_mark(from_mark);
|
||||
type = oe->type;
|
||||
hashcpy(sha1, oe->sha1);
|
||||
hashcpy(sha1, oe->idx.sha1);
|
||||
} else if (!get_sha1(from, sha1)) {
|
||||
unsigned long size;
|
||||
char *buf;
|
||||
@ -2891,6 +2872,17 @@ static int git_pack_config(const char *k, const char *v, void *cb)
|
||||
pack_compression_seen = 1;
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(k, "pack.indexversion")) {
|
||||
pack_idx_default_version = git_config_int(k, v);
|
||||
if (pack_idx_default_version > 2)
|
||||
die("bad pack.indexversion=%"PRIu32,
|
||||
pack_idx_default_version);
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(k, "pack.packsizelimit")) {
|
||||
max_packsize = git_config_ulong(k, v);
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(k, "core.bigfilethreshold")) {
|
||||
long n = git_config_int(k, v);
|
||||
big_file_threshold = 0 < n ? n : 0;
|
||||
|
Loading…
Reference in New Issue
Block a user