Merge branch 'dh/repack' (early part)

* 'dh/repack' (early part):
  Ensure git-repack -a -d --max-pack-size=N deletes correct packs
  pack-objects: clarification & option checks for --max-pack-size
  git-repack --max-pack-size: add option parsing to enable feature
  git-repack --max-pack-size: split packs as asked by write_{object,one}()
  git-repack --max-pack-size: write_{object,one}() respect pack limit
  git-repack --max-pack-size: new file statics and code restructuring
  Alter sha1close() 3rd argument to request flush only
This commit is contained in:
Junio C Hamano 2007-05-29 01:16:28 -07:00
commit a77a33a51d
5 changed files with 252 additions and 118 deletions

View File

@ -85,6 +85,11 @@ base-name::
times to get to the necessary object.
The default value for --window is 10 and --depth is 50.
--max-pack-size=<n>::
Maximum size of each output packfile, expressed in MiB.
If specified, multiple packfiles may be created.
The default is unlimited.
--incremental::
This flag causes an object already in a pack ignored
even if it appears in the standard input.

View File

@ -65,6 +65,11 @@ OPTIONS
to be applied that many times to get to the necessary object.
The default value for --window is 10 and --depth is 50.
--max-pack-size=<n>::
Maximum size of each output packfile, expressed in MiB.
If specified, multiple packfiles may be created.
The default is unlimited.
Configuration
-------------

View File

@ -16,7 +16,7 @@
#include "progress.h"
static const char pack_usage[] = "\
git-pack-objects [{ -q | --progress | --all-progress }] \n\
git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\
[--local] [--incremental] [--window=N] [--depth=N] \n\
[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
[--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
@ -54,7 +54,8 @@ struct object_entry {
* nice "minimum seek" order.
*/
static struct object_entry *objects;
static uint32_t nr_objects, nr_alloc, nr_result;
static struct object_entry **written_list;
static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
static int non_empty;
static int no_reuse_delta, no_reuse_object;
@ -63,9 +64,11 @@ static int incremental;
static int allow_ofs_delta;
static const char *pack_tmp_name, *idx_tmp_name;
static char tmpname[PATH_MAX];
static const char *base_name;
static unsigned char pack_file_sha1[20];
static int progress = 1;
static int window = 10;
static uint32_t pack_size_limit;
static int depth = 50;
static int pack_to_stdout;
static int num_preferred_base;
@ -351,16 +354,31 @@ static void copy_pack_data(struct sha1file *f,
}
static unsigned long write_object(struct sha1file *f,
struct object_entry *entry)
struct object_entry *entry,
off_t write_offset)
{
unsigned long size;
enum object_type type;
void *buf;
unsigned char header[10];
unsigned char dheader[10];
unsigned hdrlen;
off_t datalen;
enum object_type obj_type;
int to_reuse = 0;
/* write limit if limited packsize and not first object */
unsigned long limit = pack_size_limit && nr_written ?
pack_size_limit - write_offset : 0;
/* no if no delta */
int usable_delta = !entry->delta ? 0 :
/* yes if unlimited packfile */
!pack_size_limit ? 1 :
/* no if base written to previous pack */
entry->delta->offset == (off_t)-1 ? 0 :
/* otherwise double-check written to this
* pack, like we do below
*/
entry->delta->offset ? 1 : 0;
if (!pack_to_stdout)
crc32_begin(f);
@ -371,7 +389,9 @@ static unsigned long write_object(struct sha1file *f,
else if (!entry->in_pack)
to_reuse = 0; /* can't reuse what we don't have */
else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
to_reuse = 1; /* check_object() decided it for us */
/* check_object() decided it for us ... */
to_reuse = usable_delta;
/* ... but pack split may override that */
else if (obj_type != entry->in_pack_type)
to_reuse = 0; /* pack has delta which is unusable */
else if (entry->delta)
@ -382,24 +402,48 @@ static unsigned long write_object(struct sha1file *f,
*/
if (!to_reuse) {
z_stream stream;
unsigned long maxsize;
void *out;
buf = read_sha1_file(entry->sha1, &type, &size);
if (!buf)
die("unable to read %s", sha1_to_hex(entry->sha1));
if (size != entry->size)
die("object %s size inconsistency (%lu vs %lu)",
sha1_to_hex(entry->sha1), size, entry->size);
if (entry->delta) {
if (usable_delta) {
buf = delta_against(buf, size, entry);
size = entry->delta_size;
obj_type = (allow_ofs_delta && entry->delta->offset) ?
OBJ_OFS_DELTA : OBJ_REF_DELTA;
} else {
/*
* recover real object type in case
* check_object() wanted to re-use a delta,
* but we couldn't since base was in previous split pack
*/
obj_type = type;
}
/* compress the data to store and put compressed length in datalen */
memset(&stream, 0, sizeof(stream));
deflateInit(&stream, pack_compression_level);
maxsize = deflateBound(&stream, size);
out = xmalloc(maxsize);
/* Compress it */
stream.next_in = buf;
stream.avail_in = size;
stream.next_out = out;
stream.avail_out = maxsize;
while (deflate(&stream, Z_FINISH) == Z_OK)
/* nothing */;
deflateEnd(&stream);
datalen = stream.total_out;
deflateEnd(&stream);
/*
* The object header is a byte of 'type' followed by zero or
* more bytes of length.
*/
hdrlen = encode_header(obj_type, size, header);
sha1write(f, header, hdrlen);
if (obj_type == OBJ_OFS_DELTA) {
/*
@ -408,21 +452,41 @@ static unsigned long write_object(struct sha1file *f,
* base from this object's position in the pack.
*/
off_t ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(header) - 1;
header[pos] = ofs & 127;
unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
dheader[--pos] = 128 | (--ofs & 127);
if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
free(out);
free(buf);
return 0;
}
sha1write(f, header, hdrlen);
sha1write(f, dheader + pos, sizeof(dheader) - pos);
hdrlen += sizeof(dheader) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
/*
* Deltas with a base reference contain
* an additional 20 bytes for the base sha1.
*/
if (limit && hdrlen + 20 + datalen + 20 >= limit) {
free(out);
free(buf);
return 0;
}
sha1write(f, header, hdrlen);
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
} else {
if (limit && hdrlen + datalen + 20 >= limit) {
free(out);
free(buf);
return 0;
}
sha1write(f, header, hdrlen);
}
datalen = sha1write_compressed(f, buf, size, pack_compression_level);
sha1write(f, out, datalen);
free(out);
free(buf);
}
else {
@ -437,20 +501,6 @@ static unsigned long write_object(struct sha1file *f,
reused_delta++;
}
hdrlen = encode_header(obj_type, entry->size, header);
sha1write(f, header, hdrlen);
if (obj_type == OBJ_OFS_DELTA) {
off_t ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(header) - 1;
header[pos] = ofs & 127;
while (ofs >>= 7)
header[--pos] = 128 | (--ofs & 127);
sha1write(f, header + pos, sizeof(header) - pos);
hdrlen += sizeof(header) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
}
offset = entry->in_pack_offset;
revidx = find_packed_object(p, offset);
datalen = revidx[1].offset - offset;
@ -459,6 +509,29 @@ static unsigned long write_object(struct sha1file *f,
die("bad packed object CRC for %s", sha1_to_hex(entry->sha1));
offset += entry->in_pack_header_size;
datalen -= entry->in_pack_header_size;
if (obj_type == OBJ_OFS_DELTA) {
off_t ofs = entry->offset - entry->delta->offset;
unsigned pos = sizeof(dheader) - 1;
dheader[pos] = ofs & 127;
while (ofs >>= 7)
dheader[--pos] = 128 | (--ofs & 127);
if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit)
return 0;
sha1write(f, header, hdrlen);
sha1write(f, dheader + pos, sizeof(dheader) - pos);
hdrlen += sizeof(dheader) - pos;
} else if (obj_type == OBJ_REF_DELTA) {
if (limit && hdrlen + 20 + datalen + 20 >= limit)
return 0;
sha1write(f, header, hdrlen);
sha1write(f, entry->delta->sha1, 20);
hdrlen += 20;
} else {
if (limit && hdrlen + datalen + 20 >= limit)
return 0;
sha1write(f, header, hdrlen);
}
if (!pack_to_stdout && p->index_version == 1 &&
check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
die("corrupt packed object for %s", sha1_to_hex(entry->sha1));
@ -466,7 +539,7 @@ static unsigned long write_object(struct sha1file *f,
unuse_pack(&w_curs);
reused++;
}
if (entry->delta)
if (usable_delta)
written_delta++;
written++;
if (!pack_to_stdout)
@ -485,11 +558,19 @@ static off_t write_one(struct sha1file *f,
return offset;
/* if we are deltified, write out base object first. */
if (e->delta)
if (e->delta) {
offset = write_one(f, e->delta, offset);
if (!offset)
return 0;
}
e->offset = offset;
size = write_object(f, e);
size = write_object(f, e, offset);
if (!size) {
e->offset = 0;
return 0;
}
written_list[nr_written++] = e;
/* make sure off_t is sufficiently large not to wrap */
if (offset > offset + size)
@ -503,49 +584,113 @@ static int open_object_dir_tmp(const char *path)
return mkstemp(tmpname);
}
static off_t write_pack_file(void)
{
uint32_t i;
struct sha1file *f;
off_t offset, last_obj_offset = 0;
struct pack_header hdr;
int do_progress = progress;
/* forward declarations for write_pack_file */
static void write_index_file(off_t last_obj_offset, unsigned char *sha1);
static int adjust_perm(const char *path, mode_t mode);
if (pack_to_stdout) {
f = sha1fd(1, "<stdout>");
do_progress >>= 1;
} else {
int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
if (fd < 0)
die("unable to create %s: %s\n", tmpname, strerror(errno));
pack_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, pack_tmp_name);
}
static void write_pack_file(void)
{
uint32_t i = 0, j;
struct sha1file *f;
off_t offset, offset_one, last_obj_offset = 0;
struct pack_header hdr;
int do_progress = progress >> pack_to_stdout;
uint32_t nr_remaining = nr_result;
if (do_progress)
start_progress(&progress_state, "Writing %u objects...", "", nr_result);
written_list = xmalloc(nr_objects * sizeof(struct object_entry *));
hdr.hdr_signature = htonl(PACK_SIGNATURE);
hdr.hdr_version = htonl(PACK_VERSION);
hdr.hdr_entries = htonl(nr_result);
sha1write(f, &hdr, sizeof(hdr));
offset = sizeof(hdr);
if (!nr_result)
goto done;
for (i = 0; i < nr_objects; i++) {
last_obj_offset = offset;
offset = write_one(f, objects + i, offset);
if (do_progress)
display_progress(&progress_state, written);
}
do {
if (pack_to_stdout) {
f = sha1fd(1, "<stdout>");
} else {
int fd = open_object_dir_tmp("tmp_pack_XXXXXX");
if (fd < 0)
die("unable to create %s: %s\n", tmpname, strerror(errno));
pack_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, pack_tmp_name);
}
hdr.hdr_signature = htonl(PACK_SIGNATURE);
hdr.hdr_version = htonl(PACK_VERSION);
hdr.hdr_entries = htonl(nr_remaining);
sha1write(f, &hdr, sizeof(hdr));
offset = sizeof(hdr);
nr_written = 0;
for (; i < nr_objects; i++) {
last_obj_offset = offset;
offset_one = write_one(f, objects + i, offset);
if (!offset_one)
break;
offset = offset_one;
if (do_progress)
display_progress(&progress_state, written);
}
/*
* Did we write the wrong # entries in the header?
* If so, rewrite it like in fast-import
*/
if (pack_to_stdout || nr_written == nr_remaining) {
sha1close(f, pack_file_sha1, 1);
} else {
sha1close(f, pack_file_sha1, 0);
fixup_pack_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written);
close(f->fd);
}
if (!pack_to_stdout) {
unsigned char object_list_sha1[20];
mode_t mode = umask(0);
umask(mode);
mode = 0444 & ~mode;
write_index_file(last_obj_offset, object_list_sha1);
snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
base_name, sha1_to_hex(object_list_sha1));
if (adjust_perm(pack_tmp_name, mode))
die("unable to make temporary pack file readable: %s",
strerror(errno));
if (rename(pack_tmp_name, tmpname))
die("unable to rename temporary pack file: %s",
strerror(errno));
snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
base_name, sha1_to_hex(object_list_sha1));
if (adjust_perm(idx_tmp_name, mode))
die("unable to make temporary index file readable: %s",
strerror(errno));
if (rename(idx_tmp_name, tmpname))
die("unable to rename temporary index file: %s",
strerror(errno));
puts(sha1_to_hex(object_list_sha1));
}
/* mark written objects as written to previous pack */
for (j = 0; j < nr_written; j++) {
written_list[j]->offset = (off_t)-1;
}
nr_remaining -= nr_written;
} while (nr_remaining && i < nr_objects);
free(written_list);
if (do_progress)
stop_progress(&progress_state);
done:
if (written != nr_result)
die("wrote %u objects while expecting %u", written, nr_result);
sha1close(f, pack_file_sha1, 1);
return last_obj_offset;
/*
* We have scanned through [0 ... i). Since we have written
* the correct number of objects, the remaining [i ... nr_objects)
* items must be either already written (due to out-of-order delta base)
* or a preferred base. Count those which are neither and complain if any.
*/
for (j = 0; i < nr_objects; i++) {
struct object_entry *e = objects + i;
j += !e->offset && !e->preferred_base;
}
if (j)
die("wrote %u objects as expected but %u unwritten", written, j);
}
static int sha1_sort(const void *_a, const void *_b)
@ -572,18 +717,11 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
idx_tmp_name = xstrdup(tmpname);
f = sha1fd(fd, idx_tmp_name);
if (nr_result) {
uint32_t j = 0;
sorted_by_sha =
xcalloc(nr_result, sizeof(struct object_entry *));
for (i = 0; i < nr_objects; i++)
if (!objects[i].preferred_base)
sorted_by_sha[j++] = objects + i;
if (j != nr_result)
die("listed %u objects while expecting %u", j, nr_result);
qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort);
if (nr_written) {
sorted_by_sha = written_list;
qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort);
list = sorted_by_sha;
last = sorted_by_sha + nr_result;
last = sorted_by_sha + nr_written;
} else
sorted_by_sha = list = last = NULL;
@ -621,7 +759,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* Write the actual SHA1 entries. */
list = sorted_by_sha;
for (i = 0; i < nr_result; i++) {
for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
if (index_version < 2) {
uint32_t offset = htonl(entry->offset);
@ -636,7 +774,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the crc32 table */
list = sorted_by_sha;
for (i = 0; i < nr_objects; i++) {
for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t crc32_val = htonl(entry->crc32);
sha1write(f, &crc32_val, 4);
@ -644,7 +782,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
/* write the 32-bit offset table */
list = sorted_by_sha;
for (i = 0; i < nr_objects; i++) {
for (i = 0; i < nr_written; i++) {
struct object_entry *entry = *list++;
uint32_t offset = (entry->offset <= index_off32_limit) ?
entry->offset : (0x80000000 | nr_large_offset++);
@ -669,7 +807,6 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1)
sha1write(f, pack_file_sha1, 20);
sha1close(f, NULL, 1);
free(sorted_by_sha);
SHA1_Final(sha1, &ctx);
}
@ -1569,8 +1706,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
int use_internal_rev_list = 0;
int thin = 0;
uint32_t i;
off_t last_obj_offset;
const char *base_name = NULL;
const char **rp_av;
int rp_ac_alloc = 64;
int rp_ac;
@ -1616,6 +1751,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
pack_compression_level = level;
continue;
}
if (!prefixcmp(arg, "--max-pack-size=")) {
char *end;
pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
if (!arg[16] || *end)
usage(pack_usage);
continue;
}
if (!prefixcmp(arg, "--window=")) {
char *end;
window = strtoul(arg+9, &end, 0);
@ -1714,6 +1856,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (pack_to_stdout != !base_name)
usage(pack_usage);
if (pack_to_stdout && pack_size_limit)
die("--max-pack-size cannot be used to build a pack for transfer.");
if (!pack_to_stdout && thin)
die("--thin cannot be used to build an indexable pack.");
@ -1739,33 +1884,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
fprintf(stderr, "Result has %u objects.\n", nr_result);
if (nr_result)
prepare_pack(window, depth);
last_obj_offset = write_pack_file();
if (!pack_to_stdout) {
unsigned char object_list_sha1[20];
mode_t mode = umask(0);
umask(mode);
mode = 0444 & ~mode;
write_index_file(last_obj_offset, object_list_sha1);
snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
base_name, sha1_to_hex(object_list_sha1));
if (adjust_perm(pack_tmp_name, mode))
die("unable to make temporary pack file readable: %s",
strerror(errno));
if (rename(pack_tmp_name, tmpname))
die("unable to rename temporary pack file: %s",
strerror(errno));
snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
base_name, sha1_to_hex(object_list_sha1));
if (adjust_perm(idx_tmp_name, mode))
die("unable to make temporary index file readable: %s",
strerror(errno));
if (rename(idx_tmp_name, tmpname))
die("unable to rename temporary index file: %s",
strerror(errno));
puts(sha1_to_hex(object_list_sha1));
}
write_pack_file();
if (progress)
fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
written, written_delta, reused, reused_delta);

View File

@ -29,18 +29,20 @@ static void sha1flush(struct sha1file *f, unsigned int count)
}
}
int sha1close(struct sha1file *f, unsigned char *result, int update)
int sha1close(struct sha1file *f, unsigned char *result, int final)
{
unsigned offset = f->offset;
if (offset) {
SHA1_Update(&f->ctx, f->buffer, offset);
sha1flush(f, offset);
f->offset = 0;
}
if (!final)
return 0; /* only want to flush (no checksum write, no close) */
SHA1_Final(f->buffer, &f->ctx);
if (result)
hashcpy(result, f->buffer);
if (update)
sha1flush(f, 20);
sha1flush(f, 20);
if (close(f->fd))
die("%s: sha1 file error on close (%s)", f->name, strerror(errno));
free(f);

View File

@ -3,7 +3,7 @@
# Copyright (c) 2005 Linus Torvalds
#
USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]'
USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--depth=N]'
SUBDIRECTORY_OK='Yes'
. git-sh-setup
@ -18,6 +18,7 @@ do
-q) quiet=-q ;;
-f) no_reuse=--no-reuse-object ;;
-l) local=--local ;;
--max-pack-size=*) extra="$extra $1" ;;
--window=*) extra="$extra $1" ;;
--depth=*) extra="$extra $1" ;;
*) usage ;;
@ -35,7 +36,7 @@ true)
esac
PACKDIR="$GIT_OBJECT_DIRECTORY/pack"
PACKTMP="$GIT_DIR/.tmp-$$-pack"
PACKTMP="$GIT_OBJECT_DIRECTORY/.tmp-$$-pack"
rm -f "$PACKTMP"-*
trap 'rm -f "$PACKTMP"-*' 0 1 2 3 15
@ -62,11 +63,13 @@ case ",$all_into_one," in
esac
args="$args $local $quiet $no_reuse$extra"
name=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") ||
exit 1
if [ -z "$name" ]; then
if [ -z "$names" ]; then
echo Nothing new to pack.
else
fi
for name in $names ; do
fullbases="$fullbases pack-$name"
chmod a-w "$PACKTMP-$name.pack"
chmod a-w "$PACKTMP-$name.idx"
if test "$quiet" != '-q'; then
@ -92,7 +95,7 @@ else
exit 1
}
rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx"
fi
done
if test "$remove_redundant" = t
then
@ -103,8 +106,8 @@ then
( cd "$PACKDIR" &&
for e in $existing
do
case "$e" in
pack-$name) ;;
case " $fullbases " in
*" $e "*) ;;
*) rm -f "$e.pack" "$e.idx" "$e.keep" ;;
esac
done