Merge branch 'jc/stream-to-pack'
* jc/stream-to-pack: bulk-checkin: replace fast-import based implementation csum-file: introduce sha1file_checkpoint finish_tmp_packfile(): a helper function create_tmp_packfile(): a helper function write_pack_header(): a helper function Conflicts: pack.h
This commit is contained in:
commit
48b303675a
2
Makefile
2
Makefile
@ -511,6 +511,7 @@ LIB_H += argv-array.h
|
||||
LIB_H += attr.h
|
||||
LIB_H += blob.h
|
||||
LIB_H += builtin.h
|
||||
LIB_H += bulk-checkin.h
|
||||
LIB_H += cache.h
|
||||
LIB_H += cache-tree.h
|
||||
LIB_H += color.h
|
||||
@ -600,6 +601,7 @@ LIB_OBJS += base85.o
|
||||
LIB_OBJS += bisect.o
|
||||
LIB_OBJS += blob.o
|
||||
LIB_OBJS += branch.o
|
||||
LIB_OBJS += bulk-checkin.o
|
||||
LIB_OBJS += bundle.o
|
||||
LIB_OBJS += cache-tree.o
|
||||
LIB_OBJS += color.o
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "diff.h"
|
||||
#include "diffcore.h"
|
||||
#include "revision.h"
|
||||
#include "bulk-checkin.h"
|
||||
|
||||
static const char * const builtin_add_usage[] = {
|
||||
"git add [options] [--] <filepattern>...",
|
||||
@ -458,11 +459,15 @@ int cmd_add(int argc, const char **argv, const char *prefix)
|
||||
free(seen);
|
||||
}
|
||||
|
||||
plug_bulk_checkin();
|
||||
|
||||
exit_status |= add_files_to_cache(prefix, pathspec, flags);
|
||||
|
||||
if (add_new_files)
|
||||
exit_status |= add_files(&dir, flags);
|
||||
|
||||
unplug_bulk_checkin();
|
||||
|
||||
finish:
|
||||
if (active_cache_changed) {
|
||||
if (write_cache(newfd, active_cache, active_nr) ||
|
||||
|
@ -76,7 +76,7 @@ static struct pack_idx_option pack_idx_opts;
|
||||
static const char *base_name;
|
||||
static int progress = 1;
|
||||
static int window = 10;
|
||||
static unsigned long pack_size_limit, pack_size_limit_cfg;
|
||||
static unsigned long pack_size_limit;
|
||||
static int depth = 50;
|
||||
static int delta_search_threads;
|
||||
static int pack_to_stdout;
|
||||
@ -638,7 +638,6 @@ static void write_pack_file(void)
|
||||
uint32_t i = 0, j;
|
||||
struct sha1file *f;
|
||||
off_t offset;
|
||||
struct pack_header hdr;
|
||||
uint32_t nr_remaining = nr_result;
|
||||
time_t last_mtime = 0;
|
||||
struct object_entry **write_order;
|
||||
@ -652,22 +651,14 @@ static void write_pack_file(void)
|
||||
unsigned char sha1[20];
|
||||
char *pack_tmp_name = NULL;
|
||||
|
||||
if (pack_to_stdout) {
|
||||
if (pack_to_stdout)
|
||||
f = sha1fd_throughput(1, "<stdout>", progress_state);
|
||||
} else {
|
||||
char tmpname[PATH_MAX];
|
||||
int fd;
|
||||
fd = odb_mkstemp(tmpname, sizeof(tmpname),
|
||||
"pack/tmp_pack_XXXXXX");
|
||||
pack_tmp_name = xstrdup(tmpname);
|
||||
f = sha1fd(fd, pack_tmp_name);
|
||||
}
|
||||
else
|
||||
f = create_tmp_packfile(&pack_tmp_name);
|
||||
|
||||
hdr.hdr_signature = htonl(PACK_SIGNATURE);
|
||||
hdr.hdr_version = htonl(PACK_VERSION);
|
||||
hdr.hdr_entries = htonl(nr_remaining);
|
||||
sha1write(f, &hdr, sizeof(hdr));
|
||||
offset = sizeof(hdr);
|
||||
offset = write_pack_header(f, nr_remaining);
|
||||
if (!offset)
|
||||
die_errno("unable to write pack header");
|
||||
nr_written = 0;
|
||||
for (; i < nr_objects; i++) {
|
||||
struct object_entry *e = write_order[i];
|
||||
@ -693,20 +684,8 @@ static void write_pack_file(void)
|
||||
|
||||
if (!pack_to_stdout) {
|
||||
struct stat st;
|
||||
const char *idx_tmp_name;
|
||||
char tmpname[PATH_MAX];
|
||||
|
||||
idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
|
||||
&pack_idx_opts, sha1);
|
||||
|
||||
snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
|
||||
base_name, sha1_to_hex(sha1));
|
||||
free_pack_by_name(tmpname);
|
||||
if (adjust_shared_perm(pack_tmp_name))
|
||||
die_errno("unable to make temporary pack file readable");
|
||||
if (rename(pack_tmp_name, tmpname))
|
||||
die_errno("unable to rename temporary pack file");
|
||||
|
||||
/*
|
||||
* Packs are runtime accessed in their mtime
|
||||
* order since newer packs are more likely to contain
|
||||
@ -714,28 +693,27 @@ static void write_pack_file(void)
|
||||
* packs then we should modify the mtime of later ones
|
||||
* to preserve this property.
|
||||
*/
|
||||
if (stat(tmpname, &st) < 0) {
|
||||
if (stat(pack_tmp_name, &st) < 0) {
|
||||
warning("failed to stat %s: %s",
|
||||
tmpname, strerror(errno));
|
||||
pack_tmp_name, strerror(errno));
|
||||
} else if (!last_mtime) {
|
||||
last_mtime = st.st_mtime;
|
||||
} else {
|
||||
struct utimbuf utb;
|
||||
utb.actime = st.st_atime;
|
||||
utb.modtime = --last_mtime;
|
||||
if (utime(tmpname, &utb) < 0)
|
||||
if (utime(pack_tmp_name, &utb) < 0)
|
||||
warning("failed utime() on %s: %s",
|
||||
tmpname, strerror(errno));
|
||||
}
|
||||
|
||||
snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
|
||||
base_name, sha1_to_hex(sha1));
|
||||
if (adjust_shared_perm(idx_tmp_name))
|
||||
die_errno("unable to make temporary index file readable");
|
||||
if (rename(idx_tmp_name, tmpname))
|
||||
die_errno("unable to rename temporary index file");
|
||||
|
||||
free((void *) idx_tmp_name);
|
||||
/* Enough space for "-<sha-1>.pack"? */
|
||||
if (sizeof(tmpname) <= strlen(base_name) + 50)
|
||||
die("pack base name '%s' too long", base_name);
|
||||
snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
|
||||
finish_tmp_packfile(tmpname, pack_tmp_name,
|
||||
written_list, nr_written,
|
||||
&pack_idx_opts, sha1);
|
||||
free(pack_tmp_name);
|
||||
puts(sha1_to_hex(sha1));
|
||||
}
|
||||
@ -2098,10 +2076,6 @@ static int git_pack_config(const char *k, const char *v, void *cb)
|
||||
pack_idx_opts.version);
|
||||
return 0;
|
||||
}
|
||||
if (!strcmp(k, "pack.packsizelimit")) {
|
||||
pack_size_limit_cfg = git_config_ulong(k, v);
|
||||
return 0;
|
||||
}
|
||||
return git_default_config(k, v, cb);
|
||||
}
|
||||
|
||||
|
275
bulk-checkin.c
Normal file
275
bulk-checkin.c
Normal file
@ -0,0 +1,275 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Google Inc.
|
||||
*/
|
||||
#include "bulk-checkin.h"
|
||||
#include "csum-file.h"
|
||||
#include "pack.h"
|
||||
|
||||
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
static struct bulk_checkin_state {
|
||||
unsigned plugged:1;
|
||||
|
||||
char *pack_tmp_name;
|
||||
struct sha1file *f;
|
||||
off_t offset;
|
||||
struct pack_idx_option pack_idx_opts;
|
||||
|
||||
struct pack_idx_entry **written;
|
||||
uint32_t alloc_written;
|
||||
uint32_t nr_written;
|
||||
} state;
|
||||
|
||||
static void finish_bulk_checkin(struct bulk_checkin_state *state)
|
||||
{
|
||||
unsigned char sha1[20];
|
||||
char packname[PATH_MAX];
|
||||
int i;
|
||||
|
||||
if (!state->f)
|
||||
return;
|
||||
|
||||
if (state->nr_written == 0) {
|
||||
close(state->f->fd);
|
||||
unlink(state->pack_tmp_name);
|
||||
goto clear_exit;
|
||||
} else if (state->nr_written == 1) {
|
||||
sha1close(state->f, sha1, CSUM_FSYNC);
|
||||
} else {
|
||||
int fd = sha1close(state->f, sha1, 0);
|
||||
fixup_pack_header_footer(fd, sha1, state->pack_tmp_name,
|
||||
state->nr_written, sha1,
|
||||
state->offset);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
sprintf(packname, "%s/pack/pack-", get_object_directory());
|
||||
finish_tmp_packfile(packname, state->pack_tmp_name,
|
||||
state->written, state->nr_written,
|
||||
&state->pack_idx_opts, sha1);
|
||||
for (i = 0; i < state->nr_written; i++)
|
||||
free(state->written[i]);
|
||||
|
||||
clear_exit:
|
||||
free(state->written);
|
||||
memset(state, 0, sizeof(*state));
|
||||
|
||||
/* Make objects we just wrote available to ourselves */
|
||||
reprepare_packed_git();
|
||||
}
|
||||
|
||||
static int already_written(struct bulk_checkin_state *state, unsigned char sha1[])
|
||||
{
|
||||
int i;
|
||||
|
||||
/* The object may already exist in the repository */
|
||||
if (has_sha1_file(sha1))
|
||||
return 1;
|
||||
|
||||
/* Might want to keep the list sorted */
|
||||
for (i = 0; i < state->nr_written; i++)
|
||||
if (!hashcmp(state->written[i]->sha1, sha1))
|
||||
return 1;
|
||||
|
||||
/* This is a new object we need to keep */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the contents from fd for size bytes, streaming it to the
|
||||
* packfile in state while updating the hash in ctx. Signal a failure
|
||||
* by returning a negative value when the resulting pack would exceed
|
||||
* the pack size limit and this is not the first object in the pack,
|
||||
* so that the caller can discard what we wrote from the current pack
|
||||
* by truncating it and opening a new one. The caller will then call
|
||||
* us again after rewinding the input fd.
|
||||
*
|
||||
* The already_hashed_to pointer is kept untouched by the caller to
|
||||
* make sure we do not hash the same byte when we are called
|
||||
* again. This way, the caller does not have to checkpoint its hash
|
||||
* status before calling us just in case we ask it to call us again
|
||||
* with a new pack.
|
||||
*/
|
||||
static int stream_to_pack(struct bulk_checkin_state *state,
|
||||
git_SHA_CTX *ctx, off_t *already_hashed_to,
|
||||
int fd, size_t size, enum object_type type,
|
||||
const char *path, unsigned flags)
|
||||
{
|
||||
git_zstream s;
|
||||
unsigned char obuf[16384];
|
||||
unsigned hdrlen;
|
||||
int status = Z_OK;
|
||||
int write_object = (flags & HASH_WRITE_OBJECT);
|
||||
off_t offset = 0;
|
||||
|
||||
memset(&s, 0, sizeof(s));
|
||||
git_deflate_init(&s, pack_compression_level);
|
||||
|
||||
hdrlen = encode_in_pack_object_header(type, size, obuf);
|
||||
s.next_out = obuf + hdrlen;
|
||||
s.avail_out = sizeof(obuf) - hdrlen;
|
||||
|
||||
while (status != Z_STREAM_END) {
|
||||
unsigned char ibuf[16384];
|
||||
|
||||
if (size && !s.avail_in) {
|
||||
ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
|
||||
if (xread(fd, ibuf, rsize) != rsize)
|
||||
die("failed to read %d bytes from '%s'",
|
||||
(int)rsize, path);
|
||||
offset += rsize;
|
||||
if (*already_hashed_to < offset) {
|
||||
size_t hsize = offset - *already_hashed_to;
|
||||
if (rsize < hsize)
|
||||
hsize = rsize;
|
||||
if (hsize)
|
||||
git_SHA1_Update(ctx, ibuf, hsize);
|
||||
*already_hashed_to = offset;
|
||||
}
|
||||
s.next_in = ibuf;
|
||||
s.avail_in = rsize;
|
||||
size -= rsize;
|
||||
}
|
||||
|
||||
status = git_deflate(&s, size ? 0 : Z_FINISH);
|
||||
|
||||
if (!s.avail_out || status == Z_STREAM_END) {
|
||||
if (write_object) {
|
||||
size_t written = s.next_out - obuf;
|
||||
|
||||
/* would we bust the size limit? */
|
||||
if (state->nr_written &&
|
||||
pack_size_limit_cfg &&
|
||||
pack_size_limit_cfg < state->offset + written) {
|
||||
git_deflate_abort(&s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sha1write(state->f, obuf, written);
|
||||
state->offset += written;
|
||||
}
|
||||
s.next_out = obuf;
|
||||
s.avail_out = sizeof(obuf);
|
||||
}
|
||||
|
||||
switch (status) {
|
||||
case Z_OK:
|
||||
case Z_BUF_ERROR:
|
||||
case Z_STREAM_END:
|
||||
continue;
|
||||
default:
|
||||
die("unexpected deflate failure: %d", status);
|
||||
}
|
||||
}
|
||||
git_deflate_end(&s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Lazily create backing packfile for the state */
|
||||
static void prepare_to_stream(struct bulk_checkin_state *state,
|
||||
unsigned flags)
|
||||
{
|
||||
if (!(flags & HASH_WRITE_OBJECT) || state->f)
|
||||
return;
|
||||
|
||||
state->f = create_tmp_packfile(&state->pack_tmp_name);
|
||||
reset_pack_idx_option(&state->pack_idx_opts);
|
||||
|
||||
/* Pretend we are going to write only one object */
|
||||
state->offset = write_pack_header(state->f, 1);
|
||||
if (!state->offset)
|
||||
die_errno("unable to write pack header");
|
||||
}
|
||||
|
||||
static int deflate_to_pack(struct bulk_checkin_state *state,
|
||||
unsigned char result_sha1[],
|
||||
int fd, size_t size,
|
||||
enum object_type type, const char *path,
|
||||
unsigned flags)
|
||||
{
|
||||
off_t seekback, already_hashed_to;
|
||||
git_SHA_CTX ctx;
|
||||
unsigned char obuf[16384];
|
||||
unsigned header_len;
|
||||
struct sha1file_checkpoint checkpoint;
|
||||
struct pack_idx_entry *idx = NULL;
|
||||
|
||||
seekback = lseek(fd, 0, SEEK_CUR);
|
||||
if (seekback == (off_t) -1)
|
||||
return error("cannot find the current offset");
|
||||
|
||||
header_len = sprintf((char *)obuf, "%s %" PRIuMAX,
|
||||
typename(type), (uintmax_t)size) + 1;
|
||||
git_SHA1_Init(&ctx);
|
||||
git_SHA1_Update(&ctx, obuf, header_len);
|
||||
|
||||
/* Note: idx is non-NULL when we are writing */
|
||||
if ((flags & HASH_WRITE_OBJECT) != 0)
|
||||
idx = xcalloc(1, sizeof(*idx));
|
||||
|
||||
already_hashed_to = 0;
|
||||
|
||||
while (1) {
|
||||
prepare_to_stream(state, flags);
|
||||
if (idx) {
|
||||
sha1file_checkpoint(state->f, &checkpoint);
|
||||
idx->offset = state->offset;
|
||||
crc32_begin(state->f);
|
||||
}
|
||||
if (!stream_to_pack(state, &ctx, &already_hashed_to,
|
||||
fd, size, type, path, flags))
|
||||
break;
|
||||
/*
|
||||
* Writing this object to the current pack will make
|
||||
* it too big; we need to truncate it, start a new
|
||||
* pack, and write into it.
|
||||
*/
|
||||
if (!idx)
|
||||
die("BUG: should not happen");
|
||||
sha1file_truncate(state->f, &checkpoint);
|
||||
state->offset = checkpoint.offset;
|
||||
finish_bulk_checkin(state);
|
||||
if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
|
||||
return error("cannot seek back");
|
||||
}
|
||||
git_SHA1_Final(result_sha1, &ctx);
|
||||
if (!idx)
|
||||
return 0;
|
||||
|
||||
idx->crc32 = crc32_end(state->f);
|
||||
if (already_written(state, result_sha1)) {
|
||||
sha1file_truncate(state->f, &checkpoint);
|
||||
state->offset = checkpoint.offset;
|
||||
free(idx);
|
||||
} else {
|
||||
hashcpy(idx->sha1, result_sha1);
|
||||
ALLOC_GROW(state->written,
|
||||
state->nr_written + 1,
|
||||
state->alloc_written);
|
||||
state->written[state->nr_written++] = idx;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int index_bulk_checkin(unsigned char *sha1,
|
||||
int fd, size_t size, enum object_type type,
|
||||
const char *path, unsigned flags)
|
||||
{
|
||||
int status = deflate_to_pack(&state, sha1, fd, size, type,
|
||||
path, flags);
|
||||
if (!state.plugged)
|
||||
finish_bulk_checkin(&state);
|
||||
return status;
|
||||
}
|
||||
|
||||
void plug_bulk_checkin(void)
|
||||
{
|
||||
state.plugged = 1;
|
||||
}
|
||||
|
||||
void unplug_bulk_checkin(void)
|
||||
{
|
||||
state.plugged = 0;
|
||||
if (state.f)
|
||||
finish_bulk_checkin(&state);
|
||||
}
|
16
bulk-checkin.h
Normal file
16
bulk-checkin.h
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2011, Google Inc.
|
||||
*/
|
||||
#ifndef BULK_CHECKIN_H
|
||||
#define BULK_CHECKIN_H
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
extern int index_bulk_checkin(unsigned char sha1[],
|
||||
int fd, size_t size, enum object_type type,
|
||||
const char *path, unsigned flags);
|
||||
|
||||
extern void plug_bulk_checkin(void);
|
||||
extern void unplug_bulk_checkin(void);
|
||||
|
||||
#endif
|
2
cache.h
2
cache.h
@ -35,6 +35,7 @@ int git_inflate(git_zstream *, int flush);
|
||||
void git_deflate_init(git_zstream *, int level);
|
||||
void git_deflate_init_gzip(git_zstream *, int level);
|
||||
void git_deflate_end(git_zstream *);
|
||||
int git_deflate_abort(git_zstream *);
|
||||
int git_deflate_end_gently(git_zstream *);
|
||||
int git_deflate(git_zstream *, int flush);
|
||||
unsigned long git_deflate_bound(git_zstream *, unsigned long);
|
||||
@ -597,6 +598,7 @@ extern size_t packed_git_window_size;
|
||||
extern size_t packed_git_limit;
|
||||
extern size_t delta_base_cache_limit;
|
||||
extern unsigned long big_file_threshold;
|
||||
extern unsigned long pack_size_limit_cfg;
|
||||
extern int read_replace_refs;
|
||||
extern int fsync_object_files;
|
||||
extern int core_preload_index;
|
||||
|
4
config.c
4
config.c
@ -818,6 +818,10 @@ int git_default_config(const char *var, const char *value, void *dummy)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(var, "pack.packsizelimit")) {
|
||||
pack_size_limit_cfg = git_config_ulong(var, value);
|
||||
return 0;
|
||||
}
|
||||
/* Add other config variables here and to Documentation/config.txt. */
|
||||
return 0;
|
||||
}
|
||||
|
20
csum-file.c
20
csum-file.c
@ -158,6 +158,26 @@ struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp
|
||||
return f;
|
||||
}
|
||||
|
||||
void sha1file_checkpoint(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
|
||||
{
|
||||
sha1flush(f);
|
||||
checkpoint->offset = f->total;
|
||||
checkpoint->ctx = f->ctx;
|
||||
}
|
||||
|
||||
int sha1file_truncate(struct sha1file *f, struct sha1file_checkpoint *checkpoint)
|
||||
{
|
||||
off_t offset = checkpoint->offset;
|
||||
|
||||
if (ftruncate(f->fd, offset) ||
|
||||
lseek(f->fd, offset, SEEK_SET) != offset)
|
||||
return -1;
|
||||
f->total = offset;
|
||||
f->ctx = checkpoint->ctx;
|
||||
f->offset = 0; /* sha1flush() was called in checkpoint */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void crc32_begin(struct sha1file *f)
|
||||
{
|
||||
f->crc32 = crc32(0, NULL, 0);
|
||||
|
@ -17,6 +17,15 @@ struct sha1file {
|
||||
unsigned char buffer[8192];
|
||||
};
|
||||
|
||||
/* Checkpoint */
|
||||
struct sha1file_checkpoint {
|
||||
off_t offset;
|
||||
git_SHA_CTX ctx;
|
||||
};
|
||||
|
||||
extern void sha1file_checkpoint(struct sha1file *, struct sha1file_checkpoint *);
|
||||
extern int sha1file_truncate(struct sha1file *, struct sha1file_checkpoint *);
|
||||
|
||||
/* sha1close flags */
|
||||
#define CSUM_CLOSE 1
|
||||
#define CSUM_FSYNC 2
|
||||
|
@ -62,6 +62,7 @@ int grafts_replace_parents = 1;
|
||||
int core_apply_sparse_checkout;
|
||||
int merge_log_config = -1;
|
||||
struct startup_info *startup_info;
|
||||
unsigned long pack_size_limit_cfg;
|
||||
|
||||
/* Parallel index stat data preload? */
|
||||
int core_preload_index = 0;
|
||||
|
@ -1143,17 +1143,11 @@ static int store_object(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void truncate_pack(off_t to, git_SHA_CTX *ctx)
|
||||
static void truncate_pack(struct sha1file_checkpoint *checkpoint)
|
||||
{
|
||||
if (ftruncate(pack_data->pack_fd, to)
|
||||
|| lseek(pack_data->pack_fd, to, SEEK_SET) != to)
|
||||
if (sha1file_truncate(pack_file, checkpoint))
|
||||
die_errno("cannot truncate pack to skip duplicate");
|
||||
pack_size = to;
|
||||
|
||||
/* yes this is a layering violation */
|
||||
pack_file->total = to;
|
||||
pack_file->offset = 0;
|
||||
pack_file->ctx = *ctx;
|
||||
pack_size = checkpoint->offset;
|
||||
}
|
||||
|
||||
static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
@ -1166,8 +1160,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
unsigned long hdrlen;
|
||||
off_t offset;
|
||||
git_SHA_CTX c;
|
||||
git_SHA_CTX pack_file_ctx;
|
||||
git_zstream s;
|
||||
struct sha1file_checkpoint checkpoint;
|
||||
int status = Z_OK;
|
||||
|
||||
/* Determine if we should auto-checkpoint. */
|
||||
@ -1175,11 +1169,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
|| (pack_size + 60 + len) < pack_size)
|
||||
cycle_packfile();
|
||||
|
||||
offset = pack_size;
|
||||
|
||||
/* preserve the pack_file SHA1 ctx in case we have to truncate later */
|
||||
sha1flush(pack_file);
|
||||
pack_file_ctx = pack_file->ctx;
|
||||
sha1file_checkpoint(pack_file, &checkpoint);
|
||||
offset = checkpoint.offset;
|
||||
|
||||
hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
|
||||
if (out_sz <= hdrlen)
|
||||
@ -1245,14 +1236,14 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
|
||||
|
||||
if (e->idx.offset) {
|
||||
duplicate_count_by_type[OBJ_BLOB]++;
|
||||
truncate_pack(offset, &pack_file_ctx);
|
||||
truncate_pack(&checkpoint);
|
||||
|
||||
} else if (find_sha1_pack(sha1, packed_git)) {
|
||||
e->type = OBJ_BLOB;
|
||||
e->pack_id = MAX_PACK_ID;
|
||||
e->idx.offset = 1; /* just not zero! */
|
||||
duplicate_count_by_type[OBJ_BLOB]++;
|
||||
truncate_pack(offset, &pack_file_ctx);
|
||||
truncate_pack(&checkpoint);
|
||||
|
||||
} else {
|
||||
e->depth = 0;
|
||||
|
53
pack-write.c
53
pack-write.c
@ -182,6 +182,18 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
|
||||
return index_name;
|
||||
}
|
||||
|
||||
off_t write_pack_header(struct sha1file *f, uint32_t nr_entries)
|
||||
{
|
||||
struct pack_header hdr;
|
||||
|
||||
hdr.hdr_signature = htonl(PACK_SIGNATURE);
|
||||
hdr.hdr_version = htonl(PACK_VERSION);
|
||||
hdr.hdr_entries = htonl(nr_entries);
|
||||
if (sha1write(f, &hdr, sizeof(hdr)))
|
||||
return 0;
|
||||
return sizeof(hdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update pack header with object_count and compute new SHA1 for pack data
|
||||
* associated to pack_fd, and write that SHA1 at the end. That new SHA1
|
||||
@ -320,3 +332,44 @@ int encode_in_pack_object_header(enum object_type type, uintmax_t size, unsigned
|
||||
*hdr = c;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct sha1file *create_tmp_packfile(char **pack_tmp_name)
|
||||
{
|
||||
char tmpname[PATH_MAX];
|
||||
int fd;
|
||||
|
||||
fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
|
||||
*pack_tmp_name = xstrdup(tmpname);
|
||||
return sha1fd(fd, *pack_tmp_name);
|
||||
}
|
||||
|
||||
void finish_tmp_packfile(char *name_buffer,
|
||||
const char *pack_tmp_name,
|
||||
struct pack_idx_entry **written_list,
|
||||
uint32_t nr_written,
|
||||
struct pack_idx_option *pack_idx_opts,
|
||||
unsigned char sha1[])
|
||||
{
|
||||
const char *idx_tmp_name;
|
||||
char *end_of_name_prefix = strrchr(name_buffer, 0);
|
||||
|
||||
if (adjust_shared_perm(pack_tmp_name))
|
||||
die_errno("unable to make temporary pack file readable");
|
||||
|
||||
idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
|
||||
pack_idx_opts, sha1);
|
||||
if (adjust_shared_perm(idx_tmp_name))
|
||||
die_errno("unable to make temporary index file readable");
|
||||
|
||||
sprintf(end_of_name_prefix, "%s.pack", sha1_to_hex(sha1));
|
||||
free_pack_by_name(name_buffer);
|
||||
|
||||
if (rename(pack_tmp_name, name_buffer))
|
||||
die_errno("unable to rename temporary pack file");
|
||||
|
||||
sprintf(end_of_name_prefix, "%s.idx", sha1_to_hex(sha1));
|
||||
if (rename(idx_tmp_name, name_buffer))
|
||||
die_errno("unable to rename temporary index file");
|
||||
|
||||
free((void *)idx_tmp_name);
|
||||
}
|
||||
|
6
pack.h
6
pack.h
@ -2,6 +2,7 @@
|
||||
#define PACK_H
|
||||
|
||||
#include "object.h"
|
||||
#include "csum-file.h"
|
||||
|
||||
/*
|
||||
* Packed object header
|
||||
@ -79,6 +80,7 @@ extern const char *write_idx_file(const char *index_name, struct pack_idx_entry
|
||||
extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr);
|
||||
extern int verify_pack_index(struct packed_git *);
|
||||
extern int verify_pack(struct packed_git *, verify_fn fn, struct progress *, uint32_t);
|
||||
extern off_t write_pack_header(struct sha1file *f, uint32_t);
|
||||
extern void fixup_pack_header_footer(int, unsigned char *, const char *, uint32_t, unsigned char *, off_t);
|
||||
extern char *index_pack_lockfile(int fd);
|
||||
extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned char *);
|
||||
@ -87,4 +89,8 @@ extern int encode_in_pack_object_header(enum object_type, uintmax_t, unsigned ch
|
||||
#define PH_ERROR_PACK_SIGNATURE (-2)
|
||||
#define PH_ERROR_PROTOCOL (-3)
|
||||
extern int read_pack_header(int fd, struct pack_header *);
|
||||
|
||||
extern struct sha1file *create_tmp_packfile(char **pack_tmp_name);
|
||||
extern void finish_tmp_packfile(char *name_buffer, const char *pack_tmp_name, struct pack_idx_entry **written_list, uint32_t nr_written, struct pack_idx_option *pack_idx_opts, unsigned char sha1[]);
|
||||
|
||||
#endif
|
||||
|
67
sha1_file.c
67
sha1_file.c
@ -18,6 +18,7 @@
|
||||
#include "refs.h"
|
||||
#include "pack-revindex.h"
|
||||
#include "sha1-lookup.h"
|
||||
#include "bulk-checkin.h"
|
||||
|
||||
#ifndef O_NOATIME
|
||||
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
|
||||
@ -2680,10 +2681,8 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates one packfile per large blob, because the caller
|
||||
* immediately wants the result sha1, and fast-import can report the
|
||||
* object name via marks mechanism only by closing the created
|
||||
* packfile.
|
||||
* This creates one packfile per large blob unless bulk-checkin
|
||||
* machinery is "plugged".
|
||||
*
|
||||
* This also bypasses the usual "convert-to-git" dance, and that is on
|
||||
* purpose. We could write a streaming version of the converting
|
||||
@ -2697,65 +2696,7 @@ static int index_stream(unsigned char *sha1, int fd, size_t size,
|
||||
enum object_type type, const char *path,
|
||||
unsigned flags)
|
||||
{
|
||||
struct child_process fast_import;
|
||||
char export_marks[512];
|
||||
const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
|
||||
char tmpfile[512];
|
||||
char fast_import_cmd[512];
|
||||
char buf[512];
|
||||
int len, tmpfd;
|
||||
|
||||
strcpy(tmpfile, git_path("hashstream_XXXXXX"));
|
||||
tmpfd = git_mkstemp_mode(tmpfile, 0600);
|
||||
if (tmpfd < 0)
|
||||
die_errno("cannot create tempfile: %s", tmpfile);
|
||||
if (close(tmpfd))
|
||||
die_errno("cannot close tempfile: %s", tmpfile);
|
||||
sprintf(export_marks, "--export-marks=%s", tmpfile);
|
||||
|
||||
memset(&fast_import, 0, sizeof(fast_import));
|
||||
fast_import.in = -1;
|
||||
fast_import.argv = argv;
|
||||
fast_import.git_cmd = 1;
|
||||
if (start_command(&fast_import))
|
||||
die_errno("index-stream: git fast-import failed");
|
||||
|
||||
len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
|
||||
(unsigned long) size);
|
||||
write_or_whine(fast_import.in, fast_import_cmd, len,
|
||||
"index-stream: feeding fast-import");
|
||||
while (size) {
|
||||
char buf[10240];
|
||||
size_t sz = size < sizeof(buf) ? size : sizeof(buf);
|
||||
ssize_t actual;
|
||||
|
||||
actual = read_in_full(fd, buf, sz);
|
||||
if (actual < 0)
|
||||
die_errno("index-stream: reading input");
|
||||
if (write_in_full(fast_import.in, buf, actual) != actual)
|
||||
die_errno("index-stream: feeding fast-import");
|
||||
size -= actual;
|
||||
}
|
||||
if (close(fast_import.in))
|
||||
die_errno("index-stream: closing fast-import");
|
||||
if (finish_command(&fast_import))
|
||||
die_errno("index-stream: finishing fast-import");
|
||||
|
||||
tmpfd = open(tmpfile, O_RDONLY);
|
||||
if (tmpfd < 0)
|
||||
die_errno("index-stream: cannot open fast-import mark");
|
||||
len = read(tmpfd, buf, sizeof(buf));
|
||||
if (len < 0)
|
||||
die_errno("index-stream: reading fast-import mark");
|
||||
if (close(tmpfd) < 0)
|
||||
die_errno("index-stream: closing fast-import mark");
|
||||
if (unlink(tmpfile))
|
||||
die_errno("index-stream: unlinking fast-import mark");
|
||||
if (len != 44 ||
|
||||
memcmp(":1 ", buf, 3) ||
|
||||
get_sha1_hex(buf + 3, sha1))
|
||||
die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
|
||||
return 0;
|
||||
return index_bulk_checkin(sha1, fd, size, type, path, flags);
|
||||
}
|
||||
|
||||
int index_fd(unsigned char *sha1, int fd, struct stat *st,
|
||||
|
@ -7,21 +7,97 @@ test_description='adding and checking out large blobs'
|
||||
|
||||
test_expect_success setup '
|
||||
git config core.bigfilethreshold 200k &&
|
||||
echo X | dd of=large bs=1k seek=2000
|
||||
echo X | dd of=large1 bs=1k seek=2000 &&
|
||||
echo X | dd of=large2 bs=1k seek=2000 &&
|
||||
echo X | dd of=large3 bs=1k seek=2000 &&
|
||||
echo Y | dd of=huge bs=1k seek=2500
|
||||
'
|
||||
|
||||
test_expect_success 'add a large file' '
|
||||
git add large &&
|
||||
# make sure we got a packfile and no loose objects
|
||||
test -f .git/objects/pack/pack-*.pack &&
|
||||
test ! -f .git/objects/??/??????????????????????????????????????
|
||||
test_expect_success 'add a large file or two' '
|
||||
git add large1 huge large2 &&
|
||||
# make sure we got a single packfile and no loose objects
|
||||
bad= count=0 idx= &&
|
||||
for p in .git/objects/pack/pack-*.pack
|
||||
do
|
||||
count=$(( $count + 1 ))
|
||||
if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
|
||||
then
|
||||
continue
|
||||
fi
|
||||
bad=t
|
||||
done &&
|
||||
test -z "$bad" &&
|
||||
test $count = 1 &&
|
||||
cnt=$(git show-index <"$idx" | wc -l) &&
|
||||
test $cnt = 2 &&
|
||||
for l in .git/objects/??/??????????????????????????????????????
|
||||
do
|
||||
test -f "$l" || continue
|
||||
bad=t
|
||||
done &&
|
||||
test -z "$bad" &&
|
||||
|
||||
# attempt to add another copy of the same
|
||||
git add large3 &&
|
||||
bad= count=0 &&
|
||||
for p in .git/objects/pack/pack-*.pack
|
||||
do
|
||||
count=$(( $count + 1 ))
|
||||
if test -f "$p" && idx=${p%.pack}.idx && test -f "$idx"
|
||||
then
|
||||
continue
|
||||
fi
|
||||
bad=t
|
||||
done &&
|
||||
test -z "$bad" &&
|
||||
test $count = 1
|
||||
'
|
||||
|
||||
test_expect_success 'checkout a large file' '
|
||||
large=$(git rev-parse :large) &&
|
||||
git update-index --add --cacheinfo 100644 $large another &&
|
||||
large1=$(git rev-parse :large1) &&
|
||||
git update-index --add --cacheinfo 100644 $large1 another &&
|
||||
git checkout another &&
|
||||
cmp large another ;# this must not be test_cmp
|
||||
cmp large1 another ;# this must not be test_cmp
|
||||
'
|
||||
|
||||
test_expect_success 'packsize limit' '
|
||||
test_create_repo mid &&
|
||||
(
|
||||
cd mid &&
|
||||
git config core.bigfilethreshold 64k &&
|
||||
git config pack.packsizelimit 256k &&
|
||||
|
||||
# mid1 and mid2 will fit within 256k limit but
|
||||
# appending mid3 will bust the limit and will
|
||||
# result in a separate packfile.
|
||||
test-genrandom "a" $(( 66 * 1024 )) >mid1 &&
|
||||
test-genrandom "b" $(( 80 * 1024 )) >mid2 &&
|
||||
test-genrandom "c" $(( 128 * 1024 )) >mid3 &&
|
||||
git add mid1 mid2 mid3 &&
|
||||
|
||||
count=0
|
||||
for pi in .git/objects/pack/pack-*.idx
|
||||
do
|
||||
test -f "$pi" && count=$(( $count + 1 ))
|
||||
done &&
|
||||
test $count = 2 &&
|
||||
|
||||
(
|
||||
git hash-object --stdin <mid1
|
||||
git hash-object --stdin <mid2
|
||||
git hash-object --stdin <mid3
|
||||
) |
|
||||
sort >expect &&
|
||||
|
||||
for pi in .git/objects/pack/pack-*.idx
|
||||
do
|
||||
git show-index <"$pi"
|
||||
done |
|
||||
sed -e "s/^[0-9]* \([0-9a-f]*\) .*/\1/" |
|
||||
sort >actual &&
|
||||
|
||||
test_cmp expect actual
|
||||
)
|
||||
'
|
||||
|
||||
test_done
|
||||
|
9
zlib.c
9
zlib.c
@ -188,13 +188,20 @@ void git_deflate_init_gzip(git_zstream *strm, int level)
|
||||
strm->z.msg ? strm->z.msg : "no message");
|
||||
}
|
||||
|
||||
void git_deflate_end(git_zstream *strm)
|
||||
int git_deflate_abort(git_zstream *strm)
|
||||
{
|
||||
int status;
|
||||
|
||||
zlib_pre_call(strm);
|
||||
status = deflateEnd(&strm->z);
|
||||
zlib_post_call(strm);
|
||||
return status;
|
||||
}
|
||||
|
||||
void git_deflate_end(git_zstream *strm)
|
||||
{
|
||||
int status = git_deflate_abort(strm);
|
||||
|
||||
if (status == Z_OK)
|
||||
return;
|
||||
error("deflateEnd: %s (%s)", zerr_to_string(status),
|
||||
|
Loading…
Reference in New Issue
Block a user