Merge branch 'hx/unpack-streaming'
Allow large objects read from a packstream to be streamed into a loose object file straight, without having to keep it in-core as a whole. * hx/unpack-streaming: unpack-objects: use stream_loose_object() to unpack large objects core doc: modernize core.bigFileThreshold documentation object-file.c: add "stream_loose_object()" to handle large object object-file.c: factor out deflate part of write_loose_object() object-file.c: refactor write_loose_object() to several steps unpack-objects: low memory footprint for get_data() in dry_run mode
This commit is contained in:
commit
73b9ef6ab1
@ -444,17 +444,32 @@ You probably do not need to adjust this value.
|
|||||||
Common unit suffixes of 'k', 'm', or 'g' are supported.
|
Common unit suffixes of 'k', 'm', or 'g' are supported.
|
||||||
|
|
||||||
core.bigFileThreshold::
|
core.bigFileThreshold::
|
||||||
Files larger than this size are stored deflated, without
|
The size of files considered "big", which as discussed below
|
||||||
attempting delta compression. Storing large files without
|
changes the behavior of numerous git commands, as well as how
|
||||||
delta compression avoids excessive memory usage, at the
|
such files are stored within the repository. The default is
|
||||||
slight expense of increased disk usage. Additionally files
|
512 MiB. Common unit suffixes of 'k', 'm', or 'g' are
|
||||||
larger than this size are always treated as binary.
|
supported.
|
||||||
+
|
+
|
||||||
Default is 512 MiB on all platforms. This should be reasonable
|
Files above the configured limit will be:
|
||||||
for most projects as source code and other text files can still
|
|
||||||
be delta compressed, but larger binary media files won't be.
|
|
||||||
+
|
+
|
||||||
Common unit suffixes of 'k', 'm', or 'g' are supported.
|
* Stored deflated in packfiles, without attempting delta compression.
|
||||||
|
+
|
||||||
|
The default limit is primarily set with this use-case in mind. With it,
|
||||||
|
most projects will have their source code and other text files delta
|
||||||
|
compressed, but not larger binary media files.
|
||||||
|
+
|
||||||
|
Storing large files without delta compression avoids excessive memory
|
||||||
|
usage, at the slight expense of increased disk usage.
|
||||||
|
+
|
||||||
|
* Will be treated as if they were labeled "binary" (see
|
||||||
|
linkgit:gitattributes[5]). e.g. linkgit:git-log[1] and
|
||||||
|
linkgit:git-diff[1] will not compute diffs for files above this limit.
|
||||||
|
+
|
||||||
|
* Will generally be streamed when written, which avoids excessive
|
||||||
|
memory usage, at the cost of some fixed overhead. Commands that make
|
||||||
|
use of this include linkgit:git-archive[1],
|
||||||
|
linkgit:git-fast-import[1], linkgit:git-index-pack[1],
|
||||||
|
linkgit:git-unpack-objects[1] and linkgit:git-fsck[1].
|
||||||
|
|
||||||
core.excludesFile::
|
core.excludesFile::
|
||||||
Specifies the pathname to the file that contains patterns to
|
Specifies the pathname to the file that contains patterns to
|
||||||
|
@ -97,15 +97,27 @@ static void use(int bytes)
|
|||||||
display_throughput(progress, consumed_bytes);
|
display_throughput(progress, consumed_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decompress zstream from the standard input into a newly
|
||||||
|
* allocated buffer of specified size and return the buffer.
|
||||||
|
* The caller is responsible to free the returned buffer.
|
||||||
|
*
|
||||||
|
* But for dry_run mode, "get_data()" is only used to check the
|
||||||
|
* integrity of data, and the returned buffer is not used at all.
|
||||||
|
* Therefore, in dry_run mode, "get_data()" will release the small
|
||||||
|
* allocated buffer which is reused to hold temporary zstream output
|
||||||
|
* and return NULL instead of returning garbage data.
|
||||||
|
*/
|
||||||
static void *get_data(unsigned long size)
|
static void *get_data(unsigned long size)
|
||||||
{
|
{
|
||||||
git_zstream stream;
|
git_zstream stream;
|
||||||
void *buf = xmallocz(size);
|
unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
|
||||||
|
void *buf = xmallocz(bufsize);
|
||||||
|
|
||||||
memset(&stream, 0, sizeof(stream));
|
memset(&stream, 0, sizeof(stream));
|
||||||
|
|
||||||
stream.next_out = buf;
|
stream.next_out = buf;
|
||||||
stream.avail_out = size;
|
stream.avail_out = bufsize;
|
||||||
stream.next_in = fill(1);
|
stream.next_in = fill(1);
|
||||||
stream.avail_in = len;
|
stream.avail_in = len;
|
||||||
git_inflate_init(&stream);
|
git_inflate_init(&stream);
|
||||||
@ -125,8 +137,17 @@ static void *get_data(unsigned long size)
|
|||||||
}
|
}
|
||||||
stream.next_in = fill(1);
|
stream.next_in = fill(1);
|
||||||
stream.avail_in = len;
|
stream.avail_in = len;
|
||||||
|
if (dry_run) {
|
||||||
|
/* reuse the buffer in dry_run mode */
|
||||||
|
stream.next_out = buf;
|
||||||
|
stream.avail_out = bufsize > size - stream.total_out ?
|
||||||
|
size - stream.total_out :
|
||||||
|
bufsize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
git_inflate_end(&stream);
|
git_inflate_end(&stream);
|
||||||
|
if (dry_run)
|
||||||
|
FREE_AND_NULL(buf);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -326,10 +347,70 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
|
|||||||
{
|
{
|
||||||
void *buf = get_data(size);
|
void *buf = get_data(size);
|
||||||
|
|
||||||
if (!dry_run && buf)
|
if (buf)
|
||||||
write_object(nr, type, buf, size);
|
write_object(nr, type, buf, size);
|
||||||
else
|
}
|
||||||
free(buf);
|
|
||||||
|
struct input_zstream_data {
|
||||||
|
git_zstream *zstream;
|
||||||
|
unsigned char buf[8192];
|
||||||
|
int status;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const void *feed_input_zstream(struct input_stream *in_stream,
|
||||||
|
unsigned long *readlen)
|
||||||
|
{
|
||||||
|
struct input_zstream_data *data = in_stream->data;
|
||||||
|
git_zstream *zstream = data->zstream;
|
||||||
|
void *in = fill(1);
|
||||||
|
|
||||||
|
if (in_stream->is_finished) {
|
||||||
|
*readlen = 0;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
zstream->next_out = data->buf;
|
||||||
|
zstream->avail_out = sizeof(data->buf);
|
||||||
|
zstream->next_in = in;
|
||||||
|
zstream->avail_in = len;
|
||||||
|
|
||||||
|
data->status = git_inflate(zstream, 0);
|
||||||
|
|
||||||
|
in_stream->is_finished = data->status != Z_OK;
|
||||||
|
use(len - zstream->avail_in);
|
||||||
|
*readlen = sizeof(data->buf) - zstream->avail_out;
|
||||||
|
|
||||||
|
return data->buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void stream_blob(unsigned long size, unsigned nr)
|
||||||
|
{
|
||||||
|
git_zstream zstream = { 0 };
|
||||||
|
struct input_zstream_data data = { 0 };
|
||||||
|
struct input_stream in_stream = {
|
||||||
|
.read = feed_input_zstream,
|
||||||
|
.data = &data,
|
||||||
|
};
|
||||||
|
struct obj_info *info = &obj_list[nr];
|
||||||
|
|
||||||
|
data.zstream = &zstream;
|
||||||
|
git_inflate_init(&zstream);
|
||||||
|
|
||||||
|
if (stream_loose_object(&in_stream, size, &info->oid))
|
||||||
|
die(_("failed to write object in stream"));
|
||||||
|
|
||||||
|
if (data.status != Z_STREAM_END)
|
||||||
|
die(_("inflate returned (%d)"), data.status);
|
||||||
|
git_inflate_end(&zstream);
|
||||||
|
|
||||||
|
if (strict) {
|
||||||
|
struct blob *blob = lookup_blob(the_repository, &info->oid);
|
||||||
|
|
||||||
|
if (!blob)
|
||||||
|
die(_("invalid blob object from stream"));
|
||||||
|
blob->object.flags |= FLAG_WRITTEN;
|
||||||
|
}
|
||||||
|
info->obj = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int resolve_against_held(unsigned nr, const struct object_id *base,
|
static int resolve_against_held(unsigned nr, const struct object_id *base,
|
||||||
@ -359,10 +440,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
|||||||
oidread(&base_oid, fill(the_hash_algo->rawsz));
|
oidread(&base_oid, fill(the_hash_algo->rawsz));
|
||||||
use(the_hash_algo->rawsz);
|
use(the_hash_algo->rawsz);
|
||||||
delta_data = get_data(delta_size);
|
delta_data = get_data(delta_size);
|
||||||
if (dry_run || !delta_data) {
|
if (!delta_data)
|
||||||
free(delta_data);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
if (has_object_file(&base_oid))
|
if (has_object_file(&base_oid))
|
||||||
; /* Ok we have this one */
|
; /* Ok we have this one */
|
||||||
else if (resolve_against_held(nr, &base_oid,
|
else if (resolve_against_held(nr, &base_oid,
|
||||||
@ -398,10 +477,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
|||||||
die("offset value out of bound for delta base object");
|
die("offset value out of bound for delta base object");
|
||||||
|
|
||||||
delta_data = get_data(delta_size);
|
delta_data = get_data(delta_size);
|
||||||
if (dry_run || !delta_data) {
|
if (!delta_data)
|
||||||
free(delta_data);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
lo = 0;
|
lo = 0;
|
||||||
hi = nr;
|
hi = nr;
|
||||||
while (lo < hi) {
|
while (lo < hi) {
|
||||||
@ -468,9 +545,14 @@ static void unpack_one(unsigned nr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
case OBJ_BLOB:
|
||||||
|
if (!dry_run && size > big_file_threshold) {
|
||||||
|
stream_blob(size, nr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* fallthrough */
|
||||||
case OBJ_COMMIT:
|
case OBJ_COMMIT:
|
||||||
case OBJ_TREE:
|
case OBJ_TREE:
|
||||||
case OBJ_BLOB:
|
|
||||||
case OBJ_TAG:
|
case OBJ_TAG:
|
||||||
unpack_non_delta_entry(type, size, nr);
|
unpack_non_delta_entry(type, size, nr);
|
||||||
return;
|
return;
|
||||||
|
233
object-file.c
233
object-file.c
@ -1951,6 +1951,96 @@ static int create_tmpfile(struct strbuf *tmp, const char *filename)
|
|||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common steps for loose object writers to start writing loose
|
||||||
|
* objects:
|
||||||
|
*
|
||||||
|
* - Create tmpfile for the loose object.
|
||||||
|
* - Setup zlib stream for compression.
|
||||||
|
* - Start to feed header to zlib stream.
|
||||||
|
*
|
||||||
|
* Returns a "fd", which should later be provided to
|
||||||
|
* end_loose_object_common().
|
||||||
|
*/
|
||||||
|
static int start_loose_object_common(struct strbuf *tmp_file,
|
||||||
|
const char *filename, unsigned flags,
|
||||||
|
git_zstream *stream,
|
||||||
|
unsigned char *buf, size_t buflen,
|
||||||
|
git_hash_ctx *c,
|
||||||
|
char *hdr, int hdrlen)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
fd = create_tmpfile(tmp_file, filename);
|
||||||
|
if (fd < 0) {
|
||||||
|
if (flags & HASH_SILENT)
|
||||||
|
return -1;
|
||||||
|
else if (errno == EACCES)
|
||||||
|
return error(_("insufficient permission for adding "
|
||||||
|
"an object to repository database %s"),
|
||||||
|
get_object_directory());
|
||||||
|
else
|
||||||
|
return error_errno(
|
||||||
|
_("unable to create temporary file"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup zlib stream for compression */
|
||||||
|
git_deflate_init(stream, zlib_compression_level);
|
||||||
|
stream->next_out = buf;
|
||||||
|
stream->avail_out = buflen;
|
||||||
|
the_hash_algo->init_fn(c);
|
||||||
|
|
||||||
|
/* Start to feed header to zlib stream */
|
||||||
|
stream->next_in = (unsigned char *)hdr;
|
||||||
|
stream->avail_in = hdrlen;
|
||||||
|
while (git_deflate(stream, 0) == Z_OK)
|
||||||
|
; /* nothing */
|
||||||
|
the_hash_algo->update_fn(c, hdr, hdrlen);
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common steps for the inner git_deflate() loop for writing loose
|
||||||
|
* objects. Returns what git_deflate() returns.
|
||||||
|
*/
|
||||||
|
static int write_loose_object_common(git_hash_ctx *c,
|
||||||
|
git_zstream *stream, const int flush,
|
||||||
|
unsigned char *in0, const int fd,
|
||||||
|
unsigned char *compressed,
|
||||||
|
const size_t compressed_len)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = git_deflate(stream, flush ? Z_FINISH : 0);
|
||||||
|
the_hash_algo->update_fn(c, in0, stream->next_in - in0);
|
||||||
|
if (write_buffer(fd, compressed, stream->next_out - compressed) < 0)
|
||||||
|
die(_("unable to write loose object file"));
|
||||||
|
stream->next_out = compressed;
|
||||||
|
stream->avail_out = compressed_len;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common steps for loose object writers to end writing loose objects:
|
||||||
|
*
|
||||||
|
* - End the compression of zlib stream.
|
||||||
|
* - Get the calculated oid to "oid".
|
||||||
|
*/
|
||||||
|
static int end_loose_object_common(git_hash_ctx *c, git_zstream *stream,
|
||||||
|
struct object_id *oid)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = git_deflate_end_gently(stream);
|
||||||
|
if (ret != Z_OK)
|
||||||
|
return ret;
|
||||||
|
the_hash_algo->final_oid_fn(oid, c);
|
||||||
|
|
||||||
|
return Z_OK;
|
||||||
|
}
|
||||||
|
|
||||||
static int write_loose_object(const struct object_id *oid, char *hdr,
|
static int write_loose_object(const struct object_id *oid, char *hdr,
|
||||||
int hdrlen, const void *buf, unsigned long len,
|
int hdrlen, const void *buf, unsigned long len,
|
||||||
time_t mtime, unsigned flags)
|
time_t mtime, unsigned flags)
|
||||||
@ -1968,50 +2058,29 @@ static int write_loose_object(const struct object_id *oid, char *hdr,
|
|||||||
|
|
||||||
loose_object_path(the_repository, &filename, oid);
|
loose_object_path(the_repository, &filename, oid);
|
||||||
|
|
||||||
fd = create_tmpfile(&tmp_file, filename.buf);
|
fd = start_loose_object_common(&tmp_file, filename.buf, flags,
|
||||||
if (fd < 0) {
|
&stream, compressed, sizeof(compressed),
|
||||||
if (flags & HASH_SILENT)
|
&c, hdr, hdrlen);
|
||||||
return -1;
|
if (fd < 0)
|
||||||
else if (errno == EACCES)
|
return -1;
|
||||||
return error(_("insufficient permission for adding an object to repository database %s"), get_object_directory());
|
|
||||||
else
|
|
||||||
return error_errno(_("unable to create temporary file"));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set it up */
|
|
||||||
git_deflate_init(&stream, zlib_compression_level);
|
|
||||||
stream.next_out = compressed;
|
|
||||||
stream.avail_out = sizeof(compressed);
|
|
||||||
the_hash_algo->init_fn(&c);
|
|
||||||
|
|
||||||
/* First header.. */
|
|
||||||
stream.next_in = (unsigned char *)hdr;
|
|
||||||
stream.avail_in = hdrlen;
|
|
||||||
while (git_deflate(&stream, 0) == Z_OK)
|
|
||||||
; /* nothing */
|
|
||||||
the_hash_algo->update_fn(&c, hdr, hdrlen);
|
|
||||||
|
|
||||||
/* Then the data itself.. */
|
/* Then the data itself.. */
|
||||||
stream.next_in = (void *)buf;
|
stream.next_in = (void *)buf;
|
||||||
stream.avail_in = len;
|
stream.avail_in = len;
|
||||||
do {
|
do {
|
||||||
unsigned char *in0 = stream.next_in;
|
unsigned char *in0 = stream.next_in;
|
||||||
ret = git_deflate(&stream, Z_FINISH);
|
|
||||||
the_hash_algo->update_fn(&c, in0, stream.next_in - in0);
|
ret = write_loose_object_common(&c, &stream, 1, in0, fd,
|
||||||
if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
|
compressed, sizeof(compressed));
|
||||||
die(_("unable to write loose object file"));
|
|
||||||
stream.next_out = compressed;
|
|
||||||
stream.avail_out = sizeof(compressed);
|
|
||||||
} while (ret == Z_OK);
|
} while (ret == Z_OK);
|
||||||
|
|
||||||
if (ret != Z_STREAM_END)
|
if (ret != Z_STREAM_END)
|
||||||
die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
|
die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
|
||||||
ret);
|
ret);
|
||||||
ret = git_deflate_end_gently(&stream);
|
ret = end_loose_object_common(&c, &stream, ¶no_oid);
|
||||||
if (ret != Z_OK)
|
if (ret != Z_OK)
|
||||||
die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
|
die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
|
||||||
ret);
|
ret);
|
||||||
the_hash_algo->final_oid_fn(¶no_oid, &c);
|
|
||||||
if (!oideq(oid, ¶no_oid))
|
if (!oideq(oid, ¶no_oid))
|
||||||
die(_("confused by unstable object source data for %s"),
|
die(_("confused by unstable object source data for %s"),
|
||||||
oid_to_hex(oid));
|
oid_to_hex(oid));
|
||||||
@ -2050,6 +2119,110 @@ static int freshen_packed_object(const struct object_id *oid)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int stream_loose_object(struct input_stream *in_stream, size_t len,
|
||||||
|
struct object_id *oid)
|
||||||
|
{
|
||||||
|
int fd, ret, err = 0, flush = 0;
|
||||||
|
unsigned char compressed[4096];
|
||||||
|
git_zstream stream;
|
||||||
|
git_hash_ctx c;
|
||||||
|
struct strbuf tmp_file = STRBUF_INIT;
|
||||||
|
struct strbuf filename = STRBUF_INIT;
|
||||||
|
int dirlen;
|
||||||
|
char hdr[MAX_HEADER_LEN];
|
||||||
|
int hdrlen;
|
||||||
|
|
||||||
|
if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
|
||||||
|
prepare_loose_object_bulk_checkin();
|
||||||
|
|
||||||
|
/* Since oid is not determined, save tmp file to odb path. */
|
||||||
|
strbuf_addf(&filename, "%s/", get_object_directory());
|
||||||
|
hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Common steps for write_loose_object and stream_loose_object to
|
||||||
|
* start writing loose objects:
|
||||||
|
*
|
||||||
|
* - Create tmpfile for the loose object.
|
||||||
|
* - Setup zlib stream for compression.
|
||||||
|
* - Start to feed header to zlib stream.
|
||||||
|
*/
|
||||||
|
fd = start_loose_object_common(&tmp_file, filename.buf, 0,
|
||||||
|
&stream, compressed, sizeof(compressed),
|
||||||
|
&c, hdr, hdrlen);
|
||||||
|
if (fd < 0) {
|
||||||
|
err = -1;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Then the data itself.. */
|
||||||
|
do {
|
||||||
|
unsigned char *in0 = stream.next_in;
|
||||||
|
|
||||||
|
if (!stream.avail_in && !in_stream->is_finished) {
|
||||||
|
const void *in = in_stream->read(in_stream, &stream.avail_in);
|
||||||
|
stream.next_in = (void *)in;
|
||||||
|
in0 = (unsigned char *)in;
|
||||||
|
/* All data has been read. */
|
||||||
|
if (in_stream->is_finished)
|
||||||
|
flush = 1;
|
||||||
|
}
|
||||||
|
ret = write_loose_object_common(&c, &stream, flush, in0, fd,
|
||||||
|
compressed, sizeof(compressed));
|
||||||
|
/*
|
||||||
|
* Unlike write_loose_object(), we do not have the entire
|
||||||
|
* buffer. If we get Z_BUF_ERROR due to too few input bytes,
|
||||||
|
* then we'll replenish them in the next input_stream->read()
|
||||||
|
* call when we loop.
|
||||||
|
*/
|
||||||
|
} while (ret == Z_OK || ret == Z_BUF_ERROR);
|
||||||
|
|
||||||
|
if (stream.total_in != len + hdrlen)
|
||||||
|
die(_("write stream object %ld != %"PRIuMAX), stream.total_in,
|
||||||
|
(uintmax_t)len + hdrlen);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Common steps for write_loose_object and stream_loose_object to
|
||||||
|
* end writing loose oject:
|
||||||
|
*
|
||||||
|
* - End the compression of zlib stream.
|
||||||
|
* - Get the calculated oid.
|
||||||
|
*/
|
||||||
|
if (ret != Z_STREAM_END)
|
||||||
|
die(_("unable to stream deflate new object (%d)"), ret);
|
||||||
|
ret = end_loose_object_common(&c, &stream, oid);
|
||||||
|
if (ret != Z_OK)
|
||||||
|
die(_("deflateEnd on stream object failed (%d)"), ret);
|
||||||
|
close_loose_object(fd, tmp_file.buf);
|
||||||
|
|
||||||
|
if (freshen_packed_object(oid) || freshen_loose_object(oid)) {
|
||||||
|
unlink_or_warn(tmp_file.buf);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
loose_object_path(the_repository, &filename, oid);
|
||||||
|
|
||||||
|
/* We finally know the object path, and create the missing dir. */
|
||||||
|
dirlen = directory_size(filename.buf);
|
||||||
|
if (dirlen) {
|
||||||
|
struct strbuf dir = STRBUF_INIT;
|
||||||
|
strbuf_add(&dir, filename.buf, dirlen);
|
||||||
|
|
||||||
|
if (mkdir_in_gitdir(dir.buf) && errno != EEXIST) {
|
||||||
|
err = error_errno(_("unable to create directory %s"), dir.buf);
|
||||||
|
strbuf_release(&dir);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
strbuf_release(&dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = finalize_object_file(tmp_file.buf, filename.buf);
|
||||||
|
cleanup:
|
||||||
|
strbuf_release(&tmp_file);
|
||||||
|
strbuf_release(&filename);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
int write_object_file_flags(const void *buf, unsigned long len,
|
int write_object_file_flags(const void *buf, unsigned long len,
|
||||||
enum object_type type, struct object_id *oid,
|
enum object_type type, struct object_id *oid,
|
||||||
unsigned flags)
|
unsigned flags)
|
||||||
|
@ -46,6 +46,12 @@ struct object_directory {
|
|||||||
char *path;
|
char *path;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct input_stream {
|
||||||
|
const void *(*read)(struct input_stream *, unsigned long *len);
|
||||||
|
void *data;
|
||||||
|
int is_finished;
|
||||||
|
};
|
||||||
|
|
||||||
KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
|
KHASH_INIT(odb_path_map, const char * /* key: odb_path */,
|
||||||
struct object_directory *, 1, fspathhash, fspatheq)
|
struct object_directory *, 1, fspathhash, fspatheq)
|
||||||
|
|
||||||
@ -269,6 +275,8 @@ static inline int write_object_file(const void *buf, unsigned long len,
|
|||||||
int write_object_file_literally(const void *buf, unsigned long len,
|
int write_object_file_literally(const void *buf, unsigned long len,
|
||||||
const char *type, struct object_id *oid,
|
const char *type, struct object_id *oid,
|
||||||
unsigned flags);
|
unsigned flags);
|
||||||
|
int stream_loose_object(struct input_stream *in_stream, size_t len,
|
||||||
|
struct object_id *oid);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add an object file to the in-memory object store, without writing it
|
* Add an object file to the in-memory object store, without writing it
|
||||||
|
76
t/t5351-unpack-large-objects.sh
Executable file
76
t/t5351-unpack-large-objects.sh
Executable file
@ -0,0 +1,76 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Copyright (c) 2022 Han Xin
|
||||||
|
#
|
||||||
|
|
||||||
|
test_description='git unpack-objects with large objects'
|
||||||
|
|
||||||
|
. ./test-lib.sh
|
||||||
|
|
||||||
|
prepare_dest () {
|
||||||
|
test_when_finished "rm -rf dest.git" &&
|
||||||
|
git init --bare dest.git &&
|
||||||
|
git -C dest.git config core.bigFileThreshold "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
test_expect_success "create large objects (1.5 MB) and PACK" '
|
||||||
|
test-tool genrandom foo 1500000 >big-blob &&
|
||||||
|
test_commit --append foo big-blob &&
|
||||||
|
test-tool genrandom bar 1500000 >big-blob &&
|
||||||
|
test_commit --append bar big-blob &&
|
||||||
|
PACK=$(echo HEAD | git pack-objects --revs pack) &&
|
||||||
|
git verify-pack -v pack-$PACK.pack >out &&
|
||||||
|
sed -n -e "s/^\([0-9a-f][0-9a-f]*\).*\(commit\|tree\|blob\).*/\1/p" \
|
||||||
|
<out >obj-list
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'set memory limitation to 1MB' '
|
||||||
|
GIT_ALLOC_LIMIT=1m &&
|
||||||
|
export GIT_ALLOC_LIMIT
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'unpack-objects failed under memory limitation' '
|
||||||
|
prepare_dest 2m &&
|
||||||
|
test_must_fail git -C dest.git unpack-objects <pack-$PACK.pack 2>err &&
|
||||||
|
grep "fatal: attempting to allocate" err
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
|
||||||
|
prepare_dest 2m &&
|
||||||
|
git -C dest.git unpack-objects -n <pack-$PACK.pack &&
|
||||||
|
test_stdout_line_count = 0 find dest.git/objects -type f &&
|
||||||
|
test_dir_is_empty dest.git/objects/pack
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'unpack big object in stream' '
|
||||||
|
prepare_dest 1m &&
|
||||||
|
git -C dest.git unpack-objects <pack-$PACK.pack &&
|
||||||
|
test_dir_is_empty dest.git/objects/pack
|
||||||
|
'
|
||||||
|
|
||||||
|
BATCH_CONFIGURATION='-c core.fsync=loose-object -c core.fsyncmethod=batch'
|
||||||
|
|
||||||
|
test_expect_success 'unpack big object in stream (core.fsyncmethod=batch)' '
|
||||||
|
prepare_dest 1m &&
|
||||||
|
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
|
||||||
|
git -C dest.git $BATCH_CONFIGURATION unpack-objects <pack-$PACK.pack &&
|
||||||
|
grep fsync/hardware-flush trace2.txt &&
|
||||||
|
test_dir_is_empty dest.git/objects/pack &&
|
||||||
|
git -C dest.git cat-file --batch-check="%(objectname)" <obj-list >current &&
|
||||||
|
cmp obj-list current
|
||||||
|
'
|
||||||
|
|
||||||
|
test_expect_success 'do not unpack existing large objects' '
|
||||||
|
prepare_dest 1m &&
|
||||||
|
git -C dest.git index-pack --stdin <pack-$PACK.pack &&
|
||||||
|
git -C dest.git unpack-objects <pack-$PACK.pack &&
|
||||||
|
|
||||||
|
# The destination came up with the exact same pack...
|
||||||
|
DEST_PACK=$(echo dest.git/objects/pack/pack-*.pack) &&
|
||||||
|
test_cmp pack-$PACK.pack $DEST_PACK &&
|
||||||
|
|
||||||
|
# ...and wrote no loose objects
|
||||||
|
test_stdout_line_count = 0 find dest.git/objects -type f ! -name "pack-*"
|
||||||
|
'
|
||||||
|
|
||||||
|
test_done
|
Loading…
Reference in New Issue
Block a user