diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 87e4c04836..3ea3124f7f 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -468,8 +468,8 @@ usage, at the slight expense of increased disk usage. * Will generally be streamed when written, which avoids excessive memory usage, at the cost of some fixed overhead. Commands that make use of this include linkgit:git-archive[1], -linkgit:git-fast-import[1], linkgit:git-index-pack[1] and -linkgit:git-fsck[1]. +linkgit:git-fast-import[1], linkgit:git-index-pack[1], +linkgit:git-unpack-objects[1] and linkgit:git-fsck[1]. core.excludesFile:: Specifies the pathname to the file that contains patterns to diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 32e8b47059..43789b8ef2 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -351,6 +351,68 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size, write_object(nr, type, buf, size); } +struct input_zstream_data { + git_zstream *zstream; + unsigned char buf[8192]; + int status; +}; + +static const void *feed_input_zstream(struct input_stream *in_stream, + unsigned long *readlen) +{ + struct input_zstream_data *data = in_stream->data; + git_zstream *zstream = data->zstream; + void *in = fill(1); + + if (in_stream->is_finished) { + *readlen = 0; + return NULL; + } + + zstream->next_out = data->buf; + zstream->avail_out = sizeof(data->buf); + zstream->next_in = in; + zstream->avail_in = len; + + data->status = git_inflate(zstream, 0); + + in_stream->is_finished = data->status != Z_OK; + use(len - zstream->avail_in); + *readlen = sizeof(data->buf) - zstream->avail_out; + + return data->buf; +} + +static void stream_blob(unsigned long size, unsigned nr) +{ + git_zstream zstream = { 0 }; + struct input_zstream_data data = { 0 }; + struct input_stream in_stream = { + .read = feed_input_zstream, + .data = &data, + }; + struct obj_info *info = &obj_list[nr]; + + data.zstream = &zstream; + git_inflate_init(&zstream); + + if (stream_loose_object(&in_stream, size, &info->oid)) + die(_("failed to write object in stream")); + + if (data.status != Z_STREAM_END) + die(_("inflate returned (%d)"), data.status); + git_inflate_end(&zstream); + + if (strict) { + struct blob *blob = lookup_blob(the_repository, &info->oid); + + if (!blob) + die(_("invalid blob object from stream")); + blob->object.flags |= FLAG_WRITTEN; + } + info->obj = NULL; +} + static int resolve_against_held(unsigned nr, const struct object_id *base, void *delta_data, unsigned long delta_size) { @@ -483,9 +545,14 @@ static void unpack_one(unsigned nr) } switch (type) { + case OBJ_BLOB: + if (!dry_run && size > big_file_threshold) { + stream_blob(size, nr); + return; + } + /* fallthrough */ case OBJ_COMMIT: case OBJ_TREE: - case OBJ_BLOB: case OBJ_TAG: unpack_non_delta_entry(type, size, nr); return; diff --git a/t/t5351-unpack-large-objects.sh b/t/t5351-unpack-large-objects.sh index 8d84313221..8ce8aa3b14 100755 --- a/t/t5351-unpack-large-objects.sh +++ b/t/t5351-unpack-large-objects.sh @@ -9,7 +9,8 @@ test_description='git unpack-objects with large objects' prepare_dest () { test_when_finished "rm -rf dest.git" && - git init --bare dest.git + git init --bare dest.git && + git -C dest.git config core.bigFileThreshold "$1" } test_expect_success "create large objects (1.5 MB) and PACK" ' @@ -17,7 +18,10 @@ test_expect_success "create large objects (1.5 MB) and PACK" ' test_commit --append foo big-blob && test-tool genrandom bar 1500000 >big-blob && test_commit --append bar big-blob && - PACK=$(echo HEAD | git pack-objects --revs pack) + PACK=$(echo HEAD | git pack-objects --revs pack) && + git verify-pack -v pack-$PACK.pack >out && + sed -n -e "s/^\([0-9a-f][0-9a-f]*\).*\(commit\|tree\|blob\).*/\1/p" \ + obj-list ' test_expect_success 'set memory limitation to 1MB' ' @@ -26,16 +30,47 @@ test_expect_success 'set memory limitation to 1MB' ' ' test_expect_success 'unpack-objects failed under memory limitation' ' - prepare_dest && + prepare_dest 2m && test_must_fail git -C dest.git unpack-objects err && grep "fatal: attempting to allocate" err ' test_expect_success 'unpack-objects works with memory limitation in dry-run mode' ' - prepare_dest && + prepare_dest 2m && git -C dest.git unpack-objects -n current && + cmp obj-list current +' + +test_expect_success 'do not unpack existing large objects' ' + prepare_dest 1m && + git -C dest.git index-pack --stdin