unpack-objects: low memory footprint for get_data() in dry_run mode
As the name implies, "get_data(size)" will allocate and return a given
amount of memory. Allocating memory for a large blob object may cause the
system to run out of memory. Before preparing to replace calling of
"get_data()" to unpack large blob objects in latter commits, refactor
"get_data()" to reduce memory footprint for dry_run mode.
Because in dry_run mode, "get_data()" is only used to check the
integrity of data, and the returned buffer is not used at all, we can
allocate a smaller buffer and use it as zstream output. Make the function
return NULL in the dry-run mode, as no callers use the returned buffer.
The "find [...]objects/?? -type f | wc -l" test idiom being used here
is adapted from the same "find" use added to another test in
d9545c7f46
(fast-import: implement unpack limit, 2016-04-25).
Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <chiyutianyi@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
ab336e8f1c
commit
a1bf5ca29f
@ -97,15 +97,27 @@ static void use(int bytes)
|
||||
display_throughput(progress, consumed_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decompress zstream from the standard input into a newly
|
||||
* allocated buffer of specified size and return the buffer.
|
||||
* The caller is responsible to free the returned buffer.
|
||||
*
|
||||
* But for dry_run mode, "get_data()" is only used to check the
|
||||
* integrity of data, and the returned buffer is not used at all.
|
||||
* Therefore, in dry_run mode, "get_data()" will release the small
|
||||
* allocated buffer which is reused to hold temporary zstream output
|
||||
* and return NULL instead of returning garbage data.
|
||||
*/
|
||||
static void *get_data(unsigned long size)
|
||||
{
|
||||
git_zstream stream;
|
||||
void *buf = xmallocz(size);
|
||||
unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
|
||||
void *buf = xmallocz(bufsize);
|
||||
|
||||
memset(&stream, 0, sizeof(stream));
|
||||
|
||||
stream.next_out = buf;
|
||||
stream.avail_out = size;
|
||||
stream.avail_out = bufsize;
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = len;
|
||||
git_inflate_init(&stream);
|
||||
@ -125,8 +137,17 @@ static void *get_data(unsigned long size)
|
||||
}
|
||||
stream.next_in = fill(1);
|
||||
stream.avail_in = len;
|
||||
if (dry_run) {
|
||||
/* reuse the buffer in dry_run mode */
|
||||
stream.next_out = buf;
|
||||
stream.avail_out = bufsize > size - stream.total_out ?
|
||||
size - stream.total_out :
|
||||
bufsize;
|
||||
}
|
||||
}
|
||||
git_inflate_end(&stream);
|
||||
if (dry_run)
|
||||
FREE_AND_NULL(buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -326,10 +347,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
|
||||
{
|
||||
void *buf = get_data(size);
|
||||
|
||||
if (!dry_run && buf)
|
||||
if (buf)
|
||||
write_object(nr, type, buf, size);
|
||||
else
|
||||
free(buf);
|
||||
}
|
||||
|
||||
static int resolve_against_held(unsigned nr, const struct object_id *base,
|
||||
@ -359,10 +378,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
||||
oidread(&base_oid, fill(the_hash_algo->rawsz));
|
||||
use(the_hash_algo->rawsz);
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
if (!delta_data)
|
||||
return;
|
||||
}
|
||||
if (has_object_file(&base_oid))
|
||||
; /* Ok we have this one */
|
||||
else if (resolve_against_held(nr, &base_oid,
|
||||
@ -398,10 +415,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
|
||||
die("offset value out of bound for delta base object");
|
||||
|
||||
delta_data = get_data(delta_size);
|
||||
if (dry_run || !delta_data) {
|
||||
free(delta_data);
|
||||
if (!delta_data)
|
||||
return;
|
||||
}
|
||||
lo = 0;
|
||||
hi = nr;
|
||||
while (lo < hi) {
|
||||
|
41
t/t5351-unpack-large-objects.sh
Executable file
41
t/t5351-unpack-large-objects.sh
Executable file
@ -0,0 +1,41 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright (c) 2022 Han Xin
|
||||
#
|
||||
|
||||
test_description='git unpack-objects with large objects'
|
||||
|
||||
. ./test-lib.sh
|
||||
|
||||
prepare_dest () {
|
||||
test_when_finished "rm -rf dest.git" &&
|
||||
git init --bare dest.git
|
||||
}
|
||||
|
||||
test_expect_success "create large objects (1.5 MB) and PACK" '
|
||||
test-tool genrandom foo 1500000 >big-blob &&
|
||||
test_commit --append foo big-blob &&
|
||||
test-tool genrandom bar 1500000 >big-blob &&
|
||||
test_commit --append bar big-blob &&
|
||||
PACK=$(echo HEAD | git pack-objects --revs pack)
|
||||
'
|
||||
|
||||
test_expect_success 'set memory limitation to 1MB' '
|
||||
GIT_ALLOC_LIMIT=1m &&
|
||||
export GIT_ALLOC_LIMIT
|
||||
'
|
||||
|
||||
test_expect_success 'unpack-objects failed under memory limitation' '
|
||||
prepare_dest &&
|
||||
test_must_fail git -C dest.git unpack-objects <pack-$PACK.pack 2>err &&
|
||||
grep "fatal: attempting to allocate" err
|
||||
'
|
||||
|
||||
test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
|
||||
prepare_dest &&
|
||||
git -C dest.git unpack-objects -n <pack-$PACK.pack &&
|
||||
test_stdout_line_count = 0 find dest.git/objects -type f &&
|
||||
test_dir_is_empty dest.git/objects/pack
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user