2011-05-08 10:47:35 +02:00
|
|
|
#!/bin/sh
|
|
|
|
# Copyright (c) 2011, Google Inc.
|
|
|
|
|
|
|
|
test_description='adding and checking out large blobs'
|
|
|
|
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
|
|
|
test_expect_success setup '
|
2012-03-07 11:54:16 +01:00
|
|
|
# clone does not allow us to pass core.bigfilethreshold to
|
|
|
|
# new repos, so set core.bigfilethreshold globally
|
|
|
|
git config --global core.bigfilethreshold 200k &&
|
t1050-large: generate large files without dd
For some unknown reason, the dd on my Windows box segfaults randomly,
but since recently, it does so much more often than it used to, which
makes running the test suite burdensome.
Use printf to write large files instead of dd. To emphasize that three
of the large blobs are exact copies, use cp to allocate them.
The new code makes the files a bit smaller, and they are not sparse
anymore, but the tests do not depend on these properties. We do not want
to use test-genrandom here (which is used to generate large files
elsewhere in t1050), so that the files can be compressed well (which
keeps the run-time short).
The files are now large text files, not binary files. But since they
are larger than core.bigfilethreshold they are diagnosed as binary
by Git. For this reason, the 'git diff' tests that check the output
for "Binary files differ" still pass.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-14 21:28:56 +01:00
|
|
|
printf "%2000000s" X >large1 &&
|
|
|
|
cp large1 large2 &&
|
|
|
|
cp large1 large3 &&
|
|
|
|
printf "%2500000s" Y >huge &&
|
2014-08-26 17:23:22 +02:00
|
|
|
GIT_ALLOC_LIMIT=1500k &&
|
2012-03-07 11:54:16 +01:00
|
|
|
export GIT_ALLOC_LIMIT
|
2011-05-08 10:47:35 +02:00
|
|
|
'
|
|
|
|
|
2021-11-11 06:18:55 +01:00
|
|
|
test_expect_success 'enter "large" codepath, with small core.bigFileThreshold' '
|
|
|
|
test_when_finished "rm -rf repo" &&
|
|
|
|
|
|
|
|
git init --bare repo &&
|
|
|
|
echo large | git -C repo hash-object -w --stdin &&
|
|
|
|
git -C repo -c core.bigfilethreshold=4 fsck
|
|
|
|
'
|
|
|
|
|
2016-11-16 02:42:40 +01:00
|
|
|
# add a large file with different settings
|
|
|
|
while read expect config
|
|
|
|
do
|
|
|
|
test_expect_success "add with $config" '
|
|
|
|
test_when_finished "rm -f .git/objects/pack/pack-*.* .git/index" &&
|
|
|
|
git $config add large1 &&
|
2020-11-07 02:12:57 +01:00
|
|
|
sz=$(test_file_size .git/objects/pack/pack-*.pack) &&
|
2016-11-16 02:42:40 +01:00
|
|
|
case "$expect" in
|
|
|
|
small) test "$sz" -le 100000 ;;
|
|
|
|
large) test "$sz" -ge 100000 ;;
|
|
|
|
esac
|
|
|
|
'
|
|
|
|
done <<\EOF
|
|
|
|
large -c core.compression=0
|
|
|
|
small -c core.compression=9
|
|
|
|
large -c core.compression=0 -c pack.compression=0
|
|
|
|
large -c core.compression=9 -c pack.compression=0
|
|
|
|
small -c core.compression=0 -c pack.compression=9
|
|
|
|
small -c core.compression=9 -c pack.compression=9
|
|
|
|
large -c pack.compression=0
|
|
|
|
small -c pack.compression=9
|
|
|
|
EOF
|
|
|
|
|
2011-10-28 23:48:40 +02:00
|
|
|
test_expect_success 'add a large file or two' '
|
|
|
|
git add large1 huge large2 &&
|
|
|
|
# make sure we got a single packfile and no loose objects
|
|
|
|
bad= count=0 idx= &&
|
|
|
|
for p in .git/objects/pack/pack-*.pack
|
|
|
|
do
|
|
|
|
count=$(( $count + 1 ))
|
2020-02-23 01:50:49 +01:00
|
|
|
if test_path_is_file "$p" &&
|
|
|
|
idx=${p%.pack}.idx && test_path_is_file "$idx"
|
2011-10-28 23:48:40 +02:00
|
|
|
then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
bad=t
|
|
|
|
done &&
|
|
|
|
test -z "$bad" &&
|
|
|
|
test $count = 1 &&
|
|
|
|
cnt=$(git show-index <"$idx" | wc -l) &&
|
|
|
|
test $cnt = 2 &&
|
2020-05-13 02:53:41 +02:00
|
|
|
for l in .git/objects/$OIDPATH_REGEX
|
2011-10-28 23:48:40 +02:00
|
|
|
do
|
2020-02-23 01:50:49 +01:00
|
|
|
test_path_is_file "$l" || continue
|
2011-10-28 23:48:40 +02:00
|
|
|
bad=t
|
|
|
|
done &&
|
|
|
|
test -z "$bad" &&
|
|
|
|
|
|
|
|
# attempt to add another copy of the same
|
|
|
|
git add large3 &&
|
|
|
|
bad= count=0 &&
|
|
|
|
for p in .git/objects/pack/pack-*.pack
|
|
|
|
do
|
|
|
|
count=$(( $count + 1 ))
|
2020-02-23 01:50:49 +01:00
|
|
|
if test_path_is_file "$p" &&
|
|
|
|
idx=${p%.pack}.idx && test_path_is_file "$idx"
|
2011-10-28 23:48:40 +02:00
|
|
|
then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
bad=t
|
|
|
|
done &&
|
|
|
|
test -z "$bad" &&
|
|
|
|
test $count = 1
|
2011-05-08 10:47:35 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'checkout a large file' '
|
2011-10-28 23:48:40 +02:00
|
|
|
large1=$(git rev-parse :large1) &&
|
|
|
|
git update-index --add --cacheinfo 100644 $large1 another &&
|
2011-05-08 10:47:35 +02:00
|
|
|
git checkout another &&
|
t1050-large: generate large files without dd
For some unknown reason, the dd on my Windows box segfaults randomly,
but since recently, it does so much more often than it used to, which
makes running the test suite burdensome.
Use printf to write large files instead of dd. To emphasize that three
of the large blobs are exact copies, use cp to allocate them.
The new code makes the files a bit smaller, and they are not sparse
anymore, but the tests do not depend on these properties. We do not want
to use test-genrandom here (which is used to generate large files
elsewhere in t1050), so that the files can be compressed well (which
keeps the run-time short).
The files are now large text files, not binary files. But since they
are larger than core.bigfilethreshold they are diagnosed as binary
by Git. For this reason, the 'git diff' tests that check the output
for "Binary files differ" still pass.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-14 21:28:56 +01:00
|
|
|
test_cmp large1 another
|
2011-10-28 23:48:40 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'packsize limit' '
|
|
|
|
test_create_repo mid &&
|
|
|
|
(
|
|
|
|
cd mid &&
|
|
|
|
git config core.bigfilethreshold 64k &&
|
|
|
|
git config pack.packsizelimit 256k &&
|
|
|
|
|
|
|
|
# mid1 and mid2 will fit within 256k limit but
|
|
|
|
# appending mid3 will bust the limit and will
|
|
|
|
# result in a separate packfile.
|
2018-03-24 08:44:42 +01:00
|
|
|
test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
|
|
|
|
test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
|
|
|
|
test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
|
2011-10-28 23:48:40 +02:00
|
|
|
git add mid1 mid2 mid3 &&
|
|
|
|
|
2018-07-02 02:23:56 +02:00
|
|
|
count=0 &&
|
2011-10-28 23:48:40 +02:00
|
|
|
for pi in .git/objects/pack/pack-*.idx
|
|
|
|
do
|
2020-02-23 01:50:49 +01:00
|
|
|
test_path_is_file "$pi" && count=$(( $count + 1 ))
|
2011-10-28 23:48:40 +02:00
|
|
|
done &&
|
|
|
|
test $count = 2 &&
|
|
|
|
|
|
|
|
(
|
2018-07-02 02:23:56 +02:00
|
|
|
git hash-object --stdin <mid1 &&
|
|
|
|
git hash-object --stdin <mid2 &&
|
2011-10-28 23:48:40 +02:00
|
|
|
git hash-object --stdin <mid3
|
|
|
|
) |
|
|
|
|
sort >expect &&
|
|
|
|
|
|
|
|
for pi in .git/objects/pack/pack-*.idx
|
|
|
|
do
|
|
|
|
git show-index <"$pi"
|
|
|
|
done |
|
|
|
|
sed -e "s/^[0-9]* \([0-9a-f]*\) .*/\1/" |
|
|
|
|
sort >actual &&
|
|
|
|
|
|
|
|
test_cmp expect actual
|
|
|
|
)
|
2011-05-08 10:47:35 +02:00
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:16 +01:00
|
|
|
test_expect_success 'diff --raw' '
|
|
|
|
git commit -q -m initial &&
|
|
|
|
echo modified >>large1 &&
|
|
|
|
git add large1 &&
|
|
|
|
git commit -q -m modified &&
|
|
|
|
git diff --raw HEAD^
|
|
|
|
'
|
|
|
|
|
2014-08-16 05:08:05 +02:00
|
|
|
test_expect_success 'diff --stat' '
|
|
|
|
git diff --stat HEAD^ HEAD
|
|
|
|
'
|
|
|
|
|
2014-08-16 05:08:06 +02:00
|
|
|
test_expect_success 'diff' '
|
|
|
|
git diff HEAD^ HEAD >actual &&
|
|
|
|
grep "Binary files.*differ" actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'diff --cached' '
|
|
|
|
git diff --cached HEAD^ >actual &&
|
|
|
|
grep "Binary files.*differ" actual
|
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:16 +01:00
|
|
|
test_expect_success 'hash-object' '
|
|
|
|
git hash-object large1
|
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:17 +01:00
|
|
|
test_expect_success 'cat-file a large file' '
|
2012-03-07 11:54:16 +01:00
|
|
|
git cat-file blob :large1 >/dev/null
|
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:17 +01:00
|
|
|
test_expect_success 'cat-file a large file from a tag' '
|
2012-03-07 11:54:16 +01:00
|
|
|
git tag -m largefile largefiletag :large1 &&
|
|
|
|
git cat-file blob largefiletag >/dev/null
|
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:19 +01:00
|
|
|
test_expect_success 'git-show a large file' '
|
2012-03-07 11:54:16 +01:00
|
|
|
git show :large1 >/dev/null
|
|
|
|
|
|
|
|
'
|
|
|
|
|
2012-05-23 16:09:47 +02:00
|
|
|
test_expect_success 'index-pack' '
|
2014-04-28 14:57:37 +02:00
|
|
|
git clone file://"$(pwd)"/.git foo &&
|
2020-06-19 19:55:53 +02:00
|
|
|
GIT_DIR=non-existent git index-pack --object-format=$(test_oid algo) \
|
|
|
|
--strict --verify foo/.git/objects/pack/*.pack
|
2012-05-23 16:09:47 +02:00
|
|
|
'
|
|
|
|
|
2012-03-07 11:54:21 +01:00
|
|
|
test_expect_success 'repack' '
|
2012-03-07 11:54:16 +01:00
|
|
|
git repack -ad
|
|
|
|
'
|
|
|
|
|
2012-05-26 12:28:01 +02:00
|
|
|
test_expect_success 'pack-objects with large loose object' '
|
2014-04-28 14:57:37 +02:00
|
|
|
SHA1=$(git hash-object huge) &&
|
2012-05-26 12:28:01 +02:00
|
|
|
test_create_repo loose &&
|
|
|
|
echo $SHA1 | git pack-objects --stdout |
|
|
|
|
GIT_ALLOC_LIMIT=0 GIT_DIR=loose/.git git unpack-objects &&
|
|
|
|
echo $SHA1 | GIT_DIR=loose/.git git pack-objects pack &&
|
|
|
|
test_create_repo packed &&
|
|
|
|
mv pack-* packed/.git/objects/pack &&
|
|
|
|
GIT_DIR=packed/.git git cat-file blob $SHA1 >actual &&
|
t1050-large: generate large files without dd
For some unknown reason, the dd on my Windows box segfaults randomly,
but since recently, it does so much more often than it used to, which
makes running the test suite burdensome.
Use printf to write large files instead of dd. To emphasize that three
of the large blobs are exact copies, use cp to allocate them.
The new code makes the files a bit smaller, and they are not sparse
anymore, but the tests do not depend on these properties. We do not want
to use test-genrandom here (which is used to generate large files
elsewhere in t1050), so that the files can be compressed well (which
keeps the run-time short).
The files are now large text files, not binary files. But since they
are larger than core.bigfilethreshold they are diagnosed as binary
by Git. For this reason, the 'git diff' tests that check the output
for "Binary files differ" still pass.
Signed-off-by: Johannes Sixt <j6t@kdbg.org>
Reviewed-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-14 21:28:56 +01:00
|
|
|
test_cmp huge actual
|
2012-05-26 12:28:01 +02:00
|
|
|
'
|
|
|
|
|
2019-11-05 18:07:25 +01:00
|
|
|
test_expect_success 'tar archiving' '
|
2012-05-03 03:51:04 +02:00
|
|
|
git archive --format=tar HEAD >/dev/null
|
|
|
|
'
|
|
|
|
|
2019-11-05 18:07:25 +01:00
|
|
|
test_expect_success 'zip archiving, store only' '
|
2012-05-03 03:51:07 +02:00
|
|
|
git archive --format=zip -0 HEAD >/dev/null
|
|
|
|
'
|
|
|
|
|
2019-11-05 18:07:25 +01:00
|
|
|
test_expect_success 'zip archiving, deflate' '
|
2012-05-03 03:51:08 +02:00
|
|
|
git archive --format=zip HEAD >/dev/null
|
|
|
|
'
|
|
|
|
|
fsck: use streaming interface for large blobs in pack
For blobs, we want to make sure the on-disk data is not corrupted
(i.e. can be inflated and produce the expected SHA-1). Blob content is
opaque, there's nothing else inside to check for.
For really large blobs, we may want to avoid unpacking the entire blob
in memory, just to check whether it produces the same SHA-1. On 32-bit
systems, we may not have enough virtual address space for such memory
allocation. And even on 64-bit where it's not a problem, allocating a
lot more memory could result in kicking other parts of systems to swap
file, generating lots of I/O and slowing everything down.
For this particular operation, not unpacking the blob and letting
check_sha1_signature, which supports streaming interface, do the job
is sufficient. check_sha1_signature() is not shown in the diff,
unfortunately. But if will be called when "data_valid && !data" is
false.
We will call the callback function "fn" with NULL as "data". The only
callback of this function is fsck_obj_buffer(), which does not touch
"data" at all if it's a blob.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-13 17:44:04 +02:00
|
|
|
test_expect_success 'fsck large blobs' '
|
|
|
|
git fsck 2>err &&
|
|
|
|
test_must_be_empty err
|
2014-08-16 05:08:03 +02:00
|
|
|
'
|
|
|
|
|
2011-05-08 10:47:35 +02:00
|
|
|
test_done
|