2005-06-02 22:50:17 +02:00
|
|
|
#!/bin/sh
|
|
|
|
#
|
|
|
|
# Copyright (C) 2005 Rene Scharfe
|
|
|
|
#
|
|
|
|
|
2013-11-10 16:47:29 +01:00
|
|
|
test_description='git archive and git get-tar-commit-id test
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2005-06-03 18:21:23 +02:00
|
|
|
This test covers the topics of file contents, commit date handling and
|
|
|
|
commit id embedding:
|
2005-06-02 22:50:17 +02:00
|
|
|
|
|
|
|
The contents of the repository is compared to the extracted tar
|
|
|
|
archive. The repository contains simple text files, symlinks and a
|
2007-02-04 05:49:16 +01:00
|
|
|
binary file (/bin/sh). Only paths shorter than 99 characters are
|
2005-06-03 18:21:23 +02:00
|
|
|
used.
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2013-11-10 16:47:29 +01:00
|
|
|
git archive applies the commit date to every file in the archive it
|
2005-06-02 22:50:17 +02:00
|
|
|
creates. The test sets the commit date to a specific value and checks
|
|
|
|
if the tar archive contains that value.
|
|
|
|
|
2013-11-10 16:47:29 +01:00
|
|
|
When giving git archive a commit id (in contrast to a tree id) it
|
2005-06-02 22:50:17 +02:00
|
|
|
embeds this commit id into the tar archive as a comment. The test
|
2007-07-03 07:52:14 +02:00
|
|
|
checks the ability of git get-tar-commit-id to figure it out from the
|
2005-06-02 22:50:17 +02:00
|
|
|
tar file.
|
|
|
|
|
|
|
|
'
|
|
|
|
|
2022-06-03 13:15:06 +02:00
|
|
|
TEST_CREATE_REPO_NO_TEMPLATE=1
|
2005-06-02 22:50:17 +02:00
|
|
|
. ./test-lib.sh
|
|
|
|
|
2007-09-06 18:51:11 +02:00
|
|
|
SUBSTFORMAT=%H%n
|
2007-09-03 20:07:01 +02:00
|
|
|
|
2013-05-20 11:58:29 +02:00
|
|
|
test_lazy_prereq TAR_NEEDS_PAX_FALLBACK '
|
|
|
|
(
|
|
|
|
mkdir pax &&
|
|
|
|
cd pax &&
|
|
|
|
"$TAR" xf "$TEST_DIRECTORY"/t5000/pax.tar &&
|
|
|
|
test -f PaxHeaders.1791/file
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
t5000: simplify gzip prerequisite checks
In t5000, we test the built-in ".tar.gz" config for
git-archive. To make our tests portable, we check that we
have a way to both gzip and gunzip, and we respected
environment variables to point to alternate commands for
doing these operations.
However, the $GZIP variable did not actually do anything, as
changing it would not affect the baked-in value in
archive-tar.c. Moreover, setting the variable $GZIP
influences gzip itself. From the gzip man page:
The environment variable GZIP can hold a set of default
options for gzip. These options are interpreted first and
can be overwritten by explicit command line parameters.
We could rename this variable, and use it to set up custom
config (or even have a Makefile knob to affect the built
binary), but it is not worth the trouble; nobody has ever
reported a problem with the baked-in default, and they can
always change it via config if they need to. Let's just drop
the variable and use "gzip" in the test (keeping the
prerequisite, of course).
While we're at it, we can drop the GUNZIP variable and
prerequisite; it uses "gzip -d", so if we have GZIP, we
will have both.
We can also use test_lazy_prereq for the gzip prerequisite,
which is simpler and behaves more consistently with the rest
of git (e.g., by making output available when the test is
run with "-v").
Noticed-by: Christian Hesse <mail@eworm.de>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-03 14:21:40 +01:00
|
|
|
test_lazy_prereq GZIP 'gzip --version'
|
|
|
|
|
2013-05-20 11:58:29 +02:00
|
|
|
get_pax_header() {
|
|
|
|
file=$1
|
|
|
|
header=$2=
|
|
|
|
|
|
|
|
while read len rest
|
|
|
|
do
|
|
|
|
if test "$len" = $(echo "$len $rest" | wc -c)
|
|
|
|
then
|
|
|
|
case "$rest" in
|
|
|
|
$header*)
|
|
|
|
echo "${rest#$header}"
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
fi
|
|
|
|
done <"$file"
|
|
|
|
}
|
|
|
|
|
2013-05-20 11:58:26 +02:00
|
|
|
check_tar() {
|
|
|
|
tarfile=$1.tar
|
|
|
|
listfile=$1.lst
|
|
|
|
dir=$1
|
|
|
|
dir_with_prefix=$dir/$2
|
|
|
|
|
|
|
|
test_expect_success ' extract tar archive' '
|
|
|
|
(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
|
|
|
|
'
|
|
|
|
|
2013-05-20 11:58:29 +02:00
|
|
|
test_expect_success TAR_NEEDS_PAX_FALLBACK ' interpret pax headers' '
|
|
|
|
(
|
|
|
|
cd $dir &&
|
|
|
|
for header in *.paxheader
|
|
|
|
do
|
|
|
|
data=${header%.paxheader}.data &&
|
2014-06-06 16:55:58 +02:00
|
|
|
if test -h $data || test -e $data
|
2013-05-20 11:58:29 +02:00
|
|
|
then
|
|
|
|
path=$(get_pax_header $header path) &&
|
|
|
|
if test -n "$path"
|
|
|
|
then
|
2021-12-09 06:11:14 +01:00
|
|
|
mv "$data" "$path" || exit 1
|
2013-05-20 11:58:29 +02:00
|
|
|
fi
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
)
|
|
|
|
'
|
|
|
|
|
2013-05-20 11:58:26 +02:00
|
|
|
test_expect_success ' validate filenames' '
|
|
|
|
(cd ${dir_with_prefix}a && find .) | sort >$listfile &&
|
|
|
|
test_cmp a.lst $listfile
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success ' validate file contents' '
|
|
|
|
diff -r a ${dir_with_prefix}a
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
2020-09-19 23:23:42 +02:00
|
|
|
check_added() {
|
|
|
|
dir=$1
|
|
|
|
path_in_fs=$2
|
|
|
|
path_in_archive=$3
|
|
|
|
|
|
|
|
test_expect_success " validate extra file $path_in_archive" '
|
|
|
|
diff -r $path_in_fs $dir/$path_in_archive
|
|
|
|
'
|
|
|
|
}
|
|
|
|
|
2019-06-29 00:59:22 +02:00
|
|
|
test_expect_success 'setup' '
|
|
|
|
test_oid_cache <<-EOF
|
|
|
|
obj sha1:19f9c8273ec45a8938e6999cb59b3ff66739902a
|
|
|
|
obj sha256:3c666f798798601571f5cec0adb57ce4aba8546875e7693177e0535f34d2c49b
|
|
|
|
EOF
|
|
|
|
'
|
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'populate workdir' '
|
|
|
|
mkdir a &&
|
|
|
|
echo simple textfile >a/a &&
|
|
|
|
ten=0123456789 &&
|
|
|
|
hundred="$ten$ten$ten$ten$ten$ten$ten$ten$ten$ten" &&
|
|
|
|
echo long filename >"a/four$hundred" &&
|
|
|
|
mkdir a/bin &&
|
|
|
|
test-tool genrandom "frotz" 500000 >a/bin/sh &&
|
|
|
|
printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
|
|
|
|
printf "A not substituted O" >a/substfile2 &&
|
|
|
|
if test_have_prereq SYMLINKS
|
|
|
|
then
|
|
|
|
ln -s a a/l1
|
|
|
|
else
|
|
|
|
printf %s a >a/l1
|
|
|
|
fi &&
|
|
|
|
(
|
|
|
|
p=long_path_to_a_file &&
|
|
|
|
cd a &&
|
|
|
|
for depth in 1 2 3 4 5
|
|
|
|
do
|
|
|
|
mkdir $p &&
|
2021-12-09 06:11:14 +01:00
|
|
|
cd $p || exit 1
|
2021-05-31 18:56:24 +02:00
|
|
|
done &&
|
|
|
|
echo text >file_with_long_path
|
|
|
|
) &&
|
|
|
|
(cd a && find .) | sort >a.lst
|
|
|
|
'
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2008-06-08 18:42:33 +02:00
|
|
|
test_expect_success \
|
|
|
|
'add ignored file' \
|
|
|
|
'echo ignore me >a/ignored &&
|
2022-06-03 13:15:06 +02:00
|
|
|
mkdir .git/info &&
|
2009-04-18 00:17:49 +02:00
|
|
|
echo ignored export-ignore >.git/info/attributes'
|
2008-06-08 18:42:33 +02:00
|
|
|
|
2014-07-05 21:35:01 +02:00
|
|
|
test_expect_success 'add files to repository' '
|
|
|
|
git add a &&
|
|
|
|
GIT_COMMITTER_DATE="2005-05-27 22:00" git commit -m initial
|
|
|
|
'
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2013-05-20 11:58:24 +02:00
|
|
|
test_expect_success 'setup export-subst' '
|
|
|
|
echo "substfile?" export-subst >>.git/info/attributes &&
|
|
|
|
git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \
|
|
|
|
>a/substfile1
|
|
|
|
'
|
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'create bare clone' '
|
2022-06-03 13:15:06 +02:00
|
|
|
git clone --template= --bare . bare.git &&
|
|
|
|
mkdir bare.git/info &&
|
2021-05-31 18:56:24 +02:00
|
|
|
cp .git/info/attributes bare.git/info/attributes
|
|
|
|
'
|
2008-10-25 17:38:14 +02:00
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'remove ignored file' '
|
|
|
|
rm a/ignored
|
|
|
|
'
|
2008-06-08 18:42:33 +02:00
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'git archive' '
|
|
|
|
git archive HEAD >b.tar
|
|
|
|
'
|
2007-04-09 17:12:53 +02:00
|
|
|
|
2013-05-20 11:58:26 +02:00
|
|
|
check_tar b
|
|
|
|
|
2013-05-20 11:58:27 +02:00
|
|
|
test_expect_success 'git archive --prefix=prefix/' '
|
|
|
|
git archive --prefix=prefix/ HEAD >with_prefix.tar
|
|
|
|
'
|
|
|
|
|
|
|
|
check_tar with_prefix prefix/
|
|
|
|
|
|
|
|
test_expect_success 'git-archive --prefix=olde-' '
|
|
|
|
git archive --prefix=olde- HEAD >with_olde-prefix.tar
|
|
|
|
'
|
|
|
|
|
|
|
|
check_tar with_olde-prefix olde-
|
|
|
|
|
2020-09-19 23:23:42 +02:00
|
|
|
test_expect_success 'git archive --add-file' '
|
|
|
|
echo untracked >untracked &&
|
|
|
|
git archive --add-file=untracked HEAD >with_untracked.tar
|
|
|
|
'
|
|
|
|
|
|
|
|
check_tar with_untracked
|
|
|
|
check_added with_untracked untracked untracked
|
|
|
|
|
|
|
|
test_expect_success 'git archive --add-file twice' '
|
|
|
|
echo untracked >untracked &&
|
|
|
|
git archive --prefix=one/ --add-file=untracked \
|
|
|
|
--prefix=two/ --add-file=untracked \
|
|
|
|
--prefix= HEAD >with_untracked2.tar
|
|
|
|
'
|
|
|
|
|
|
|
|
check_tar with_untracked2
|
|
|
|
check_added with_untracked2 untracked one/untracked
|
|
|
|
check_added with_untracked2 untracked two/untracked
|
|
|
|
|
2012-05-03 03:51:04 +02:00
|
|
|
test_expect_success 'git archive on large files' '
|
2021-05-31 18:56:24 +02:00
|
|
|
test_config core.bigfilethreshold 1 &&
|
|
|
|
git archive HEAD >b3.tar &&
|
|
|
|
test_cmp_bin b.tar b3.tar
|
2012-05-03 03:51:04 +02:00
|
|
|
'
|
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'git archive in a bare repo' '
|
|
|
|
git --git-dir bare.git archive HEAD >b3.tar
|
|
|
|
'
|
2008-10-25 17:38:14 +02:00
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'git archive vs. the same in a bare repo' '
|
|
|
|
test_cmp_bin b.tar b3.tar
|
|
|
|
'
|
2008-10-25 17:38:14 +02:00
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'git archive with --output' '
|
|
|
|
git archive --output=b4.tar HEAD &&
|
|
|
|
test_cmp_bin b.tar b4.tar
|
|
|
|
'
|
2009-02-16 18:20:25 +01:00
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'git archive --remote' '
|
|
|
|
git archive --remote=. HEAD >b5.tar &&
|
|
|
|
test_cmp_bin b.tar b5.tar
|
|
|
|
'
|
2009-06-27 20:47:43 +02:00
|
|
|
|
2016-11-22 22:37:04 +01:00
|
|
|
test_expect_success 'git archive --remote with configured remote' '
|
|
|
|
git config remote.foo.url . &&
|
|
|
|
(
|
|
|
|
cd a &&
|
|
|
|
git archive --remote=foo --output=../b5-nick.tar HEAD
|
|
|
|
) &&
|
|
|
|
test_cmp_bin b.tar b5-nick.tar
|
|
|
|
'
|
|
|
|
|
2021-05-31 18:56:24 +02:00
|
|
|
test_expect_success 'validate file modification time' '
|
|
|
|
mkdir extract &&
|
|
|
|
"$TAR" xf b.tar -C extract a/a &&
|
|
|
|
test-tool chmtime --get extract/a/a >b.mtime &&
|
|
|
|
echo "1117231200" >expected.mtime &&
|
|
|
|
test_cmp expected.mtime b.mtime
|
|
|
|
'
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2021-05-31 18:56:25 +02:00
|
|
|
test_expect_success 'git get-tar-commit-id' '
|
|
|
|
git get-tar-commit-id <b.tar >actual &&
|
|
|
|
git rev-parse HEAD >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2005-06-02 22:50:17 +02:00
|
|
|
|
2010-02-08 00:30:20 +01:00
|
|
|
test_expect_success 'git archive with --output, override inferred format' '
|
|
|
|
git archive --format=tar --output=d4.zip HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar d4.zip
|
2010-02-08 00:30:20 +01:00
|
|
|
'
|
|
|
|
|
2018-10-25 22:32:14 +02:00
|
|
|
test_expect_success GZIP 'git archive with --output and --remote creates .tgz' '
|
|
|
|
git archive --output=d5.tgz --remote=. HEAD &&
|
|
|
|
gzip -d -c <d5.tgz >d5.tar &&
|
|
|
|
test_cmp_bin b.tar d5.tar
|
|
|
|
'
|
|
|
|
|
2016-11-22 22:37:04 +01:00
|
|
|
test_expect_success 'git archive --list outside of a git repo' '
|
|
|
|
nongit git archive --list
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git archive --remote outside of a git repo' '
|
|
|
|
git archive HEAD >expect.tar &&
|
|
|
|
nongit git archive --remote="$PWD" HEAD >actual.tar &&
|
|
|
|
test_cmp_bin expect.tar actual.tar
|
|
|
|
'
|
2007-04-05 22:55:43 +02:00
|
|
|
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 00:04:22 +01:00
|
|
|
test_expect_success 'clients cannot access unreachable commits' '
|
|
|
|
test_commit unreachable &&
|
2014-04-30 18:23:07 +02:00
|
|
|
sha1=$(git rev-parse HEAD) &&
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 00:04:22 +01:00
|
|
|
git reset --hard HEAD^ &&
|
|
|
|
git archive $sha1 >remote.tar &&
|
|
|
|
test_must_fail git archive --remote=. $sha1 >remote.tar
|
|
|
|
'
|
|
|
|
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-28 11:04:19 +01:00
|
|
|
test_expect_success 'upload-archive can allow unreachable commits' '
|
|
|
|
test_commit unreachable1 &&
|
2014-04-30 18:23:07 +02:00
|
|
|
sha1=$(git rev-parse HEAD) &&
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-28 11:04:19 +01:00
|
|
|
git reset --hard HEAD^ &&
|
|
|
|
git archive $sha1 >remote.tar &&
|
|
|
|
test_config uploadarchive.allowUnreachable true &&
|
|
|
|
git archive --remote=. $sha1 >remote.tar
|
|
|
|
'
|
|
|
|
|
2011-06-22 03:26:31 +02:00
|
|
|
test_expect_success 'setup tar filters' '
|
|
|
|
git config tar.tar.foo.command "tr ab ba" &&
|
2011-06-22 05:17:35 +02:00
|
|
|
git config tar.bar.command "tr ab ba" &&
|
2013-01-23 07:23:27 +01:00
|
|
|
git config tar.bar.remote true &&
|
|
|
|
git config tar.invalid baz
|
2011-06-22 03:26:31 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'archive --list mentions user filter' '
|
|
|
|
git archive --list >output &&
|
|
|
|
grep "^tar\.foo\$" output &&
|
|
|
|
grep "^bar\$" output
|
|
|
|
'
|
|
|
|
|
2011-11-19 08:40:04 +01:00
|
|
|
test_expect_success 'archive --list shows only enabled remote filters' '
|
2011-06-22 03:26:31 +02:00
|
|
|
git archive --list --remote=. >output &&
|
2011-06-22 05:17:35 +02:00
|
|
|
! grep "^tar\.foo\$" output &&
|
2011-06-22 03:26:31 +02:00
|
|
|
grep "^bar\$" output
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'invoke tar filter by format' '
|
|
|
|
git archive --format=tar.foo HEAD >config.tar.foo &&
|
|
|
|
tr ab ba <config.tar.foo >config.tar &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar config.tar &&
|
2011-06-22 03:26:31 +02:00
|
|
|
git archive --format=bar HEAD >config.bar &&
|
|
|
|
tr ab ba <config.bar >config.tar &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar config.tar
|
2011-06-22 03:26:31 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'invoke tar filter by extension' '
|
|
|
|
git archive -o config-implicit.tar.foo HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin config.tar.foo config-implicit.tar.foo &&
|
2011-06-22 03:26:31 +02:00
|
|
|
git archive -o config-implicit.bar HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin config.tar.foo config-implicit.bar
|
2011-06-22 03:26:31 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'default output format remains tar' '
|
|
|
|
git archive -o config-implicit.baz HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar config-implicit.baz
|
2011-06-22 03:26:31 +02:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'extension matching requires dot' '
|
|
|
|
git archive -o config-implicittar.foo HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar config-implicittar.foo
|
2011-06-22 03:26:31 +02:00
|
|
|
'
|
|
|
|
|
2011-11-19 08:40:04 +01:00
|
|
|
test_expect_success 'only enabled filters are available remotely' '
|
2011-06-22 05:17:35 +02:00
|
|
|
test_must_fail git archive --remote=. --format=tar.foo HEAD \
|
|
|
|
>remote.tar.foo &&
|
|
|
|
git archive --remote=. --format=bar >remote.bar HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin remote.bar config.bar
|
2011-06-22 05:17:35 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'git archive --format=tgz' '
|
2011-06-22 03:27:35 +02:00
|
|
|
git archive --format=tgz HEAD >j.tgz
|
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'git archive --format=tar.gz' '
|
2011-06-22 03:27:35 +02:00
|
|
|
git archive --format=tar.gz HEAD >j1.tar.gz &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin j.tgz j1.tar.gz
|
2011-06-22 03:27:35 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'infer tgz from .tgz filename' '
|
2011-06-22 03:27:35 +02:00
|
|
|
git archive --output=j2.tgz HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin j.tgz j2.tgz
|
2011-06-22 03:27:35 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'infer tgz from .tar.gz filename' '
|
2011-06-22 03:27:35 +02:00
|
|
|
git archive --output=j3.tar.gz HEAD &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin j.tgz j3.tar.gz
|
2011-06-22 03:27:35 +02:00
|
|
|
'
|
|
|
|
|
t5000: simplify gzip prerequisite checks
In t5000, we test the built-in ".tar.gz" config for
git-archive. To make our tests portable, we check that we
have a way to both gzip and gunzip, and we respected
environment variables to point to alternate commands for
doing these operations.
However, the $GZIP variable did not actually do anything, as
changing it would not affect the baked-in value in
archive-tar.c. Moreover, setting the variable $GZIP
influences gzip itself. From the gzip man page:
The environment variable GZIP can hold a set of default
options for gzip. These options are interpreted first and
can be overwritten by explicit command line parameters.
We could rename this variable, and use it to set up custom
config (or even have a Makefile knob to affect the built
binary), but it is not worth the trouble; nobody has ever
reported a problem with the baked-in default, and they can
always change it via config if they need to. Let's just drop
the variable and use "gzip" in the test (keeping the
prerequisite, of course).
While we're at it, we can drop the GUNZIP variable and
prerequisite; it uses "gzip -d", so if we have GZIP, we
will have both.
We can also use test_lazy_prereq for the gzip prerequisite,
which is simpler and behaves more consistently with the rest
of git (e.g., by making output available when the test is
run with "-v").
Noticed-by: Christian Hesse <mail@eworm.de>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-03 14:21:40 +01:00
|
|
|
test_expect_success GZIP 'extract tgz file' '
|
|
|
|
gzip -d -c <j.tgz >j.tar &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin b.tar j.tar
|
2011-06-22 03:27:35 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'remote tar.gz is allowed by default' '
|
2011-06-22 05:17:35 +02:00
|
|
|
git archive --remote=. --format=tar.gz HEAD >remote.tar.gz &&
|
2014-06-04 17:57:52 +02:00
|
|
|
test_cmp_bin j.tgz remote.tar.gz
|
2011-06-22 05:17:35 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success 'remote tar.gz can be disabled' '
|
2011-06-22 05:17:35 +02:00
|
|
|
git config tar.tar.gz.remote false &&
|
|
|
|
test_must_fail git archive --remote=. --format=tar.gz HEAD \
|
|
|
|
>remote.tar.gz
|
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success GZIP 'git archive --format=tgz (external gzip)' '
|
|
|
|
test_config tar.tgz.command "gzip -cn" &&
|
|
|
|
git archive --format=tgz HEAD >external_gzip.tgz
|
archive-tar: add internal gzip implementation
Git uses zlib for its own object store, but calls gzip when creating tgz
archives. Add an option to perform the gzip compression for the latter
using zlib, without depending on the external gzip binary.
Plug it in by making write_block a function pointer and switching to a
compressing variant if the filter command has the magic value "git
archive gzip". Does that indirection slow down tar creation? Not
really, at least not in this test:
$ hyperfine -w3 -L rev HEAD,origin/main -p 'git checkout {rev} && make' \
'./git -C ../linux archive --format=tar HEAD # {rev}'
Benchmark #1: ./git -C ../linux archive --format=tar HEAD # HEAD
Time (mean ± σ): 4.044 s ± 0.007 s [User: 3.901 s, System: 0.137 s]
Range (min … max): 4.038 s … 4.059 s 10 runs
Benchmark #2: ./git -C ../linux archive --format=tar HEAD # origin/main
Time (mean ± σ): 4.047 s ± 0.009 s [User: 3.903 s, System: 0.138 s]
Range (min … max): 4.038 s … 4.066 s 10 runs
How does tgz creation perform?
$ hyperfine -w3 -L command 'gzip -cn','git archive gzip' \
'./git -c tar.tgz.command="{command}" -C ../linux archive --format=tgz HEAD'
Benchmark #1: ./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 20.404 s ± 0.006 s [User: 23.943 s, System: 0.401 s]
Range (min … max): 20.395 s … 20.414 s 10 runs
Benchmark #2: ./git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 23.807 s ± 0.023 s [User: 23.655 s, System: 0.145 s]
Range (min … max): 23.782 s … 23.857 s 10 runs
Summary
'./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD' ran
1.17 ± 0.00 times faster than './git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD'
So the internal implementation takes 17% longer on the Linux repo, but
uses 2% less CPU time. That's because the external gzip can run in
parallel on its own processor, while the internal one works sequentially
and avoids the inter-process communication overhead.
What are the benefits? Only an internal sequential implementation can
offer this eco mode, and it allows avoiding the gzip(1) requirement.
This implementation uses the helper functions from our zlib.c instead of
the convenient gz* functions from zlib, because the latter doesn't give
the control over the generated gzip header that the next patch requires.
Original-patch-by: Rohit Ashiwal <rohit.ashiwal265@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-15 19:02:33 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success GZIP 'git archive --format=tar.gz (external gzip)' '
|
|
|
|
test_config tar.tar.gz.command "gzip -cn" &&
|
|
|
|
git archive --format=tar.gz HEAD >external_gzip.tar.gz &&
|
|
|
|
test_cmp_bin external_gzip.tgz external_gzip.tar.gz
|
archive-tar: add internal gzip implementation
Git uses zlib for its own object store, but calls gzip when creating tgz
archives. Add an option to perform the gzip compression for the latter
using zlib, without depending on the external gzip binary.
Plug it in by making write_block a function pointer and switching to a
compressing variant if the filter command has the magic value "git
archive gzip". Does that indirection slow down tar creation? Not
really, at least not in this test:
$ hyperfine -w3 -L rev HEAD,origin/main -p 'git checkout {rev} && make' \
'./git -C ../linux archive --format=tar HEAD # {rev}'
Benchmark #1: ./git -C ../linux archive --format=tar HEAD # HEAD
Time (mean ± σ): 4.044 s ± 0.007 s [User: 3.901 s, System: 0.137 s]
Range (min … max): 4.038 s … 4.059 s 10 runs
Benchmark #2: ./git -C ../linux archive --format=tar HEAD # origin/main
Time (mean ± σ): 4.047 s ± 0.009 s [User: 3.903 s, System: 0.138 s]
Range (min … max): 4.038 s … 4.066 s 10 runs
How does tgz creation perform?
$ hyperfine -w3 -L command 'gzip -cn','git archive gzip' \
'./git -c tar.tgz.command="{command}" -C ../linux archive --format=tgz HEAD'
Benchmark #1: ./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 20.404 s ± 0.006 s [User: 23.943 s, System: 0.401 s]
Range (min … max): 20.395 s … 20.414 s 10 runs
Benchmark #2: ./git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 23.807 s ± 0.023 s [User: 23.655 s, System: 0.145 s]
Range (min … max): 23.782 s … 23.857 s 10 runs
Summary
'./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD' ran
1.17 ± 0.00 times faster than './git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD'
So the internal implementation takes 17% longer on the Linux repo, but
uses 2% less CPU time. That's because the external gzip can run in
parallel on its own processor, while the internal one works sequentially
and avoids the inter-process communication overhead.
What are the benefits? Only an internal sequential implementation can
offer this eco mode, and it allows avoiding the gzip(1) requirement.
This implementation uses the helper functions from our zlib.c instead of
the convenient gz* functions from zlib, because the latter doesn't give
the control over the generated gzip header that the next patch requires.
Original-patch-by: Rohit Ashiwal <rohit.ashiwal265@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-15 19:02:33 +02:00
|
|
|
'
|
|
|
|
|
2022-06-15 19:05:03 +02:00
|
|
|
test_expect_success GZIP 'extract tgz file (external gzip)' '
|
|
|
|
gzip -d -c <external_gzip.tgz >external_gzip.tar &&
|
|
|
|
test_cmp_bin b.tar external_gzip.tar
|
archive-tar: add internal gzip implementation
Git uses zlib for its own object store, but calls gzip when creating tgz
archives. Add an option to perform the gzip compression for the latter
using zlib, without depending on the external gzip binary.
Plug it in by making write_block a function pointer and switching to a
compressing variant if the filter command has the magic value "git
archive gzip". Does that indirection slow down tar creation? Not
really, at least not in this test:
$ hyperfine -w3 -L rev HEAD,origin/main -p 'git checkout {rev} && make' \
'./git -C ../linux archive --format=tar HEAD # {rev}'
Benchmark #1: ./git -C ../linux archive --format=tar HEAD # HEAD
Time (mean ± σ): 4.044 s ± 0.007 s [User: 3.901 s, System: 0.137 s]
Range (min … max): 4.038 s … 4.059 s 10 runs
Benchmark #2: ./git -C ../linux archive --format=tar HEAD # origin/main
Time (mean ± σ): 4.047 s ± 0.009 s [User: 3.903 s, System: 0.138 s]
Range (min … max): 4.038 s … 4.066 s 10 runs
How does tgz creation perform?
$ hyperfine -w3 -L command 'gzip -cn','git archive gzip' \
'./git -c tar.tgz.command="{command}" -C ../linux archive --format=tgz HEAD'
Benchmark #1: ./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 20.404 s ± 0.006 s [User: 23.943 s, System: 0.401 s]
Range (min … max): 20.395 s … 20.414 s 10 runs
Benchmark #2: ./git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD
Time (mean ± σ): 23.807 s ± 0.023 s [User: 23.655 s, System: 0.145 s]
Range (min … max): 23.782 s … 23.857 s 10 runs
Summary
'./git -c tar.tgz.command="gzip -cn" -C ../linux archive --format=tgz HEAD' ran
1.17 ± 0.00 times faster than './git -c tar.tgz.command="git archive gzip" -C ../linux archive --format=tgz HEAD'
So the internal implementation takes 17% longer on the Linux repo, but
uses 2% less CPU time. That's because the external gzip can run in
parallel on its own processor, while the internal one works sequentially
and avoids the inter-process communication overhead.
What are the benefits? Only an internal sequential implementation can
offer this eco mode, and it allows avoiding the gzip(1) requirement.
This implementation uses the helper functions from our zlib.c instead of
the convenient gz* functions from zlib, because the latter doesn't give
the control over the generated gzip header that the next patch requires.
Original-patch-by: Rohit Ashiwal <rohit.ashiwal265@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-06-15 19:02:33 +02:00
|
|
|
'
|
|
|
|
|
2014-09-21 05:55:06 +02:00
|
|
|
test_expect_success 'archive and :(glob)' '
|
|
|
|
git archive -v HEAD -- ":(glob)**/sh" >/dev/null 2>actual &&
|
|
|
|
cat >expect <<EOF &&
|
|
|
|
a/
|
|
|
|
a/bin/
|
|
|
|
a/bin/sh
|
|
|
|
EOF
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'catch non-matching pathspec' '
|
|
|
|
test_must_fail git archive -v HEAD -- "*.abc" >/dev/null
|
|
|
|
'
|
|
|
|
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
# Pull the size and date of each entry in a tarfile using the system tar.
|
|
|
|
#
|
|
|
|
# We'll pull out only the year from the date; that avoids any question of
|
|
|
|
# timezones impacting the result (as long as we keep our test times away from a
|
|
|
|
# year boundary; our reference times are all in August).
|
|
|
|
#
|
|
|
|
# The output of tar_info is expected to be "<size> <year>", both in decimal. It
|
|
|
|
# ignores the return value of tar. We have to do this, because some of our test
|
|
|
|
# input is only partial (the real data is 64GB in some cases).
|
|
|
|
tar_info () {
|
|
|
|
"$TAR" tvf "$1" |
|
|
|
|
awk '{
|
|
|
|
split($4, date, "-")
|
|
|
|
print $3 " " date[1]
|
|
|
|
}'
|
|
|
|
}
|
|
|
|
|
|
|
|
# See if our system tar can handle a tar file with huge sizes and dates far in
|
|
|
|
# the future, and that we can actually parse its output.
|
|
|
|
#
|
|
|
|
# The reference file was generated by GNU tar, and the magic time and size are
|
|
|
|
# both octal 01000000000001, which overflows normal ustar fields.
|
|
|
|
test_lazy_prereq TAR_HUGE '
|
|
|
|
echo "68719476737 4147" >expect &&
|
|
|
|
tar_info "$TEST_DIRECTORY"/t5000/huge-and-future.tar >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2016-07-14 22:04:43 +02:00
|
|
|
test_expect_success LONG_IS_64BIT 'set up repository with huge blob' '
|
2019-06-29 00:59:22 +02:00
|
|
|
obj=$(test_oid obj) &&
|
|
|
|
path=$(test_oid_to_path $obj) &&
|
|
|
|
mkdir -p .git/objects/$(dirname $path) &&
|
|
|
|
cp "$TEST_DIRECTORY"/t5000/huge-object .git/objects/$path &&
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
rm -f .git/index &&
|
|
|
|
git update-index --add --cacheinfo 100644,$obj,huge &&
|
|
|
|
git commit -m huge
|
|
|
|
'
|
|
|
|
|
|
|
|
# We expect git to die with SIGPIPE here (otherwise we
|
|
|
|
# would generate the whole 64GB).
|
2016-07-14 22:04:43 +02:00
|
|
|
test_expect_success LONG_IS_64BIT 'generate tar with huge size' '
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
{
|
|
|
|
git archive HEAD
|
|
|
|
echo $? >exit-code
|
|
|
|
} | test_copy_bytes 4096 >huge.tar &&
|
|
|
|
echo 141 >expect &&
|
|
|
|
test_cmp expect exit-code
|
|
|
|
'
|
|
|
|
|
2016-07-14 22:04:43 +02:00
|
|
|
test_expect_success TAR_HUGE,LONG_IS_64BIT 'system tar can read our huge size' '
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
echo 68719476737 >expect &&
|
|
|
|
tar_info huge.tar | cut -d" " -f1 >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
commit-graph: consolidate fill_commit_graph_info
Both fill_commit_graph_info() and fill_commit_in_graph() parse
information present in commit data chunk. Let's simplify the
implementation by calling fill_commit_graph_info() within
fill_commit_in_graph().
fill_commit_graph_info() used to not load committer data from commit data
chunk. However, with the upcoming switch to using corrected committer
date as generation number v2, we will have to load committer date to
compute generation number value anyway.
e51217e15 (t5000: test tar files that overflow ustar headers,
30-06-2016) introduced a test 'generate tar with future mtime' that
creates a commit with committer date of (2^36 + 1) seconds since
EPOCH. The CDAT chunk provides 34-bits for storing committer date, thus
committer time overflows into generation number (within CDAT chunk) and
has undefined behavior.
The test used to pass as fill_commit_graph_info() would not set struct
member `date` of struct commit and load committer date from the object
database, generating a tar file with the expected mtime.
However, with corrected commit date, we will load the committer date
from CDAT chunk (truncated to lower 34-bits to populate the generation
number. Thus, Git sets date and generates tar file with the truncated
mtime.
The ustar format (the header format used by most modern tar programs)
only has room for 11 (or 12, depending on some implementations) octal
digits for the size and mtime of each file.
As the CDAT chunk is overflow by 12-octal digits but not 11-octal
digits, we split the existing tests to test both implementations
separately and add a new explicit test for 11-digit implementation.
To test the 11-octal digit implementation, we create a future commit
with committer date of 2^34 - 1, which overflows 11-octal digits without
overflowing 34-bits of the Commit Date chunks.
To test the 12-octal digit implementation, the smallest committer date
possible is 2^36 + 1, which overflows the CDAT chunk and thus
commit-graph must be disabled for the test.
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:10 +01:00
|
|
|
test_expect_success TIME_IS_64BIT 'set up repository with far-future (2^34 - 1) commit' '
|
|
|
|
rm -f .git/index &&
|
|
|
|
echo foo >file &&
|
|
|
|
git add file &&
|
|
|
|
GIT_COMMITTER_DATE="@17179869183 +0000" \
|
|
|
|
git commit -m "tempori parendum"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success TIME_IS_64BIT 'generate tar with far-future mtime' '
|
|
|
|
git archive HEAD >future.tar
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success TAR_HUGE,TIME_IS_64BIT,TIME_T_IS_64BIT 'system tar can read our future mtime' '
|
|
|
|
echo 2514 >expect &&
|
|
|
|
tar_info future.tar | cut -d" " -f2 >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success TIME_IS_64BIT 'set up repository with far-far-future (2^36 + 1) commit' '
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
rm -f .git/index &&
|
|
|
|
echo content >file &&
|
|
|
|
git add file &&
|
commit-graph: consolidate fill_commit_graph_info
Both fill_commit_graph_info() and fill_commit_in_graph() parse
information present in commit data chunk. Let's simplify the
implementation by calling fill_commit_graph_info() within
fill_commit_in_graph().
fill_commit_graph_info() used to not load committer data from commit data
chunk. However, with the upcoming switch to using corrected committer
date as generation number v2, we will have to load committer date to
compute generation number value anyway.
e51217e15 (t5000: test tar files that overflow ustar headers,
30-06-2016) introduced a test 'generate tar with future mtime' that
creates a commit with committer date of (2^36 + 1) seconds since
EPOCH. The CDAT chunk provides 34-bits for storing committer date, thus
committer time overflows into generation number (within CDAT chunk) and
has undefined behavior.
The test used to pass as fill_commit_graph_info() would not set struct
member `date` of struct commit and load committer date from the object
database, generating a tar file with the expected mtime.
However, with corrected commit date, we will load the committer date
from CDAT chunk (truncated to lower 34-bits to populate the generation
number. Thus, Git sets date and generates tar file with the truncated
mtime.
The ustar format (the header format used by most modern tar programs)
only has room for 11 (or 12, depending on some implementations) octal
digits for the size and mtime of each file.
As the CDAT chunk is overflow by 12-octal digits but not 11-octal
digits, we split the existing tests to test both implementations
separately and add a new explicit test for 11-digit implementation.
To test the 11-octal digit implementation, we create a future commit
with committer date of 2^34 - 1, which overflows 11-octal digits without
overflowing 34-bits of the Commit Date chunks.
To test the 12-octal digit implementation, the smallest committer date
possible is 2^36 + 1, which overflows the CDAT chunk and thus
commit-graph must be disabled for the test.
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:10 +01:00
|
|
|
GIT_TEST_COMMIT_GRAPH=0 GIT_COMMITTER_DATE="@68719476737 +0000" \
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
git commit -m "tempori parendum"
|
|
|
|
'
|
|
|
|
|
commit-graph: consolidate fill_commit_graph_info
Both fill_commit_graph_info() and fill_commit_in_graph() parse
information present in commit data chunk. Let's simplify the
implementation by calling fill_commit_graph_info() within
fill_commit_in_graph().
fill_commit_graph_info() used to not load committer data from commit data
chunk. However, with the upcoming switch to using corrected committer
date as generation number v2, we will have to load committer date to
compute generation number value anyway.
e51217e15 (t5000: test tar files that overflow ustar headers,
30-06-2016) introduced a test 'generate tar with future mtime' that
creates a commit with committer date of (2^36 + 1) seconds since
EPOCH. The CDAT chunk provides 34-bits for storing committer date, thus
committer time overflows into generation number (within CDAT chunk) and
has undefined behavior.
The test used to pass as fill_commit_graph_info() would not set struct
member `date` of struct commit and load committer date from the object
database, generating a tar file with the expected mtime.
However, with corrected commit date, we will load the committer date
from CDAT chunk (truncated to lower 34-bits to populate the generation
number. Thus, Git sets date and generates tar file with the truncated
mtime.
The ustar format (the header format used by most modern tar programs)
only has room for 11 (or 12, depending on some implementations) octal
digits for the size and mtime of each file.
As the CDAT chunk is overflow by 12-octal digits but not 11-octal
digits, we split the existing tests to test both implementations
separately and add a new explicit test for 11-digit implementation.
To test the 11-octal digit implementation, we create a future commit
with committer date of 2^34 - 1, which overflows 11-octal digits without
overflowing 34-bits of the Commit Date chunks.
To test the 12-octal digit implementation, the smallest committer date
possible is 2^36 + 1, which overflows the CDAT chunk and thus
commit-graph must be disabled for the test.
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-16 19:11:10 +01:00
|
|
|
test_expect_success TIME_IS_64BIT 'generate tar with far-far-future mtime' '
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
git archive HEAD >future.tar
|
|
|
|
'
|
|
|
|
|
2017-04-20 22:58:21 +02:00
|
|
|
test_expect_success TAR_HUGE,TIME_IS_64BIT,TIME_T_IS_64BIT 'system tar can read our future mtime' '
|
t5000: test tar files that overflow ustar headers
The ustar format only has room for 11 (or 12, depending on
some implementations) octal digits for the size and mtime of
each file. For values larger than this, we have to add pax
extended headers to specify the real data, and git does not
yet know how to do so.
Before fixing that, let's start off with some test
infrastructure, as designing portable and efficient tests
for this is non-trivial.
We want to use the system tar to check our output (because
what we really care about is interoperability), but we can't
rely on it:
1. being able to read pax headers
2. being able to handle huge sizes or mtimes
3. supporting a "t" format we can parse
So as a prerequisite, we can feed the system tar a reference
tarball to make sure it can handle these features. The
reference tar here was created with:
dd if=/dev/zero seek=64G bs=1 count=1 of=huge
touch -d @68719476737 huge
tar cf - --format=pax |
head -c 2048
using GNU tar. Note that this is not a complete tarfile, but
it's enough to contain the headers we want to examine.
Likewise, we need to convince git that it has a 64GB blob to
output. Running "git add" on that 64GB file takes many
minutes of CPU, and even compressed, the result is 64MB. So
again, I pre-generated that loose object, and then took only
the first 2k of it. That should be enough to generate 2MB of
data before hitting an inflate error, which is plenty for us
to generate the tar header (and then die of SIGPIPE while
streaming the rest out).
The tests are split so that we test as much as we can even
with an uncooperative system tar. This actually catches the
current breakage (which is that we die("BUG") trying to
write the ustar header) on every system, and then on systems
where we can, we go farther and actually verify the result.
Helped-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-30 11:08:57 +02:00
|
|
|
echo 4147 >expect &&
|
|
|
|
tar_info future.tar | cut -d" " -f2 >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2005-06-02 22:50:17 +02:00
|
|
|
test_done
|