2017-12-08 16:58:49 +01:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='git partial clone'
|
|
|
|
|
|
|
|
. ./test-lib.sh
|
|
|
|
|
|
|
|
# create a normal "src" repo where we can later create new commits.
|
|
|
|
# expect_1.oids will contain a list of the OIDs of all blobs.
|
|
|
|
test_expect_success 'setup normal src repo' '
|
|
|
|
echo "{print \$1}" >print_1.awk &&
|
|
|
|
echo "{print \$2}" >print_2.awk &&
|
|
|
|
|
|
|
|
git init src &&
|
|
|
|
for n in 1 2 3 4
|
|
|
|
do
|
|
|
|
echo "This is file: $n" > src/file.$n.txt
|
|
|
|
git -C src add file.$n.txt
|
|
|
|
git -C src commit -m "file $n"
|
|
|
|
git -C src ls-files -s file.$n.txt >>temp
|
|
|
|
done &&
|
|
|
|
awk -f print_2.awk <temp | sort >expect_1.oids &&
|
|
|
|
test_line_count = 4 expect_1.oids
|
|
|
|
'
|
|
|
|
|
|
|
|
# bare clone "src" giving "srv.bare" for use as our server.
|
|
|
|
test_expect_success 'setup bare clone for server' '
|
|
|
|
git clone --bare "file://$(pwd)/src" srv.bare &&
|
|
|
|
git -C srv.bare config --local uploadpack.allowfilter 1 &&
|
|
|
|
git -C srv.bare config --local uploadpack.allowanysha1inwant 1
|
|
|
|
'
|
|
|
|
|
|
|
|
# do basic partial clone from "srv.bare"
|
|
|
|
# confirm we are missing all of the known blobs.
|
|
|
|
# confirm partial clone was registered in the local config.
|
|
|
|
test_expect_success 'do partial clone 1' '
|
|
|
|
git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 &&
|
2018-10-05 23:54:03 +02:00
|
|
|
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print HEAD >revs &&
|
2018-10-05 23:54:05 +02:00
|
|
|
awk -f print_1.awk revs |
|
2018-10-05 23:54:03 +02:00
|
|
|
sed "s/?//" |
|
|
|
|
sort >observed.oids &&
|
|
|
|
|
2017-12-08 16:58:49 +01:00
|
|
|
test_cmp expect_1.oids observed.oids &&
|
|
|
|
test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" &&
|
2019-06-25 15:40:31 +02:00
|
|
|
test "$(git -C pc1 config --local remote.origin.promisor)" = "true" &&
|
2019-06-25 15:40:32 +02:00
|
|
|
test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none"
|
2017-12-08 16:58:49 +01:00
|
|
|
'
|
|
|
|
|
|
|
|
# checkout master to force dynamic object fetch of blobs at HEAD.
|
|
|
|
test_expect_success 'verify checkout with dynamic object fetch' '
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
|
2017-12-08 16:58:49 +01:00
|
|
|
test_line_count = 4 observed &&
|
|
|
|
git -C pc1 checkout master &&
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
|
2017-12-08 16:58:49 +01:00
|
|
|
test_line_count = 0 observed
|
|
|
|
'
|
|
|
|
|
|
|
|
# create new commits in "src" repo to establish a blame history on file.1.txt
|
|
|
|
# and push to "srv.bare".
|
|
|
|
test_expect_success 'push new commits to server' '
|
|
|
|
git -C src remote add srv "file://$(pwd)/srv.bare" &&
|
|
|
|
for x in a b c d e
|
|
|
|
do
|
2017-12-08 16:58:50 +01:00
|
|
|
echo "Mod file.1.txt $x" >>src/file.1.txt
|
2017-12-08 16:58:49 +01:00
|
|
|
git -C src add file.1.txt
|
|
|
|
git -C src commit -m "mod $x"
|
|
|
|
done &&
|
|
|
|
git -C src blame master -- file.1.txt >expect.blame &&
|
|
|
|
git -C src push -u srv master
|
|
|
|
'
|
|
|
|
|
|
|
|
# (partial) fetch in the partial clone repo from the promisor remote.
|
|
|
|
# verify that fetch inherited the filter-spec from the config and DOES NOT
|
|
|
|
# have the new blobs.
|
|
|
|
test_expect_success 'partial fetch inherits filter settings' '
|
|
|
|
git -C pc1 fetch origin &&
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >observed &&
|
2017-12-08 16:58:49 +01:00
|
|
|
test_line_count = 5 observed
|
|
|
|
'
|
|
|
|
|
|
|
|
# force dynamic object fetch using diff.
|
|
|
|
# we should only get 1 new blob (for the file in origin/master).
|
|
|
|
test_expect_success 'verify diff causes dynamic object fetch' '
|
|
|
|
git -C pc1 diff master..origin/master -- file.1.txt &&
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >observed &&
|
2017-12-08 16:58:49 +01:00
|
|
|
test_line_count = 4 observed
|
|
|
|
'
|
|
|
|
|
|
|
|
# force full dynamic object fetch of the file's history using blame.
|
|
|
|
# we should get the intermediate blobs for the file.
|
|
|
|
test_expect_success 'verify blame causes dynamic object fetch' '
|
|
|
|
git -C pc1 blame origin/master -- file.1.txt >observed.blame &&
|
|
|
|
test_cmp expect.blame observed.blame &&
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >observed &&
|
2017-12-08 16:58:49 +01:00
|
|
|
test_line_count = 0 observed
|
|
|
|
'
|
|
|
|
|
2017-12-08 16:58:50 +01:00
|
|
|
# create new commits in "src" repo to establish a history on file.2.txt
|
|
|
|
# and push to "srv.bare".
|
|
|
|
test_expect_success 'push new commits to server for file.2.txt' '
|
|
|
|
for x in a b c d e f
|
|
|
|
do
|
|
|
|
echo "Mod file.2.txt $x" >>src/file.2.txt
|
|
|
|
git -C src add file.2.txt
|
|
|
|
git -C src commit -m "mod $x"
|
|
|
|
done &&
|
|
|
|
git -C src push -u srv master
|
|
|
|
'
|
|
|
|
|
2017-12-08 16:58:51 +01:00
|
|
|
# Do FULL fetch by disabling inherited filter-spec using --no-filter.
|
2017-12-08 16:58:50 +01:00
|
|
|
# Verify we have all the new blobs.
|
|
|
|
test_expect_success 'override inherited filter-spec using --no-filter' '
|
|
|
|
git -C pc1 fetch --no-filter origin &&
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >observed &&
|
2017-12-08 16:58:50 +01:00
|
|
|
test_line_count = 0 observed
|
|
|
|
'
|
|
|
|
|
2017-12-08 16:58:51 +01:00
|
|
|
# create new commits in "src" repo to establish a history on file.3.txt
|
|
|
|
# and push to "srv.bare".
|
|
|
|
test_expect_success 'push new commits to server for file.3.txt' '
|
|
|
|
for x in a b c d e f
|
|
|
|
do
|
|
|
|
echo "Mod file.3.txt $x" >>src/file.3.txt
|
|
|
|
git -C src add file.3.txt
|
|
|
|
git -C src commit -m "mod $x"
|
|
|
|
done &&
|
|
|
|
git -C src push -u srv master
|
|
|
|
'
|
|
|
|
|
|
|
|
# Do a partial fetch and then try to manually fetch the missing objects.
|
|
|
|
# This can be used as the basis of a pre-command hook to bulk fetch objects
|
|
|
|
# perhaps combined with a command in dry-run mode.
|
|
|
|
test_expect_success 'manual prefetch of missing objects' '
|
|
|
|
git -C pc1 fetch --filter=blob:none origin &&
|
2018-10-05 23:54:03 +02:00
|
|
|
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >revs &&
|
2018-10-05 23:54:05 +02:00
|
|
|
awk -f print_1.awk revs |
|
2018-10-05 23:54:03 +02:00
|
|
|
sed "s/?//" |
|
|
|
|
sort >observed.oids &&
|
|
|
|
|
2017-12-08 16:58:51 +01:00
|
|
|
test_line_count = 6 observed.oids &&
|
|
|
|
git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" <observed.oids &&
|
2018-10-05 23:54:03 +02:00
|
|
|
|
2018-10-05 23:54:07 +02:00
|
|
|
git -C pc1 rev-list --quiet --objects --missing=print \
|
|
|
|
master..origin/master >revs &&
|
2018-10-05 23:54:05 +02:00
|
|
|
awk -f print_1.awk revs |
|
2018-10-05 23:54:03 +02:00
|
|
|
sed "s/?//" |
|
|
|
|
sort >observed.oids &&
|
|
|
|
|
2017-12-08 16:58:51 +01:00
|
|
|
test_line_count = 0 observed.oids
|
|
|
|
'
|
|
|
|
|
2018-03-14 19:42:41 +01:00
|
|
|
test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack --fsck-objects' '
|
|
|
|
git init src &&
|
|
|
|
test_commit -C src x &&
|
|
|
|
test_config -C src uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C src uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
|
|
|
GIT_TRACE="$(pwd)/trace" git -c transfer.fsckobjects=1 \
|
|
|
|
clone --filter="blob:none" "file://$(pwd)/src" dst &&
|
|
|
|
grep "git index-pack.*--fsck-objects" trace
|
|
|
|
'
|
|
|
|
|
2018-10-05 23:31:27 +02:00
|
|
|
test_expect_success 'use fsck before and after manually fetching a missing subtree' '
|
|
|
|
# push new commit so server has a subtree
|
|
|
|
mkdir src/dir &&
|
|
|
|
echo "in dir" >src/dir/file.txt &&
|
|
|
|
git -C src add dir/file.txt &&
|
|
|
|
git -C src commit -m "file in dir" &&
|
|
|
|
git -C src push -u srv master &&
|
|
|
|
SUBTREE=$(git -C src rev-parse HEAD:dir) &&
|
|
|
|
|
|
|
|
rm -rf dst &&
|
|
|
|
git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst &&
|
|
|
|
git -C dst fsck &&
|
|
|
|
|
|
|
|
# Make sure we only have commits, and all trees and blobs are missing.
|
|
|
|
git -C dst rev-list --missing=allow-any --objects master \
|
|
|
|
>fetched_objects &&
|
|
|
|
awk -f print_1.awk fetched_objects |
|
|
|
|
xargs -n1 git -C dst cat-file -t >fetched_types &&
|
|
|
|
|
|
|
|
sort -u fetched_types >unique_types.observed &&
|
|
|
|
echo commit >unique_types.expected &&
|
|
|
|
test_cmp unique_types.expected unique_types.observed &&
|
|
|
|
|
|
|
|
# Auto-fetch a tree with cat-file.
|
|
|
|
git -C dst cat-file -p $SUBTREE >tree_contents &&
|
|
|
|
grep file.txt tree_contents &&
|
|
|
|
|
|
|
|
# fsck still works after an auto-fetch of a tree.
|
|
|
|
git -C dst fsck &&
|
|
|
|
|
|
|
|
# Auto-fetch all remaining trees and blobs with --missing=error
|
|
|
|
git -C dst rev-list --missing=error --objects master >fetched_objects &&
|
|
|
|
test_line_count = 70 fetched_objects &&
|
|
|
|
|
|
|
|
awk -f print_1.awk fetched_objects |
|
|
|
|
xargs -n1 git -C dst cat-file -t >fetched_types &&
|
|
|
|
|
|
|
|
sort -u fetched_types >unique_types.observed &&
|
2018-10-12 22:01:41 +02:00
|
|
|
test_write_lines blob commit tree >unique_types.expected &&
|
2018-10-05 23:31:27 +02:00
|
|
|
test_cmp unique_types.expected unique_types.observed
|
|
|
|
'
|
|
|
|
|
2018-07-06 21:34:09 +02:00
|
|
|
test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
|
|
|
|
rm -rf src dst &&
|
|
|
|
git init src &&
|
|
|
|
test_commit -C src x &&
|
|
|
|
test_config -C src uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C src uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
|
|
|
# Create a tag pointing to a blob.
|
|
|
|
BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
|
|
|
|
git -C src tag myblob "$BLOB" &&
|
|
|
|
|
|
|
|
git clone --filter="blob:none" "file://$(pwd)/src" dst 2>err &&
|
|
|
|
! grep "does not point to a valid object" err &&
|
|
|
|
git -C dst fsck
|
|
|
|
'
|
|
|
|
|
2018-09-21 20:22:38 +02:00
|
|
|
test_expect_success 'fetch what is specified on CLI even if already promised' '
|
|
|
|
rm -rf src dst.git &&
|
|
|
|
git init src &&
|
|
|
|
test_commit -C src foo &&
|
|
|
|
test_config -C src uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C src uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
|
|
|
git hash-object --stdin <src/foo.t >blob &&
|
|
|
|
|
|
|
|
git clone --bare --filter=blob:none "file://$(pwd)/src" dst.git &&
|
|
|
|
git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_before &&
|
|
|
|
grep "?$(cat blob)" missing_before &&
|
|
|
|
git -C dst.git fetch origin $(cat blob) &&
|
|
|
|
git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_after &&
|
|
|
|
! grep "?$(cat blob)" missing_after
|
|
|
|
'
|
|
|
|
|
2018-07-06 21:34:10 +02:00
|
|
|
. "$TEST_DIRECTORY"/lib-httpd.sh
|
|
|
|
start_httpd
|
|
|
|
|
2019-05-14 23:10:54 +02:00
|
|
|
# Converts bytes into their hexadecimal representation. For example,
|
|
|
|
# "printf 'ab\r\n' | hex_unpack" results in '61620d0a'.
|
|
|
|
hex_unpack () {
|
|
|
|
perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)'
|
|
|
|
}
|
|
|
|
|
|
|
|
# Inserts $1 at the start of the string and every 2 characters thereafter.
|
|
|
|
intersperse () {
|
|
|
|
sed 's/\(..\)/'$1'\1/g'
|
|
|
|
}
|
|
|
|
|
|
|
|
# Create a one-time-sed command to replace the existing packfile with $1.
|
|
|
|
replace_packfile () {
|
|
|
|
# The protocol requires that the packfile be sent in sideband 1, hence
|
|
|
|
# the extra \x01 byte at the beginning.
|
|
|
|
printf "1,/packfile/!c %04x\\\\x01%s0000" \
|
|
|
|
"$(($(wc -c <$1) + 5))" \
|
|
|
|
"$(hex_unpack <$1 | intersperse '\\x')" \
|
|
|
|
>"$HTTPD_ROOT_PATH/one-time-sed"
|
2018-07-06 21:34:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'upon cloning, check that all refs point to objects' '
|
|
|
|
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
|
|
|
|
rm -rf "$SERVER" repo &&
|
|
|
|
test_create_repo "$SERVER" &&
|
|
|
|
test_commit -C "$SERVER" foo &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
|
|
|
# Create a tag pointing to a blob.
|
|
|
|
BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
|
|
|
|
git -C "$SERVER" tag myblob "$BLOB" &&
|
|
|
|
|
|
|
|
# Craft a packfile not including that blob.
|
|
|
|
git -C "$SERVER" rev-parse HEAD |
|
2018-10-05 23:54:03 +02:00
|
|
|
git -C "$SERVER" pack-objects --stdout >incomplete.pack &&
|
2018-07-06 21:34:10 +02:00
|
|
|
|
|
|
|
# Replace the existing packfile with the crafted one. The protocol
|
|
|
|
# requires that the packfile be sent in sideband 1, hence the extra
|
|
|
|
# \x01 byte at the beginning.
|
2019-05-14 23:10:54 +02:00
|
|
|
replace_packfile incomplete.pack &&
|
2018-07-06 21:34:10 +02:00
|
|
|
|
|
|
|
# Use protocol v2 because the sed command looks for the "packfile"
|
|
|
|
# section header.
|
|
|
|
test_config -C "$SERVER" protocol.version 2 &&
|
|
|
|
test_must_fail git -c protocol.version=2 clone \
|
|
|
|
--filter=blob:none $HTTPD_URL/one_time_sed/server repo 2>err &&
|
|
|
|
|
2019-01-04 22:33:31 +01:00
|
|
|
test_i18ngrep "did not send all necessary objects" err &&
|
2018-07-06 21:34:10 +02:00
|
|
|
|
|
|
|
# Ensure that the one-time-sed script was used.
|
|
|
|
! test -e "$HTTPD_ROOT_PATH/one-time-sed"
|
|
|
|
'
|
|
|
|
|
2018-07-13 02:03:06 +02:00
|
|
|
test_expect_success 'when partial cloning, tolerate server not sending target of tag' '
|
|
|
|
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
|
|
|
|
rm -rf "$SERVER" repo &&
|
|
|
|
test_create_repo "$SERVER" &&
|
|
|
|
test_commit -C "$SERVER" foo &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
|
|
|
# Create an annotated tag pointing to a blob.
|
|
|
|
BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
|
|
|
|
git -C "$SERVER" tag -m message -a myblob "$BLOB" &&
|
|
|
|
|
|
|
|
# Craft a packfile including the tag, but not the blob it points to.
|
2018-07-13 02:03:07 +02:00
|
|
|
# Also, omit objects referenced from HEAD in order to force a second
|
|
|
|
# fetch (to fetch missing objects) upon the automatic checkout that
|
|
|
|
# happens after a clone.
|
|
|
|
printf "%s\n%s\n--not\n%s\n%s\n" \
|
2018-07-13 02:03:06 +02:00
|
|
|
$(git -C "$SERVER" rev-parse HEAD) \
|
|
|
|
$(git -C "$SERVER" rev-parse myblob) \
|
2018-07-13 02:03:07 +02:00
|
|
|
$(git -C "$SERVER" rev-parse HEAD^{tree}) \
|
2018-07-13 02:03:06 +02:00
|
|
|
$(git -C "$SERVER" rev-parse myblob^{blob}) |
|
|
|
|
git -C "$SERVER" pack-objects --thin --stdout >incomplete.pack &&
|
|
|
|
|
|
|
|
# Replace the existing packfile with the crafted one. The protocol
|
|
|
|
# requires that the packfile be sent in sideband 1, hence the extra
|
|
|
|
# \x01 byte at the beginning.
|
2019-05-14 23:10:54 +02:00
|
|
|
replace_packfile incomplete.pack &&
|
2018-07-13 02:03:06 +02:00
|
|
|
|
|
|
|
# Use protocol v2 because the sed command looks for the "packfile"
|
|
|
|
# section header.
|
|
|
|
test_config -C "$SERVER" protocol.version 2 &&
|
|
|
|
|
|
|
|
# Exercise to make sure it works.
|
|
|
|
git -c protocol.version=2 clone \
|
2018-07-13 02:03:07 +02:00
|
|
|
--filter=blob:none $HTTPD_URL/one_time_sed/server repo 2> err &&
|
|
|
|
! grep "missing object referenced by" err &&
|
2018-07-13 02:03:06 +02:00
|
|
|
|
|
|
|
# Ensure that the one-time-sed script was used.
|
|
|
|
! test -e "$HTTPD_ROOT_PATH/one-time-sed"
|
|
|
|
'
|
|
|
|
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
test_expect_success 'tolerate server sending REF_DELTA against missing promisor objects' '
|
|
|
|
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
|
|
|
|
rm -rf "$SERVER" repo &&
|
|
|
|
test_create_repo "$SERVER" &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
|
|
|
|
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
|
|
|
|
|
2019-06-11 23:06:47 +02:00
|
|
|
# Create a commit with 2 blobs to be used as delta bases.
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
for i in $(test_seq 10)
|
|
|
|
do
|
2019-06-11 23:06:47 +02:00
|
|
|
echo "this is a line" >>"$SERVER/foo.txt" &&
|
|
|
|
echo "this is another line" >>"$SERVER/have.txt"
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
done &&
|
2019-06-11 23:06:47 +02:00
|
|
|
git -C "$SERVER" add foo.txt have.txt &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
git -C "$SERVER" commit -m bar &&
|
2019-06-11 23:06:47 +02:00
|
|
|
git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase_missing &&
|
|
|
|
git -C "$SERVER" rev-parse HEAD:have.txt >deltabase_have &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
|
2019-06-11 23:06:47 +02:00
|
|
|
# Clone. The client has deltabase_have but not deltabase_missing.
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
git -c protocol.version=2 clone --no-checkout \
|
|
|
|
--filter=blob:none $HTTPD_URL/one_time_sed/server repo &&
|
2019-06-11 23:06:47 +02:00
|
|
|
git -C repo hash-object -w -- "$SERVER/have.txt" &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
|
2019-06-11 23:06:47 +02:00
|
|
|
# Sanity check to ensure that the client does not have
|
|
|
|
# deltabase_missing.
|
2019-06-11 23:06:46 +02:00
|
|
|
git -C repo rev-list --objects --ignore-missing \
|
2019-06-11 23:06:47 +02:00
|
|
|
-- $(cat deltabase_missing) >objlist &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
test_line_count = 0 objlist &&
|
|
|
|
|
|
|
|
# Another commit. This commit will be fetched by the client.
|
|
|
|
echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" &&
|
2019-06-11 23:06:47 +02:00
|
|
|
echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/have.txt" &&
|
|
|
|
git -C "$SERVER" add foo.txt have.txt &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
git -C "$SERVER" commit -m baz &&
|
|
|
|
|
|
|
|
# Pack a thin pack containing, among other things, HEAD:foo.txt
|
2019-06-11 23:06:47 +02:00
|
|
|
# delta-ed against HEAD^:foo.txt and HEAD:have.txt delta-ed against
|
|
|
|
# HEAD^:have.txt.
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
printf "%s\n--not\n%s\n" \
|
|
|
|
$(git -C "$SERVER" rev-parse HEAD) \
|
|
|
|
$(git -C "$SERVER" rev-parse HEAD^) |
|
|
|
|
git -C "$SERVER" pack-objects --thin --stdout >thin.pack &&
|
|
|
|
|
|
|
|
# Ensure that the pack contains one delta against HEAD^:foo.txt. Since
|
|
|
|
# the delta contains at least 26 novel characters, the size cannot be
|
|
|
|
# contained in 4 bits, so the object header will take up 2 bytes. The
|
|
|
|
# most significant nybble of the first byte is 0b1111 (0b1 to indicate
|
|
|
|
# that the header continues, and 0b111 to indicate REF_DELTA), followed
|
|
|
|
# by any 3 nybbles, then the OID of the delta base.
|
2019-06-11 23:06:47 +02:00
|
|
|
printf "f.,..%s" $(intersperse "," <deltabase_missing) >want &&
|
|
|
|
hex_unpack <thin.pack | intersperse "," >have &&
|
|
|
|
grep $(cat want) have &&
|
|
|
|
|
|
|
|
# Ensure that the pack contains one delta against HEAD^:have.txt,
|
|
|
|
# similar to the above.
|
|
|
|
printf "f.,..%s" $(intersperse "," <deltabase_have) >want &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
hex_unpack <thin.pack | intersperse "," >have &&
|
|
|
|
grep $(cat want) have &&
|
|
|
|
|
|
|
|
replace_packfile thin.pack &&
|
|
|
|
|
|
|
|
# Use protocol v2 because the sed command looks for the "packfile"
|
|
|
|
# section header.
|
|
|
|
test_config -C "$SERVER" protocol.version 2 &&
|
|
|
|
|
|
|
|
# Fetch the thin pack and ensure that index-pack is able to handle the
|
|
|
|
# REF_DELTA object with a missing promisor delta base.
|
2019-06-11 23:06:47 +02:00
|
|
|
GIT_TRACE_PACKET="$(pwd)/trace" git -C repo -c protocol.version=2 fetch &&
|
|
|
|
|
|
|
|
# Ensure that the missing delta base was directly fetched, but not the
|
|
|
|
# one that the client has.
|
|
|
|
grep "want $(cat deltabase_missing)" trace &&
|
|
|
|
! grep "want $(cat deltabase_have)" trace &&
|
index-pack: prefetch missing REF_DELTA bases
When fetching, the client sends "have" commit IDs indicating that the
server does not need to send any object referenced by those commits,
reducing network I/O. When the client is a partial clone, the client
still sends "have"s in this way, even if it does not have every object
referenced by a commit it sent as "have".
If a server omits such an object, it is fine: the client could lazily
fetch that object before this fetch, and it can still do so after.
The issue is when the server sends a thin pack containing an object that
is a REF_DELTA against such a missing object: index-pack fails to fix
the thin pack. When support for lazily fetching missing objects was
added in 8b4c0103a9 ("sha1_file: support lazily fetching missing
objects", 2017-12-08), support in index-pack was turned off in the
belief that it accesses the repo only to do hash collision checks.
However, this is not true: it also needs to access the repo to resolve
REF_DELTA bases.
Support for lazy fetching should still generally be turned off in
index-pack because it is used as part of the lazy fetching process
itself (if not, infinite loops may occur), but we do need to fetch the
REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that
those are REF_DELTA themselves, because we do not send "have" when
making such fetches.)
To resolve this, prefetch all missing REF_DELTA bases before attempting
to resolve them. This both ensures that all bases are attempted to be
fetched, and ensures that we make only one request per index-pack
invocation, and not one request per missing object.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
|
|
|
|
|
|
|
# Ensure that the one-time-sed script was used.
|
|
|
|
! test -e "$HTTPD_ROOT_PATH/one-time-sed"
|
|
|
|
'
|
|
|
|
|
2017-12-08 16:58:49 +01:00
|
|
|
test_done
|