git-commit-vandalism/t/t5616-partial-clone.sh

642 lines
23 KiB
Bash
Raw Normal View History

#!/bin/sh
test_description='git partial clone'
. ./test-lib.sh
# create a normal "src" repo where we can later create new commits.
# expect_1.oids will contain a list of the OIDs of all blobs.
test_expect_success 'setup normal src repo' '
echo "{print \$1}" >print_1.awk &&
echo "{print \$2}" >print_2.awk &&
git init src &&
for n in 1 2 3 4
do
echo "This is file: $n" > src/file.$n.txt
git -C src add file.$n.txt
git -C src commit -m "file $n"
git -C src ls-files -s file.$n.txt >>temp
done &&
awk -f print_2.awk <temp | sort >expect_1.oids &&
test_line_count = 4 expect_1.oids
'
# bare clone "src" giving "srv.bare" for use as our server.
test_expect_success 'setup bare clone for server' '
git clone --bare "file://$(pwd)/src" srv.bare &&
git -C srv.bare config --local uploadpack.allowfilter 1 &&
git -C srv.bare config --local uploadpack.allowanysha1inwant 1
'
# do basic partial clone from "srv.bare"
# confirm we are missing all of the known blobs.
# confirm partial clone was registered in the local config.
test_expect_success 'do partial clone 1' '
git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 &&
git -C pc1 rev-list --quiet --objects --missing=print HEAD >revs &&
awk -f print_1.awk revs |
sed "s/?//" |
sort >observed.oids &&
test_cmp expect_1.oids observed.oids &&
test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" &&
test "$(git -C pc1 config --local remote.origin.promisor)" = "true" &&
test "$(git -C pc1 config --local remote.origin.partialclonefilter)" = "blob:none"
'
test_expect_success 'verify that .promisor file contains refs fetched' '
ls pc1/.git/objects/pack/pack-*.promisor >promisorlist &&
test_line_count = 1 promisorlist &&
git -C srv.bare rev-parse --verify HEAD >headhash &&
grep "$(cat headhash) HEAD" $(cat promisorlist) &&
grep "$(cat headhash) refs/heads/master" $(cat promisorlist)
'
# checkout master to force dynamic object fetch of blobs at HEAD.
test_expect_success 'verify checkout with dynamic object fetch' '
git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
test_line_count = 4 observed &&
git -C pc1 checkout master &&
git -C pc1 rev-list --quiet --objects --missing=print HEAD >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a blame history on file.1.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server' '
git -C src remote add srv "file://$(pwd)/srv.bare" &&
for x in a b c d e
do
echo "Mod file.1.txt $x" >>src/file.1.txt
git -C src add file.1.txt
git -C src commit -m "mod $x"
done &&
git -C src blame master -- file.1.txt >expect.blame &&
git -C src push -u srv master
'
# (partial) fetch in the partial clone repo from the promisor remote.
# verify that fetch inherited the filter-spec from the config and DOES NOT
# have the new blobs.
test_expect_success 'partial fetch inherits filter settings' '
git -C pc1 fetch origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >observed &&
test_line_count = 5 observed
'
# force dynamic object fetch using diff.
# we should only get 1 new blob (for the file in origin/master).
test_expect_success 'verify diff causes dynamic object fetch' '
git -C pc1 diff master..origin/master -- file.1.txt &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >observed &&
test_line_count = 4 observed
'
# force full dynamic object fetch of the file's history using blame.
# we should get the intermediate blobs for the file.
test_expect_success 'verify blame causes dynamic object fetch' '
git -C pc1 blame origin/master -- file.1.txt >observed.blame &&
test_cmp expect.blame observed.blame &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a history on file.2.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server for file.2.txt' '
for x in a b c d e f
do
echo "Mod file.2.txt $x" >>src/file.2.txt
git -C src add file.2.txt
git -C src commit -m "mod $x"
done &&
git -C src push -u srv master
'
# Do FULL fetch by disabling inherited filter-spec using --no-filter.
# Verify we have all the new blobs.
test_expect_success 'override inherited filter-spec using --no-filter' '
git -C pc1 fetch --no-filter origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >observed &&
test_line_count = 0 observed
'
# create new commits in "src" repo to establish a history on file.3.txt
# and push to "srv.bare".
test_expect_success 'push new commits to server for file.3.txt' '
for x in a b c d e f
do
echo "Mod file.3.txt $x" >>src/file.3.txt
git -C src add file.3.txt
git -C src commit -m "mod $x"
done &&
git -C src push -u srv master
'
# Do a partial fetch and then try to manually fetch the missing objects.
# This can be used as the basis of a pre-command hook to bulk fetch objects
# perhaps combined with a command in dry-run mode.
test_expect_success 'manual prefetch of missing objects' '
git -C pc1 fetch --filter=blob:none origin &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >revs &&
awk -f print_1.awk revs |
sed "s/?//" |
sort >observed.oids &&
test_line_count = 6 observed.oids &&
git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" <observed.oids &&
git -C pc1 rev-list --quiet --objects --missing=print \
master..origin/master >revs &&
awk -f print_1.awk revs |
sed "s/?//" |
sort >observed.oids &&
test_line_count = 0 observed.oids
'
test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack --fsck-objects' '
git init src &&
test_commit -C src x &&
test_config -C src uploadpack.allowfilter 1 &&
test_config -C src uploadpack.allowanysha1inwant 1 &&
GIT_TRACE="$(pwd)/trace" git -c transfer.fsckobjects=1 \
clone --filter="blob:none" "file://$(pwd)/src" dst &&
grep "git index-pack.*--fsck-objects" trace
'
test_expect_success 'use fsck before and after manually fetching a missing subtree' '
# push new commit so server has a subtree
mkdir src/dir &&
echo "in dir" >src/dir/file.txt &&
git -C src add dir/file.txt &&
git -C src commit -m "file in dir" &&
git -C src push -u srv master &&
SUBTREE=$(git -C src rev-parse HEAD:dir) &&
rm -rf dst &&
git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst &&
git -C dst fsck &&
# Make sure we only have commits, and all trees and blobs are missing.
git -C dst rev-list --missing=allow-any --objects master \
>fetched_objects &&
awk -f print_1.awk fetched_objects |
xargs -n1 git -C dst cat-file -t >fetched_types &&
sort -u fetched_types >unique_types.observed &&
echo commit >unique_types.expected &&
test_cmp unique_types.expected unique_types.observed &&
# Auto-fetch a tree with cat-file.
git -C dst cat-file -p $SUBTREE >tree_contents &&
grep file.txt tree_contents &&
# fsck still works after an auto-fetch of a tree.
git -C dst fsck &&
# Auto-fetch all remaining trees and blobs with --missing=error
git -C dst rev-list --missing=error --objects master >fetched_objects &&
test_line_count = 70 fetched_objects &&
awk -f print_1.awk fetched_objects |
xargs -n1 git -C dst cat-file -t >fetched_types &&
sort -u fetched_types >unique_types.observed &&
test_write_lines blob commit tree >unique_types.expected &&
test_cmp unique_types.expected unique_types.observed
'
test_expect_success 'implicitly construct combine: filter with repeated flags' '
GIT_TRACE=$(pwd)/trace git clone --bare \
--filter=blob:none --filter=tree:1 \
"file://$(pwd)/srv.bare" pc2 &&
grep "trace:.* git pack-objects .*--filter=combine:blob:none+tree:1" \
trace &&
git -C pc2 rev-list --objects --missing=allow-any HEAD >objects &&
# We should have gotten some root trees.
grep " $" objects &&
# Should not have gotten any non-root trees or blobs.
! grep " ." objects &&
xargs -n 1 git -C pc2 cat-file -t <objects >types &&
sort -u types >unique_types.actual &&
test_write_lines commit tree >unique_types.expected &&
test_cmp unique_types.expected unique_types.actual
'
upload-pack.c: allow banning certain object filter(s) Git clients may ask the server for a partial set of objects, where the set of objects being requested is refined by one or more object filters. Server administrators can configure 'git upload-pack' to allow or ban these filters by setting the 'uploadpack.allowFilter' variable to 'true' or 'false', respectively. However, administrators using bitmaps may wish to allow certain kinds of object filters, but ban others. Specifically, they may wish to allow object filters that can be optimized by the use of bitmaps, while rejecting other object filters which aren't and represent a perceived performance degradation (as well as an increased load factor on the server). Allow configuring 'git upload-pack' to support object filters on a case-by-case basis by introducing two new configuration variables: - 'uploadpackfilter.allow' - 'uploadpackfilter.<kind>.allow' where '<kind>' may be one of 'blobNone', 'blobLimit', 'tree', and so on. Setting the second configuration variable for any valid value of '<kind>' explicitly allows or disallows restricting that kind of object filter. If a client requests the object filter <kind> and the respective configuration value is not set, 'git upload-pack' will default to the value of 'uploadpackfilter.allow', which itself defaults to 'true' to maintain backwards compatibility. Note that this differs from 'uploadpack.allowfilter', which controls whether or not the 'filter' capability is advertised. Helped-by: Jeff King <peff@peff.net> Signed-off-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-03 20:00:10 +02:00
test_expect_success 'upload-pack fails banned object filters' '
test_config -C srv.bare uploadpackfilter.blob:none.allow false &&
test_must_fail ok=sigpipe git clone --no-checkout --filter=blob:none \
"file://$(pwd)/srv.bare" pc3 2>err &&
grep "filter '\''blob:none'\'' not supported" err
'
test_expect_success 'upload-pack fails banned combine object filters' '
test_config -C srv.bare uploadpackfilter.allow false &&
test_config -C srv.bare uploadpackfilter.combine.allow true &&
test_config -C srv.bare uploadpackfilter.tree.allow true &&
test_config -C srv.bare uploadpackfilter.blob:none.allow false &&
test_must_fail ok=sigpipe git clone --no-checkout --filter=tree:1 \
--filter=blob:none "file://$(pwd)/srv.bare" pc3 2>err &&
grep "filter '\''blob:none'\'' not supported" err
'
test_expect_success 'upload-pack fails banned object filters with fallback' '
test_config -C srv.bare uploadpackfilter.allow false &&
test_must_fail ok=sigpipe git clone --no-checkout --filter=blob:none \
"file://$(pwd)/srv.bare" pc3 2>err &&
grep "filter '\''blob:none'\'' not supported" err
'
upload-pack.c: introduce 'uploadpackfilter.tree.maxDepth' In b79cf959b2 (upload-pack.c: allow banning certain object filter(s), 2020-02-26), we introduced functionality to disallow certain object filters from being chosen from within 'git upload-pack'. Traditionally, administrators use this functionality to disallow filters that are known to perform slowly, for e.g., those that do not have bitmap-level filtering. In the past, the '--filter=tree:<n>' was one such filter that does not have bitmap-level filtering support, and so was likely to be banned by administrators. However, in the previous couple of commits, we introduced bitmap-level filtering for the case when 'n' is equal to '0', i.e., as if we had a '--filter=tree:none' choice. While it would be sufficient to simply write $ git config uploadpackfilter.tree.allow true (since it would allow all values of 'n'), we would like to be able to allow this filter for certain values of 'n', i.e., those no greater than some pre-specified maximum. In order to do this, introduce a new configuration key, as follows: $ git config uploadpackfilter.tree.maxDepth <m> where '<m>' specifies the maximum allowed value of 'n' in the filter 'tree:n'. Administrators who wish to allow for only the value '0' can write: $ git config uploadpackfilter.tree.allow true $ git config uploadpackfilter.tree.maxDepth 0 which allows '--filter=tree:0', but no other values. Signed-off-by: Taylor Blau <me@ttaylorr.com> Acked-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-03 20:00:17 +02:00
test_expect_success 'upload-pack limits tree depth filters' '
test_config -C srv.bare uploadpackfilter.allow false &&
test_config -C srv.bare uploadpackfilter.tree.allow true &&
test_config -C srv.bare uploadpackfilter.tree.maxDepth 0 &&
test_must_fail ok=sigpipe git clone --no-checkout --filter=tree:1 \
"file://$(pwd)/srv.bare" pc3 2>err &&
grep "tree filter allows max depth 0, but got 1" err
'
test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' '
rm -rf src dst &&
git init src &&
test_commit -C src x &&
test_config -C src uploadpack.allowfilter 1 &&
test_config -C src uploadpack.allowanysha1inwant 1 &&
# Create a tag pointing to a blob.
BLOB=$(echo blob-contents | git -C src hash-object --stdin -w) &&
git -C src tag myblob "$BLOB" &&
git clone --filter="blob:none" "file://$(pwd)/src" dst 2>err &&
! grep "does not point to a valid object" err &&
git -C dst fsck
'
test_expect_success 'fetch what is specified on CLI even if already promised' '
rm -rf src dst.git &&
git init src &&
test_commit -C src foo &&
test_config -C src uploadpack.allowfilter 1 &&
test_config -C src uploadpack.allowanysha1inwant 1 &&
git hash-object --stdin <src/foo.t >blob &&
git clone --bare --filter=blob:none "file://$(pwd)/src" dst.git &&
git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_before &&
grep "?$(cat blob)" missing_before &&
git -C dst.git fetch origin $(cat blob) &&
git -C dst.git rev-list --objects --quiet --missing=print HEAD >missing_after &&
! grep "?$(cat blob)" missing_after
'
test_expect_success 'setup src repo for sparse filter' '
git init sparse-src &&
git -C sparse-src config --local uploadpack.allowfilter 1 &&
git -C sparse-src config --local uploadpack.allowanysha1inwant 1 &&
test_commit -C sparse-src one &&
test_commit -C sparse-src two &&
echo /one.t >sparse-src/only-one &&
git -C sparse-src add . &&
git -C sparse-src commit -m "add sparse checkout files"
'
list-objects-filter: delay parsing of sparse oid The list-objects-filter code has two steps to its initialization: 1. parse_list_objects_filter() makes sure the spec is a filter we know about and is syntactically correct. This step is done by "rev-list" or "upload-pack" that is going to apply a filter, but also by "git clone" or "git fetch" before they send the spec across the wire. 2. list_objects_filter__init() runs the type-specific initialization (using function pointers established in step 1). This happens at the start of traverse_commit_list_filtered(), when we're about to actually use the filter. It's a good idea to parse as much as we can in step 1, in order to catch problems early (e.g., a blob size limit that isn't a number). But one thing we _shouldn't_ do is resolve any oids at that step (e.g., for sparse-file contents specified by oid). In the case of a fetch, the oid has to be resolved on the remote side. The current code does resolve the oid during the parse phase, but ignores any error (which we must do, because we might just be sending the spec across the wire). This leads to two bugs: - if we're not in a repository (e.g., because it's git-clone parsing the spec), then we trigger a BUG() trying to resolve the name - if we did hit the error case, we still have to notice that later and bail. The code path in rev-list handles this, but the one in upload-pack does not, leading to a segfault. We can fix both by moving the oid resolution into the sparse-oid init function. At that point we know we have a repository (because we're about to traverse), and handling the error there fixes the segfault. As a bonus, we can drop the NULL sparse_oid_value check in rev-list, since this is now handled in the sparse-oid-filter init function. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-15 18:12:44 +02:00
test_expect_success 'partial clone with sparse filter succeeds' '
rm -rf dst.git &&
git clone --no-local --bare \
--filter=sparse:oid=master:only-one \
sparse-src dst.git &&
(
cd dst.git &&
git rev-list --objects --missing=print HEAD >out &&
grep "^$(git rev-parse HEAD:one.t)" out &&
grep "^?$(git rev-parse HEAD:two.t)" out
)
'
list-objects-filter: delay parsing of sparse oid The list-objects-filter code has two steps to its initialization: 1. parse_list_objects_filter() makes sure the spec is a filter we know about and is syntactically correct. This step is done by "rev-list" or "upload-pack" that is going to apply a filter, but also by "git clone" or "git fetch" before they send the spec across the wire. 2. list_objects_filter__init() runs the type-specific initialization (using function pointers established in step 1). This happens at the start of traverse_commit_list_filtered(), when we're about to actually use the filter. It's a good idea to parse as much as we can in step 1, in order to catch problems early (e.g., a blob size limit that isn't a number). But one thing we _shouldn't_ do is resolve any oids at that step (e.g., for sparse-file contents specified by oid). In the case of a fetch, the oid has to be resolved on the remote side. The current code does resolve the oid during the parse phase, but ignores any error (which we must do, because we might just be sending the spec across the wire). This leads to two bugs: - if we're not in a repository (e.g., because it's git-clone parsing the spec), then we trigger a BUG() trying to resolve the name - if we did hit the error case, we still have to notice that later and bail. The code path in rev-list handles this, but the one in upload-pack does not, leading to a segfault. We can fix both by moving the oid resolution into the sparse-oid init function. At that point we know we have a repository (because we're about to traverse), and handling the error there fixes the segfault. As a bonus, we can drop the NULL sparse_oid_value check in rev-list, since this is now handled in the sparse-oid-filter init function. Signed-off-by: Jeff King <peff@peff.net> Acked-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-15 18:12:44 +02:00
test_expect_success 'partial clone with unresolvable sparse filter fails cleanly' '
rm -rf dst.git &&
test_must_fail git clone --no-local --bare \
--filter=sparse:oid=master:no-such-name \
sparse-src dst.git 2>err &&
test_i18ngrep "unable to access sparse blob in .master:no-such-name" err &&
test_must_fail git clone --no-local --bare \
--filter=sparse:oid=master \
sparse-src dst.git 2>err &&
test_i18ngrep "unable to parse sparse filter data in" err
'
setup_triangle () {
rm -rf big-blob.txt server client promisor-remote &&
printf "line %d\n" $(test_seq 1 100) >big-blob.txt &&
# Create a server with 2 commits: a commit with a big tree and a child
# commit with an incremental change. Also, create a partial clone
# client that only contains the first commit.
git init server &&
git -C server config --local uploadpack.allowfilter 1 &&
for i in $(test_seq 1 100)
do
echo "make the tree big" >server/file$i &&
git -C server add file$i
done &&
git -C server commit -m "initial" &&
git clone --bare --filter=tree:0 "file://$(pwd)/server" client &&
echo another line >>server/file1 &&
git -C server commit -am "incremental change" &&
# Create a promisor remote that only contains the tree and blob from
# the first commit.
git init promisor-remote &&
git -C server config --local uploadpack.allowanysha1inwant 1 &&
TREE_HASH=$(git -C server rev-parse HEAD~1^{tree}) &&
git -C promisor-remote fetch --keep "file://$(pwd)/server" "$TREE_HASH" &&
git -C promisor-remote count-objects -v >object-count &&
test_i18ngrep "count: 0" object-count &&
test_i18ngrep "in-pack: 2" object-count &&
# Set it as the promisor remote of client. Thus, whenever
# the client lazy fetches, the lazy fetch will succeed only if it is
# for this tree or blob.
test_commit -C promisor-remote one && # so that ref advertisement is not empty
git -C promisor-remote config --local uploadpack.allowanysha1inwant 1 &&
git -C client remote set-url origin "file://$(pwd)/promisor-remote"
}
# NEEDSWORK: The tests beginning with "fetch lazy-fetches" below only
# test that "fetch" avoid fetching trees and blobs, but not commits or
# tags. Revisit this if Git is ever taught to support partial clones
# with commits and/or tags filtered out.
test_expect_success 'fetch lazy-fetches only to resolve deltas' '
setup_triangle &&
# Exercise to make sure it works. Git will not fetch anything from the
# promisor remote other than for the big tree (because it needs to
# resolve the delta).
GIT_TRACE_PACKET="$(pwd)/trace" git -C client \
fetch "file://$(pwd)/server" master &&
# Verify the assumption that the client needed to fetch the delta base
# to resolve the delta.
git -C server rev-parse HEAD~1^{tree} >hash &&
grep "want $(cat hash)" trace
'
test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
setup_triangle &&
git -C server config --local protocol.version 2 &&
git -C client config --local protocol.version 2 &&
git -C promisor-remote config --local protocol.version 2 &&
# Exercise to make sure it works. Git will not fetch anything from the
# promisor remote other than for the big blob (because it needs to
# resolve the delta).
GIT_TRACE_PACKET="$(pwd)/trace" git -C client \
fetch "file://$(pwd)/server" master &&
# Verify that protocol version 2 was used.
grep "fetch< version 2" trace &&
# Verify the assumption that the client needed to fetch the delta base
# to resolve the delta.
git -C server rev-parse HEAD~1^{tree} >hash &&
grep "want $(cat hash)" trace
'
# The following two tests must be in this order. It is important that
# the srv.bare repository did not have tags during clone, but has tags
partial-clone: demonstrate bugs in partial fetch While testing partial clone, I noticed some odd behavior. I was testing a way of running 'git init', followed by manually configuring the remote for partial clone, and then running 'git fetch'. Astonishingly, I saw the 'git fetch' process start asking the server for multiple rounds of pack-file downloads! When tweaking the situation a little more, I discovered that I could cause the remote to hang up with an error. Add two tests that demonstrate these two issues. In the first test, we find that when fetching with blob filters from a repository that previously did not have any tags, the 'git fetch --tags origin' command fails because the server sends "multiple filter-specs cannot be combined". This only happens when using protocol v2. In the second test, we see that a 'git fetch origin' request with several ref updates results in multiple pack-file downloads. This must be due to Git trying to fault-in the objects pointed by the refs. What makes this matter particularly nasty is that this goes through the do_oid_object_info_extended() method, so there are no "haves" in the negotiation. This leads the remote to send every reachable commit and tree from each new ref, providing a quadratic amount of data transfer! This test is fixed if we revert 6462d5eb9a (fetch: remove fetch_if_missing=0, 2019-11-05), but that revert causes other test failures. The real fix will need more care. The tests are ordered in this way because if I swap the test order the tag test will succeed instead of fail. I believe this is because somehow we need the srv.bare repo to not have any tags when we clone, but then have tags in our next fetch. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-21 22:47:27 +01:00
# in the fetch.
test_expect_success 'verify fetch succeeds when asking for new tags' '
partial-clone: demonstrate bugs in partial fetch While testing partial clone, I noticed some odd behavior. I was testing a way of running 'git init', followed by manually configuring the remote for partial clone, and then running 'git fetch'. Astonishingly, I saw the 'git fetch' process start asking the server for multiple rounds of pack-file downloads! When tweaking the situation a little more, I discovered that I could cause the remote to hang up with an error. Add two tests that demonstrate these two issues. In the first test, we find that when fetching with blob filters from a repository that previously did not have any tags, the 'git fetch --tags origin' command fails because the server sends "multiple filter-specs cannot be combined". This only happens when using protocol v2. In the second test, we see that a 'git fetch origin' request with several ref updates results in multiple pack-file downloads. This must be due to Git trying to fault-in the objects pointed by the refs. What makes this matter particularly nasty is that this goes through the do_oid_object_info_extended() method, so there are no "haves" in the negotiation. This leads the remote to send every reachable commit and tree from each new ref, providing a quadratic amount of data transfer! This test is fixed if we revert 6462d5eb9a (fetch: remove fetch_if_missing=0, 2019-11-05), but that revert causes other test failures. The real fix will need more care. The tests are ordered in this way because if I swap the test order the tag test will succeed instead of fail. I believe this is because somehow we need the srv.bare repo to not have any tags when we clone, but then have tags in our next fetch. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-21 22:47:27 +01:00
git clone --filter=blob:none "file://$(pwd)/srv.bare" tag-test &&
for i in I J K
do
test_commit -C src $i &&
git -C src branch $i || return 1
done &&
git -C srv.bare fetch --tags origin +refs/heads/*:refs/heads/* &&
git -C tag-test -c protocol.version=2 fetch --tags origin
'
test_expect_success 'verify fetch downloads only one pack when updating refs' '
partial-clone: demonstrate bugs in partial fetch While testing partial clone, I noticed some odd behavior. I was testing a way of running 'git init', followed by manually configuring the remote for partial clone, and then running 'git fetch'. Astonishingly, I saw the 'git fetch' process start asking the server for multiple rounds of pack-file downloads! When tweaking the situation a little more, I discovered that I could cause the remote to hang up with an error. Add two tests that demonstrate these two issues. In the first test, we find that when fetching with blob filters from a repository that previously did not have any tags, the 'git fetch --tags origin' command fails because the server sends "multiple filter-specs cannot be combined". This only happens when using protocol v2. In the second test, we see that a 'git fetch origin' request with several ref updates results in multiple pack-file downloads. This must be due to Git trying to fault-in the objects pointed by the refs. What makes this matter particularly nasty is that this goes through the do_oid_object_info_extended() method, so there are no "haves" in the negotiation. This leads the remote to send every reachable commit and tree from each new ref, providing a quadratic amount of data transfer! This test is fixed if we revert 6462d5eb9a (fetch: remove fetch_if_missing=0, 2019-11-05), but that revert causes other test failures. The real fix will need more care. The tests are ordered in this way because if I swap the test order the tag test will succeed instead of fail. I believe this is because somehow we need the srv.bare repo to not have any tags when we clone, but then have tags in our next fetch. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-21 22:47:27 +01:00
git clone --filter=blob:none "file://$(pwd)/srv.bare" pack-test &&
ls pack-test/.git/objects/pack/*pack >pack-list &&
test_line_count = 2 pack-list &&
for i in A B C
do
test_commit -C src $i &&
git -C src branch $i || return 1
done &&
git -C srv.bare fetch origin +refs/heads/*:refs/heads/* &&
git -C pack-test fetch origin &&
ls pack-test/.git/objects/pack/*pack >pack-list &&
test_line_count = 3 pack-list
'
test_expect_success 'single-branch tag following respects partial clone' '
git clone --single-branch -b B --filter=blob:none \
"file://$(pwd)/srv.bare" single &&
git -C single rev-parse --verify refs/tags/B &&
git -C single rev-parse --verify refs/tags/A &&
test_must_fail git -C single rev-parse --verify refs/tags/C
'
. "$TEST_DIRECTORY"/lib-httpd.sh
start_httpd
# Converts bytes into their hexadecimal representation. For example,
# "printf 'ab\r\n' | hex_unpack" results in '61620d0a'.
hex_unpack () {
perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)'
}
# Inserts $1 at the start of the string and every 2 characters thereafter.
intersperse () {
sed 's/\(..\)/'$1'\1/g'
}
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Create a one-time-perl command to replace the existing packfile with $1.
replace_packfile () {
# The protocol requires that the packfile be sent in sideband 1, hence
# the extra \x01 byte at the beginning.
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
cp $1 "$HTTPD_ROOT_PATH/one-time-pack" &&
echo 'if (/packfile/) {
print;
my $length = -s "one-time-pack";
printf "%04x\x01", $length + 5;
print `cat one-time-pack` . "0000";
last
}' >"$HTTPD_ROOT_PATH/one-time-perl"
}
test_expect_success 'upon cloning, check that all refs point to objects' '
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
rm -rf "$SERVER" repo &&
test_create_repo "$SERVER" &&
test_commit -C "$SERVER" foo &&
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
# Create a tag pointing to a blob.
BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
git -C "$SERVER" tag myblob "$BLOB" &&
# Craft a packfile not including that blob.
git -C "$SERVER" rev-parse HEAD |
git -C "$SERVER" pack-objects --stdout >incomplete.pack &&
# Replace the existing packfile with the crafted one. The protocol
# requires that the packfile be sent in sideband 1, hence the extra
# \x01 byte at the beginning.
replace_packfile incomplete.pack &&
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Use protocol v2 because the perl command looks for the "packfile"
# section header.
test_config -C "$SERVER" protocol.version 2 &&
test_must_fail git -c protocol.version=2 clone \
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
--filter=blob:none $HTTPD_URL/one_time_perl/server repo 2>err &&
test_i18ngrep "did not send all necessary objects" err &&
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Ensure that the one-time-perl script was used.
! test -e "$HTTPD_ROOT_PATH/one-time-perl"
'
test_expect_success 'when partial cloning, tolerate server not sending target of tag' '
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
rm -rf "$SERVER" repo &&
test_create_repo "$SERVER" &&
test_commit -C "$SERVER" foo &&
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
# Create an annotated tag pointing to a blob.
BLOB=$(echo blob-contents | git -C "$SERVER" hash-object --stdin -w) &&
git -C "$SERVER" tag -m message -a myblob "$BLOB" &&
# Craft a packfile including the tag, but not the blob it points to.
# Also, omit objects referenced from HEAD in order to force a second
# fetch (to fetch missing objects) upon the automatic checkout that
# happens after a clone.
printf "%s\n%s\n--not\n%s\n%s\n" \
$(git -C "$SERVER" rev-parse HEAD) \
$(git -C "$SERVER" rev-parse myblob) \
$(git -C "$SERVER" rev-parse HEAD^{tree}) \
$(git -C "$SERVER" rev-parse myblob^{blob}) |
git -C "$SERVER" pack-objects --thin --stdout >incomplete.pack &&
# Replace the existing packfile with the crafted one. The protocol
# requires that the packfile be sent in sideband 1, hence the extra
# \x01 byte at the beginning.
replace_packfile incomplete.pack &&
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Use protocol v2 because the perl command looks for the "packfile"
# section header.
test_config -C "$SERVER" protocol.version 2 &&
# Exercise to make sure it works.
git -c protocol.version=2 clone \
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
--filter=blob:none $HTTPD_URL/one_time_perl/server repo 2> err &&
! grep "missing object referenced by" err &&
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Ensure that the one-time-perl script was used.
! test -e "$HTTPD_ROOT_PATH/one-time-perl"
'
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
test_expect_success 'tolerate server sending REF_DELTA against missing promisor objects' '
SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" &&
rm -rf "$SERVER" repo &&
test_create_repo "$SERVER" &&
test_config -C "$SERVER" uploadpack.allowfilter 1 &&
test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 &&
# Create a commit with 2 blobs to be used as delta bases.
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
for i in $(test_seq 10)
do
echo "this is a line" >>"$SERVER/foo.txt" &&
echo "this is another line" >>"$SERVER/have.txt"
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
done &&
git -C "$SERVER" add foo.txt have.txt &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
git -C "$SERVER" commit -m bar &&
git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase_missing &&
git -C "$SERVER" rev-parse HEAD:have.txt >deltabase_have &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
# Clone. The client has deltabase_have but not deltabase_missing.
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
git -c protocol.version=2 clone --no-checkout \
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
--filter=blob:none $HTTPD_URL/one_time_perl/server repo &&
git -C repo hash-object -w -- "$SERVER/have.txt" &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
# Sanity check to ensure that the client does not have
# deltabase_missing.
git -C repo rev-list --objects --ignore-missing \
-- $(cat deltabase_missing) >objlist &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
test_line_count = 0 objlist &&
# Another commit. This commit will be fetched by the client.
echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" &&
echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/have.txt" &&
git -C "$SERVER" add foo.txt have.txt &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
git -C "$SERVER" commit -m baz &&
# Pack a thin pack containing, among other things, HEAD:foo.txt
# delta-ed against HEAD^:foo.txt and HEAD:have.txt delta-ed against
# HEAD^:have.txt.
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
printf "%s\n--not\n%s\n" \
$(git -C "$SERVER" rev-parse HEAD) \
$(git -C "$SERVER" rev-parse HEAD^) |
git -C "$SERVER" pack-objects --thin --stdout >thin.pack &&
# Ensure that the pack contains one delta against HEAD^:foo.txt. Since
# the delta contains at least 26 novel characters, the size cannot be
# contained in 4 bits, so the object header will take up 2 bytes. The
# most significant nybble of the first byte is 0b1111 (0b1 to indicate
# that the header continues, and 0b111 to indicate REF_DELTA), followed
# by any 3 nybbles, then the OID of the delta base.
printf "f.,..%s" $(intersperse "," <deltabase_missing) >want &&
hex_unpack <thin.pack | intersperse "," >have &&
grep $(cat want) have &&
# Ensure that the pack contains one delta against HEAD^:have.txt,
# similar to the above.
printf "f.,..%s" $(intersperse "," <deltabase_have) >want &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
hex_unpack <thin.pack | intersperse "," >have &&
grep $(cat want) have &&
replace_packfile thin.pack &&
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Use protocol v2 because the perl command looks for the "packfile"
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
# section header.
test_config -C "$SERVER" protocol.version 2 &&
# Fetch the thin pack and ensure that index-pack is able to handle the
# REF_DELTA object with a missing promisor delta base.
GIT_TRACE_PACKET="$(pwd)/trace" git -C repo -c protocol.version=2 fetch &&
# Ensure that the missing delta base was directly fetched, but not the
# one that the client has.
grep "want $(cat deltabase_missing)" trace &&
! grep "want $(cat deltabase_have)" trace &&
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
t/lib-httpd: avoid using macOS' sed Among other differences relative to GNU sed, macOS' sed always ends its output with a trailing newline, even if the input did not have such a trailing newline. Surprisingly, this makes three httpd-based tests fail on macOS: t5616, t5702 and t5703. ("Surprisingly" because those tests have been around for some time, but apparently nobody runs them on macOS with a working Apache2 setup.) The reason is that we use `sed` in those tests to filter the response of the web server. Apart from the fact that we use GNU constructs (such as using a space after the `c` command instead of a backslash and a newline), we have another problem: macOS' sed LF-only newlines while webservers are supposed to use CR/LF ones. Even worse, t5616 uses `sed` to replace a binary part of the response with a new binary part (kind of hoping that the replaced binary part does not contain a 0x0a byte which would be interpreted as a newline). To that end, it calls on Perl to read the binary pack file and hex-encode it, then calls on `sed` to prefix every hex digit pair with a `\x` in order to construct the text that the `c` statement of the `sed` invocation is supposed to insert. So we call Perl and sed to construct a sed statement. The final nail in the coffin is that macOS' sed does not even interpret those `\x<hex>` constructs. Let's just replace all of that by Perl snippets. With Perl, at least, we do not have to deal with GNU vs macOS semantics, we do not have to worry about unwanted trailing newlines, and we do not have to spawn commands to construct arguments for other commands to be spawned (i.e. we can avoid a whole lot of shell scripting complexity). The upshot is that this fixes t5616, t5702 and t5703 on macOS with Apache2. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-02-27 14:23:11 +01:00
# Ensure that the one-time-perl script was used.
! test -e "$HTTPD_ROOT_PATH/one-time-perl"
index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-14 23:10:55 +02:00
'
# DO NOT add non-httpd-specific tests here, because the last part of this
# test script is only executed when httpd is available and enabled.
test_done