From 385d1bfd7ad5d49783a3956bba19d9feea9955b6 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 14 May 2019 14:10:54 -0700 Subject: [PATCH 1/4] t5616: refactor packfile replacement A subsequent patch will perform the same packfile replacement that is already done twice, so refactor it into its own function. Also, the same subsequent patch will use, in another way, part of the packfile replacement functionality, so extract those out too. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/t5616-partial-clone.sh | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 9a8f9886b3..7cc0c71556 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -244,11 +244,25 @@ test_expect_success 'fetch what is specified on CLI even if already promised' ' . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd -# Converts bytes into a form suitable for inclusion in a sed command. For -# example, "printf 'ab\r\n' | hex_unpack" results in '\x61\x62\x0d\x0a'. -sed_escape () { - perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)' | - sed 's/\(..\)/\\x\1/g' +# Converts bytes into their hexadecimal representation. For example, +# "printf 'ab\r\n' | hex_unpack" results in '61620d0a'. +hex_unpack () { + perl -e '$/ = undef; $input = <>; print unpack("H2" x length($input), $input)' +} + +# Inserts $1 at the start of the string and every 2 characters thereafter. +intersperse () { + sed 's/\(..\)/'$1'\1/g' +} + +# Create a one-time-sed command to replace the existing packfile with $1. +replace_packfile () { + # The protocol requires that the packfile be sent in sideband 1, hence + # the extra \x01 byte at the beginning. + printf "1,/packfile/!c %04x\\\\x01%s0000" \ + "$(($(wc -c <$1) + 5))" \ + "$(hex_unpack <$1 | intersperse '\\x')" \ + >"$HTTPD_ROOT_PATH/one-time-sed" } test_expect_success 'upon cloning, check that all refs point to objects' ' @@ -270,10 +284,7 @@ test_expect_success 'upon cloning, check that all refs point to objects' ' # Replace the existing packfile with the crafted one. The protocol # requires that the packfile be sent in sideband 1, hence the extra # \x01 byte at the beginning. - printf "1,/packfile/!c %04x\\\\x01%s0000" \ - "$(($(wc -c "$HTTPD_ROOT_PATH/one-time-sed" && + replace_packfile incomplete.pack && # Use protocol v2 because the sed command looks for the "packfile" # section header. @@ -313,10 +324,7 @@ test_expect_success 'when partial cloning, tolerate server not sending target of # Replace the existing packfile with the crafted one. The protocol # requires that the packfile be sent in sideband 1, hence the extra # \x01 byte at the beginning. - printf "1,/packfile/!c %04x\\\\x01%s0000" \ - "$(($(wc -c "$HTTPD_ROOT_PATH/one-time-sed" && + replace_packfile incomplete.pack && # Use protocol v2 because the sed command looks for the "packfile" # section header. From 8a30a1efd11bcfb7b0f5b8543492a09fc495ae60 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 14 May 2019 14:10:55 -0700 Subject: [PATCH 2/4] index-pack: prefetch missing REF_DELTA bases When fetching, the client sends "have" commit IDs indicating that the server does not need to send any object referenced by those commits, reducing network I/O. When the client is a partial clone, the client still sends "have"s in this way, even if it does not have every object referenced by a commit it sent as "have". If a server omits such an object, it is fine: the client could lazily fetch that object before this fetch, and it can still do so after. The issue is when the server sends a thin pack containing an object that is a REF_DELTA against such a missing object: index-pack fails to fix the thin pack. When support for lazily fetching missing objects was added in 8b4c0103a9 ("sha1_file: support lazily fetching missing objects", 2017-12-08), support in index-pack was turned off in the belief that it accesses the repo only to do hash collision checks. However, this is not true: it also needs to access the repo to resolve REF_DELTA bases. Support for lazy fetching should still generally be turned off in index-pack because it is used as part of the lazy fetching process itself (if not, infinite loops may occur), but we do need to fetch the REF_DELTA bases. (When fetching REF_DELTA bases, it is unlikely that those are REF_DELTA themselves, because we do not send "have" when making such fetches.) To resolve this, prefetch all missing REF_DELTA bases before attempting to resolve them. This both ensures that all bases are attempted to be fetched, and ensures that we make only one request per index-pack invocation, and not one request per missing object. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 26 +++++++++++++++-- t/t5616-partial-clone.sh | 61 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ccf4eb7e9b..0d55f73b0b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -14,6 +14,7 @@ #include "thread-utils.h" #include "packfile.h" #include "object-store.h" +#include "fetch-object.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@ -1351,6 +1352,25 @@ static void fix_unresolved_deltas(struct hashfile *f) sorted_by_pos[i] = &ref_deltas[i]; QSORT(sorted_by_pos, nr_ref_deltas, delta_pos_compare); + if (repository_format_partial_clone) { + /* + * Prefetch the delta bases. + */ + struct oid_array to_fetch = OID_ARRAY_INIT; + for (i = 0; i < nr_ref_deltas; i++) { + struct ref_delta_entry *d = sorted_by_pos[i]; + if (!oid_object_info_extended(the_repository, &d->oid, + NULL, + OBJECT_INFO_FOR_PREFETCH)) + continue; + oid_array_append(&to_fetch, &d->oid); + } + if (to_fetch.nr) + fetch_objects(repository_format_partial_clone, + to_fetch.oid, to_fetch.nr); + oid_array_clear(&to_fetch); + } + for (i = 0; i < nr_ref_deltas; i++) { struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; @@ -1650,8 +1670,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) int report_end_of_input = 0; /* - * index-pack never needs to fetch missing objects, since it only - * accesses the repo to do hash collision checks + * index-pack never needs to fetch missing objects except when + * REF_DELTA bases are missing (which are explicitly handled). It only + * accesses the repo to do hash collision checks and to check which + * REF_DELTA bases need to be fetched. */ fetch_if_missing = 0; diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 7cc0c71556..f1baf83502 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -339,4 +339,65 @@ test_expect_success 'when partial cloning, tolerate server not sending target of ! test -e "$HTTPD_ROOT_PATH/one-time-sed" ' +test_expect_success 'tolerate server sending REF_DELTA against missing promisor objects' ' + SERVER="$HTTPD_DOCUMENT_ROOT_PATH/server" && + rm -rf "$SERVER" repo && + test_create_repo "$SERVER" && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && + + # Create a commit with a blob to be used as a delta base. + for i in $(test_seq 10) + do + echo "this is a line" >>"$SERVER/foo.txt" + done && + git -C "$SERVER" add foo.txt && + git -C "$SERVER" commit -m bar && + git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase && + + git -c protocol.version=2 clone --no-checkout \ + --filter=blob:none $HTTPD_URL/one_time_sed/server repo && + + # Sanity check to ensure that the client does not have that blob. + git -C repo rev-list --objects --exclude-promisor-objects \ + -- $(cat deltabase) >objlist && + test_line_count = 0 objlist && + + # Another commit. This commit will be fetched by the client. + echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" && + git -C "$SERVER" add foo.txt && + git -C "$SERVER" commit -m baz && + + # Pack a thin pack containing, among other things, HEAD:foo.txt + # delta-ed against HEAD^:foo.txt. + printf "%s\n--not\n%s\n" \ + $(git -C "$SERVER" rev-parse HEAD) \ + $(git -C "$SERVER" rev-parse HEAD^) | + git -C "$SERVER" pack-objects --thin --stdout >thin.pack && + + # Ensure that the pack contains one delta against HEAD^:foo.txt. Since + # the delta contains at least 26 novel characters, the size cannot be + # contained in 4 bits, so the object header will take up 2 bytes. The + # most significant nybble of the first byte is 0b1111 (0b1 to indicate + # that the header continues, and 0b111 to indicate REF_DELTA), followed + # by any 3 nybbles, then the OID of the delta base. + git -C "$SERVER" rev-parse HEAD^:foo.txt >deltabase && + printf "f.,..%s" $(intersperse "," want && + hex_unpack have && + grep $(cat want) have && + + replace_packfile thin.pack && + + # Use protocol v2 because the sed command looks for the "packfile" + # section header. + test_config -C "$SERVER" protocol.version 2 && + + # Fetch the thin pack and ensure that index-pack is able to handle the + # REF_DELTA object with a missing promisor delta base. + git -C repo -c protocol.version=2 fetch && + + # Ensure that the one-time-sed script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-sed" +' + test_done From 5718c53d0ab47cc750e05d2af4a401197117c2f0 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 11 Jun 2019 14:06:46 -0700 Subject: [PATCH 3/4] t5616: use correct flag to check object is missing If we want to check whether an object is missing, the correct flag to pass to rev-list is --ignore-missing; --exclude-promisor-objects will exclude any object that came from the promisor remote, whether it is present or missing. Use the correct flag. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/t5616-partial-clone.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index f1baf83502..8b9b471a62 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -359,7 +359,7 @@ test_expect_success 'tolerate server sending REF_DELTA against missing promisor --filter=blob:none $HTTPD_URL/one_time_sed/server repo && # Sanity check to ensure that the client does not have that blob. - git -C repo rev-list --objects --exclude-promisor-objects \ + git -C repo rev-list --objects --ignore-missing \ -- $(cat deltabase) >objlist && test_line_count = 0 objlist && From 810e19322d0165e89faf560e65de61b9cd5d63dc Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 11 Jun 2019 14:06:47 -0700 Subject: [PATCH 4/4] t5616: cover case of client having delta base When fetching into a partial clone, Git first prefetches missing REF_DELTA bases from the promisor remote. (This feature was introduced in [1].) But as can be seen in a recent test coverage report [2], the case in which a REF_DELTA base is already present is not covered by tests. Extend the tests slightly to cover this case. [1] 8a30a1efd1 ("index-pack: prefetch missing REF_DELTA bases", 2019-05-15). [2] https://public-inbox.org/git/396091fc-5572-19a5-4f18-61c258590dd5@gmail.com/ Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/t5616-partial-clone.sh | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 8b9b471a62..b91ef548f8 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -346,30 +346,37 @@ test_expect_success 'tolerate server sending REF_DELTA against missing promisor test_config -C "$SERVER" uploadpack.allowfilter 1 && test_config -C "$SERVER" uploadpack.allowanysha1inwant 1 && - # Create a commit with a blob to be used as a delta base. + # Create a commit with 2 blobs to be used as delta bases. for i in $(test_seq 10) do - echo "this is a line" >>"$SERVER/foo.txt" + echo "this is a line" >>"$SERVER/foo.txt" && + echo "this is another line" >>"$SERVER/have.txt" done && - git -C "$SERVER" add foo.txt && + git -C "$SERVER" add foo.txt have.txt && git -C "$SERVER" commit -m bar && - git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase && + git -C "$SERVER" rev-parse HEAD:foo.txt >deltabase_missing && + git -C "$SERVER" rev-parse HEAD:have.txt >deltabase_have && + # Clone. The client has deltabase_have but not deltabase_missing. git -c protocol.version=2 clone --no-checkout \ --filter=blob:none $HTTPD_URL/one_time_sed/server repo && + git -C repo hash-object -w -- "$SERVER/have.txt" && - # Sanity check to ensure that the client does not have that blob. + # Sanity check to ensure that the client does not have + # deltabase_missing. git -C repo rev-list --objects --ignore-missing \ - -- $(cat deltabase) >objlist && + -- $(cat deltabase_missing) >objlist && test_line_count = 0 objlist && # Another commit. This commit will be fetched by the client. echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/foo.txt" && - git -C "$SERVER" add foo.txt && + echo "abcdefghijklmnopqrstuvwxyz" >>"$SERVER/have.txt" && + git -C "$SERVER" add foo.txt have.txt && git -C "$SERVER" commit -m baz && # Pack a thin pack containing, among other things, HEAD:foo.txt - # delta-ed against HEAD^:foo.txt. + # delta-ed against HEAD^:foo.txt and HEAD:have.txt delta-ed against + # HEAD^:have.txt. printf "%s\n--not\n%s\n" \ $(git -C "$SERVER" rev-parse HEAD) \ $(git -C "$SERVER" rev-parse HEAD^) | @@ -381,8 +388,13 @@ test_expect_success 'tolerate server sending REF_DELTA against missing promisor # most significant nybble of the first byte is 0b1111 (0b1 to indicate # that the header continues, and 0b111 to indicate REF_DELTA), followed # by any 3 nybbles, then the OID of the delta base. - git -C "$SERVER" rev-parse HEAD^:foo.txt >deltabase && - printf "f.,..%s" $(intersperse "," want && + printf "f.,..%s" $(intersperse "," want && + hex_unpack have && + grep $(cat want) have && + + # Ensure that the pack contains one delta against HEAD^:have.txt, + # similar to the above. + printf "f.,..%s" $(intersperse "," want && hex_unpack have && grep $(cat want) have && @@ -394,7 +406,12 @@ test_expect_success 'tolerate server sending REF_DELTA against missing promisor # Fetch the thin pack and ensure that index-pack is able to handle the # REF_DELTA object with a missing promisor delta base. - git -C repo -c protocol.version=2 fetch && + GIT_TRACE_PACKET="$(pwd)/trace" git -C repo -c protocol.version=2 fetch && + + # Ensure that the missing delta base was directly fetched, but not the + # one that the client has. + grep "want $(cat deltabase_missing)" trace && + ! grep "want $(cat deltabase_have)" trace && # Ensure that the one-time-sed script was used. ! test -e "$HTTPD_ROOT_PATH/one-time-sed"