Merge branch 'jt/non-blob-lazy-fetch'

A partial clone that is configured to lazily fetch missing objects
will on-demand issue a "git fetch" request to the originating
repository to fill not-yet-obtained objects.  The request has been
optimized for requesting a tree object (and not the leaf blob
objects contained in it) by telling the originating repository that
no blobs are needed.

* jt/non-blob-lazy-fetch:
  fetch-pack: exclude blobs when lazy-fetching trees
  fetch-pack: avoid object flags if no_dependents
This commit is contained in:
Junio C Hamano 2018-10-19 13:34:07 +09:00
commit fa54cccf1f
3 changed files with 121 additions and 42 deletions

View File

@ -253,8 +253,10 @@ static int find_common(struct fetch_negotiator *negotiator,
if (args->stateless_rpc && multi_ack == 1) if (args->stateless_rpc && multi_ack == 1)
die(_("--stateless-rpc requires multi_ack_detailed")); die(_("--stateless-rpc requires multi_ack_detailed"));
mark_tips(negotiator, args->negotiation_tips); if (!args->no_dependents) {
for_each_cached_alternate(negotiator, insert_one_alternate_object); mark_tips(negotiator, args->negotiation_tips);
for_each_cached_alternate(negotiator, insert_one_alternate_object);
}
fetching = 0; fetching = 0;
for ( ; refs ; refs = refs->next) { for ( ; refs ; refs = refs->next) {
@ -271,8 +273,12 @@ static int find_common(struct fetch_negotiator *negotiator,
* We use lookup_object here because we are only * We use lookup_object here because we are only
* interested in the case we *know* the object is * interested in the case we *know* the object is
* reachable and we have already scanned it. * reachable and we have already scanned it.
*
* Do this only if args->no_dependents is false (if it is true,
* we cannot trust the object flags).
*/ */
if (((o = lookup_object(the_repository, remote->hash)) != NULL) && if (!args->no_dependents &&
((o = lookup_object(the_repository, remote->hash)) != NULL) &&
(o->flags & COMPLETE)) { (o->flags & COMPLETE)) {
continue; continue;
} }
@ -707,31 +713,29 @@ static void mark_complete_and_common_ref(struct fetch_negotiator *negotiator,
oidset_clear(&loose_oid_set); oidset_clear(&loose_oid_set);
if (!args->no_dependents) { if (!args->deepen) {
if (!args->deepen) { for_each_ref(mark_complete_oid, NULL);
for_each_ref(mark_complete_oid, NULL); for_each_cached_alternate(NULL, mark_alternate_complete);
for_each_cached_alternate(NULL, mark_alternate_complete); commit_list_sort_by_date(&complete);
commit_list_sort_by_date(&complete); if (cutoff)
if (cutoff) mark_recent_complete_commits(args, cutoff);
mark_recent_complete_commits(args, cutoff); }
}
/* /*
* Mark all complete remote refs as common refs. * Mark all complete remote refs as common refs.
* Don't mark them common yet; the server has to be told so first. * Don't mark them common yet; the server has to be told so first.
*/ */
for (ref = *refs; ref; ref = ref->next) { for (ref = *refs; ref; ref = ref->next) {
struct object *o = deref_tag(the_repository, struct object *o = deref_tag(the_repository,
lookup_object(the_repository, lookup_object(the_repository,
ref->old_oid.hash), ref->old_oid.hash),
NULL, 0); NULL, 0);
if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE)) if (!o || o->type != OBJ_COMMIT || !(o->flags & COMPLETE))
continue; continue;
negotiator->known_common(negotiator, negotiator->known_common(negotiator,
(struct commit *)o); (struct commit *)o);
}
} }
save_commit_buffer = old_save_commit_buffer; save_commit_buffer = old_save_commit_buffer;
@ -987,11 +991,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
if (!server_supports("deepen-relative") && args->deepen_relative) if (!server_supports("deepen-relative") && args->deepen_relative)
die(_("Server does not support --deepen")); die(_("Server does not support --deepen"));
mark_complete_and_common_ref(&negotiator, args, &ref); if (!args->no_dependents) {
filter_refs(args, &ref, sought, nr_sought); mark_complete_and_common_ref(&negotiator, args, &ref);
if (everything_local(args, &ref)) { filter_refs(args, &ref, sought, nr_sought);
packet_flush(fd[1]); if (everything_local(args, &ref)) {
goto all_done; packet_flush(fd[1]);
goto all_done;
}
} else {
filter_refs(args, &ref, sought, nr_sought);
} }
if (find_common(&negotiator, args, fd, &oid, ref) < 0) if (find_common(&negotiator, args, fd, &oid, ref) < 0)
if (!args->keep_pack) if (!args->keep_pack)
@ -1037,7 +1045,7 @@ static void add_shallow_requests(struct strbuf *req_buf,
} }
} }
static void add_wants(const struct ref *wants, struct strbuf *req_buf) static void add_wants(int no_dependents, const struct ref *wants, struct strbuf *req_buf)
{ {
int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0); int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0);
@ -1054,8 +1062,12 @@ static void add_wants(const struct ref *wants, struct strbuf *req_buf)
* We use lookup_object here because we are only * We use lookup_object here because we are only
* interested in the case we *know* the object is * interested in the case we *know* the object is
* reachable and we have already scanned it. * reachable and we have already scanned it.
*
* Do this only if args->no_dependents is false (if it is true,
* we cannot trust the object flags).
*/ */
if (((o = lookup_object(the_repository, remote->hash)) != NULL) && if (!no_dependents &&
((o = lookup_object(the_repository, remote->hash)) != NULL) &&
(o->flags & COMPLETE)) { (o->flags & COMPLETE)) {
continue; continue;
} }
@ -1152,7 +1164,7 @@ static int send_fetch_request(struct fetch_negotiator *negotiator, int fd_out,
} }
/* add wants */ /* add wants */
add_wants(wants, &req_buf); add_wants(args->no_dependents, wants, &req_buf);
if (args->no_dependents) { if (args->no_dependents) {
packet_buf_write(&req_buf, "done"); packet_buf_write(&req_buf, "done");
@ -1343,16 +1355,21 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
args->deepen = 1; args->deepen = 1;
/* Filter 'ref' by 'sought' and those that aren't local */ /* Filter 'ref' by 'sought' and those that aren't local */
mark_complete_and_common_ref(&negotiator, args, &ref); if (!args->no_dependents) {
filter_refs(args, &ref, sought, nr_sought); mark_complete_and_common_ref(&negotiator, args, &ref);
if (everything_local(args, &ref)) filter_refs(args, &ref, sought, nr_sought);
state = FETCH_DONE; if (everything_local(args, &ref))
else state = FETCH_DONE;
state = FETCH_SEND_REQUEST; else
state = FETCH_SEND_REQUEST;
mark_tips(&negotiator, args->negotiation_tips); mark_tips(&negotiator, args->negotiation_tips);
for_each_cached_alternate(&negotiator, for_each_cached_alternate(&negotiator,
insert_one_alternate_object); insert_one_alternate_object);
} else {
filter_refs(args, &ref, sought, nr_sought);
state = FETCH_SEND_REQUEST;
}
break; break;
case FETCH_SEND_REQUEST: case FETCH_SEND_REQUEST:
if (send_fetch_request(&negotiator, fd[1], args, ref, if (send_fetch_request(&negotiator, fd[1], args, ref,
@ -1595,6 +1612,20 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
if (nr_sought) if (nr_sought)
nr_sought = remove_duplicates_in_refs(sought, nr_sought); nr_sought = remove_duplicates_in_refs(sought, nr_sought);
if (args->no_dependents && !args->filter_options.choice) {
/*
* The protocol does not support requesting that only the
* wanted objects be sent, so approximate this by setting a
* "blob:none" filter if no filter is already set. This works
* for all object types: note that wanted blobs will still be
* sent because they are directly specified as a "want".
*
* NEEDSWORK: Add an option in the protocol to request that
* only the wanted objects be sent, and implement it.
*/
parse_list_objects_filter(&args->filter_options, "blob:none");
}
if (!ref) { if (!ref) {
packet_flush(fd[1]); packet_flush(fd[1]);
die(_("no matching remote head")); die(_("no matching remote head"));

View File

@ -43,6 +43,13 @@ struct fetch_pack_args {
unsigned from_promisor:1; unsigned from_promisor:1;
/* /*
* Attempt to fetch only the wanted objects, and not any objects
* referred to by them. Due to protocol limitations, extraneous
* objects may still be included. (When fetching non-blob
* objects, only blobs are excluded; when fetching a blob, the
* blob itself will still be sent. The client does not need to
* know whether a wanted object is a blob or not.)
*
* If 1, fetch_pack() will also not modify any object flags. * If 1, fetch_pack() will also not modify any object flags.
* This allows fetch_pack() to safely be called by any function, * This allows fetch_pack() to safely be called by any function,
* regardless of which object flags it uses (if any). * regardless of which object flags it uses (if any).

View File

@ -182,6 +182,47 @@ test_expect_success 'fetching of missing objects works with ref-in-want enabled'
grep "git< fetch=.*ref-in-want" trace grep "git< fetch=.*ref-in-want" trace
' '
test_expect_success 'fetching of missing blobs works' '
rm -rf server repo &&
test_create_repo server &&
test_commit -C server foo &&
git -C server repack -a -d --write-bitmap-index &&
git clone "file://$(pwd)/server" repo &&
git hash-object repo/foo.t >blobhash &&
rm -rf repo/.git/objects/* &&
git -C server config uploadpack.allowanysha1inwant 1 &&
git -C server config uploadpack.allowfilter 1 &&
git -C repo config core.repositoryformatversion 1 &&
git -C repo config extensions.partialclone "origin" &&
git -C repo cat-file -p $(cat blobhash)
'
test_expect_success 'fetching of missing trees does not fetch blobs' '
rm -rf server repo &&
test_create_repo server &&
test_commit -C server foo &&
git -C server repack -a -d --write-bitmap-index &&
git clone "file://$(pwd)/server" repo &&
git -C repo rev-parse foo^{tree} >treehash &&
git hash-object repo/foo.t >blobhash &&
rm -rf repo/.git/objects/* &&
git -C server config uploadpack.allowanysha1inwant 1 &&
git -C server config uploadpack.allowfilter 1 &&
git -C repo config core.repositoryformatversion 1 &&
git -C repo config extensions.partialclone "origin" &&
git -C repo cat-file -p $(cat treehash) &&
# Ensure that the tree, but not the blob, is fetched
git -C repo rev-list --objects --missing=print $(cat treehash) >objects &&
grep "^$(cat treehash)" objects &&
grep "^[?]$(cat blobhash)" objects
'
test_expect_success 'rev-list stops traversal at missing and promised commit' ' test_expect_success 'rev-list stops traversal at missing and promised commit' '
rm -rf repo && rm -rf repo &&
test_create_repo repo && test_create_repo repo &&