From f447a499dbb8be3a9f76f8099938129c74fcbd32 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 13 Aug 2018 11:14:28 -0700 Subject: [PATCH 01/10] list-objects: store common func args in struct This will make utility functions easier to create, as done by the next patch. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 156 +++++++++++++++++++++++-------------------------- 1 file changed, 73 insertions(+), 83 deletions(-) diff --git a/list-objects.c b/list-objects.c index c99c47ac18..584518a3fa 100644 --- a/list-objects.c +++ b/list-objects.c @@ -12,20 +12,25 @@ #include "packfile.h" #include "object-store.h" -static void process_blob(struct rev_info *revs, +struct traversal_context { + struct rev_info *revs; + show_object_fn show_object; + show_commit_fn show_commit; + void *show_data; + filter_object_fn filter_fn; + void *filter_data; +}; + +static void process_blob(struct traversal_context *ctx, struct blob *blob, - show_object_fn show, struct strbuf *path, - const char *name, - void *cb_data, - filter_object_fn filter_fn, - void *filter_data) + const char *name) { struct object *obj = &blob->object; size_t pathlen; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; - if (!revs->blob_objects) + if (!ctx->revs->blob_objects) return; if (!obj) die("bad blob object"); @@ -41,21 +46,21 @@ static void process_blob(struct rev_info *revs, * may cause the actual filter to report an incomplete list * of missing objects. */ - if (revs->exclude_promisor_objects && + if (ctx->revs->exclude_promisor_objects && !has_object_file(&obj->oid) && is_promisor_object(&obj->oid)) return; pathlen = path->len; strbuf_addstr(path, name); - if (!(obj->flags & USER_GIVEN) && filter_fn) - r = filter_fn(LOFS_BLOB, obj, - path->buf, &path->buf[pathlen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + r = ctx->filter_fn(LOFS_BLOB, obj, + path->buf, &path->buf[pathlen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, path->buf, cb_data); + ctx->show_object(obj, path->buf, ctx->show_data); strbuf_setlen(path, pathlen); } @@ -81,26 +86,21 @@ static void process_blob(struct rev_info *revs, * the link, and how to do it. Whether it necessarily makes * any sense what-so-ever to ever do that is another issue. */ -static void process_gitlink(struct rev_info *revs, +static void process_gitlink(struct traversal_context *ctx, const unsigned char *sha1, - show_object_fn show, struct strbuf *path, - const char *name, - void *cb_data) + const char *name) { /* Nothing to do */ } -static void process_tree(struct rev_info *revs, +static void process_tree(struct traversal_context *ctx, struct tree *tree, - show_object_fn show, struct strbuf *base, - const char *name, - void *cb_data, - filter_object_fn filter_fn, - void *filter_data) + const char *name) { struct object *obj = &tree->object; + struct rev_info *revs = ctx->revs; struct tree_desc desc; struct name_entry entry; enum interesting match = revs->diffopt.pathspec.nr == 0 ? @@ -133,14 +133,14 @@ static void process_tree(struct rev_info *revs, } strbuf_addstr(base, name); - if (!(obj->flags & USER_GIVEN) && filter_fn) - r = filter_fn(LOFS_BEGIN_TREE, obj, - base->buf, &base->buf[baselen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + r = ctx->filter_fn(LOFS_BEGIN_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, base->buf, cb_data); + ctx->show_object(obj, base->buf, ctx->show_data); if (base->len) strbuf_addch(base, '/'); @@ -157,29 +157,25 @@ static void process_tree(struct rev_info *revs, } if (S_ISDIR(entry.mode)) - process_tree(revs, + process_tree(ctx, lookup_tree(the_repository, entry.oid), - show, base, entry.path, - cb_data, filter_fn, filter_data); + base, entry.path); else if (S_ISGITLINK(entry.mode)) - process_gitlink(revs, entry.oid->hash, - show, base, entry.path, - cb_data); + process_gitlink(ctx, entry.oid->hash, base, entry.path); else - process_blob(revs, + process_blob(ctx, lookup_blob(the_repository, entry.oid), - show, base, entry.path, - cb_data, filter_fn, filter_data); + base, entry.path); } - if (!(obj->flags & USER_GIVEN) && filter_fn) { - r = filter_fn(LOFS_END_TREE, obj, - base->buf, &base->buf[baselen], - filter_data); + if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { + r = ctx->filter_fn(LOFS_END_TREE, obj, + base->buf, &base->buf[baselen], + ctx->filter_data); if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - show(obj, base->buf, cb_data); + ctx->show_object(obj, base->buf, ctx->show_data); } strbuf_setlen(base, baselen); @@ -242,19 +238,15 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -static void traverse_trees_and_blobs(struct rev_info *revs, - struct strbuf *base, - show_object_fn show_object, - void *show_data, - filter_object_fn filter_fn, - void *filter_data) +static void traverse_trees_and_blobs(struct traversal_context *ctx, + struct strbuf *base) { int i; assert(base->len == 0); - for (i = 0; i < revs->pending.nr; i++) { - struct object_array_entry *pending = revs->pending.objects + i; + for (i = 0; i < ctx->revs->pending.nr; i++) { + struct object_array_entry *pending = ctx->revs->pending.objects + i; struct object *obj = pending->item; const char *name = pending->name; const char *path = pending->path; @@ -262,62 +254,49 @@ static void traverse_trees_and_blobs(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name, show_data); + ctx->show_object(obj, name, ctx->show_data); continue; } if (!path) path = ""; if (obj->type == OBJ_TREE) { - process_tree(revs, (struct tree *)obj, show_object, - base, path, show_data, - filter_fn, filter_data); + process_tree(ctx, (struct tree *)obj, base, path); continue; } if (obj->type == OBJ_BLOB) { - process_blob(revs, (struct blob *)obj, show_object, - base, path, show_data, - filter_fn, filter_data); + process_blob(ctx, (struct blob *)obj, base, path); continue; } die("unknown pending object %s (%s)", oid_to_hex(&obj->oid), name); } - object_array_clear(&revs->pending); + object_array_clear(&ctx->revs->pending); } -static void do_traverse(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *show_data, - filter_object_fn filter_fn, - void *filter_data) +static void do_traverse(struct traversal_context *ctx) { struct commit *commit; struct strbuf csp; /* callee's scratch pad */ strbuf_init(&csp, PATH_MAX); - while ((commit = get_revision(revs)) != NULL) { + while ((commit = get_revision(ctx->revs)) != NULL) { /* * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway */ if (get_commit_tree(commit)) - add_pending_tree(revs, get_commit_tree(commit)); - show_commit(commit, show_data); + add_pending_tree(ctx->revs, get_commit_tree(commit)); + ctx->show_commit(commit, ctx->show_data); - if (revs->tree_blobs_in_commit_order) + if (ctx->revs->tree_blobs_in_commit_order) /* * NEEDSWORK: Adding the tree and then flushing it here * needs a reallocation for each commit. Can we pass the * tree directory without allocation churn? */ - traverse_trees_and_blobs(revs, &csp, - show_object, show_data, - filter_fn, filter_data); + traverse_trees_and_blobs(ctx, &csp); } - traverse_trees_and_blobs(revs, &csp, - show_object, show_data, - filter_fn, filter_data); + traverse_trees_and_blobs(ctx, &csp); strbuf_release(&csp); } @@ -326,7 +305,14 @@ void traverse_commit_list(struct rev_info *revs, show_object_fn show_object, void *show_data) { - do_traverse(revs, show_commit, show_object, show_data, NULL, NULL); + struct traversal_context ctx; + ctx.revs = revs; + ctx.show_commit = show_commit; + ctx.show_object = show_object; + ctx.show_data = show_data; + ctx.filter_fn = NULL; + ctx.filter_data = NULL; + do_traverse(&ctx); } void traverse_commit_list_filtered( @@ -337,14 +323,18 @@ void traverse_commit_list_filtered( void *show_data, struct oidset *omitted) { - filter_object_fn filter_fn = NULL; + struct traversal_context ctx; filter_free_fn filter_free_fn = NULL; - void *filter_data = NULL; - filter_data = list_objects_filter__init(omitted, filter_options, - &filter_fn, &filter_free_fn); - do_traverse(revs, show_commit, show_object, show_data, - filter_fn, filter_data); - if (filter_data && filter_free_fn) - filter_free_fn(filter_data); + ctx.revs = revs; + ctx.show_object = show_object; + ctx.show_commit = show_commit; + ctx.show_data = show_data; + ctx.filter_fn = NULL; + + ctx.filter_data = list_objects_filter__init(omitted, filter_options, + &ctx.filter_fn, &filter_free_fn); + do_traverse(&ctx); + if (ctx.filter_data && filter_free_fn) + filter_free_fn(ctx.filter_data); } From 9202489174a110f82867edbac601f12480a4e284 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Mon, 13 Aug 2018 11:14:29 -0700 Subject: [PATCH 02/10] list-objects: refactor to process_tree_contents This will be used in a follow-up patch to reduce indentation needed when invoking the logic conditionally. i.e. rather than: if (foo) { while (...) { /* this is very indented */ } } we will have: if (foo) process_tree_contents(...); Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 68 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/list-objects.c b/list-objects.c index 584518a3fa..ccc529e5e3 100644 --- a/list-objects.c +++ b/list-objects.c @@ -94,6 +94,46 @@ static void process_gitlink(struct traversal_context *ctx, /* Nothing to do */ } +static void process_tree(struct traversal_context *ctx, + struct tree *tree, + struct strbuf *base, + const char *name); + +static void process_tree_contents(struct traversal_context *ctx, + struct tree *tree, + struct strbuf *base) +{ + struct tree_desc desc; + struct name_entry entry; + enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ? + all_entries_interesting : entry_not_interesting; + + init_tree_desc(&desc, tree->buffer, tree->size); + + while (tree_entry(&desc, &entry)) { + if (match != all_entries_interesting) { + match = tree_entry_interesting(&entry, base, 0, + &ctx->revs->diffopt.pathspec); + if (match == all_entries_not_interesting) + break; + if (match == entry_not_interesting) + continue; + } + + if (S_ISDIR(entry.mode)) + process_tree(ctx, + lookup_tree(the_repository, entry.oid), + base, entry.path); + else if (S_ISGITLINK(entry.mode)) + process_gitlink(ctx, entry.oid->hash, + base, entry.path); + else + process_blob(ctx, + lookup_blob(the_repository, entry.oid), + base, entry.path); + } +} + static void process_tree(struct traversal_context *ctx, struct tree *tree, struct strbuf *base, @@ -101,10 +141,6 @@ static void process_tree(struct traversal_context *ctx, { struct object *obj = &tree->object; struct rev_info *revs = ctx->revs; - struct tree_desc desc; - struct name_entry entry; - enum interesting match = revs->diffopt.pathspec.nr == 0 ? - all_entries_interesting: entry_not_interesting; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; int gently = revs->ignore_missing_links || @@ -144,29 +180,7 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - init_tree_desc(&desc, tree->buffer, tree->size); - - while (tree_entry(&desc, &entry)) { - if (match != all_entries_interesting) { - match = tree_entry_interesting(&entry, base, 0, - &revs->diffopt.pathspec); - if (match == all_entries_not_interesting) - break; - if (match == entry_not_interesting) - continue; - } - - if (S_ISDIR(entry.mode)) - process_tree(ctx, - lookup_tree(the_repository, entry.oid), - base, entry.path); - else if (S_ISGITLINK(entry.mode)) - process_gitlink(ctx, entry.oid->hash, base, entry.path); - else - process_blob(ctx, - lookup_blob(the_repository, entry.oid), - base, entry.path); - } + process_tree_contents(ctx, tree, base); if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, From f1d02daacfe657fd175634174b4928a645d537f4 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Tue, 14 Aug 2018 17:22:52 -0700 Subject: [PATCH 03/10] list-objects: always parse trees gently If parsing fails when revs->ignore_missing_links and revs->exclude_promisor_objects are both false, we print the OID anyway in the die("bad tree object...") call, so any message printed by parse_tree_gently() is superfluous. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/list-objects.c b/list-objects.c index ccc529e5e3..f9b51db7a7 100644 --- a/list-objects.c +++ b/list-objects.c @@ -143,8 +143,6 @@ static void process_tree(struct traversal_context *ctx, struct rev_info *revs = ctx->revs; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; - int gently = revs->ignore_missing_links || - revs->exclude_promisor_objects; if (!revs->tree_objects) return; @@ -152,7 +150,7 @@ static void process_tree(struct traversal_context *ctx, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - if (parse_tree_gently(tree, gently) < 0) { + if (parse_tree_gently(tree, 1) < 0) { if (revs->ignore_missing_links) return; From 7c0fe330d5f3d2fc7aac57a19c7580ea2543c799 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:23 -0700 Subject: [PATCH 04/10] rev-list: handle missing tree objects properly Previously, we assumed only blob objects could be missing. This patch makes rev-list handle missing trees like missing blobs. The --missing=* and --exclude-promisor-objects flags now work for trees as they already do for blobs. This is demonstrated in t6112. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 11 ++++--- list-objects.c | 11 +++++-- revision.h | 15 +++++++++ t/t0410-partial-clone.sh | 45 ++++++++++++++++++++++++++ t/t5317-pack-objects-filter-objects.sh | 13 ++++++++ t/t6112-rev-list-filters-objects.sh | 22 +++++++++++++ 6 files changed, 110 insertions(+), 7 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 5b07f3f4a2..49d6deed70 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -6,6 +6,7 @@ #include "list-objects.h" #include "list-objects-filter.h" #include "list-objects-filter-options.h" +#include "object.h" #include "object-store.h" #include "pack.h" #include "pack-bitmap.h" @@ -209,7 +210,8 @@ static inline void finish_object__ma(struct object *obj) */ switch (arg_missing_action) { case MA_ERROR: - die("missing blob object '%s'", oid_to_hex(&obj->oid)); + die("missing %s object '%s'", + type_name(obj->type), oid_to_hex(&obj->oid)); return; case MA_ALLOW_ANY: @@ -222,8 +224,8 @@ static inline void finish_object__ma(struct object *obj) case MA_ALLOW_PROMISOR: if (is_promisor_object(&obj->oid)) return; - die("unexpected missing blob object '%s'", - oid_to_hex(&obj->oid)); + die("unexpected missing %s object '%s'", + type_name(obj->type), oid_to_hex(&obj->oid)); return; default: @@ -235,7 +237,7 @@ static inline void finish_object__ma(struct object *obj) static int finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) { + if (!has_object_file(&obj->oid)) { finish_object__ma(obj); return 1; } @@ -373,6 +375,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) init_revisions(&revs, prefix); revs.abbrev = DEFAULT_ABBREV; revs.commit_format = CMIT_FMT_UNSPECIFIED; + revs.do_not_die_on_missing_tree = 1; /* * Scan the argument list before invoking setup_revisions(), so that we diff --git a/list-objects.c b/list-objects.c index f9b51db7a7..243192af53 100644 --- a/list-objects.c +++ b/list-objects.c @@ -143,6 +143,7 @@ static void process_tree(struct traversal_context *ctx, struct rev_info *revs = ctx->revs; int baselen = base->len; enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW; + int failed_parse; if (!revs->tree_objects) return; @@ -150,7 +151,9 @@ static void process_tree(struct traversal_context *ctx, die("bad tree object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - if (parse_tree_gently(tree, 1) < 0) { + + failed_parse = parse_tree_gently(tree, 1); + if (failed_parse) { if (revs->ignore_missing_links) return; @@ -163,7 +166,8 @@ static void process_tree(struct traversal_context *ctx, is_promisor_object(&obj->oid)) return; - die("bad tree object %s", oid_to_hex(&obj->oid)); + if (!revs->do_not_die_on_missing_tree) + die("bad tree object %s", oid_to_hex(&obj->oid)); } strbuf_addstr(base, name); @@ -178,7 +182,8 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - process_tree_contents(ctx, tree, base); + if (!failed_parse) + process_tree_contents(ctx, tree, base); if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, diff --git a/revision.h b/revision.h index c599c34da9..5118aaaa92 100644 --- a/revision.h +++ b/revision.h @@ -125,6 +125,21 @@ struct rev_info { line_level_traverse:1, tree_blobs_in_commit_order:1, + /* + * Blobs are shown without regard for their existence. + * But not so for trees: unless exclude_promisor_objects + * is set and the tree in question is a promisor object; + * OR ignore_missing_links is set, the revision walker + * dies with a "bad tree object HASH" message when + * encountering a missing tree. For callers that can + * handle missing trees and want them to be filterable + * and showable, set this to true. The revision walker + * will filter and show such a missing tree as usual, + * but will not attempt to recurse into this tree + * object. + */ + do_not_die_on_missing_tree:1, + /* for internal use only */ exclude_promisor_objects:1; diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 4984ca583d..2f4ea487a4 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -186,6 +186,51 @@ test_expect_success 'rev-list stops traversal at missing and promised commit' ' ! grep $FOO out ' +test_expect_success 'missing tree objects with --missing=allow-promisor and --exclude-promisor-objects' ' + rm -rf repo && + test_create_repo repo && + test_commit -C repo foo && + test_commit -C repo bar && + test_commit -C repo baz && + + promise_and_delete $(git -C repo rev-parse bar^{tree}) && + promise_and_delete $(git -C repo rev-parse foo^{tree}) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + + git -C repo rev-list --missing=allow-promisor --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 3 commits, 3 blobs, and 1 tree + test_line_count = 7 objs && + + # Do the same for --exclude-promisor-objects, but with all trees gone. + promise_and_delete $(git -C repo rev-parse baz^{tree}) && + git -C repo rev-list --exclude-promisor-objects --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 3 commits, no blobs or trees + test_line_count = 3 objs +' + +test_expect_success 'missing non-root tree object and rev-list' ' + rm -rf repo && + test_create_repo repo && + mkdir repo/dir && + echo foo >repo/dir/foo && + git -C repo add dir/foo && + git -C repo commit -m "commit dir/foo" && + + promise_and_delete $(git -C repo rev-parse HEAD:dir) && + + git -C repo config core.repositoryformatversion 1 && + git -C repo config extensions.partialclone "arbitrary string" && + + git -C repo rev-list --missing=allow-any --objects HEAD >objs 2>rev_list_err && + test_must_be_empty rev_list_err && + # 1 commit and 1 tree + test_line_count = 2 objs +' + test_expect_success 'rev-list stops traversal at missing and promised tree' ' rm -rf repo && test_create_repo repo && diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 6710c8bc8c..9839b48c1c 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -59,6 +59,19 @@ test_expect_success 'verify normal and blob:none packfiles have same commits/tre test_cmp observed expected ' +test_expect_success 'get an error for missing tree object' ' + git init r5 && + echo foo >r5/foo && + git -C r5 add foo && + git -C r5 commit -m "foo" && + del=$(git -C r5 rev-parse HEAD^{tree} | sed "s|..|&/|") && + rm r5/.git/objects/$del && + test_must_fail git -C r5 pack-objects --rev --stdout 2>bad_tree <<-EOF && + HEAD + EOF + grep -q "bad tree object" bad_tree +' + # Test blob:limit=[kmg] filter. # We boundary test around the size parameter. The filter is strictly less than # the value, so size 500 and 1000 should have the same results, but 1001 should diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 0a37dd5f97..6d69e6a0aa 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -196,6 +196,28 @@ test_expect_success 'verify sparse:oid=oid-ish omits top-level files' ' test_cmp observed expected ' +test_expect_success 'rev-list W/ --missing=print and --missing=allow-any for trees' ' + TREE=$(git -C r3 rev-parse HEAD:dir1) && + + # Create a spare repo because we will be deleting objects from this one. + git clone r3 r3.b && + + rm r3.b/.git/objects/$(echo $TREE | sed "s|^..|&/|") && + + git -C r3.b rev-list --quiet --missing=print --objects HEAD \ + >missing_objs 2>rev_list_err && + echo "?$TREE" >expected && + test_cmp expected missing_objs && + + # do not complain when a missing tree cannot be parsed + test_must_be_empty rev_list_err && + + git -C r3.b rev-list --missing=allow-any --objects HEAD \ + >objs 2>rev_list_err && + ! grep $TREE objs && + test_must_be_empty rev_list_err +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. From 99c9aa9579ae970c0d273ced8fb8efe9eed70a75 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:24 -0700 Subject: [PATCH 05/10] revision: mark non-user-given objects instead Currently, list-objects.c incorrectly treats all root trees of commits as USER_GIVEN. Also, it would be easier to mark objects that are non-user-given instead of user-given, since the places in the code where we access an object through a reference are more obvious than the places where we access an object that was given by the user. Resolve these two problems by introducing a flag NOT_USER_GIVEN that marks blobs and trees that are non-user-given, replacing USER_GIVEN. (Only blobs and trees are marked because this mark is only used when filtering objects, and filtering of other types of objects is not supported yet.) This fixes a bug in that git rev-list behaved differently from git pack-objects. pack-objects would *not* filter objects given explicitly on the command line and rev-list would filter. This was because the two commands used a different function to add objects to the rev_info struct. This seems to have been an oversight, and pack-objects has the correct behavior, so I added a test to make sure that rev-list now behaves properly. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects.c | 31 +++++++++++++++++------------ revision.c | 1 - revision.h | 11 ++++++++-- t/t6112-rev-list-filters-objects.sh | 12 +++++++++++ 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/list-objects.c b/list-objects.c index 243192af53..7a1a0929db 100644 --- a/list-objects.c +++ b/list-objects.c @@ -53,7 +53,7 @@ static void process_blob(struct traversal_context *ctx, pathlen = path->len; strbuf_addstr(path, name); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) r = ctx->filter_fn(LOFS_BLOB, obj, path->buf, &path->buf[pathlen], ctx->filter_data); @@ -120,17 +120,19 @@ static void process_tree_contents(struct traversal_context *ctx, continue; } - if (S_ISDIR(entry.mode)) - process_tree(ctx, - lookup_tree(the_repository, entry.oid), - base, entry.path); + if (S_ISDIR(entry.mode)) { + struct tree *t = lookup_tree(the_repository, entry.oid); + t->object.flags |= NOT_USER_GIVEN; + process_tree(ctx, t, base, entry.path); + } else if (S_ISGITLINK(entry.mode)) process_gitlink(ctx, entry.oid->hash, base, entry.path); - else - process_blob(ctx, - lookup_blob(the_repository, entry.oid), - base, entry.path); + else { + struct blob *b = lookup_blob(the_repository, entry.oid); + b->object.flags |= NOT_USER_GIVEN; + process_blob(ctx, b, base, entry.path); + } } } @@ -171,7 +173,7 @@ static void process_tree(struct traversal_context *ctx, } strbuf_addstr(base, name); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) r = ctx->filter_fn(LOFS_BEGIN_TREE, obj, base->buf, &base->buf[baselen], ctx->filter_data); @@ -185,7 +187,7 @@ static void process_tree(struct traversal_context *ctx, if (!failed_parse) process_tree_contents(ctx, tree, base); - if (!(obj->flags & USER_GIVEN) && ctx->filter_fn) { + if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { r = ctx->filter_fn(LOFS_END_TREE, obj, base->buf, &base->buf[baselen], ctx->filter_data); @@ -301,8 +303,11 @@ static void do_traverse(struct traversal_context *ctx) * an uninteresting boundary commit may not have its tree * parsed yet, but we are not going to show them anyway */ - if (get_commit_tree(commit)) - add_pending_tree(ctx->revs, get_commit_tree(commit)); + if (get_commit_tree(commit)) { + struct tree *tree = get_commit_tree(commit); + tree->object.flags |= NOT_USER_GIVEN; + add_pending_tree(ctx->revs, tree); + } ctx->show_commit(commit, ctx->show_data); if (ctx->revs->tree_blobs_in_commit_order) diff --git a/revision.c b/revision.c index 0627494378..6d355b43c3 100644 --- a/revision.c +++ b/revision.c @@ -175,7 +175,6 @@ static void add_pending_object_with_path(struct rev_info *revs, strbuf_release(&buf); return; /* do not add the commit itself */ } - obj->flags |= USER_GIVEN; add_object_array_with_path(obj, name, &revs->pending, mode, path); } diff --git a/revision.h b/revision.h index 5118aaaa92..4dc45bb9ad 100644 --- a/revision.h +++ b/revision.h @@ -20,9 +20,16 @@ #define SYMMETRIC_LEFT (1u<<8) #define PATCHSAME (1u<<9) #define BOTTOM (1u<<10) -#define USER_GIVEN (1u<<25) /* given directly by the user */ +/* + * Indicates object was reached by traversal. i.e. not given by user on + * command-line or stdin. + * NEEDSWORK: NOT_USER_GIVEN doesn't apply to commits because we only support + * filtering trees and blobs, but it may be useful to support filtering commits + * in the future. + */ +#define NOT_USER_GIVEN (1u<<25) #define TRACK_LINEAR (1u<<26) -#define ALL_REV_FLAGS (((1u<<11)-1) | USER_GIVEN | TRACK_LINEAR) +#define ALL_REV_FLAGS (((1u<<11)-1) | NOT_USER_GIVEN | TRACK_LINEAR) #define DECORATE_SHORT_REFS 1 #define DECORATE_FULL_REFS 2 diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 6d69e6a0aa..0d4d43894b 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -30,6 +30,18 @@ test_expect_success 'verify blob:none omits all 5 blobs' ' test_cmp observed expected ' +test_expect_success 'specify blob explicitly prevents filtering' ' + file_3=$(git -C r1 ls-files -s file.3 | + awk -f print_2.awk) && + + file_4=$(git -C r1 ls-files -s file.4 | + awk -f print_2.awk) && + + git -C r1 rev-list --objects --filter=blob:none HEAD $file_3 >observed && + grep -q "$file_3" observed && + test_must_fail grep -q "$file_4" observed +' + test_expect_success 'verify emitted+omitted == all' ' git -C r1 rev-list HEAD --objects \ | awk -f print_1.awk \ From 696aa73905c8c11b06735f9db830e55bc369849a Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:25 -0700 Subject: [PATCH 06/10] list-objects-filter: use BUG rather than die In some cases in this file, BUG makes more sense than die. In such cases, a we get there from a coding error rather than a user error. 'return' has been removed following some instances of BUG since BUG does not return. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index a0ba78b20c..5f8b1a002f 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -44,8 +44,7 @@ static enum list_objects_filter_result filter_blobs_none( switch (filter_situation) { default: - die("unknown filter_situation"); - return LOFR_ZERO; + BUG("unknown filter_situation: %d", filter_situation); case LOFS_BEGIN_TREE: assert(obj->type == OBJ_TREE); @@ -102,8 +101,7 @@ static enum list_objects_filter_result filter_blobs_limit( switch (filter_situation) { default: - die("unknown filter_situation"); - return LOFR_ZERO; + BUG("unknown filter_situation: %d", filter_situation); case LOFS_BEGIN_TREE: assert(obj->type == OBJ_TREE); @@ -208,8 +206,7 @@ static enum list_objects_filter_result filter_sparse( switch (filter_situation) { default: - die("unknown filter_situation"); - return LOFR_ZERO; + BUG("unknown filter_situation: %d", filter_situation); case LOFS_BEGIN_TREE: assert(obj->type == OBJ_TREE); @@ -389,7 +386,7 @@ void *list_objects_filter__init( assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); if (filter_options->choice >= LOFC__COUNT) - die("invalid list-objects filter choice: %d", + BUG("invalid list-objects filter choice: %d", filter_options->choice); init_fn = s_filters[filter_options->choice]; From cc0b05a4cc54c30a5355a9da5d76b1879d960628 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:26 -0700 Subject: [PATCH 07/10] list-objects-filter-options: do not over-strbuf_init The function gently_parse_list_objects_filter is either called with errbuf=STRBUF_INIT or errbuf=NULL, but that function calls strbuf_init when errbuf is not NULL. strbuf_init is only necessary if errbuf contains garbage, and risks a memory leak if errbuf already has a non-STRBUF_INIT state. It should be the caller's responsibility to make sure errbuf is not garbage, since garbage content is easily avoidable with STRBUF_INIT. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index c0e2bd6a06..d259bdb2c1 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -30,7 +30,6 @@ static int gently_parse_list_objects_filter( if (filter_options->choice) { if (errbuf) { - strbuf_init(errbuf, 0); strbuf_addstr( errbuf, _("multiple filter-specs cannot be combined")); @@ -71,10 +70,9 @@ static int gently_parse_list_objects_filter( return 0; } - if (errbuf) { - strbuf_init(errbuf, 0); + if (errbuf) strbuf_addf(errbuf, "invalid filter-spec '%s'", arg); - } + memset(filter_options, 0, sizeof(*filter_options)); return 1; } From bc5975d24f33c974d53b3d94c5697fadaec5b000 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 5 Oct 2018 14:31:27 -0700 Subject: [PATCH 08/10] list-objects-filter: implement filter tree:0 Teach list-objects the "tree:0" filter which allows for filtering out all tree and blob objects (unless other objects are explicitly specified by the user). The purpose of this patch is to allow smaller partial clones. The name of this filter - tree:0 - does not explicitly specify that it also filters out all blobs, but this should not cause much confusion because blobs are not at all useful without the trees that refer to them. I also considered only:commits as a name, but this is inaccurate because it suggests that annotated tags are omitted, but actually they are included. The name "tree:0" allows later filtering based on depth, i.e. "tree:1" would filter out all but the root tree and blobs. In order to avoid confusion between 0 and capital O, the documentation was worded in a somewhat round-about way that also hints at this future improvement to the feature. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 5 +++ list-objects-filter-options.c | 13 +++++++ list-objects-filter-options.h | 1 + list-objects-filter.c | 49 ++++++++++++++++++++++++++ t/t5317-pack-objects-filter-objects.sh | 28 +++++++++++++++ t/t5616-partial-clone.sh | 42 ++++++++++++++++++++++ t/t6112-rev-list-filters-objects.sh | 15 ++++++++ 7 files changed, 153 insertions(+) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 7b273635de..5f1672913b 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -731,6 +731,11 @@ the requested refs. + The form '--filter=sparse:path=' similarly uses a sparse-checkout specification contained in . ++ +The form '--filter=tree:' omits all blobs and trees whose depth +from the root tree is >= (minimum depth if an object is located +at multiple depths in the commits traversed). Currently, only =0 +is supported, which omits all blobs and trees. --no-filter:: Turn off any previous `--filter=` argument. diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index d259bdb2c1..e8da2e8581 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -49,6 +49,19 @@ static int gently_parse_list_objects_filter( return 0; } + } else if (skip_prefix(arg, "tree:", &v0)) { + unsigned long depth; + if (!git_parse_ulong(v0, &depth) || depth != 0) { + if (errbuf) { + strbuf_addstr( + errbuf, + _("only 'tree:0' is supported")); + } + return 1; + } + filter_options->choice = LOFC_TREE_NONE; + return 0; + } else if (skip_prefix(arg, "sparse:oid=", &v0)) { struct object_context oc; struct object_id sparse_oid; diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 0000a61f82..af64e5c66f 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -10,6 +10,7 @@ enum list_objects_filter_choice { LOFC_DISABLED = 0, LOFC_BLOB_NONE, LOFC_BLOB_LIMIT, + LOFC_TREE_NONE, LOFC_SPARSE_OID, LOFC_SPARSE_PATH, LOFC__COUNT /* must be last */ diff --git a/list-objects-filter.c b/list-objects-filter.c index 5f8b1a002f..09b2b05d54 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -79,6 +79,54 @@ static void *filter_blobs_none__init( return d; } +/* + * A filter for list-objects to omit ALL trees and blobs from the traversal. + * Can OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_trees_none_data { + struct oidset *omits; +}; + +static enum list_objects_filter_result filter_trees_none( + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_trees_none_data *filter_data = filter_data_; + + switch (filter_situation) { + default: + BUG("unknown filter_situation: %d", filter_situation); + + case LOFS_BEGIN_TREE: + case LOFS_BLOB: + if (filter_data->omits) + oidset_insert(filter_data->omits, &obj->oid); + return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + } +} + +static void* filter_trees_none__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options, + filter_object_fn *filter_fn, + filter_free_fn *filter_free_fn) +{ + struct filter_trees_none_data *d = xcalloc(1, sizeof(*d)); + d->omits = omitted; + + *filter_fn = filter_trees_none; + *filter_free_fn = free; + return d; +} + /* * A filter for list-objects to omit large blobs. * And to OPTIONALLY collect a list of the omitted OIDs. @@ -371,6 +419,7 @@ static filter_init_fn s_filters[] = { NULL, filter_blobs_none__init, filter_blobs_limit__init, + filter_trees_none__init, filter_sparse_oid__init, filter_sparse_path__init, }; diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 9839b48c1c..510d3537f6 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -72,6 +72,34 @@ test_expect_success 'get an error for missing tree object' ' grep -q "bad tree object" bad_tree ' +test_expect_success 'setup for tests of tree:0' ' + mkdir r1/subtree && + echo "This is a file in a subtree" >r1/subtree/file && + git -C r1 add subtree/file && + git -C r1 commit -m subtree +' + +test_expect_success 'verify tree:0 packfile has no blobs or trees' ' + git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF && + HEAD + EOF + git -C r1 index-pack ../commitsonly.pack && + git -C r1 verify-pack -v ../commitsonly.pack >objs && + ! grep -E "tree|blob" objs +' + +test_expect_success 'grab tree directly when using tree:0' ' + # We should get the tree specified directly but not its blobs or subtrees. + git -C r1 pack-objects --rev --stdout --filter=tree:0 >commitsonly.pack <<-EOF && + HEAD: + EOF + git -C r1 index-pack ../commitsonly.pack && + git -C r1 verify-pack -v ../commitsonly.pack >objs && + awk "/tree|blob/{print \$1}" objs >trees_and_blobs && + git -C r1 rev-parse HEAD: >expected && + test_cmp expected trees_and_blobs +' + # Test blob:limit=[kmg] filter. # We boundary test around the size parameter. The filter is strictly less than # the value, so size 500 and 1000 should have the same results, but 1001 should diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index bbbe7537df..53fbf7db88 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -154,6 +154,48 @@ test_expect_success 'partial clone with transfer.fsckobjects=1 uses index-pack - grep "git index-pack.*--fsck-objects" trace ' +test_expect_success 'use fsck before and after manually fetching a missing subtree' ' + # push new commit so server has a subtree + mkdir src/dir && + echo "in dir" >src/dir/file.txt && + git -C src add dir/file.txt && + git -C src commit -m "file in dir" && + git -C src push -u srv master && + SUBTREE=$(git -C src rev-parse HEAD:dir) && + + rm -rf dst && + git clone --no-checkout --filter=tree:0 "file://$(pwd)/srv.bare" dst && + git -C dst fsck && + + # Make sure we only have commits, and all trees and blobs are missing. + git -C dst rev-list --missing=allow-any --objects master \ + >fetched_objects && + awk -f print_1.awk fetched_objects | + xargs -n1 git -C dst cat-file -t >fetched_types && + + sort -u fetched_types >unique_types.observed && + echo commit >unique_types.expected && + test_cmp unique_types.expected unique_types.observed && + + # Auto-fetch a tree with cat-file. + git -C dst cat-file -p $SUBTREE >tree_contents && + grep file.txt tree_contents && + + # fsck still works after an auto-fetch of a tree. + git -C dst fsck && + + # Auto-fetch all remaining trees and blobs with --missing=error + git -C dst rev-list --missing=error --objects master >fetched_objects && + test_line_count = 70 fetched_objects && + + awk -f print_1.awk fetched_objects | + xargs -n1 git -C dst cat-file -t >fetched_types && + + sort -u fetched_types >unique_types.observed && + printf "blob\ncommit\ntree\n" >unique_types.expected && + test_cmp unique_types.expected unique_types.observed +' + test_expect_success 'partial clone fetches blobs pointed to by refs even if normally filtered out' ' rm -rf src dst && git init src && diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 0d4d43894b..2cbb81d3bb 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -230,6 +230,21 @@ test_expect_success 'rev-list W/ --missing=print and --missing=allow-any for tre test_must_be_empty rev_list_err ' +# Test tree:0 filter. + +test_expect_success 'verify tree:0 includes trees in "filtered" output' ' + git -C r3 rev-list --quiet --objects --filter-print-omitted \ + --filter=tree:0 HEAD >revs && + + awk -f print_1.awk revs | + sed s/~// | + xargs -n1 git -C r3 cat-file -t >unsorted_filtered_types && + + sort -u unsorted_filtered_types >filtered_types && + printf "blob\ntree\n" >expected && + test_cmp expected filtered_types +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive. From d9e6d0942bb9f9fe9e4cca9670181e5b59074bcb Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Fri, 12 Oct 2018 13:01:41 -0700 Subject: [PATCH 09/10] filter-trees: code clean-up of tests A few trivial updates to test to match the current best practices. - avoid "grep -q" that strips potentially useful output from tests running under "-v". - use test_write_lines to prepare multi-line expected output file. - reserve use of test_must_fail to "git" commands. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- t/t5317-pack-objects-filter-objects.sh | 2 +- t/t5616-partial-clone.sh | 2 +- t/t6112-rev-list-filters-objects.sh | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/t/t5317-pack-objects-filter-objects.sh b/t/t5317-pack-objects-filter-objects.sh index 510d3537f6..d9dccf4d4d 100755 --- a/t/t5317-pack-objects-filter-objects.sh +++ b/t/t5317-pack-objects-filter-objects.sh @@ -69,7 +69,7 @@ test_expect_success 'get an error for missing tree object' ' test_must_fail git -C r5 pack-objects --rev --stdout 2>bad_tree <<-EOF && HEAD EOF - grep -q "bad tree object" bad_tree + grep "bad tree object" bad_tree ' test_expect_success 'setup for tests of tree:0' ' diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 53fbf7db88..392caa08fd 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -192,7 +192,7 @@ test_expect_success 'use fsck before and after manually fetching a missing subtr xargs -n1 git -C dst cat-file -t >fetched_types && sort -u fetched_types >unique_types.observed && - printf "blob\ncommit\ntree\n" >unique_types.expected && + test_write_lines blob commit tree >unique_types.expected && test_cmp unique_types.expected unique_types.observed ' diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 2cbb81d3bb..d24f9d5b5a 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -38,8 +38,8 @@ test_expect_success 'specify blob explicitly prevents filtering' ' awk -f print_2.awk) && git -C r1 rev-list --objects --filter=blob:none HEAD $file_3 >observed && - grep -q "$file_3" observed && - test_must_fail grep -q "$file_4" observed + grep "$file_3" observed && + ! grep "$file_4" observed ' test_expect_success 'verify emitted+omitted == all' ' @@ -241,7 +241,7 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' ' xargs -n1 git -C r3 cat-file -t >unsorted_filtered_types && sort -u unsorted_filtered_types >filtered_types && - printf "blob\ntree\n" >expected && + test_write_lines blob tree >expected && test_cmp expected filtered_types ' From 8b10a206f090e01ce1ac4d9a10ec769e2409e2b0 Mon Sep 17 00:00:00 2001 From: Matthew DeVore Date: Wed, 17 Oct 2018 17:39:15 -0700 Subject: [PATCH 10/10] list-objects: support for skipping tree traversal The tree:0 filter does not need to traverse the trees that it has filtered out, so optimize list-objects and list-objects-filter to skip traversing the trees entirely. Before this patch, we iterated over all children of the tree, and did nothing for all of them, which was wasteful. Signed-off-by: Matthew DeVore Signed-off-by: Junio C Hamano --- list-objects-filter.c | 11 +++++++++-- list-objects-filter.h | 6 ++++++ list-objects.c | 5 ++++- t/t6112-rev-list-filters-objects.sh | 13 +++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/list-objects-filter.c b/list-objects-filter.c index 09b2b05d54..765f3df3b0 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -102,9 +102,16 @@ static enum list_objects_filter_result filter_trees_none( case LOFS_BEGIN_TREE: case LOFS_BLOB: - if (filter_data->omits) + if (filter_data->omits) { oidset_insert(filter_data->omits, &obj->oid); - return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + /* _MARK_SEEN but not _DO_SHOW (hard omit) */ + return LOFR_MARK_SEEN; + } else { + /* + * Not collecting omits so no need to to traverse tree. + */ + return LOFR_SKIP_TREE | LOFR_MARK_SEEN; + } case LOFS_END_TREE: assert(obj->type == OBJ_TREE); diff --git a/list-objects-filter.h b/list-objects-filter.h index a963d0274c..9c19875a41 100644 --- a/list-objects-filter.h +++ b/list-objects-filter.h @@ -20,6 +20,11 @@ * In general, objects should only be shown once, but * this result DOES NOT imply that we mark it SEEN. * + * _SKIP_TREE : Used in LOFS_BEGIN_TREE situation - indicates that + * the tree's children should not be iterated over. This + * is used as an optimization when all children will + * definitely be ignored. + * * Most of the time, you want the combination (_MARK_SEEN | _DO_SHOW) * but they can be used independently, such as when sparse-checkout * pattern matching is being applied. @@ -41,6 +46,7 @@ enum list_objects_filter_result { LOFR_ZERO = 0, LOFR_MARK_SEEN = 1<<0, LOFR_DO_SHOW = 1<<1, + LOFR_SKIP_TREE = 1<<2, }; enum list_objects_filter_situation { diff --git a/list-objects.c b/list-objects.c index 7a1a0929db..d1e3d217c5 100644 --- a/list-objects.c +++ b/list-objects.c @@ -11,6 +11,7 @@ #include "list-objects-filter-options.h" #include "packfile.h" #include "object-store.h" +#include "trace.h" struct traversal_context { struct rev_info *revs; @@ -184,7 +185,9 @@ static void process_tree(struct traversal_context *ctx, if (base->len) strbuf_addch(base, '/'); - if (!failed_parse) + if (r & LOFR_SKIP_TREE) + trace_printf("Skipping contents of tree %s...\n", base->buf); + else if (!failed_parse) process_tree_contents(ctx, tree, base); if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) { diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index d24f9d5b5a..c6aae93b57 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -245,6 +245,19 @@ test_expect_success 'verify tree:0 includes trees in "filtered" output' ' test_cmp expected filtered_types ' +# Make sure tree:0 does not iterate through any trees. + +test_expect_success 'filter a GIANT tree through tree:0' ' + GIT_TRACE=1 git -C r3 rev-list \ + --objects --filter=tree:0 HEAD 2>filter_trace && + grep "Skipping contents of tree [.][.][.]" filter_trace >actual && + # One line for each commit traversed. + test_line_count = 2 actual && + + # Make sure no other trees were considered besides the root. + ! grep "Skipping contents of tree [^.]" filter_trace +' + # Delete some loose objects and use rev-list, but WITHOUT any filtering. # This models previously omitted objects that we did not receive.