From 1f52cdfacb6af203cdcc6a5101fc8cb5fff72735 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:31 +0000 Subject: [PATCH 01/13] index-pack: document and test the --promisor option The --promisor option of 'git index-pack' was created in 88e2f9e (introduce fetch-object: fetch one promisor object, 2017-12-05) but was untested. It is currently unused within the Git codebase, but that will change in an upcoming change to 'git bundle unbundle' when there is a filter capability. For now, add documentation about the option and add a test to ensure it is working as expected. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-index-pack.txt | 8 ++++++++ t/t5300-pack-object.sh | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index 1f1e359225..4e71c256ec 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -122,6 +122,14 @@ This option cannot be used with --stdin. + include::object-format-disclaimer.txt[] +--promisor[=]:: + Before committing the pack-index, create a .promisor file for this + pack. Particularly helpful when writing a promisor pack with --fix-thin + since the name of the pack is not final until the pack has been fully + written. If a `` is provided, then that content will be + written to the .promisor file for future reference. See + link:technical/partial-clone.html[partial clone] for more information. + NOTES ----- diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 2fd845187e..a11d61206a 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -315,8 +315,10 @@ test_expect_success \ git index-pack -o tmp.idx test-3.pack && cmp tmp.idx test-1-${packname_1}.idx && - git index-pack test-3.pack && + git index-pack --promisor=message test-3.pack && cmp test-3.idx test-1-${packname_1}.idx && + echo message >expect && + test_cmp expect test-3.promisor && cat test-2-${packname_2}.pack >test-3.pack && git index-pack -o tmp.idx test-2-${packname_2}.pack && From 4a4c3f9b6389168033843814ad45e69bf0b92b1d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:32 +0000 Subject: [PATCH 02/13] list-objects-filter-options: create copy helper As we add more embedded members with type 'struct list_objects_filter_options', it will be important to easily perform a deep copy across multiple such structs. Create list_objects_filter_copy() to satisfy this need. This method is recursive to match the recursive nature of the struct. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- list-objects-filter-options.c | 19 +++++++++++++++++++ list-objects-filter-options.h | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index fd8d59f653..449d53af69 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -415,3 +415,22 @@ void partial_clone_get_default_filter_spec( &errbuf); strbuf_release(&errbuf); } + +void list_objects_filter_copy( + struct list_objects_filter_options *dest, + const struct list_objects_filter_options *src) +{ + int i; + struct string_list_item *item; + + /* Copy everything. We will overwrite the pointers shortly. */ + memcpy(dest, src, sizeof(struct list_objects_filter_options)); + + string_list_init_dup(&dest->filter_spec); + for_each_string_list_item(item, &src->filter_spec) + string_list_append(&dest->filter_spec, item->string); + + ALLOC_ARRAY(dest->sub, dest->sub_alloc); + for (i = 0; i < src->sub_nr; i++) + list_objects_filter_copy(&dest->sub[i], &src->sub[i]); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index da5b6737e2..425c38cae9 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -132,4 +132,8 @@ void partial_clone_get_default_filter_spec( struct list_objects_filter_options *filter_options, const char *remote); +void list_objects_filter_copy( + struct list_objects_filter_options *dest, + const struct list_objects_filter_options *src); + #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ From ffaa137f646803749cf81017d3208be2ce8a2964 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:33 +0000 Subject: [PATCH 03/13] revision: put object filter into struct rev_info Placing a 'struct list_objects_filter_options' within 'struct rev_info' will assist making some bookkeeping around object filters in the future. For now, let's use this new member to remove a static global instance of the struct from builtin/rev-list.c. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 26 ++++++++++++-------------- revision.h | 7 +++++++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 777558e9b0..1beb578cc5 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -62,7 +62,6 @@ static const char rev_list_usage[] = static struct progress *progress; static unsigned progress_counter; -static struct list_objects_filter_options filter_options; static struct oidset omitted_objects; static int arg_print_omitted; /* print objects omitted by filter */ @@ -400,7 +399,6 @@ static inline int parse_missing_action_value(const char *value) } static int try_bitmap_count(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects) { uint32_t commit_count = 0, @@ -436,7 +434,8 @@ static int try_bitmap_count(struct rev_info *revs, */ max_count = revs->max_count; - bitmap_git = prepare_bitmap_walk(revs, filter, filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, &revs->filter, + filter_provided_objects); if (!bitmap_git) return -1; @@ -453,7 +452,6 @@ static int try_bitmap_count(struct rev_info *revs, } static int try_bitmap_traversal(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects) { struct bitmap_index *bitmap_git; @@ -465,7 +463,8 @@ static int try_bitmap_traversal(struct rev_info *revs, if (revs->max_count >= 0) return -1; - bitmap_git = prepare_bitmap_walk(revs, filter, filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, &revs->filter, + filter_provided_objects); if (!bitmap_git) return -1; @@ -475,7 +474,6 @@ static int try_bitmap_traversal(struct rev_info *revs, } static int try_bitmap_disk_usage(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects) { struct bitmap_index *bitmap_git; @@ -483,7 +481,7 @@ static int try_bitmap_disk_usage(struct rev_info *revs, if (!show_disk_usage) return -1; - bitmap_git = prepare_bitmap_walk(revs, filter, filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, &revs->filter, filter_provided_objects); if (!bitmap_git) return -1; @@ -597,13 +595,13 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) } if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { - parse_list_objects_filter(&filter_options, arg); - if (filter_options.choice && !revs.blob_objects) + parse_list_objects_filter(&revs.filter, arg); + if (revs.filter.choice && !revs.blob_objects) die(_("object filtering requires --objects")); continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { - list_objects_filter_set_no_filter(&filter_options); + list_objects_filter_set_no_filter(&revs.filter); continue; } if (!strcmp(arg, "--filter-provided-objects")) { @@ -688,11 +686,11 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) progress = start_delayed_progress(show_progress, 0); if (use_bitmap_index) { - if (!try_bitmap_count(&revs, &filter_options, filter_provided_objects)) + if (!try_bitmap_count(&revs, filter_provided_objects)) return 0; - if (!try_bitmap_disk_usage(&revs, &filter_options, filter_provided_objects)) + if (!try_bitmap_disk_usage(&revs, filter_provided_objects)) return 0; - if (!try_bitmap_traversal(&revs, &filter_options, filter_provided_objects)) + if (!try_bitmap_traversal(&revs, filter_provided_objects)) return 0; } @@ -733,7 +731,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE); traverse_commit_list_filtered( - &filter_options, &revs, show_commit, show_object, &info, + &revs.filter, &revs, show_commit, show_object, &info, (arg_print_omitted ? &omitted_objects : NULL)); if (arg_print_omitted) { diff --git a/revision.h b/revision.h index b9c2421687..5bc59c7bfe 100644 --- a/revision.h +++ b/revision.h @@ -8,6 +8,7 @@ #include "pretty.h" #include "diff.h" #include "commit-slab-decl.h" +#include "list-objects-filter-options.h" /** * The revision walking API offers functions to build a list of revisions @@ -94,6 +95,12 @@ struct rev_info { /* The end-points specified by the end user */ struct rev_cmdline_info cmdline; + /* + * Object filter options. No filtering is specified + * if and only if filter.choice is zero. + */ + struct list_objects_filter_options filter; + /* excluding from --branches, --refs, etc. expansion */ struct string_list *ref_excludes; From 7940941de1f23d02c60581cc90a193988e5e2fe0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:34 +0000 Subject: [PATCH 04/13] pack-objects: use rev.filter when possible In builtin/pack-objects.c, we use a 'filter_options' global to populate the --filter= argument. The previous change created a pointer to a filter option in 'struct rev_info', so we can use that pointer here as a start to simplifying some usage of object filters. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 87cb7b45c3..da0de0c3fe 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3651,7 +3651,7 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options, 0))) + if (!(bitmap_git = prepare_bitmap_walk(revs, &revs->filter, 0))) return -1; if (pack_options_allow_reuse() && @@ -3727,6 +3727,7 @@ static void get_object_list(int ac, const char **av) repo_init_revisions(the_repository, &revs, NULL); save_commit_buffer = 0; setup_revisions(ac, av, &revs, &s_r_opt); + list_objects_filter_copy(&revs.filter, &filter_options); /* make sure shallows are read */ is_repository_shallow(the_repository); @@ -3777,7 +3778,7 @@ static void get_object_list(int ac, const char **av) if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list_filtered(&filter_options, &revs, + traverse_commit_list_filtered(&revs.filter, &revs, show_commit, fn_show_object, NULL, NULL); From 09d4a79effac002399557392e21c9f8829056ca3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:35 +0000 Subject: [PATCH 05/13] pack-bitmap: drop filter in prepare_bitmap_walk() Now that all consumers of prepare_bitmap_walk() have populated the 'filter' member of 'struct rev_info', we can drop that extra parameter from the method and access it directly from the 'struct rev_info'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- builtin/rev-list.c | 8 +++----- pack-bitmap.c | 20 +++++++++----------- pack-bitmap.h | 2 -- reachable.c | 2 +- 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index da0de0c3fe..ddd698a1fe 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3651,7 +3651,7 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - if (!(bitmap_git = prepare_bitmap_walk(revs, &revs->filter, 0))) + if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; if (pack_options_allow_reuse() && diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 1beb578cc5..ab7558bd66 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -434,8 +434,7 @@ static int try_bitmap_count(struct rev_info *revs, */ max_count = revs->max_count; - bitmap_git = prepare_bitmap_walk(revs, &revs->filter, - filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects); if (!bitmap_git) return -1; @@ -463,8 +462,7 @@ static int try_bitmap_traversal(struct rev_info *revs, if (revs->max_count >= 0) return -1; - bitmap_git = prepare_bitmap_walk(revs, &revs->filter, - filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects); if (!bitmap_git) return -1; @@ -481,7 +479,7 @@ static int try_bitmap_disk_usage(struct rev_info *revs, if (!show_disk_usage) return -1; - bitmap_git = prepare_bitmap_walk(revs, &revs->filter, filter_provided_objects); + bitmap_git = prepare_bitmap_walk(revs, filter_provided_objects); if (!bitmap_git) return -1; diff --git a/pack-bitmap.c b/pack-bitmap.c index 9c666cdb8b..37fa490579 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -739,8 +739,7 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, - struct bitmap *seen, - struct list_objects_filter_options *filter) + struct bitmap *seen) { struct bitmap *base = NULL; int needs_walk = 0; @@ -823,7 +822,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, show_data.bitmap_git = bitmap_git; show_data.base = base; - traverse_commit_list_filtered(filter, revs, + traverse_commit_list_filtered(&revs->filter, revs, show_commit, show_object, &show_data, NULL); @@ -1219,7 +1218,6 @@ static int can_filter_bitmap(struct list_objects_filter_options *filter) } struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects) { unsigned int i; @@ -1240,7 +1238,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (revs->prune) return NULL; - if (!can_filter_bitmap(filter)) + if (!can_filter_bitmap(&revs->filter)) return NULL; /* try to open a bitmapped pack, but don't parse it yet @@ -1297,8 +1295,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (haves) { revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL, - filter); + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); reset_revision_walk(); revs->ignore_missing_links = 0; @@ -1306,8 +1303,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, BUG("failed to perform bitmap walk"); } - wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap, - filter); + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); if (!wants_bitmap) BUG("failed to perform bitmap walk"); @@ -1315,8 +1311,10 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (haves_bitmap) bitmap_and_not(wants_bitmap, haves_bitmap); - filter_bitmap(bitmap_git, (filter && filter_provided_objects) ? NULL : wants, - wants_bitmap, filter); + filter_bitmap(bitmap_git, + (revs->filter.choice && filter_provided_objects) ? NULL : wants, + wants_bitmap, + &revs->filter); bitmap_git->result = wants_bitmap; bitmap_git->haves = haves_bitmap; diff --git a/pack-bitmap.h b/pack-bitmap.h index 19a63fa1ab..3d3ddd7734 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -10,7 +10,6 @@ struct commit; struct repository; struct rev_info; -struct list_objects_filter_options; static const char BITMAP_IDX_SIGNATURE[] = {'B', 'I', 'T', 'M'}; @@ -54,7 +53,6 @@ void test_bitmap_walk(struct rev_info *revs); int test_bitmap_commits(struct repository *r); int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects); uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); int reuse_partial_packfile_from_bitmap(struct bitmap_index *, diff --git a/reachable.c b/reachable.c index 84e3d0d75e..b9f4ad886e 100644 --- a/reachable.c +++ b/reachable.c @@ -205,7 +205,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, cp.progress = progress; cp.count = 0; - bitmap_git = prepare_bitmap_walk(revs, NULL, 0); + bitmap_git = prepare_bitmap_walk(revs, 0); if (bitmap_git) { traverse_bitmap_commit_list(bitmap_git, revs, mark_object_seen); free_bitmap_index(bitmap_git); From 3e0370a8d2251742d583ce095072b7dcc34b3c01 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:36 +0000 Subject: [PATCH 06/13] list-objects: consolidate traverse_commit_list[_filtered] Now that all consumers of traverse_commit_list_filtered() populate the 'filter' member of 'struct rev_info', we can drop that parameter from the method prototype to simplify things. In addition, the only thing different now between traverse_commit_list_filtered() and traverse_commit_list() is the presence of the 'omitted' parameter, which is only non-NULL for one caller. We can consolidate these two methods by having one call the other and use the simpler form everywhere the 'omitted' parameter would be NULL. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 6 +++--- builtin/rev-list.c | 2 +- list-objects.c | 34 ++++++++++++---------------------- list-objects.h | 12 ++++++++++-- pack-bitmap.c | 6 +++--- 5 files changed, 29 insertions(+), 31 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ddd698a1fe..ae7dbc8c8a 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3778,9 +3778,9 @@ static void get_object_list(int ac, const char **av) if (!fn_show_object) fn_show_object = show_object; - traverse_commit_list_filtered(&revs.filter, &revs, - show_commit, fn_show_object, NULL, - NULL); + traverse_commit_list(&revs, + show_commit, fn_show_object, + NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index ab7558bd66..ec433cb6d3 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -729,7 +729,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) oidset_init(&missing_objects, DEFAULT_OIDSET_SIZE); traverse_commit_list_filtered( - &revs.filter, &revs, show_commit, show_object, &info, + &revs, show_commit, show_object, &info, (arg_print_omitted ? &omitted_objects : NULL)); if (arg_print_omitted) { diff --git a/list-objects.c b/list-objects.c index 2f623f8211..117f734398 100644 --- a/list-objects.c +++ b/list-objects.c @@ -416,35 +416,25 @@ static void do_traverse(struct traversal_context *ctx) strbuf_release(&csp); } -void traverse_commit_list(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *show_data) -{ - struct traversal_context ctx; - ctx.revs = revs; - ctx.show_commit = show_commit; - ctx.show_object = show_object; - ctx.show_data = show_data; - ctx.filter = NULL; - do_traverse(&ctx); -} - void traverse_commit_list_filtered( - struct list_objects_filter_options *filter_options, struct rev_info *revs, show_commit_fn show_commit, show_object_fn show_object, void *show_data, struct oidset *omitted) { - struct traversal_context ctx; + struct traversal_context ctx = { + .revs = revs, + .show_object = show_object, + .show_commit = show_commit, + .show_data = show_data, + }; + + if (revs->filter.choice) + ctx.filter = list_objects_filter__init(omitted, &revs->filter); - ctx.revs = revs; - ctx.show_object = show_object; - ctx.show_commit = show_commit; - ctx.show_data = show_data; - ctx.filter = list_objects_filter__init(omitted, filter_options); do_traverse(&ctx); - list_objects_filter__free(ctx.filter); + + if (ctx.filter) + list_objects_filter__free(ctx.filter); } diff --git a/list-objects.h b/list-objects.h index a952680e46..9eaf4de844 100644 --- a/list-objects.h +++ b/list-objects.h @@ -7,7 +7,6 @@ struct rev_info; typedef void (*show_commit_fn)(struct commit *, void *); typedef void (*show_object_fn)(struct object *, const char *, void *); -void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); typedef void (*show_edge_fn)(struct commit *); void mark_edges_uninteresting(struct rev_info *revs, @@ -18,11 +17,20 @@ struct oidset; struct list_objects_filter_options; void traverse_commit_list_filtered( - struct list_objects_filter_options *filter_options, struct rev_info *revs, show_commit_fn show_commit, show_object_fn show_object, void *show_data, struct oidset *omitted); +static inline void traverse_commit_list( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data) +{ + traverse_commit_list_filtered(revs, show_commit, + show_object, show_data, NULL); +} + #endif /* LIST_OBJECTS_H */ diff --git a/pack-bitmap.c b/pack-bitmap.c index 37fa490579..97909d48da 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -822,9 +822,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, show_data.bitmap_git = bitmap_git; show_data.base = base; - traverse_commit_list_filtered(&revs->filter, revs, - show_commit, show_object, - &show_data, NULL); + traverse_commit_list(revs, + show_commit, show_object, + &show_data); revs->include_check = NULL; revs->include_check_obj = NULL; From f0d2f84919885d4dfb940e79a8e340bd2ad1887d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:37 +0000 Subject: [PATCH 07/13] MyFirstObjectWalk: update recommended usage The previous change consolidated traverse_commit_list() and traverse_commit_list_filtered(). This allows us to simplify the recommended usage in MyFirstObjectWalk.txt to use this new set of values. While here, add some clarification on the difference between the two methods. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/MyFirstObjectWalk.txt | 44 +++++++++++------------------ 1 file changed, 16 insertions(+), 28 deletions(-) diff --git a/Documentation/MyFirstObjectWalk.txt b/Documentation/MyFirstObjectWalk.txt index ca267941f3..8d9e85566e 100644 --- a/Documentation/MyFirstObjectWalk.txt +++ b/Documentation/MyFirstObjectWalk.txt @@ -522,24 +522,25 @@ function shows that the all-object walk is being performed by `traverse_commit_list()` or `traverse_commit_list_filtered()`. Those two functions reside in `list-objects.c`; examining the source shows that, despite the name, these functions traverse all kinds of objects. Let's have a look at -the arguments to `traverse_commit_list_filtered()`, which are a superset of the -arguments to the unfiltered version. +the arguments to `traverse_commit_list()`. -- `struct list_objects_filter_options *filter_options`: This is a struct which - stores a filter-spec as outlined in `Documentation/rev-list-options.txt`. -- `struct rev_info *revs`: This is the `rev_info` used for the walk. +- `struct rev_info *revs`: This is the `rev_info` used for the walk. If + its `filter` member is not `NULL`, then `filter` contains information for + how to filter the object list. - `show_commit_fn show_commit`: A callback which will be used to handle each individual commit object. - `show_object_fn show_object`: A callback which will be used to handle each non-commit object (so each blob, tree, or tag). - `void *show_data`: A context buffer which is passed in turn to `show_commit` and `show_object`. + +In addition, `traverse_commit_list_filtered()` has an additional paramter: + - `struct oidset *omitted`: A linked-list of object IDs which the provided filter caused to be omitted. -It looks like this `traverse_commit_list_filtered()` uses callbacks we provide -instead of needing us to call it repeatedly ourselves. Cool! Let's add the -callbacks first. +It looks like these methods use callbacks we provide instead of needing us +to call it repeatedly ourselves. Cool! Let's add the callbacks first. For the sake of this tutorial, we'll simply keep track of how many of each kind of object we find. At file scope in `builtin/walken.c` add the following @@ -712,20 +713,9 @@ help understand. In our case, that means we omit trees and blobs not directly referenced by `HEAD` or `HEAD`'s history, because we begin the walk with only `HEAD` in the `pending` list.) -First, we'll need to `#include "list-objects-filter-options.h"` and set up the -`struct list_objects_filter_options` at the top of the function. - ----- -static void walken_object_walk(struct rev_info *rev) -{ - struct list_objects_filter_options filter_options = { 0 }; - - ... ----- - For now, we are not going to track the omitted objects, so we'll replace those parameters with `NULL`. For the sake of simplicity, we'll add a simple -build-time branch to use our filter or not. Replace the line calling +build-time branch to use our filter or not. Preface the line calling `traverse_commit_list()` with the following, which will remind us which kind of walk we've just performed: @@ -733,19 +723,17 @@ walk we've just performed: if (0) { /* Unfiltered: */ trace_printf(_("Unfiltered object walk.\n")); - traverse_commit_list(rev, walken_show_commit, - walken_show_object, NULL); } else { trace_printf( _("Filtered object walk with filterspec 'tree:1'.\n")); - parse_list_objects_filter(&filter_options, "tree:1"); - - traverse_commit_list_filtered(&filter_options, rev, - walken_show_commit, walken_show_object, NULL, NULL); + CALLOC_ARRAY(rev->filter, 1); + parse_list_objects_filter(rev->filter, "tree:1"); } + traverse_commit_list(rev, walken_show_commit, + walken_show_object, NULL); ---- -`struct list_objects_filter_options` is usually built directly from a command +The `rev->filter` member is usually built directly from a command line argument, so the module provides an easy way to build one from a string. Even though we aren't taking user input right now, we can still build one with a hardcoded string using `parse_list_objects_filter()`. @@ -784,7 +772,7 @@ object: ---- ... - traverse_commit_list_filtered(&filter_options, rev, + traverse_commit_list_filtered(rev, walken_show_commit, walken_show_object, NULL, &omitted); ... From 4f33a6345f2bd6e47253d1dbefd01874d895ab2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Wed, 9 Mar 2022 16:01:38 +0000 Subject: [PATCH 08/13] list-objects: handle NULL function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a caller to traverse_commit_list() specifies the options for the --objects flag but does not specify a show_object function pointer, the result is a segfault. This is currently visible by running 'git bundle create --objects HEAD'. We could fix this problem by supplying a no-op callback in builtin/bundle.c, but that only solves the problem for one builtin, leaving this segfault open for other callers. Replace all callers of the show_commit and show_object function pointers in list-objects.c to call helper functions show_commit() and show_object() which check that the given context has non-NULL functions before passing the necessary data. One extra benefit is that it reduces duplication due to passing ctx->show_data to every caller. Test that this segfault no longer occurs for 'git bundle'. Co-authored-by: Derrick Stolee Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 2 ++ list-objects.c | 27 ++++++++++++++++++++++----- t/t6020-bundle-misc.sh | 12 ++++++++++++ 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/bundle.c b/bundle.c index a0bb687b0f..7ba60a573d 100644 --- a/bundle.c +++ b/bundle.c @@ -544,6 +544,8 @@ int create_bundle(struct repository *r, const char *path, die("revision walk setup failed"); bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + + revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); diff --git a/list-objects.c b/list-objects.c index 117f734398..250d9de41c 100644 --- a/list-objects.c +++ b/list-objects.c @@ -21,6 +21,23 @@ struct traversal_context { struct filter *filter; }; +static void show_commit(struct traversal_context *ctx, + struct commit *commit) +{ + if (!ctx->show_commit) + return; + ctx->show_commit(commit, ctx->show_data); +} + +static void show_object(struct traversal_context *ctx, + struct object *object, + const char *name) +{ + if (!ctx->show_object) + return; + ctx->show_object(object, name, ctx->show_data); +} + static void process_blob(struct traversal_context *ctx, struct blob *blob, struct strbuf *path, @@ -60,7 +77,7 @@ static void process_blob(struct traversal_context *ctx, if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - ctx->show_object(obj, path->buf, ctx->show_data); + show_object(ctx, obj, path->buf); strbuf_setlen(path, pathlen); } @@ -194,7 +211,7 @@ static void process_tree(struct traversal_context *ctx, if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - ctx->show_object(obj, base->buf, ctx->show_data); + show_object(ctx, obj, base->buf); if (base->len) strbuf_addch(base, '/'); @@ -210,7 +227,7 @@ static void process_tree(struct traversal_context *ctx, if (r & LOFR_MARK_SEEN) obj->flags |= SEEN; if (r & LOFR_DO_SHOW) - ctx->show_object(obj, base->buf, ctx->show_data); + show_object(ctx, obj, base->buf); strbuf_setlen(base, baselen); free_tree_buffer(tree); @@ -228,7 +245,7 @@ static void process_tag(struct traversal_context *ctx, if (r & LOFR_MARK_SEEN) tag->object.flags |= SEEN; if (r & LOFR_DO_SHOW) - ctx->show_object(&tag->object, name, ctx->show_data); + show_object(ctx, &tag->object, name); } static void mark_edge_parents_uninteresting(struct commit *commit, @@ -402,7 +419,7 @@ static void do_traverse(struct traversal_context *ctx) if (r & LOFR_MARK_SEEN) commit->object.flags |= SEEN; if (r & LOFR_DO_SHOW) - ctx->show_commit(commit, ctx->show_data); + show_commit(ctx, commit); if (ctx->revs->tree_blobs_in_commit_order) /* diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index b13e8a52a9..df5ff561fa 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -475,4 +475,16 @@ test_expect_success 'clone from bundle' ' test_cmp expect actual ' +test_expect_success 'unfiltered bundle with --objects' ' + git bundle create all-objects.bdl \ + --all --objects && + git bundle create all.bdl \ + --all && + + # Compare the headers of these files. + sed -n -e "/^$/q" -e "p" all.bdl >expect && + sed -n -e "/^$/q" -e "p" all-objects.bdl >actual && + test_cmp expect actual +' + test_done From 105c6f14ad34b417c1e78bc9a8704dcda7b059f2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:39 +0000 Subject: [PATCH 09/13] bundle: parse filter capability The v3 bundle format has capabilities, allowing newer versions of Git to create bundles with newer features. Older versions that do not understand these new capabilities will fail with a helpful warning. Create a new capability allowing Git to understand that the contained pack-file is filtered according to some object filter. Typically, this filter will be "blob:none" for a blobless partial clone. This change teaches Git to parse this capability, place its value in the bundle header, and demonstrate this understanding by adding a message to 'git bundle verify'. Since we will use gently_parse_list_objects_filter() outside of list-objects-filter-options.c, make it an external method and move its API documentation to before its declaration. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-bundle.txt | 7 +++++-- Documentation/technical/bundle-format.txt | 11 ++++++++--- bundle.c | 15 ++++++++++++++- bundle.h | 2 ++ list-objects-filter-options.c | 17 +---------------- list-objects-filter-options.h | 20 ++++++++++++++++++++ 6 files changed, 50 insertions(+), 22 deletions(-) diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt index 72ab813905..ac4c4352aa 100644 --- a/Documentation/git-bundle.txt +++ b/Documentation/git-bundle.txt @@ -75,8 +75,11 @@ verify :: cleanly to the current repository. This includes checks on the bundle format itself as well as checking that the prerequisite commits exist and are fully linked in the current repository. - 'git bundle' prints a list of missing commits, if any, and exits - with a non-zero status. + Information about additional capabilities, such as "object filter", + is printed. See "Capabilities" in link:technical/bundle-format.html + for more information. Finally, 'git bundle' prints a list of + missing commits, if any. The exit code is zero for success, but + will be nonzero if the bundle file is invalid. list-heads :: Lists the references defined in the bundle. If followed by a diff --git a/Documentation/technical/bundle-format.txt b/Documentation/technical/bundle-format.txt index bac558d049..b9be8644cf 100644 --- a/Documentation/technical/bundle-format.txt +++ b/Documentation/technical/bundle-format.txt @@ -71,6 +71,11 @@ and the Git bundle v2 format cannot represent a shallow clone repository. == Capabilities Because there is no opportunity for negotiation, unknown capabilities cause 'git -bundle' to abort. The only known capability is `object-format`, which specifies -the hash algorithm in use, and can take the same values as the -`extensions.objectFormat` configuration value. +bundle' to abort. + +* `object-format` specifies the hash algorithm in use, and can take the same + values as the `extensions.objectFormat` configuration value. + +* `filter` specifies an object filter as in the `--filter` option in + linkgit:git-rev-list[1]. The resulting pack-file must be marked as a + `.promisor` pack-file after it is unbundled. diff --git a/bundle.c b/bundle.c index 7ba60a573d..41e75efab9 100644 --- a/bundle.c +++ b/bundle.c @@ -11,7 +11,7 @@ #include "run-command.h" #include "refs.h" #include "strvec.h" - +#include "list-objects-filter-options.h" static const char v2_bundle_signature[] = "# v2 git bundle\n"; static const char v3_bundle_signature[] = "# v3 git bundle\n"; @@ -33,6 +33,7 @@ void bundle_header_release(struct bundle_header *header) { string_list_clear(&header->prerequisites, 1); string_list_clear(&header->references, 1); + list_objects_filter_release(&header->filter); } static int parse_capability(struct bundle_header *header, const char *capability) @@ -45,6 +46,10 @@ static int parse_capability(struct bundle_header *header, const char *capability header->hash_algo = &hash_algos[algo]; return 0; } + if (skip_prefix(capability, "filter=", &arg)) { + parse_list_objects_filter(&header->filter, arg); + return 0; + } return error(_("unknown capability '%s'"), capability); } @@ -220,6 +225,8 @@ int verify_bundle(struct repository *r, req_nr = revs.pending.nr; setup_revisions(2, argv, &revs, NULL); + list_objects_filter_copy(&revs.filter, &header->filter); + if (prepare_revision_walk(&revs)) die(_("revision walk setup failed")); @@ -259,6 +266,12 @@ int verify_bundle(struct repository *r, r->nr), r->nr); list_refs(r, 0, NULL); + + if (header->filter.choice) { + printf_ln("The bundle uses this filter: %s", + list_objects_filter_spec(&header->filter)); + } + r = &header->prerequisites; if (!r->nr) { printf_ln(_("The bundle records a complete history.")); diff --git a/bundle.h b/bundle.h index 06009fe6b1..7fef2108f4 100644 --- a/bundle.h +++ b/bundle.h @@ -4,12 +4,14 @@ #include "strvec.h" #include "cache.h" #include "string-list.h" +#include "list-objects-filter-options.h" struct bundle_header { unsigned version; struct string_list prerequisites; struct string_list references; const struct git_hash_algo *hash_algo; + struct list_objects_filter_options filter; }; #define BUNDLE_HEADER_INIT \ diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 449d53af69..f02d8df142 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -40,22 +40,7 @@ const char *list_object_filter_config_name(enum list_objects_filter_choice c) BUG("list_object_filter_config_name: invalid argument '%d'", c); } -/* - * Parse value of the argument to the "filter" keyword. - * On the command line this looks like: - * --filter= - * and in the pack protocol as: - * "filter" SP - * - * The filter keyword will be used by many commands. - * See Documentation/rev-list-options.txt for allowed values for . - * - * Capture the given arg as the "filter_spec". This can be forwarded to - * subordinate commands when necessary (although it's better to pass it through - * expand_list_objects_filter_spec() first). We also "intern" the arg for the - * convenience of the current command. - */ -static int gently_parse_list_objects_filter( +int gently_parse_list_objects_filter( struct list_objects_filter_options *filter_options, const char *arg, struct strbuf *errbuf) diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 425c38cae9..2eb6c98394 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -72,6 +72,26 @@ struct list_objects_filter_options { /* Normalized command line arguments */ #define CL_ARG__FILTER "filter" +/* + * Parse value of the argument to the "filter" keyword. + * On the command line this looks like: + * --filter= + * and in the pack protocol as: + * "filter" SP + * + * The filter keyword will be used by many commands. + * See Documentation/rev-list-options.txt for allowed values for . + * + * Capture the given arg as the "filter_spec". This can be forwarded to + * subordinate commands when necessary (although it's better to pass it through + * expand_list_objects_filter_spec() first). We also "intern" the arg for the + * convenience of the current command. + */ +int gently_parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf); + void list_objects_filter_die_if_populated( struct list_objects_filter_options *filter_options); From c4ea513f4aea64e1ab03309dddff046852bddb74 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:40 +0000 Subject: [PATCH 10/13] rev-list: move --filter parsing into revision.c Now that 'struct rev_info' has a 'filter' member and most consumers of object filtering are using that member instead of an external struct, move the parsing of the '--filter' option out of builtin/rev-list.c and into revision.c. This use within handle_revision_pseudo_opt() allows us to find the option within setup_revisions() if the arguments are passed directly. In the case of a command such as 'git blame', the arguments are first scanned and checked with parse_revision_opt(), which complains about the option, so 'git blame --filter=blob:none ' does not become valid with this change. Some commands, such as 'git diff' gain this option without having it make an effect. And 'git diff --objects' was already possible, but does not actually make sense in that builtin. The key addition that is coming is 'git bundle create --filter=' so we can create bundles containing promisor packs. More work is required to make them fully functional, but that will follow. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 11 ----------- revision.c | 7 +++++++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index ec433cb6d3..640828149c 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -591,17 +591,6 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) show_progress = arg; continue; } - - if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { - parse_list_objects_filter(&revs.filter, arg); - if (revs.filter.choice && !revs.blob_objects) - die(_("object filtering requires --objects")); - continue; - } - if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { - list_objects_filter_set_no_filter(&revs.filter); - continue; - } if (!strcmp(arg, "--filter-provided-objects")) { filter_provided_objects = 1; continue; diff --git a/revision.c b/revision.c index 4c55c55880..58b8717262 100644 --- a/revision.c +++ b/revision.c @@ -32,6 +32,7 @@ #include "utf8.h" #include "bloom.h" #include "json-writer.h" +#include "list-objects-filter-options.h" volatile show_early_output_fn_t show_early_output; @@ -2690,6 +2691,10 @@ static int handle_revision_pseudo_opt(struct rev_info *revs, revs->no_walk = 0; } else if (!strcmp(arg, "--single-worktree")) { revs->single_worktree = 1; + } else if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&revs->filter, arg); + } else if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { + list_objects_filter_set_no_filter(&revs->filter); } else { return 0; } @@ -2894,6 +2899,8 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s die("cannot combine --walk-reflogs with history-limiting options"); if (revs->rewrite_parents && revs->children.name) die(_("options '%s' and '%s' cannot be used together"), "--parents", "--children"); + if (revs->filter.choice && !revs->blob_objects) + die(_("object filtering requires --objects")); /* * Limitations on the graph functionality From f18b512bbba8ee508fec64698f581920bfa5b46d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:41 +0000 Subject: [PATCH 11/13] bundle: create filtered bundles A previous change allowed Git to parse bundles with the 'filter' capability. Now, teach Git to create bundles with this option. Some rearranging of code is required to get the option parsing in the correct spot. There are now two reasons why we might need capabilities (a new hash algorithm or an object filter) so that is pulled out into a place where we can check both at the same time. The --filter option is parsed as part of setup_revisions(), but it expected the --objects flag, too. That flag is somewhat implied by 'git bundle' because it creates a pack-file walking objects, but there is also a walk that walks the revision range expecting only commits. Make this parsing work by setting 'revs.tree_objects' and 'revs.blob_objects' before the call to setup_revisions(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 53 +++++++++++++++++++++++++++++++++--------- t/t6020-bundle-misc.sh | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 11 deletions(-) diff --git a/bundle.c b/bundle.c index 41e75efab9..9370a6e307 100644 --- a/bundle.c +++ b/bundle.c @@ -332,6 +332,9 @@ static int write_pack_data(int bundle_fd, struct rev_info *revs, struct strvec * "--stdout", "--thin", "--delta-base-offset", NULL); strvec_pushv(&pack_objects.args, pack_options->v); + if (revs->filter.choice) + strvec_pushf(&pack_objects.args, "--filter=%s", + list_objects_filter_spec(&revs->filter)); pack_objects.in = -1; pack_objects.out = bundle_fd; pack_objects.git_cmd = 1; @@ -499,10 +502,37 @@ int create_bundle(struct repository *r, const char *path, int bundle_to_stdout; int ref_count = 0; struct rev_info revs, revs_copy; - int min_version = the_hash_algo == &hash_algos[GIT_HASH_SHA1] ? 2 : 3; + int min_version = 2; struct bundle_prerequisites_info bpi; int i; + /* init revs to list objects for pack-objects later */ + save_commit_buffer = 0; + repo_init_revisions(r, &revs, NULL); + + /* + * Pre-initialize the '--objects' flag so we can parse a + * --filter option successfully. + */ + revs.tree_objects = revs.blob_objects = 1; + + argc = setup_revisions(argc, argv, &revs, NULL); + + /* + * Reasons to require version 3: + * + * 1. @object-format is required because our hash algorithm is not + * SHA1. + * 2. @filter is required because we parsed an object filter. + */ + if (the_hash_algo != &hash_algos[GIT_HASH_SHA1] || revs.filter.choice) + min_version = 3; + + if (argc > 1) { + error(_("unrecognized argument: %s"), argv[1]); + goto err; + } + bundle_to_stdout = !strcmp(path, "-"); if (bundle_to_stdout) bundle_fd = 1; @@ -525,17 +555,14 @@ int create_bundle(struct repository *r, const char *path, write_or_die(bundle_fd, capability, strlen(capability)); write_or_die(bundle_fd, the_hash_algo->name, strlen(the_hash_algo->name)); write_or_die(bundle_fd, "\n", 1); - } - /* init revs to list objects for pack-objects later */ - save_commit_buffer = 0; - repo_init_revisions(r, &revs, NULL); - - argc = setup_revisions(argc, argv, &revs, NULL); - - if (argc > 1) { - error(_("unrecognized argument: %s"), argv[1]); - goto err; + if (revs.filter.choice) { + const char *value = expand_list_objects_filter_spec(&revs.filter); + capability = "@filter="; + write_or_die(bundle_fd, capability, strlen(capability)); + write_or_die(bundle_fd, value, strlen(value)); + write_or_die(bundle_fd, "\n", 1); + } } /* save revs.pending in revs_copy for later use */ @@ -558,6 +585,10 @@ int create_bundle(struct repository *r, const char *path, bpi.fd = bundle_fd; bpi.pending = &revs_copy.pending; + /* + * Remove any object walking here. We only care about commits and + * tags here. The revs_copy has the right instances of these values. + */ revs.blob_objects = revs.tree_objects = 0; traverse_commit_list(&revs, write_bundle_prerequisites, NULL, &bpi); object_array_remove_duplicates(&revs_copy.pending); diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index df5ff561fa..6e97c044ee 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -487,4 +487,52 @@ test_expect_success 'unfiltered bundle with --objects' ' test_cmp expect actual ' +for filter in "blob:none" "tree:0" "tree:1" "blob:limit=100" +do + test_expect_success "filtered bundle: $filter" ' + test_when_finished rm -rf .git/objects/pack cloned unbundled && + git bundle create partial.bdl \ + --all \ + --filter=$filter && + + git bundle verify partial.bdl >unfiltered && + make_user_friendly_and_stable_output actual && + + cat >expect <<-EOF && + The bundle contains these 10 refs: + refs/heads/main + refs/heads/release + refs/heads/topic/1 + refs/heads/topic/2 + refs/pull/1/head + refs/pull/2/head + refs/tags/v1 + refs/tags/v2 + refs/tags/v3 + HEAD + The bundle uses this filter: $filter + The bundle records a complete history. + EOF + test_cmp expect actual && + + test_config uploadpack.allowfilter 1 && + test_config uploadpack.allowanysha1inwant 1 && + git clone --no-local --filter=$filter --bare "file://$(pwd)" cloned && + + git init unbundled && + git -C unbundled bundle unbundle ../partial.bdl >ref-list.txt && + + # Count the same number of reachable objects. + reflist=$(git for-each-ref --format="%(objectname)") && + git rev-list --objects --filter=$filter --missing=allow-any \ + $reflist >expect && + for repo in cloned unbundled + do + git -C $repo rev-list --objects --missing=allow-any \ + $reflist >actual && + test_cmp expect actual || return 1 + done + ' +done + test_done From 4f39eb031af8690eb86eb781e43b10141dd47da9 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:42 +0000 Subject: [PATCH 12/13] bundle: unbundle promisor packs In order to have a valid pack-file after unbundling a bundle that has the 'filter' capability, we need to generate a .promisor file. The bundle does not promise _where_ the objects can be found, but we can expect that these bundles will be unbundled in repositories with appropriate promisor remotes that can find those missing objects. Use the 'git index-pack --promisor=' option to create this .promisor file. Add "from-bundle" as the message to help anyone diagnose issues with these promisor packs. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- bundle.c | 4 ++++ t/t6020-bundle-misc.sh | 2 ++ 2 files changed, 6 insertions(+) diff --git a/bundle.c b/bundle.c index 9370a6e307..56681c2113 100644 --- a/bundle.c +++ b/bundle.c @@ -620,6 +620,10 @@ int unbundle(struct repository *r, struct bundle_header *header, struct child_process ip = CHILD_PROCESS_INIT; strvec_pushl(&ip.args, "index-pack", "--fix-thin", "--stdin", NULL); + /* If there is a filter, then we need to create the promisor pack. */ + if (header->filter.choice) + strvec_push(&ip.args, "--promisor=from-bundle"); + if (extra_index_pack_args) { strvec_pushv(&ip.args, extra_index_pack_args->v); strvec_clear(extra_index_pack_args); diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 6e97c044ee..7c6db67022 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -521,6 +521,8 @@ do git init unbundled && git -C unbundled bundle unbundle ../partial.bdl >ref-list.txt && + ls unbundled/.git/objects/pack/pack-*.promisor >promisor && + test_line_count = 1 promisor && # Count the same number of reachable objects. reflist=$(git for-each-ref --format="%(objectname)") && From 86fdd94d723b7fa1870a64e8080d3540a434f29b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 9 Mar 2022 16:01:43 +0000 Subject: [PATCH 13/13] clone: fail gracefully when cloning filtered bundle Users can create a new repository using 'git clone '. The new "@filter" capability for bundles means that we can generate a bundle that does not contain all reachable objects, even if the header has no negative commit OIDs. It is feasible to think that we could make a filtered bundle work with the command git clone --filter=$filter --bare or possibly replacing --bare with --no-checkout. However, this requires having some repository-global config that specifies the specified object filter and notifies Git about the existence of promisor pack-files. Without a remote, that is currently impossible. As a stop-gap, parse the bundle header during 'git clone' and die() with a helpful error message instead of the current behavior of failing due to "missing objects". Most of the existing logic for handling bundle clones actually happens in fetch-pack.c, but that logic is the same as if the user specified 'git fetch ', so we want to avoid failing to fetch a filtered bundle when in an existing repository that has the proper config set up for at least one remote. Carefully comment around the test that this is not the desired long-term behavior of 'git clone' in this case, but instead that we need to do more work before that is possible. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/clone.c | 13 +++++++++++++ t/t6020-bundle-misc.sh | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/builtin/clone.c b/builtin/clone.c index 0d80b135c9..cf8f348865 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -33,6 +33,7 @@ #include "packfile.h" #include "list-objects-filter-options.h" #include "hook.h" +#include "bundle.h" /* * Overall FIXMEs: @@ -1138,6 +1139,18 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--local is ignored")); transport->cloning = 1; + if (is_bundle) { + struct bundle_header header = BUNDLE_HEADER_INIT; + int fd = read_bundle_header(path, &header); + int has_filter = header.filter.choice != LOFC_DISABLED; + + if (fd > 0) + close(fd); + bundle_header_release(&header); + if (has_filter) + die(_("cannot clone from filtered bundle")); + } + transport_set_option(transport, TRANS_OPT_KEEP, "yes"); if (reject_shallow) diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 7c6db67022..ed95d19542 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -537,4 +537,16 @@ do ' done +# NEEDSWORK: 'git clone --bare' should be able to clone from a filtered +# bundle, but that requires a change to promisor/filter config options. +# For now, we fail gracefully with a helpful error. This behavior can be +# changed in the future to succeed as much as possible. +test_expect_success 'cloning from filtered bundle has useful error' ' + git bundle create partial.bdl \ + --all \ + --filter=blob:none && + test_must_fail git clone --bare partial.bdl partial 2>err && + grep "cannot clone from filtered bundle" err +' + test_done