From 161b1cf3bdca7fecf5ec03ffdb74260312cd0229 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:48 -0800 Subject: [PATCH 1/9] sha1-array: provide oid_array_filter Helped-by: Junio C Hamano Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- Documentation/technical/api-oid-array.txt | 5 +++++ sha1-array.c | 17 +++++++++++++++++ sha1-array.h | 3 +++ 3 files changed, 25 insertions(+) diff --git a/Documentation/technical/api-oid-array.txt b/Documentation/technical/api-oid-array.txt index 9febfb1d52..c97428c2c3 100644 --- a/Documentation/technical/api-oid-array.txt +++ b/Documentation/technical/api-oid-array.txt @@ -48,6 +48,11 @@ Functions is not sorted, this function has the side effect of sorting it. +`oid_array_filter`:: + Apply the callback function `want` to each entry in the array, + retaining only the entries for which the function returns true. + Preserve the order of the entries that are retained. + Examples -------- diff --git a/sha1-array.c b/sha1-array.c index b94e0ec0f5..d922e94e3f 100644 --- a/sha1-array.c +++ b/sha1-array.c @@ -77,3 +77,20 @@ int oid_array_for_each_unique(struct oid_array *array, } return 0; } + +void oid_array_filter(struct oid_array *array, + for_each_oid_fn want, + void *cb_data) +{ + unsigned nr = array->nr, src, dst; + struct object_id *oids = array->oid; + + for (src = dst = 0; src < nr; src++) { + if (want(&oids[src], cb_data)) { + if (src != dst) + oidcpy(&oids[dst], &oids[src]); + dst++; + } + } + array->nr = dst; +} diff --git a/sha1-array.h b/sha1-array.h index 232bf95017..55d016c4bf 100644 --- a/sha1-array.h +++ b/sha1-array.h @@ -22,5 +22,8 @@ int oid_array_for_each(struct oid_array *array, int oid_array_for_each_unique(struct oid_array *array, for_each_oid_fn fn, void *data); +void oid_array_filter(struct oid_array *array, + for_each_oid_fn want, + void *cbdata); #endif /* SHA1_ARRAY_H */ From 25e3d28efd56124b51e02a3f8496401d4e8fb40b Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:49 -0800 Subject: [PATCH 2/9] submodule.c: fix indentation The submodule subsystem is really bad at staying within 80 characters. Fix it while we are here. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- submodule.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/submodule.c b/submodule.c index 6415cc5580..bc48ea3b68 100644 --- a/submodule.c +++ b/submodule.c @@ -1271,7 +1271,8 @@ static int get_next_submodule(struct child_process *cp, if (!submodule) { const char *name = default_name_or_path(ce->name); if (name) { - default_submodule.path = default_submodule.name = name; + default_submodule.path = name; + default_submodule.name = name; submodule = &default_submodule; } } @@ -1281,8 +1282,10 @@ static int get_next_submodule(struct child_process *cp, default: case RECURSE_SUBMODULES_DEFAULT: case RECURSE_SUBMODULES_ON_DEMAND: - if (!submodule || !unsorted_string_list_lookup(&changed_submodule_names, - submodule->name)) + if (!submodule || + !unsorted_string_list_lookup( + &changed_submodule_names, + submodule->name)) continue; default_argv = "on-demand"; break; From 08a297bd4996480f0b03830058f413bb24f0d37c Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:50 -0800 Subject: [PATCH 3/9] submodule.c: sort changed_submodule_names before searching it We can string_list_insert() to maintain sorted-ness of the list as we find new items, or we can string_list_append() to build an unsorted list and sort it at the end just once. As we do not rely on the sortedness while building the list, we pick the "append and sort at the end" as it has better worst case execution times. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- submodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/submodule.c b/submodule.c index bc48ea3b68..3c388f85cc 100644 --- a/submodule.c +++ b/submodule.c @@ -1283,7 +1283,7 @@ static int get_next_submodule(struct child_process *cp, case RECURSE_SUBMODULES_DEFAULT: case RECURSE_SUBMODULES_ON_DEMAND: if (!submodule || - !unsorted_string_list_lookup( + !string_list_lookup( &changed_submodule_names, submodule->name)) continue; @@ -1377,6 +1377,7 @@ int fetch_populated_submodules(struct repository *r, /* default value, "--submodule-prefix" and its value are added later */ calculate_changed_submodule_paths(r); + string_list_sort(&changed_submodule_names); run_processes_parallel(max_parallel_jobs, get_next_submodule, fetch_start_failure, From 16dd6fe133cc3693f0daad0d10c4f65713e36de4 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:51 -0800 Subject: [PATCH 4/9] submodule.c: tighten scope of changed_submodule_names struct The `changed_submodule_names` are only used for fetching, so let's make it part of the struct that is passed around for fetching submodules. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- submodule.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/submodule.c b/submodule.c index 3c388f85cc..f93f0aff82 100644 --- a/submodule.c +++ b/submodule.c @@ -25,7 +25,6 @@ #include "commit-reach.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; -static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP; static int initialized_fetch_ref_tips; static struct oid_array ref_tips_before_fetch; static struct oid_array ref_tips_after_fetch; @@ -1136,7 +1135,8 @@ void check_for_new_submodule_commits(struct object_id *oid) oid_array_append(&ref_tips_after_fetch, oid); } -static void calculate_changed_submodule_paths(struct repository *r) +static void calculate_changed_submodule_paths(struct repository *r, + struct string_list *changed_submodule_names) { struct argv_array argv = ARGV_ARRAY_INIT; struct string_list changed_submodules = STRING_LIST_INIT_DUP; @@ -1174,7 +1174,8 @@ static void calculate_changed_submodule_paths(struct repository *r) continue; if (!submodule_has_commits(r, path, commits)) - string_list_append(&changed_submodule_names, name->string); + string_list_append(changed_submodule_names, + name->string); } free_submodules_oids(&changed_submodules); @@ -1221,8 +1222,10 @@ struct submodule_parallel_fetch { int default_option; int quiet; int result; + + struct string_list changed_submodule_names; }; -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0} +#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP } static int get_fetch_recurse_config(const struct submodule *submodule, struct submodule_parallel_fetch *spf) @@ -1284,7 +1287,7 @@ static int get_next_submodule(struct child_process *cp, case RECURSE_SUBMODULES_ON_DEMAND: if (!submodule || !string_list_lookup( - &changed_submodule_names, + &spf->changed_submodule_names, submodule->name)) continue; default_argv = "on-demand"; @@ -1376,8 +1379,8 @@ int fetch_populated_submodules(struct repository *r, argv_array_push(&spf.args, "--recurse-submodules-default"); /* default value, "--submodule-prefix" and its value are added later */ - calculate_changed_submodule_paths(r); - string_list_sort(&changed_submodule_names); + calculate_changed_submodule_paths(r, &spf.changed_submodule_names); + string_list_sort(&spf.changed_submodule_names); run_processes_parallel(max_parallel_jobs, get_next_submodule, fetch_start_failure, @@ -1386,7 +1389,7 @@ int fetch_populated_submodules(struct repository *r, argv_array_clear(&spf.args); out: - string_list_clear(&changed_submodule_names, 1); + string_list_clear(&spf.changed_submodule_names, 1); return spf.result; } From bcd7337243f4f20091d478c71682d68dd2100207 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:52 -0800 Subject: [PATCH 5/9] submodule: store OIDs in changed_submodule_names 'calculate_changed_submodule_paths' uses a local list to compute the changed submodules, and then produces the result by copying appropriate items into the result list. Instead use the result list directly and prune items afterwards using string_list_remove_empty_items. By doing so we'll have access to the util pointer for longer that contains the commits that we need to fetch, which will be useful in a later patch. Signed-off-by: Stefan Beller Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- submodule.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/submodule.c b/submodule.c index f93f0aff82..0c81aca6f2 100644 --- a/submodule.c +++ b/submodule.c @@ -1139,8 +1139,7 @@ static void calculate_changed_submodule_paths(struct repository *r, struct string_list *changed_submodule_names) { struct argv_array argv = ARGV_ARRAY_INIT; - struct string_list changed_submodules = STRING_LIST_INIT_DUP; - const struct string_list_item *name; + struct string_list_item *name; /* No need to check if there are no submodules configured */ if (!submodule_from_path(r, NULL, NULL)) @@ -1157,9 +1156,9 @@ static void calculate_changed_submodule_paths(struct repository *r, * Collect all submodules (whether checked out or not) for which new * commits have been recorded upstream in "changed_submodule_names". */ - collect_changed_submodules(r, &changed_submodules, &argv); + collect_changed_submodules(r, changed_submodule_names, &argv); - for_each_string_list_item(name, &changed_submodules) { + for_each_string_list_item(name, changed_submodule_names) { struct oid_array *commits = name->util; const struct submodule *submodule; const char *path = NULL; @@ -1173,12 +1172,14 @@ static void calculate_changed_submodule_paths(struct repository *r, if (!path) continue; - if (!submodule_has_commits(r, path, commits)) - string_list_append(changed_submodule_names, - name->string); + if (submodule_has_commits(r, path, commits)) { + oid_array_clear(commits); + *name->string = '\0'; + } } - free_submodules_oids(&changed_submodules); + string_list_remove_empty_items(changed_submodule_names, 1); + argv_array_clear(&argv); oid_array_clear(&ref_tips_before_fetch); oid_array_clear(&ref_tips_after_fetch); @@ -1389,7 +1390,7 @@ int fetch_populated_submodules(struct repository *r, argv_array_clear(&spf.args); out: - string_list_clear(&spf.changed_submodule_names, 1); + free_submodules_oids(&spf.changed_submodule_names); return spf.result; } From d5498e087175ad7bbb0460a40cfb55cc3726226d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:53 -0800 Subject: [PATCH 6/9] repository: repo_submodule_init to take a submodule struct When constructing a struct repository for a submodule for some revision of the superproject where the submodule is not contained in the index, it may not be present in the working tree currently either. In that situation giving a 'path' argument is not useful. Upgrade the repo_submodule_init function to take a struct submodule instead. The submodule struct can be obtained via submodule_from_{path, name} or an artificial submodule struct can be passed in. While we are at it, rename the repository struct in the repo_submodule_init function, which is to be initialized, to a name that is not confused with the struct submodule as easily. Perform such renames in similar functions as well. Also move its documentation into the header file. Reviewed-by: Jonathan Tan Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/grep.c | 17 +++++++----- builtin/ls-files.c | 12 +++++---- builtin/submodule--helper.c | 2 +- repository.c | 27 ++++++++------------ repository.h | 12 +++++++-- t/helper/test-submodule-nested-repo-config.c | 8 +++--- 6 files changed, 43 insertions(+), 35 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 71df52a333..d6bd887b2d 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -404,7 +404,10 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject, const struct object_id *oid, const char *filename, const char *path) { - struct repository submodule; + struct repository subrepo; + const struct submodule *sub = submodule_from_path(superproject, + &null_oid, path); + int hit; /* @@ -420,12 +423,12 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject, return 0; } - if (repo_submodule_init(&submodule, superproject, path)) { + if (repo_submodule_init(&subrepo, superproject, sub)) { grep_read_unlock(); return 0; } - repo_read_gitmodules(&submodule); + repo_read_gitmodules(&subrepo); /* * NEEDSWORK: This adds the submodule's object directory to the list of @@ -437,7 +440,7 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject, * store is no longer global and instead is a member of the repository * object. */ - add_to_alternates_memory(submodule.objects->objectdir); + add_to_alternates_memory(subrepo.objects->objectdir); grep_read_unlock(); if (oid) { @@ -462,14 +465,14 @@ static int grep_submodule(struct grep_opt *opt, struct repository *superproject, init_tree_desc(&tree, data, size); hit = grep_tree(opt, pathspec, &tree, &base, base.len, - object->type == OBJ_COMMIT, &submodule); + object->type == OBJ_COMMIT, &subrepo); strbuf_release(&base); free(data); } else { - hit = grep_cache(opt, &submodule, pathspec, 1); + hit = grep_cache(opt, &subrepo, pathspec, 1); } - repo_clear(&submodule); + repo_clear(&subrepo); return hit; } diff --git a/builtin/ls-files.c b/builtin/ls-files.c index c70a9c7158..583a0e1ca2 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -206,17 +206,19 @@ static void show_files(struct repository *repo, struct dir_struct *dir); static void show_submodule(struct repository *superproject, struct dir_struct *dir, const char *path) { - struct repository submodule; + struct repository subrepo; + const struct submodule *sub = submodule_from_path(superproject, + &null_oid, path); - if (repo_submodule_init(&submodule, superproject, path)) + if (repo_submodule_init(&subrepo, superproject, sub)) return; - if (repo_read_index(&submodule) < 0) + if (repo_read_index(&subrepo) < 0) die("index file corrupt"); - show_files(&submodule, dir); + show_files(&subrepo, dir); - repo_clear(&submodule); + repo_clear(&subrepo); } static void show_ce(struct repository *repo, struct dir_struct *dir, diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index d38113a31a..4eceb8f040 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -2053,7 +2053,7 @@ static int ensure_core_worktree(int argc, const char **argv, const char *prefix) if (!sub) BUG("We could get the submodule handle before?"); - if (repo_submodule_init(&subrepo, the_repository, path)) + if (repo_submodule_init(&subrepo, the_repository, sub)) die(_("could not get a repository handle for submodule '%s'"), path); if (!repo_config_get_string(&subrepo, "core.worktree", &cw)) { diff --git a/repository.c b/repository.c index 5dd1486718..aabe64ee5d 100644 --- a/repository.c +++ b/repository.c @@ -166,30 +166,23 @@ error: return -1; } -/* - * Initialize 'submodule' as the submodule given by 'path' in parent repository - * 'superproject'. - * Return 0 upon success and a non-zero value upon failure. - */ -int repo_submodule_init(struct repository *submodule, +int repo_submodule_init(struct repository *subrepo, struct repository *superproject, - const char *path) + const struct submodule *sub) { - const struct submodule *sub; struct strbuf gitdir = STRBUF_INIT; struct strbuf worktree = STRBUF_INIT; int ret = 0; - sub = submodule_from_path(superproject, &null_oid, path); if (!sub) { ret = -1; goto out; } - strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", path); - strbuf_repo_worktree_path(&worktree, superproject, "%s", path); + strbuf_repo_worktree_path(&gitdir, superproject, "%s/.git", sub->path); + strbuf_repo_worktree_path(&worktree, superproject, "%s", sub->path); - if (repo_init(submodule, gitdir.buf, worktree.buf)) { + if (repo_init(subrepo, gitdir.buf, worktree.buf)) { /* * If initilization fails then it may be due to the submodule * not being populated in the superproject's worktree. Instead @@ -201,16 +194,16 @@ int repo_submodule_init(struct repository *submodule, strbuf_repo_git_path(&gitdir, superproject, "modules/%s", sub->name); - if (repo_init(submodule, gitdir.buf, NULL)) { + if (repo_init(subrepo, gitdir.buf, NULL)) { ret = -1; goto out; } } - submodule->submodule_prefix = xstrfmt("%s%s/", - superproject->submodule_prefix ? - superproject->submodule_prefix : - "", path); + subrepo->submodule_prefix = xstrfmt("%s%s/", + superproject->submodule_prefix ? + superproject->submodule_prefix : + "", sub->path); out: strbuf_release(&gitdir); diff --git a/repository.h b/repository.h index 9f16c42c1e..0e482b7d49 100644 --- a/repository.h +++ b/repository.h @@ -116,9 +116,17 @@ void repo_set_worktree(struct repository *repo, const char *path); void repo_set_hash_algo(struct repository *repo, int algo); void initialize_the_repository(void); int repo_init(struct repository *r, const char *gitdir, const char *worktree); -int repo_submodule_init(struct repository *submodule, + +/* + * Initialize the repository 'subrepo' as the submodule given by the + * struct submodule 'sub' in parent repository 'superproject'. + * Return 0 upon success and a non-zero value upon failure, which may happen + * if the submodule is not found, or 'sub' is NULL. + */ +struct submodule; +int repo_submodule_init(struct repository *subrepo, struct repository *superproject, - const char *path); + const struct submodule *sub); void repo_clear(struct repository *repo); /* diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c index a31e2a9bea..bc97929bbc 100644 --- a/t/helper/test-submodule-nested-repo-config.c +++ b/t/helper/test-submodule-nested-repo-config.c @@ -10,19 +10,21 @@ static void die_usage(int argc, const char **argv, const char *msg) int cmd__submodule_nested_repo_config(int argc, const char **argv) { - struct repository submodule; + struct repository subrepo; + const struct submodule *sub; if (argc < 3) die_usage(argc, argv, "Wrong number of arguments."); setup_git_directory(); - if (repo_submodule_init(&submodule, the_repository, argv[1])) { + sub = submodule_from_path(the_repository, &null_oid, argv[1]); + if (repo_submodule_init(&subrepo, the_repository, sub)) { die_usage(argc, argv, "Submodule not found."); } /* Read the config of _child_ submodules. */ - print_config_from_gitmodules(&submodule, argv[2]); + print_config_from_gitmodules(&subrepo, argv[2]); submodule_free(the_repository); From 26f80ccfc15bb1e591b634173bdb7092da33da12 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:54 -0800 Subject: [PATCH 7/9] submodule: migrate get_next_submodule to use repository structs We used to recurse into submodules, even if they were broken having only an objects directory. The child process executed in the submodule would fail though if the submodule was broken. This is tested via "fetching submodule into a broken repository" in t5526. This patch tightens the check upfront, such that we do not need to spawn a child process to find out if the submodule is broken. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- submodule.c | 56 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/submodule.c b/submodule.c index 0c81aca6f2..77ace5e784 100644 --- a/submodule.c +++ b/submodule.c @@ -1253,6 +1253,30 @@ static int get_fetch_recurse_config(const struct submodule *submodule, return spf->default_option; } +static struct repository *get_submodule_repo_for(struct repository *r, + const struct submodule *sub) +{ + struct repository *ret = xmalloc(sizeof(*ret)); + + if (repo_submodule_init(ret, r, sub)) { + /* + * No entry in .gitmodules? Technically not a submodule, + * but historically we supported repositories that happen to be + * in-place where a gitlink is. Keep supporting them. + */ + struct strbuf gitdir = STRBUF_INIT; + strbuf_repo_worktree_path(&gitdir, r, "%s/.git", sub->path); + if (repo_init(ret, gitdir.buf, NULL)) { + strbuf_release(&gitdir); + free(ret); + return NULL; + } + strbuf_release(&gitdir); + } + + return ret; +} + static int get_next_submodule(struct child_process *cp, struct strbuf *err, void *data, void **task_cb) { @@ -1260,12 +1284,11 @@ static int get_next_submodule(struct child_process *cp, struct submodule_parallel_fetch *spf = data; for (; spf->count < spf->r->index->cache_nr; spf->count++) { - struct strbuf submodule_path = STRBUF_INIT; - struct strbuf submodule_git_dir = STRBUF_INIT; struct strbuf submodule_prefix = STRBUF_INIT; const struct cache_entry *ce = spf->r->index->cache[spf->count]; - const char *git_dir, *default_argv; + const char *default_argv; const struct submodule *submodule; + struct repository *repo; struct submodule default_submodule = SUBMODULE_INIT; if (!S_ISGITLINK(ce->ce_mode)) @@ -1300,15 +1323,11 @@ static int get_next_submodule(struct child_process *cp, continue; } - strbuf_repo_worktree_path(&submodule_path, spf->r, "%s", ce->name); - strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf); strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name); - git_dir = read_gitfile(submodule_git_dir.buf); - if (!git_dir) - git_dir = submodule_git_dir.buf; - if (is_directory(git_dir)) { + repo = get_submodule_repo_for(spf->r, submodule); + if (repo) { child_process_init(cp); - cp->dir = strbuf_detach(&submodule_path, NULL); + cp->dir = xstrdup(repo->worktree); prepare_submodule_repo_env(&cp->env_array); cp->git_cmd = 1; if (!spf->quiet) @@ -1319,10 +1338,23 @@ static int get_next_submodule(struct child_process *cp, argv_array_push(&cp->args, default_argv); argv_array_push(&cp->args, "--submodule-prefix"); argv_array_push(&cp->args, submodule_prefix.buf); + + repo_clear(repo); + free(repo); ret = 1; + } else { + /* + * An empty directory is normal, + * the submodule is not initialized + */ + if (S_ISGITLINK(ce->ce_mode) && + !is_empty_dir(ce->name)) { + spf->result = 1; + strbuf_addf(err, + _("Could not access submodule '%s'"), + ce->name); + } } - strbuf_release(&submodule_path); - strbuf_release(&submodule_git_dir); strbuf_release(&submodule_prefix); if (ret) { spf->count++; From a62387b3fc9f5aeeb04a2db278121d33a9caafa7 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 28 Nov 2018 16:27:55 -0800 Subject: [PATCH 8/9] submodule.c: fetch in submodules git directory instead of in worktree Keep the properties introduced in 10f5c52656 (submodule: avoid auto-discovery in prepare_submodule_repo_env(), 2016-09-01), by fixating the git directory of the submodule. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- submodule.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/submodule.c b/submodule.c index 77ace5e784..d1b6646f42 100644 --- a/submodule.c +++ b/submodule.c @@ -494,6 +494,12 @@ void prepare_submodule_repo_env(struct argv_array *out) DEFAULT_GIT_DIR_ENVIRONMENT); } +static void prepare_submodule_repo_env_in_gitdir(struct argv_array *out) +{ + prepare_submodule_repo_env_no_git_dir(out); + argv_array_pushf(out, "%s=.", GIT_DIR_ENVIRONMENT); +} + /* Helper function to display the submodule header line prior to the full * summary output. If it can locate the submodule objects directory it will * attempt to lookup both the left and right commits and put them into the @@ -1327,8 +1333,8 @@ static int get_next_submodule(struct child_process *cp, repo = get_submodule_repo_for(spf->r, submodule); if (repo) { child_process_init(cp); - cp->dir = xstrdup(repo->worktree); - prepare_submodule_repo_env(&cp->env_array); + cp->dir = xstrdup(repo->gitdir); + prepare_submodule_repo_env_in_gitdir(&cp->env_array); cp->git_cmd = 1; if (!spf->quiet) strbuf_addf(err, "Fetching submodule %s%s\n", From be76c2128234d94b47f7087152ee55d08bb65d88 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Thu, 6 Dec 2018 13:26:55 -0800 Subject: [PATCH 9/9] fetch: ensure submodule objects fetched Currently when git-fetch is asked to recurse into submodules, it dispatches a plain "git-fetch -C " (with some submodule related options such as prefix and recusing strategy, but) without any information of the remote or the tip that should be fetched. But this default fetch is not sufficient, as a newly fetched commit in the superproject could point to a commit in the submodule that is not in the default refspec. This is common in workflows like Gerrit's. When fetching a Gerrit change under review (from refs/changes/??), the commits in that change likely point to submodule commits that have not been merged to a branch yet. Fetch a submodule object by id if the object that the superproject points to, cannot be found. For now this object is fetched from the 'origin' remote as we defer getting the default remote to a later patch. A list of new submodule commits are already generated in certain conditions (by check_for_new_submodule_commits()); this new feature invokes that function in more situations. The submodule checks were done only when a ref in the superproject changed, these checks were extended to also be performed when fetching into FETCH_HEAD for completeness, and add a test for that too. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- builtin/fetch.c | 11 +- submodule.c | 208 ++++++++++++++++++++++++++++++------ t/t5526-fetch-submodules.sh | 117 ++++++++++++++++++++ 3 files changed, 297 insertions(+), 39 deletions(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index e0140327aa..91f9b7d9c8 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -763,9 +763,6 @@ static int update_local_ref(struct ref *ref, what = _("[new ref]"); } - if ((recurse_submodules != RECURSE_SUBMODULES_OFF) && - (recurse_submodules != RECURSE_SUBMODULES_ON)) - check_for_new_submodule_commits(&ref->new_oid); r = s_update_ref(msg, ref, 0); format_display(display, r ? '!' : '*', what, r ? _("unable to update local ref") : NULL, @@ -779,9 +776,6 @@ static int update_local_ref(struct ref *ref, strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, DEFAULT_ABBREV); strbuf_addstr(&quickref, ".."); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); - if ((recurse_submodules != RECURSE_SUBMODULES_OFF) && - (recurse_submodules != RECURSE_SUBMODULES_ON)) - check_for_new_submodule_commits(&ref->new_oid); r = s_update_ref("fast-forward", ref, 1); format_display(display, r ? '!' : ' ', quickref.buf, r ? _("unable to update local ref") : NULL, @@ -794,9 +788,6 @@ static int update_local_ref(struct ref *ref, strbuf_add_unique_abbrev(&quickref, ¤t->object.oid, DEFAULT_ABBREV); strbuf_addstr(&quickref, "..."); strbuf_add_unique_abbrev(&quickref, &ref->new_oid, DEFAULT_ABBREV); - if ((recurse_submodules != RECURSE_SUBMODULES_OFF) && - (recurse_submodules != RECURSE_SUBMODULES_ON)) - check_for_new_submodule_commits(&ref->new_oid); r = s_update_ref("forced-update", ref, 1); format_display(display, r ? '!' : '+', quickref.buf, r ? _("unable to update local ref") : _("forced update"), @@ -892,6 +883,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, ref->force = rm->peer_ref->force; } + if (recurse_submodules != RECURSE_SUBMODULES_OFF) + check_for_new_submodule_commits(&rm->old_oid); if (!strcmp(rm->name, "HEAD")) { kind = ""; diff --git a/submodule.c b/submodule.c index d1b6646f42..b88343d977 100644 --- a/submodule.c +++ b/submodule.c @@ -1231,8 +1231,14 @@ struct submodule_parallel_fetch { int result; struct string_list changed_submodule_names; + + /* Pending fetches by OIDs */ + struct fetch_task **oid_fetch_tasks; + int oid_fetch_tasks_nr, oid_fetch_tasks_alloc; }; -#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, STRING_LIST_INIT_DUP } +#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0, 0, \ + STRING_LIST_INIT_DUP, \ + NULL, 0, 0} static int get_fetch_recurse_config(const struct submodule *submodule, struct submodule_parallel_fetch *spf) @@ -1259,6 +1265,76 @@ static int get_fetch_recurse_config(const struct submodule *submodule, return spf->default_option; } +/* + * Fetch in progress (if callback data) or + * pending (if in oid_fetch_tasks in struct submodule_parallel_fetch) + */ +struct fetch_task { + struct repository *repo; + const struct submodule *sub; + unsigned free_sub : 1; /* Do we need to free the submodule? */ + + struct oid_array *commits; /* Ensure these commits are fetched */ +}; + +/** + * When a submodule is not defined in .gitmodules, we cannot access it + * via the regular submodule-config. Create a fake submodule, which we can + * work on. + */ +static const struct submodule *get_non_gitmodules_submodule(const char *path) +{ + struct submodule *ret = NULL; + const char *name = default_name_or_path(path); + + if (!name) + return NULL; + + ret = xmalloc(sizeof(*ret)); + memset(ret, 0, sizeof(*ret)); + ret->path = name; + ret->name = name; + + return (const struct submodule *) ret; +} + +static struct fetch_task *fetch_task_create(struct repository *r, + const char *path) +{ + struct fetch_task *task = xmalloc(sizeof(*task)); + memset(task, 0, sizeof(*task)); + + task->sub = submodule_from_path(r, &null_oid, path); + if (!task->sub) { + /* + * No entry in .gitmodules? Technically not a submodule, + * but historically we supported repositories that happen to be + * in-place where a gitlink is. Keep supporting them. + */ + task->sub = get_non_gitmodules_submodule(path); + if (!task->sub) { + free(task); + return NULL; + } + + task->free_sub = 1; + } + + return task; +} + +static void fetch_task_release(struct fetch_task *p) +{ + if (p->free_sub) + free((void*)p->sub); + p->free_sub = 0; + p->sub = NULL; + + if (p->repo) + repo_clear(p->repo); + FREE_AND_NULL(p->repo); +} + static struct repository *get_submodule_repo_for(struct repository *r, const struct submodule *sub) { @@ -1286,39 +1362,29 @@ static struct repository *get_submodule_repo_for(struct repository *r, static int get_next_submodule(struct child_process *cp, struct strbuf *err, void *data, void **task_cb) { - int ret = 0; struct submodule_parallel_fetch *spf = data; for (; spf->count < spf->r->index->cache_nr; spf->count++) { - struct strbuf submodule_prefix = STRBUF_INIT; const struct cache_entry *ce = spf->r->index->cache[spf->count]; const char *default_argv; - const struct submodule *submodule; - struct repository *repo; - struct submodule default_submodule = SUBMODULE_INIT; + struct fetch_task *task; if (!S_ISGITLINK(ce->ce_mode)) continue; - submodule = submodule_from_path(spf->r, &null_oid, ce->name); - if (!submodule) { - const char *name = default_name_or_path(ce->name); - if (name) { - default_submodule.path = name; - default_submodule.name = name; - submodule = &default_submodule; - } - } + task = fetch_task_create(spf->r, ce->name); + if (!task) + continue; - switch (get_fetch_recurse_config(submodule, spf)) + switch (get_fetch_recurse_config(task->sub, spf)) { default: case RECURSE_SUBMODULES_DEFAULT: case RECURSE_SUBMODULES_ON_DEMAND: - if (!submodule || + if (!task->sub || !string_list_lookup( &spf->changed_submodule_names, - submodule->name)) + task->sub->name)) continue; default_argv = "on-demand"; break; @@ -1329,11 +1395,11 @@ static int get_next_submodule(struct child_process *cp, continue; } - strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name); - repo = get_submodule_repo_for(spf->r, submodule); - if (repo) { + task->repo = get_submodule_repo_for(spf->r, task->sub); + if (task->repo) { + struct strbuf submodule_prefix = STRBUF_INIT; child_process_init(cp); - cp->dir = xstrdup(repo->gitdir); + cp->dir = task->repo->gitdir; prepare_submodule_repo_env_in_gitdir(&cp->env_array); cp->git_cmd = 1; if (!spf->quiet) @@ -1343,12 +1409,22 @@ static int get_next_submodule(struct child_process *cp, argv_array_pushv(&cp->args, spf->args.argv); argv_array_push(&cp->args, default_argv); argv_array_push(&cp->args, "--submodule-prefix"); + + strbuf_addf(&submodule_prefix, "%s%s/", + spf->prefix, + task->sub->path); argv_array_push(&cp->args, submodule_prefix.buf); - repo_clear(repo); - free(repo); - ret = 1; + spf->count++; + *task_cb = task; + + strbuf_release(&submodule_prefix); + return 1; } else { + + fetch_task_release(task); + free(task); + /* * An empty directory is normal, * the submodule is not initialized @@ -1361,12 +1437,38 @@ static int get_next_submodule(struct child_process *cp, ce->name); } } - strbuf_release(&submodule_prefix); - if (ret) { - spf->count++; - return 1; - } } + + if (spf->oid_fetch_tasks_nr) { + struct fetch_task *task = + spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr - 1]; + struct strbuf submodule_prefix = STRBUF_INIT; + spf->oid_fetch_tasks_nr--; + + strbuf_addf(&submodule_prefix, "%s%s/", + spf->prefix, task->sub->path); + + child_process_init(cp); + prepare_submodule_repo_env_in_gitdir(&cp->env_array); + cp->git_cmd = 1; + cp->dir = task->repo->gitdir; + + argv_array_init(&cp->args); + argv_array_pushv(&cp->args, spf->args.argv); + argv_array_push(&cp->args, "on-demand"); + argv_array_push(&cp->args, "--submodule-prefix"); + argv_array_push(&cp->args, submodule_prefix.buf); + + /* NEEDSWORK: have get_default_remote from submodule--helper */ + argv_array_push(&cp->args, "origin"); + oid_array_for_each_unique(task->commits, + append_oid_to_argv, &cp->args); + + *task_cb = task; + strbuf_release(&submodule_prefix); + return 1; + } + return 0; } @@ -1374,20 +1476,66 @@ static int fetch_start_failure(struct strbuf *err, void *cb, void *task_cb) { struct submodule_parallel_fetch *spf = cb; + struct fetch_task *task = task_cb; spf->result = 1; + fetch_task_release(task); return 0; } +static int commit_missing_in_sub(const struct object_id *oid, void *data) +{ + struct repository *subrepo = data; + + enum object_type type = oid_object_info(subrepo, oid, NULL); + + return type != OBJ_COMMIT; +} + static int fetch_finish(int retvalue, struct strbuf *err, void *cb, void *task_cb) { struct submodule_parallel_fetch *spf = cb; + struct fetch_task *task = task_cb; + + struct string_list_item *it; + struct oid_array *commits; if (retvalue) spf->result = 1; + if (!task || !task->sub) + BUG("callback cookie bogus"); + + /* Is this the second time we process this submodule? */ + if (task->commits) + goto out; + + it = string_list_lookup(&spf->changed_submodule_names, task->sub->name); + if (!it) + /* Could be an unchanged submodule, not contained in the list */ + goto out; + + commits = it->util; + oid_array_filter(commits, + commit_missing_in_sub, + task->repo); + + /* Are there commits we want, but do not exist? */ + if (commits->nr) { + task->commits = commits; + ALLOC_GROW(spf->oid_fetch_tasks, + spf->oid_fetch_tasks_nr + 1, + spf->oid_fetch_tasks_alloc); + spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr] = task; + spf->oid_fetch_tasks_nr++; + return 0; + } + +out: + fetch_task_release(task); + return 0; } diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh index 6c2f9b2ba2..9f8c744eb5 100755 --- a/t/t5526-fetch-submodules.sh +++ b/t/t5526-fetch-submodules.sh @@ -600,4 +600,121 @@ test_expect_success "fetch new commits when submodule got renamed" ' test_cmp expect actual ' +test_expect_success "fetch new submodule commits on-demand outside standard refspec" ' + # add a second submodule and ensure it is around in downstream first + git clone submodule sub1 && + git submodule add ./sub1 && + git commit -m "adding a second submodule" && + git -C downstream pull && + git -C downstream submodule update --init --recursive && + + git checkout --detach && + + C=$(git -C submodule commit-tree -m "new change outside refs/heads" HEAD^{tree}) && + git -C submodule update-ref refs/changes/1 $C && + git update-index --cacheinfo 160000 $C submodule && + test_tick && + + D=$(git -C sub1 commit-tree -m "new change outside refs/heads" HEAD^{tree}) && + git -C sub1 update-ref refs/changes/2 $D && + git update-index --cacheinfo 160000 $D sub1 && + + git commit -m "updated submodules outside of refs/heads" && + E=$(git rev-parse HEAD) && + git update-ref refs/changes/3 $E && + ( + cd downstream && + git fetch --recurse-submodules origin refs/changes/3:refs/heads/my_branch && + git -C submodule cat-file -t $C && + git -C sub1 cat-file -t $D && + git checkout --recurse-submodules FETCH_HEAD + ) +' + +test_expect_success 'fetch new submodule commit on-demand in FETCH_HEAD' ' + # depends on the previous test for setup + + C=$(git -C submodule commit-tree -m "another change outside refs/heads" HEAD^{tree}) && + git -C submodule update-ref refs/changes/4 $C && + git update-index --cacheinfo 160000 $C submodule && + test_tick && + + D=$(git -C sub1 commit-tree -m "another change outside refs/heads" HEAD^{tree}) && + git -C sub1 update-ref refs/changes/5 $D && + git update-index --cacheinfo 160000 $D sub1 && + + git commit -m "updated submodules outside of refs/heads" && + E=$(git rev-parse HEAD) && + git update-ref refs/changes/6 $E && + ( + cd downstream && + git fetch --recurse-submodules origin refs/changes/6 && + git -C submodule cat-file -t $C && + git -C sub1 cat-file -t $D && + git checkout --recurse-submodules FETCH_HEAD + ) +' + +test_expect_success 'fetch new submodule commits on-demand without .gitmodules entry' ' + # depends on the previous test for setup + + git config -f .gitmodules --remove-section submodule.sub1 && + git add .gitmodules && + git commit -m "delete gitmodules file" && + git checkout -B master && + git -C downstream fetch && + git -C downstream checkout origin/master && + + C=$(git -C submodule commit-tree -m "yet another change outside refs/heads" HEAD^{tree}) && + git -C submodule update-ref refs/changes/7 $C && + git update-index --cacheinfo 160000 $C submodule && + test_tick && + + D=$(git -C sub1 commit-tree -m "yet another change outside refs/heads" HEAD^{tree}) && + git -C sub1 update-ref refs/changes/8 $D && + git update-index --cacheinfo 160000 $D sub1 && + + git commit -m "updated submodules outside of refs/heads" && + E=$(git rev-parse HEAD) && + git update-ref refs/changes/9 $E && + ( + cd downstream && + git fetch --recurse-submodules origin refs/changes/9 && + git -C submodule cat-file -t $C && + git -C sub1 cat-file -t $D && + git checkout --recurse-submodules FETCH_HEAD + ) +' + +test_expect_success 'fetch new submodule commit intermittently referenced by superproject' ' + # depends on the previous test for setup + + D=$(git -C sub1 commit-tree -m "change 10 outside refs/heads" HEAD^{tree}) && + E=$(git -C sub1 commit-tree -m "change 11 outside refs/heads" HEAD^{tree}) && + F=$(git -C sub1 commit-tree -m "change 12 outside refs/heads" HEAD^{tree}) && + + git -C sub1 update-ref refs/changes/10 $D && + git update-index --cacheinfo 160000 $D sub1 && + git commit -m "updated submodules outside of refs/heads" && + + git -C sub1 update-ref refs/changes/11 $E && + git update-index --cacheinfo 160000 $E sub1 && + git commit -m "updated submodules outside of refs/heads" && + + git -C sub1 update-ref refs/changes/12 $F && + git update-index --cacheinfo 160000 $F sub1 && + git commit -m "updated submodules outside of refs/heads" && + + G=$(git rev-parse HEAD) && + git update-ref refs/changes/13 $G && + ( + cd downstream && + git fetch --recurse-submodules origin refs/changes/13 && + + git -C sub1 cat-file -t $D && + git -C sub1 cat-file -t $E && + git -C sub1 cat-file -t $F + ) +' + test_done