From 419fd7866c7636e4f2e3606888273b38d8cdacd4 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 28 Apr 2017 16:53:57 -0700 Subject: [PATCH 1/6] submodule: rename add_sha1_to_array() Rename 'add_sha1_to_array()' to 'append_oid_to_array()' to more accurately describe what the function does, since it handles 'struct object_id' and not sha1 character arrays. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/submodule.c b/submodule.c index d3299e29c0..be0f5d8478 100644 --- a/submodule.c +++ b/submodule.c @@ -951,17 +951,18 @@ static void submodule_collect_changed_cb(struct diff_queue_struct *q, } } -static int add_sha1_to_array(const char *ref, const struct object_id *oid, - int flags, void *data) +static int append_oid_to_array(const char *ref, const struct object_id *oid, + int flags, void *data) { - oid_array_append(data, oid); + struct oid_array *array = data; + oid_array_append(array, oid); return 0; } void check_for_new_submodule_commits(struct object_id *oid) { if (!initialized_fetch_ref_tips) { - for_each_ref(add_sha1_to_array, &ref_tips_before_fetch); + for_each_ref(append_oid_to_array, &ref_tips_before_fetch); initialized_fetch_ref_tips = 1; } From 610b233704638372834777ebf6ed3ab8829ec259 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 28 Apr 2017 16:53:58 -0700 Subject: [PATCH 2/6] submodule: rename free_submodules_sha1s() Rename 'free_submodules_sha1s()' to 'free_submodules_oids()' since the function frees a 'struct string_list' which has a 'struct oid_array' stored in the 'util' field. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/submodule.c b/submodule.c index be0f5d8478..46abd52b14 100644 --- a/submodule.c +++ b/submodule.c @@ -738,7 +738,7 @@ static void find_unpushed_submodule_commits(struct commit *commit, diff_tree_combined_merge(commit, 1, &rev); } -static void free_submodules_sha1s(struct string_list *submodules) +static void free_submodules_oids(struct string_list *submodules) { struct string_list_item *item; for_each_string_list_item(item, submodules) @@ -779,7 +779,8 @@ int find_unpushed_submodules(struct oid_array *commits, if (submodule_needs_pushing(submodule->string, commits)) string_list_insert(needs_pushing, submodule->string); } - free_submodules_sha1s(&submodules); + + free_submodules_oids(&submodules); return needs_pushing->nr; } From d1a8460caac1a19457e352a6b610b7bc89cd56d9 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 28 Apr 2017 16:53:59 -0700 Subject: [PATCH 3/6] submodule: remove add_oid_to_argv() The function 'add_oid_to_argv()' provides the same functionality as 'append_oid_to_argv()'. Remove this duplicate function and instead use 'append_oid_to_argv()' where 'add_oid_to_argv()' was previously used. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/submodule.c b/submodule.c index 46abd52b14..7baa28ae05 100644 --- a/submodule.c +++ b/submodule.c @@ -970,12 +970,6 @@ void check_for_new_submodule_commits(struct object_id *oid) oid_array_append(&ref_tips_after_fetch, oid); } -static int add_oid_to_argv(const struct object_id *oid, void *data) -{ - argv_array_push(data, oid_to_hex(oid)); - return 0; -} - static void calculate_changed_submodule_paths(void) { struct rev_info rev; @@ -989,10 +983,10 @@ static void calculate_changed_submodule_paths(void) init_revisions(&rev, NULL); argv_array_push(&argv, "--"); /* argv[0] program name */ oid_array_for_each_unique(&ref_tips_after_fetch, - add_oid_to_argv, &argv); + append_oid_to_argv, &argv); argv_array_push(&argv, "--not"); oid_array_for_each_unique(&ref_tips_before_fetch, - add_oid_to_argv, &argv); + append_oid_to_argv, &argv); setup_revisions(argv.argc, argv.argv, &rev, NULL); if (prepare_revision_walk(&rev)) die("revision walk setup failed"); From a6bb78c3b19697133a01122c13993986b66ef28b Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 28 Apr 2017 16:54:00 -0700 Subject: [PATCH 4/6] submodule: change string_list changed_submodule_paths Eliminate a call to 'xstrdup()' by changing the string_list 'changed_submodule_paths' to duplicated strings added to it. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/submodule.c b/submodule.c index 7baa28ae05..3bcf44521b 100644 --- a/submodule.c +++ b/submodule.c @@ -20,7 +20,7 @@ static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND; static int config_update_recurse_submodules = RECURSE_SUBMODULES_DEFAULT; static int parallel_jobs = 1; -static struct string_list changed_submodule_paths = STRING_LIST_INIT_NODUP; +static struct string_list changed_submodule_paths = STRING_LIST_INIT_DUP; static int initialized_fetch_ref_tips; static struct oid_array ref_tips_before_fetch; static struct oid_array ref_tips_after_fetch; @@ -939,7 +939,7 @@ static void submodule_collect_changed_cb(struct diff_queue_struct *q, struct string_list_item *path; path = unsorted_string_list_lookup(&changed_submodule_paths, p->two->path); if (!path && !is_submodule_commit_present(p->two->path, p->two->oid.hash)) - string_list_append(&changed_submodule_paths, xstrdup(p->two->path)); + string_list_append(&changed_submodule_paths, p->two->path); } else { /* Submodule is new or was moved here */ /* NEEDSWORK: When the .git directories of submodules From 7c8d2b00f2516a0b30aaa3f59dceaf4fe83f8729 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 1 May 2017 18:02:38 -0700 Subject: [PATCH 5/6] submodule: improve submodule_has_commits() Teach 'submodule_has_commits()' to ensure that if a commit exists in a submodule, that it is also reachable from a ref. This is a preparatory step prior to merging the logic which checks for changed submodules when fetching or pushing. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/submodule.c b/submodule.c index 3bcf44521b..057695e645 100644 --- a/submodule.c +++ b/submodule.c @@ -644,10 +644,44 @@ static int submodule_has_commits(const char *path, struct oid_array *commits) { int has_commit = 1; + /* + * Perform a cheap, but incorrect check for the existance of 'commits'. + * This is done by adding the submodule's object store to the in-core + * object store, and then querying for each commit's existance. If we + * do not have the commit object anywhere, there is no chance we have + * it in the object store of the correct submodule and have it + * reachable from a ref, so we can fail early without spawning rev-list + * which is expensive. + */ if (add_submodule_odb(path)) return 0; oid_array_for_each_unique(commits, check_has_commit, &has_commit); + + if (has_commit) { + /* + * Even if the submodule is checked out and the commit is + * present, make sure it exists in the submodule's object store + * and that it is reachable from a ref. + */ + struct child_process cp = CHILD_PROCESS_INIT; + struct strbuf out = STRBUF_INIT; + + argv_array_pushl(&cp.args, "rev-list", "-n", "1", NULL); + oid_array_for_each_unique(commits, append_oid_to_argv, &cp.args); + argv_array_pushl(&cp.args, "--not", "--all", NULL); + + prepare_submodule_repo_env(&cp.env_array); + cp.git_cmd = 1; + cp.no_stdin = 1; + cp.dir = path; + + if (capture_command(&cp, &out, GIT_MAX_HEXSZ + 1) || out.len) + has_commit = 0; + + strbuf_release(&out); + } + return has_commit; } From aacc5c1a81c004efccff8075edb78acdf5f15264 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 1 May 2017 18:02:39 -0700 Subject: [PATCH 6/6] submodule: refactor logic to determine changed submodules There are currently two instances (fetch and push) where we want to determine if submodules have changed given some revision specification. These two instances don't use the same logic to generate a list of changed submodules and as a result there is a fair amount of code duplication. This patch refactors these two code paths such that they both use the same logic to generate a list of changed submodules. This also makes it easier for future callers to be able to reuse this logic as they only need to create an argv_array with the revision specification to be using during the revision walk. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano --- submodule.c | 247 ++++++++++++++++++++++------------------------------ 1 file changed, 105 insertions(+), 142 deletions(-) diff --git a/submodule.c b/submodule.c index 057695e645..7eaa3d384e 100644 --- a/submodule.c +++ b/submodule.c @@ -617,6 +617,94 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce) return submodule_from_path(null_sha1, ce->name); } +static struct oid_array *submodule_commits(struct string_list *submodules, + const char *path) +{ + struct string_list_item *item; + + item = string_list_insert(submodules, path); + if (item->util) + return (struct oid_array *) item->util; + + /* NEEDSWORK: should we have oid_array_init()? */ + item->util = xcalloc(1, sizeof(struct oid_array)); + return (struct oid_array *) item->util; +} + +static void collect_changed_submodules_cb(struct diff_queue_struct *q, + struct diff_options *options, + void *data) +{ + int i; + struct string_list *changed = data; + + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct oid_array *commits; + if (!S_ISGITLINK(p->two->mode)) + continue; + + if (S_ISGITLINK(p->one->mode)) { + /* + * NEEDSWORK: We should honor the name configured in + * the .gitmodules file of the commit we are examining + * here to be able to correctly follow submodules + * being moved around. + */ + commits = submodule_commits(changed, p->two->path); + oid_array_append(commits, &p->two->oid); + } else { + /* Submodule is new or was moved here */ + /* + * NEEDSWORK: When the .git directories of submodules + * live inside the superprojects .git directory some + * day we should fetch new submodules directly into + * that location too when config or options request + * that so they can be checked out from there. + */ + continue; + } + } +} + +/* + * Collect the paths of submodules in 'changed' which have changed based on + * the revisions as specified in 'argv'. Each entry in 'changed' will also + * have a corresponding 'struct oid_array' (in the 'util' field) which lists + * what the submodule pointers were updated to during the change. + */ +static void collect_changed_submodules(struct string_list *changed, + struct argv_array *argv) +{ + struct rev_info rev; + const struct commit *commit; + + init_revisions(&rev, NULL); + setup_revisions(argv->argc, argv->argv, &rev, NULL); + if (prepare_revision_walk(&rev)) + die("revision walk setup failed"); + + while ((commit = get_revision(&rev))) { + struct rev_info diff_rev; + + init_revisions(&diff_rev, NULL); + diff_rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; + diff_rev.diffopt.format_callback = collect_changed_submodules_cb; + diff_rev.diffopt.format_callback_data = changed; + diff_tree_combined_merge(commit, 1, &diff_rev); + } + + reset_revision_walk(); +} + +static void free_submodules_oids(struct string_list *submodules) +{ + struct string_list_item *item; + for_each_string_list_item(item, submodules) + oid_array_clear((struct oid_array *) item->util); + string_list_clear(submodules, 1); +} + static int has_remote(const char *refname, const struct object_id *oid, int flags, void *cb_data) { @@ -729,92 +817,31 @@ static int submodule_needs_pushing(const char *path, struct oid_array *commits) return 0; } -static struct oid_array *submodule_commits(struct string_list *submodules, - const char *path) -{ - struct string_list_item *item; - - item = string_list_insert(submodules, path); - if (item->util) - return (struct oid_array *) item->util; - - /* NEEDSWORK: should we have oid_array_init()? */ - item->util = xcalloc(1, sizeof(struct oid_array)); - return (struct oid_array *) item->util; -} - -static void collect_submodules_from_diff(struct diff_queue_struct *q, - struct diff_options *options, - void *data) -{ - int i; - struct string_list *submodules = data; - - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - struct oid_array *commits; - if (!S_ISGITLINK(p->two->mode)) - continue; - commits = submodule_commits(submodules, p->two->path); - oid_array_append(commits, &p->two->oid); - } -} - -static void find_unpushed_submodule_commits(struct commit *commit, - struct string_list *needs_pushing) -{ - struct rev_info rev; - - init_revisions(&rev, NULL); - rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK; - rev.diffopt.format_callback = collect_submodules_from_diff; - rev.diffopt.format_callback_data = needs_pushing; - diff_tree_combined_merge(commit, 1, &rev); -} - -static void free_submodules_oids(struct string_list *submodules) -{ - struct string_list_item *item; - for_each_string_list_item(item, submodules) - oid_array_clear((struct oid_array *) item->util); - string_list_clear(submodules, 1); -} - int find_unpushed_submodules(struct oid_array *commits, const char *remotes_name, struct string_list *needs_pushing) { - struct rev_info rev; - struct commit *commit; struct string_list submodules = STRING_LIST_INIT_DUP; struct string_list_item *submodule; struct argv_array argv = ARGV_ARRAY_INIT; - init_revisions(&rev, NULL); - /* argv.argv[0] will be ignored by setup_revisions */ argv_array_push(&argv, "find_unpushed_submodules"); oid_array_for_each_unique(commits, append_oid_to_argv, &argv); argv_array_push(&argv, "--not"); argv_array_pushf(&argv, "--remotes=%s", remotes_name); - setup_revisions(argv.argc, argv.argv, &rev, NULL); - if (prepare_revision_walk(&rev)) - die("revision walk setup failed"); - - while ((commit = get_revision(&rev)) != NULL) - find_unpushed_submodule_commits(commit, &submodules); - - reset_revision_walk(); - argv_array_clear(&argv); + collect_changed_submodules(&submodules, &argv); for_each_string_list_item(submodule, &submodules) { - struct oid_array *commits = (struct oid_array *) submodule->util; + struct oid_array *commits = submodule->util; + const char *path = submodule->string; - if (submodule_needs_pushing(submodule->string, commits)) - string_list_insert(needs_pushing, submodule->string); + if (submodule_needs_pushing(path, commits)) + string_list_insert(needs_pushing, path); } free_submodules_oids(&submodules); + argv_array_clear(&argv); return needs_pushing->nr; } @@ -931,61 +958,6 @@ int push_unpushed_submodules(struct oid_array *commits, return ret; } -static int is_submodule_commit_present(const char *path, unsigned char sha1[20]) -{ - int is_present = 0; - if (!add_submodule_odb(path) && lookup_commit_reference(sha1)) { - /* Even if the submodule is checked out and the commit is - * present, make sure it is reachable from a ref. */ - struct child_process cp = CHILD_PROCESS_INIT; - const char *argv[] = {"rev-list", "-n", "1", NULL, "--not", "--all", NULL}; - struct strbuf buf = STRBUF_INIT; - - argv[3] = sha1_to_hex(sha1); - cp.argv = argv; - prepare_submodule_repo_env(&cp.env_array); - cp.git_cmd = 1; - cp.no_stdin = 1; - cp.dir = path; - if (!capture_command(&cp, &buf, 1024) && !buf.len) - is_present = 1; - - strbuf_release(&buf); - } - return is_present; -} - -static void submodule_collect_changed_cb(struct diff_queue_struct *q, - struct diff_options *options, - void *data) -{ - int i; - for (i = 0; i < q->nr; i++) { - struct diff_filepair *p = q->queue[i]; - if (!S_ISGITLINK(p->two->mode)) - continue; - - if (S_ISGITLINK(p->one->mode)) { - /* NEEDSWORK: We should honor the name configured in - * the .gitmodules file of the commit we are examining - * here to be able to correctly follow submodules - * being moved around. */ - struct string_list_item *path; - path = unsorted_string_list_lookup(&changed_submodule_paths, p->two->path); - if (!path && !is_submodule_commit_present(p->two->path, p->two->oid.hash)) - string_list_append(&changed_submodule_paths, p->two->path); - } else { - /* Submodule is new or was moved here */ - /* NEEDSWORK: When the .git directories of submodules - * live inside the superprojects .git directory some - * day we should fetch new submodules directly into - * that location too when config or options request - * that so they can be checked out from there. */ - continue; - } - } -} - static int append_oid_to_array(const char *ref, const struct object_id *oid, int flags, void *data) { @@ -1006,45 +978,36 @@ void check_for_new_submodule_commits(struct object_id *oid) static void calculate_changed_submodule_paths(void) { - struct rev_info rev; - struct commit *commit; struct argv_array argv = ARGV_ARRAY_INIT; + struct string_list changed_submodules = STRING_LIST_INIT_DUP; + const struct string_list_item *item; /* No need to check if there are no submodules configured */ if (!submodule_from_path(NULL, NULL)) return; - init_revisions(&rev, NULL); argv_array_push(&argv, "--"); /* argv[0] program name */ oid_array_for_each_unique(&ref_tips_after_fetch, append_oid_to_argv, &argv); argv_array_push(&argv, "--not"); oid_array_for_each_unique(&ref_tips_before_fetch, append_oid_to_argv, &argv); - setup_revisions(argv.argc, argv.argv, &rev, NULL); - if (prepare_revision_walk(&rev)) - die("revision walk setup failed"); /* * Collect all submodules (whether checked out or not) for which new * commits have been recorded upstream in "changed_submodule_paths". */ - while ((commit = get_revision(&rev))) { - struct commit_list *parent = commit->parents; - while (parent) { - struct diff_options diff_opts; - diff_setup(&diff_opts); - DIFF_OPT_SET(&diff_opts, RECURSIVE); - diff_opts.output_format |= DIFF_FORMAT_CALLBACK; - diff_opts.format_callback = submodule_collect_changed_cb; - diff_setup_done(&diff_opts); - diff_tree_sha1(parent->item->object.oid.hash, commit->object.oid.hash, "", &diff_opts); - diffcore_std(&diff_opts); - diff_flush(&diff_opts); - parent = parent->next; - } + collect_changed_submodules(&changed_submodules, &argv); + + for_each_string_list_item(item, &changed_submodules) { + struct oid_array *commits = item->util; + const char *path = item->string; + + if (!submodule_has_commits(path, commits)) + string_list_append(&changed_submodule_paths, path); } + free_submodules_oids(&changed_submodules); argv_array_clear(&argv); oid_array_clear(&ref_tips_before_fetch); oid_array_clear(&ref_tips_after_fetch);