git-commit-vandalism/submodule.c

2327 lines
60 KiB
C
Raw Normal View History

#include "cache.h"
#include "repository.h"
#include "config.h"
#include "submodule-config.h"
#include "submodule.h"
#include "dir.h"
#include "diff.h"
#include "commit.h"
#include "revision.h"
#include "run-command.h"
#include "diffcore.h"
#include "refs.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
#include "string-list.h"
#include "oid-array.h"
#include "strvec.h"
#include "blob.h"
#include "thread-utils.h"
#include "quote.h"
#include "remote.h"
#include "worktree.h"
#include "parse-options.h"
#include "object-store.h"
#include "commit-reach.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
static int initialized_fetch_ref_tips;
static struct oid_array ref_tips_before_fetch;
static struct oid_array ref_tips_after_fetch;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
/*
* Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
* will be disabled because we can't guess what might be configured in
* .gitmodules unless the user resolves the conflict.
*/
int is_gitmodules_unmerged(struct index_state *istate)
{
int pos = index_name_pos(istate, GITMODULES_FILE, strlen(GITMODULES_FILE));
if (pos < 0) { /* .gitmodules not found or isn't merged */
pos = -1 - pos;
if (istate->cache_nr > pos) { /* there is a .gitmodules */
const struct cache_entry *ce = istate->cache[pos];
if (ce_namelen(ce) == strlen(GITMODULES_FILE) &&
!strcmp(ce->name, GITMODULES_FILE))
return 1;
}
}
return 0;
}
/*
* Check if the .gitmodules file is safe to write.
*
* Writing to the .gitmodules file requires that the file exists in the
* working tree or, if it doesn't, that a brand new .gitmodules file is going
* to be created (i.e. it's neither in the index nor in the current branch).
*
* It is not safe to write to .gitmodules if it's not in the working tree but
* it is in the index or in the current branch, because writing new values
* (and staging them) would blindly overwrite ALL the old content.
*/
int is_writing_gitmodules_ok(void)
{
struct object_id oid;
return file_exists(GITMODULES_FILE) ||
(get_oid(GITMODULES_INDEX, &oid) < 0 && get_oid(GITMODULES_HEAD, &oid) < 0);
}
/*
* Check if the .gitmodules file has unstaged modifications. This must be
* checked before allowing modifications to the .gitmodules file with the
* intention to stage them later, because when continuing we would stage the
* modifications the user didn't stage herself too. That might change in a
* future version when we learn to stage the changes we do ourselves without
* staging any previous modifications.
*/
int is_staging_gitmodules_ok(struct index_state *istate)
{
int pos = index_name_pos(istate, GITMODULES_FILE, strlen(GITMODULES_FILE));
if ((pos >= 0) && (pos < istate->cache_nr)) {
struct stat st;
if (lstat(GITMODULES_FILE, &st) == 0 &&
ie_modified(istate, istate->cache[pos], &st, 0) & DATA_CHANGED)
return 0;
}
return 1;
}
static int for_each_remote_ref_submodule(const char *submodule,
each_ref_fn fn, void *cb_data)
{
return refs_for_each_remote_ref(get_submodule_ref_store(submodule),
fn, cb_data);
}
/*
* Try to update the "path" entry in the "submodule.<name>" section of the
* .gitmodules file. Return 0 only if a .gitmodules file was found, a section
* with the correct path=<oldpath> setting was found and we could update it.
*/
int update_path_in_gitmodules(const char *oldpath, const char *newpath)
{
struct strbuf entry = STRBUF_INIT;
const struct submodule *submodule;
int ret;
if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
return -1;
if (is_gitmodules_unmerged(the_repository->index))
die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first"));
submodule = submodule_from_path(the_repository, null_oid(), oldpath);
if (!submodule || !submodule->name) {
warning(_("Could not find section in .gitmodules where path=%s"), oldpath);
return -1;
}
strbuf_addstr(&entry, "submodule.");
strbuf_addstr(&entry, submodule->name);
strbuf_addstr(&entry, ".path");
ret = config_set_in_gitmodules_file_gently(entry.buf, newpath);
strbuf_release(&entry);
return ret;
}
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
/*
* Try to remove the "submodule.<name>" section from .gitmodules where the given
* path is configured. Return 0 only if a .gitmodules file was found, a section
* with the correct path=<path> setting was found and we could remove it.
*/
int remove_path_from_gitmodules(const char *path)
{
struct strbuf sect = STRBUF_INIT;
const struct submodule *submodule;
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
if (!file_exists(GITMODULES_FILE)) /* Do nothing without .gitmodules */
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
return -1;
if (is_gitmodules_unmerged(the_repository->index))
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first"));
submodule = submodule_from_path(the_repository, null_oid(), path);
if (!submodule || !submodule->name) {
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
warning(_("Could not find section in .gitmodules where path=%s"), path);
return -1;
}
strbuf_addstr(&sect, "submodule.");
strbuf_addstr(&sect, submodule->name);
if (git_config_rename_section_in_file(GITMODULES_FILE, sect.buf, NULL) < 0) {
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
/* Maybe the user already did that, don't error out here */
warning(_("Could not remove .gitmodules entry for %s"), path);
strbuf_release(&sect);
return -1;
}
strbuf_release(&sect);
return 0;
}
void stage_updated_gitmodules(struct index_state *istate)
{
if (add_file_to_index(istate, GITMODULES_FILE, 0))
die(_("staging updated .gitmodules failed"));
}
submodule: lazily add submodule ODBs as alternates Teach Git to add submodule ODBs as alternates to the object store of the_repository only upon the first access of an object not in the_repository, and not when add_submodule_odb() is called. This provides a means of gradually migrating from accessing a submodule's object through alternates to accessing a submodule's object by explicitly passing its repository object. Any Git command can declare that it might access submodule objects by calling add_submodule_odb() (as they do now), but the submodule ODBs themselves will not be added until needed, so individual commands and/or combinations of arguments can be migrated one by one. [The advantage of explicit repository-object passing is code clarity (it is clear which repository an object read is from), performance (there is no need to linearly search through all submodule ODBs whenever an object is accessed from any repository, whether superproject or submodule), and the possibility of future features like partial clone submodules (which right now is not possible because if an object is missing, we do not know which repository to lazy-fetch into).] This commit also introduces an environment variable that a test may set to make the actual registration of alternates fatal, in order to demonstrate that its codepaths do not need this registration. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Reviewed-by: Emily Shaffer <emilyshaffer@google.com> Reviewed-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-16 23:09:51 +02:00
static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
/* TODO: remove this function, use repo_submodule_init instead. */
int add_submodule_odb(const char *path)
{
struct strbuf objects_directory = STRBUF_INIT;
int ret = 0;
allow do_submodule_path to work even if submodule isn't checked out Currently, do_submodule_path will attempt locating the .git directory by using read_gitfile on <path>/.git. If this fails it just assumes the <path>/.git is actually a git directory. This is good because it allows for handling submodules which were cloned in a regular manner first before being added to the superproject. Unfortunately this fails if the <path> is not actually checked out any longer, such as by removing the directory. Fix this by checking if the directory we found is actually a gitdir. In the case it is not, attempt to lookup the submodule configuration and find the name of where it is stored in the .git/modules/ directory of the superproject. If we can't locate the submodule configuration, this might occur because for example a submodule gitlink was added but the corresponding .gitmodules file was not properly updated. A die() here would not be pleasant to the users of submodule diff formats, so instead, modify do_submodule_path() to return an error code: - git_pathdup_submodule() returns NULL when we fail to find a path. - strbuf_git_path_submodule() propagates the error code to the caller. Modify the callers of these functions to check the error code and fail properly. This ensures we don't attempt to use a bad path that doesn't match the corresponding submodule. Because this change fixes add_submodule_odb() to work even if the submodule is not checked out, update the wording of the submodule log diff format to correctly display that the submodule is "not initialized" instead of "not checked out" Add tests to ensure this change works as expected. Signed-off-by: Jacob Keller <jacob.keller@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-01 01:27:22 +02:00
ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
if (ret)
goto done;
if (!is_directory(objects_directory.buf)) {
ret = -1;
goto done;
}
submodule: lazily add submodule ODBs as alternates Teach Git to add submodule ODBs as alternates to the object store of the_repository only upon the first access of an object not in the_repository, and not when add_submodule_odb() is called. This provides a means of gradually migrating from accessing a submodule's object through alternates to accessing a submodule's object by explicitly passing its repository object. Any Git command can declare that it might access submodule objects by calling add_submodule_odb() (as they do now), but the submodule ODBs themselves will not be added until needed, so individual commands and/or combinations of arguments can be migrated one by one. [The advantage of explicit repository-object passing is code clarity (it is clear which repository an object read is from), performance (there is no need to linearly search through all submodule ODBs whenever an object is accessed from any repository, whether superproject or submodule), and the possibility of future features like partial clone submodules (which right now is not possible because if an object is missing, we do not know which repository to lazy-fetch into).] This commit also introduces an environment variable that a test may set to make the actual registration of alternates fatal, in order to demonstrate that its codepaths do not need this registration. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Reviewed-by: Emily Shaffer <emilyshaffer@google.com> Reviewed-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-16 23:09:51 +02:00
string_list_insert(&added_submodule_odb_paths,
strbuf_detach(&objects_directory, NULL));
done:
strbuf_release(&objects_directory);
return ret;
}
void add_submodule_odb_by_path(const char *path)
{
string_list_insert(&added_submodule_odb_paths, xstrdup(path));
}
submodule: lazily add submodule ODBs as alternates Teach Git to add submodule ODBs as alternates to the object store of the_repository only upon the first access of an object not in the_repository, and not when add_submodule_odb() is called. This provides a means of gradually migrating from accessing a submodule's object through alternates to accessing a submodule's object by explicitly passing its repository object. Any Git command can declare that it might access submodule objects by calling add_submodule_odb() (as they do now), but the submodule ODBs themselves will not be added until needed, so individual commands and/or combinations of arguments can be migrated one by one. [The advantage of explicit repository-object passing is code clarity (it is clear which repository an object read is from), performance (there is no need to linearly search through all submodule ODBs whenever an object is accessed from any repository, whether superproject or submodule), and the possibility of future features like partial clone submodules (which right now is not possible because if an object is missing, we do not know which repository to lazy-fetch into).] This commit also introduces an environment variable that a test may set to make the actual registration of alternates fatal, in order to demonstrate that its codepaths do not need this registration. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Reviewed-by: Emily Shaffer <emilyshaffer@google.com> Reviewed-by: Matheus Tavares <matheus.bernardino@usp.br> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-08-16 23:09:51 +02:00
int register_all_submodule_odb_as_alternates(void)
{
int i;
int ret = added_submodule_odb_paths.nr;
for (i = 0; i < added_submodule_odb_paths.nr; i++)
add_to_alternates_memory(added_submodule_odb_paths.items[i].string);
if (ret) {
string_list_clear(&added_submodule_odb_paths, 0);
if (git_env_bool("GIT_TEST_FATAL_REGISTER_SUBMODULE_ODB", 0))
BUG("register_all_submodule_odb_as_alternates() called");
}
return ret;
}
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
const char *path)
{
const struct submodule *submodule = submodule_from_path(the_repository,
null_oid(),
path);
if (submodule) {
const char *ignore;
char *key;
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
key = xstrfmt("submodule.%s.ignore", submodule->name);
config: fix leaks from git_config_get_string_const() There are two functions to get a single config string: - git_config_get_string() - git_config_get_string_const() One might naively think that the first one allocates a new string and the second one just points us to the internal configset storage. But in fact they both allocate a new copy; the second one exists only to avoid having to cast when using it with a const global which we never intend to free. The documentation for the function explains that clearly, but it seems I'm not alone in being surprised by this. Of 17 calls to the function, 13 of them leak the resulting value. We could obviously fix these by adding the appropriate free(). But it would be simpler still if we actually had a non-allocating way to get the string. There's git_config_get_value() but that doesn't quite do what we want. If the config key is present but is a boolean with no value (e.g., "[foo]bar" in the file), then we'll get NULL (whereas the string versions will print an error and die). So let's introduce a new variant, git_config_get_string_tmp(), that behaves as these callers expect. We need a new name because we have new semantics but the same function signature (so even if we converted the four remaining callers, topics in flight might be surprised). The "tmp" is because this value should only be held onto for a short time. In practice it's rare for us to clear and refresh the configset, invalidating the pointer, but hopefully the "tmp" makes callers think about the lifetime. In each of the converted cases here the value only needs to last within the local function or its immediate caller. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-14 18:17:36 +02:00
if (repo_config_get_string_tmp(the_repository, key, &ignore))
ignore = submodule->ignore;
free(key);
if (ignore)
handle_ignore_submodules_arg(diffopt, ignore);
else if (is_gitmodules_unmerged(the_repository->index))
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
diffopt->flags.ignore_submodules = 1;
}
}
/* Cheap function that only determines if we're interested in submodules at all */
int git_default_submodule_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "submodule.recurse")) {
int v = git_config_bool(var, value) ?
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
config_update_recurse_submodules = v;
}
return 0;
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
}
int option_parse_recurse_submodules_worktree_updater(const struct option *opt,
const char *arg, int unset)
{
if (unset) {
config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
return 0;
}
if (arg)
config_update_recurse_submodules =
parse_update_recurse_submodules_arg(opt->long_name,
arg);
else
config_update_recurse_submodules = RECURSE_SUBMODULES_ON;
return 0;
}
/*
* Determine if a submodule has been initialized at a given 'path'
*/
/*
* NEEDSWORK: Emit a warning if submodule.active exists, but is valueless,
* ie, the config looks like: "[submodule] active\n".
* Since that is an invalid pathspec, we should inform the user.
*/
int is_submodule_active(struct repository *repo, const char *path)
{
int ret = 0;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
char *key = NULL;
char *value = NULL;
const struct string_list *sl;
const struct submodule *module;
module = submodule_from_path(repo, null_oid(), path);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* early return if there isn't a path->module mapping */
if (!module)
return 0;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* submodule.<name>.active is set */
key = xstrfmt("submodule.%s.active", module->name);
if (!repo_config_get_bool(repo, key, &ret)) {
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
free(key);
return ret;
}
free(key);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* submodule.active is set */
sl = repo_config_get_value_multi(repo, "submodule.active");
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
if (sl) {
struct pathspec ps;
struct strvec args = STRVEC_INIT;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
const struct string_list_item *item;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
for_each_string_list_item(item, sl) {
strvec_push(&args, item->string);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
}
parse_pathspec(&ps, 0, 0, NULL, args.v);
ret = match_pathspec(repo->index, &ps, path, strlen(path), 0, NULL, 1);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
strvec_clear(&args);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
clear_pathspec(&ps);
return ret;
}
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* fallback to checking if the URL is set */
key = xstrfmt("submodule.%s.url", module->name);
ret = !repo_config_get_string(repo, key, &value);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
free(value);
free(key);
return ret;
}
int is_submodule_populated_gently(const char *path, int *return_error_code)
{
int ret = 0;
char *gitdir = xstrfmt("%s/.git", path);
if (resolve_gitdir_gently(gitdir, return_error_code))
ret = 1;
free(gitdir);
return ret;
}
/*
* Dies if the provided 'prefix' corresponds to an unpopulated submodule
*/
void die_in_unpopulated_submodule(struct index_state *istate,
const char *prefix)
{
int i, prefixlen;
if (!prefix)
return;
prefixlen = strlen(prefix);
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
int ce_len = ce_namelen(ce);
if (!S_ISGITLINK(ce->ce_mode))
continue;
if (prefixlen <= ce_len)
continue;
if (strncmp(ce->name, prefix, ce_len))
continue;
if (prefix[ce_len] != '/')
continue;
die(_("in unpopulated submodule '%s'"), ce->name);
}
}
/*
* Dies if any paths in the provided pathspec descends into a submodule
*/
void die_path_inside_submodule(struct index_state *istate,
const struct pathspec *ps)
{
int i, j;
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
int ce_len = ce_namelen(ce);
if (!S_ISGITLINK(ce->ce_mode))
continue;
for (j = 0; j < ps->nr ; j++) {
const struct pathspec_item *item = &ps->items[j];
if (item->len <= ce_len)
continue;
if (item->match[ce_len] != '/')
continue;
if (strncmp(ce->name, item->match, ce_len))
continue;
if (item->len == ce_len + 1)
continue;
die(_("Pathspec '%s' is in submodule '%.*s'"),
item->original, ce_len, ce->name);
}
}
}
enum submodule_update_type parse_submodule_update_type(const char *value)
{
if (!strcmp(value, "none"))
return SM_UPDATE_NONE;
else if (!strcmp(value, "checkout"))
return SM_UPDATE_CHECKOUT;
else if (!strcmp(value, "rebase"))
return SM_UPDATE_REBASE;
else if (!strcmp(value, "merge"))
return SM_UPDATE_MERGE;
else if (*value == '!')
return SM_UPDATE_COMMAND;
else
return SM_UPDATE_UNSPECIFIED;
}
int parse_submodule_update_strategy(const char *value,
struct submodule_update_strategy *dst)
{
enum submodule_update_type type;
free((void*)dst->command);
dst->command = NULL;
type = parse_submodule_update_type(value);
if (type == SM_UPDATE_UNSPECIFIED)
return -1;
dst->type = type;
if (type == SM_UPDATE_COMMAND)
dst->command = xstrdup(value + 1);
return 0;
}
submodule: port init from shell to C By having the `submodule init` functionality in C, we can reference it easier from other parts in the code in later patches. The code is split up to have one function to initialize one submodule and a calling function that takes care of the rest, such as argument handling and translating the arguments to the paths of the submodules. This is the first submodule subcommand that is fully converted to C except for the usage string, so this is actually removing a call to the `submodule--helper list` function, which is supposed to be used in this transition. Instead we'll make a direct call to `module_list_compute`. An explanation why we need to edit the prefixes in cmd_update in git-submodule.sh in this patch: By having no processing in the shell part, we need to convey the notion of wt_prefix and prefix to the C parts, which former patches punted on and did the processing of displaying path in the shell. `wt_prefix` used to hold the path from the repository root to the current directory, e.g. wt_prefix would be t/ if the user invoked the `git submodule` command in ~/repo/t and ~repo is the GIT_DIR. `prefix` used to hold the relative path from the repository root to the operation, e.g. if you have recursive submodules, the shell script would modify the `prefix` in each recursive step by adding the submodule path. We will pass `wt_prefix` into the C helper via `git -C <dir>` as that will setup git in the directory the user actually called git-submodule.sh from. The `prefix` will be passed in via the `--prefix` option. Having `prefix` and `wt_prefix` relative to the GIT_DIR of the calling superproject is unfortunate with this patch as the C code doesn't know about a possible recursion from a superproject via `submodule update --init --recursive`. To fix this, we change the meaning of `wt_prefix` to point to the current project instead of the superproject and `prefix` to include any relative paths issues in the superproject. That way `prefix` will become the leading part for displaying paths and `wt_prefix` will be empty in recursive calls for now. The new notion of `wt_prefix` and `prefix` still allows us to reconstruct the calling directory in the superproject by just traveling reverse of `prefix`. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-04-16 02:50:13 +02:00
const char *submodule_strategy_to_string(const struct submodule_update_strategy *s)
{
struct strbuf sb = STRBUF_INIT;
switch (s->type) {
case SM_UPDATE_CHECKOUT:
return "checkout";
case SM_UPDATE_MERGE:
return "merge";
case SM_UPDATE_REBASE:
return "rebase";
case SM_UPDATE_NONE:
return "none";
case SM_UPDATE_UNSPECIFIED:
return NULL;
case SM_UPDATE_COMMAND:
strbuf_addf(&sb, "!%s", s->command);
return strbuf_detach(&sb, NULL);
}
return NULL;
}
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
void handle_ignore_submodules_arg(struct diff_options *diffopt,
const char *arg)
{
diffopt->flags.ignore_submodule_set = 1;
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
diffopt->flags.ignore_submodules = 0;
diffopt->flags.ignore_untracked_in_submodules = 0;
diffopt->flags.ignore_dirty_submodules = 0;
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
if (!strcmp(arg, "all"))
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
diffopt->flags.ignore_submodules = 1;
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
else if (!strcmp(arg, "untracked"))
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
diffopt->flags.ignore_untracked_in_submodules = 1;
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
else if (!strcmp(arg, "dirty"))
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
diffopt->flags.ignore_dirty_submodules = 1;
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
else if (strcmp(arg, "none"))
die(_("bad --ignore-submodules argument: %s"), arg);
/*
* Please update _git_status() in git-completion.bash when you
* add new options
*/
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
}
static int prepare_submodule_diff_summary(struct repository *r, struct rev_info *rev,
const char *path,
struct commit *left, struct commit *right,
struct commit_list *merge_bases)
{
struct commit_list *list;
repo_init_revisions(r, rev, NULL);
setup_revisions(0, NULL, rev, NULL);
rev->left_right = 1;
rev->first_parent_only = 1;
left->object.flags |= SYMMETRIC_LEFT;
add_pending_object(rev, &left->object, path);
add_pending_object(rev, &right->object, path);
for (list = merge_bases; list; list = list->next) {
list->item->object.flags |= UNINTERESTING;
add_pending_object(rev, &list->item->object,
oid_to_hex(&list->item->object.oid));
}
return prepare_revision_walk(rev);
}
static void print_submodule_diff_summary(struct repository *r, struct rev_info *rev, struct diff_options *o)
{
static const char format[] = " %m %s";
struct strbuf sb = STRBUF_INIT;
struct commit *commit;
while ((commit = get_revision(rev))) {
struct pretty_print_context ctx = {0};
ctx.date_mode = rev->date_mode;
ctx.output_encoding = get_log_output_encoding();
strbuf_setlen(&sb, 0);
repo_format_commit_message(r, commit, format, &sb,
&ctx);
strbuf_addch(&sb, '\n');
if (commit->object.flags & SYMMETRIC_LEFT)
diff_emit_submodule_del(o, sb.buf);
else
diff_emit_submodule_add(o, sb.buf);
}
strbuf_release(&sb);
}
void prepare_submodule_repo_env(struct strvec *out)
{
prepare_other_repo_env(out, DEFAULT_GIT_DIR_ENVIRONMENT);
}
static void prepare_submodule_repo_env_in_gitdir(struct strvec *out)
{
prepare_other_repo_env(out, ".");
}
/*
* Initialize a repository struct for a submodule based on the provided 'path'.
*
* Unlike repo_submodule_init, this tolerates submodules not present
* in .gitmodules. This function exists only to preserve historical behavior,
*
* Returns the repository struct on success,
* NULL when the submodule is not present.
*/
static struct repository *open_submodule(const char *path)
{
struct strbuf sb = STRBUF_INIT;
struct repository *out = xmalloc(sizeof(*out));
if (submodule_to_gitdir(&sb, path) || repo_init(out, sb.buf, NULL)) {
strbuf_release(&sb);
free(out);
return NULL;
}
/* Mark it as a submodule */
out->submodule_prefix = xstrdup(path);
strbuf_release(&sb);
return out;
}
/*
* Helper function to display the submodule header line prior to the full
* summary output.
*
* If it can locate the submodule git directory it will create a repository
* handle for the submodule and lookup both the left and right commits and
* put them into the left and right pointers.
*/
static void show_submodule_header(struct diff_options *o,
const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule,
struct repository *sub,
struct commit **left, struct commit **right,
struct commit_list **merge_bases)
{
const char *message = NULL;
struct strbuf sb = STRBUF_INIT;
int fast_forward = 0, fast_backward = 0;
if (dirty_submodule & DIRTY_SUBMODULE_UNTRACKED)
diff_emit_submodule_untracked(o, path);
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
diff_emit_submodule_modified(o, path);
if (is_null_oid(one))
message = "(new submodule)";
else if (is_null_oid(two))
message = "(submodule deleted)";
if (!sub) {
if (!message)
message = "(commits not present)";
goto output_header;
}
/*
* Attempt to lookup the commit references, and determine if this is
* a fast forward or fast backwards update.
*/
*left = lookup_commit_reference(sub, one);
*right = lookup_commit_reference(sub, two);
/*
* Warn about missing commits in the submodule project, but only if
* they aren't null.
*/
if ((!is_null_oid(one) && !*left) ||
(!is_null_oid(two) && !*right))
message = "(commits not present)";
*merge_bases = repo_get_merge_bases(sub, *left, *right);
if (*merge_bases) {
if ((*merge_bases)->item == *left)
fast_forward = 1;
else if ((*merge_bases)->item == *right)
fast_backward = 1;
}
if (oideq(one, two)) {
strbuf_release(&sb);
return;
}
output_header:
strbuf_addf(&sb, "Submodule %s ", path);
strbuf_add_unique_abbrev(&sb, one, DEFAULT_ABBREV);
strbuf_addstr(&sb, (fast_backward || fast_forward) ? ".." : "...");
strbuf_add_unique_abbrev(&sb, two, DEFAULT_ABBREV);
if (message)
strbuf_addf(&sb, " %s\n", message);
else
strbuf_addf(&sb, "%s:\n", fast_backward ? " (rewind)" : "");
diff_emit_submodule_header(o, sb.buf);
strbuf_release(&sb);
}
void show_submodule_diff_summary(struct diff_options *o, const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule)
{
struct rev_info rev;
struct commit *left = NULL, *right = NULL;
struct commit_list *merge_bases = NULL;
struct repository *sub;
sub = open_submodule(path);
show_submodule_header(o, path, one, two, dirty_submodule,
sub, &left, &right, &merge_bases);
/*
* If we don't have both a left and a right pointer, there is no
* reason to try and display a summary. The header line should contain
* all the information the user needs.
*/
if (!left || !right || !sub)
goto out;
/* Treat revision walker failure the same as missing commits */
if (prepare_submodule_diff_summary(sub, &rev, path, left, right, merge_bases)) {
diff_emit_submodule_error(o, "(revision walker failed)\n");
goto out;
}
print_submodule_diff_summary(sub, &rev, o);
out:
if (merge_bases)
free_commit_list(merge_bases);
clear_commit_marks(left, ~0);
clear_commit_marks(right, ~0);
if (sub) {
repo_clear(sub);
free(sub);
}
}
void show_submodule_inline_diff(struct diff_options *o, const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule)
{
const struct object_id *old_oid = the_hash_algo->empty_tree, *new_oid = the_hash_algo->empty_tree;
struct commit *left = NULL, *right = NULL;
struct commit_list *merge_bases = NULL;
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
struct repository *sub;
sub = open_submodule(path);
show_submodule_header(o, path, one, two, dirty_submodule,
sub, &left, &right, &merge_bases);
/* We need a valid left and right commit to display a difference */
if (!(left || is_null_oid(one)) ||
!(right || is_null_oid(two)))
goto done;
if (left)
old_oid = one;
if (right)
new_oid = two;
cp.git_cmd = 1;
cp.dir = path;
cp.out = -1;
cp.no_stdin = 1;
/* TODO: other options may need to be passed here. */
strvec_pushl(&cp.args, "diff", "--submodule=diff", NULL);
strvec_pushf(&cp.args, "--color=%s", want_color(o->use_color) ?
"always" : "never");
diff: make struct diff_flags members lowercase Now that the flags stored in struct diff_flags are being accessed directly and not through macros, change all struct members from being uppercase to lowercase. This conversion is done using the following semantic patch: @@ expression E; @@ - E.RECURSIVE + E.recursive @@ expression E; @@ - E.TREE_IN_RECURSIVE + E.tree_in_recursive @@ expression E; @@ - E.BINARY + E.binary @@ expression E; @@ - E.TEXT + E.text @@ expression E; @@ - E.FULL_INDEX + E.full_index @@ expression E; @@ - E.SILENT_ON_REMOVE + E.silent_on_remove @@ expression E; @@ - E.FIND_COPIES_HARDER + E.find_copies_harder @@ expression E; @@ - E.FOLLOW_RENAMES + E.follow_renames @@ expression E; @@ - E.RENAME_EMPTY + E.rename_empty @@ expression E; @@ - E.HAS_CHANGES + E.has_changes @@ expression E; @@ - E.QUICK + E.quick @@ expression E; @@ - E.NO_INDEX + E.no_index @@ expression E; @@ - E.ALLOW_EXTERNAL + E.allow_external @@ expression E; @@ - E.EXIT_WITH_STATUS + E.exit_with_status @@ expression E; @@ - E.REVERSE_DIFF + E.reverse_diff @@ expression E; @@ - E.CHECK_FAILED + E.check_failed @@ expression E; @@ - E.RELATIVE_NAME + E.relative_name @@ expression E; @@ - E.IGNORE_SUBMODULES + E.ignore_submodules @@ expression E; @@ - E.DIRSTAT_CUMULATIVE + E.dirstat_cumulative @@ expression E; @@ - E.DIRSTAT_BY_FILE + E.dirstat_by_file @@ expression E; @@ - E.ALLOW_TEXTCONV + E.allow_textconv @@ expression E; @@ - E.TEXTCONV_SET_VIA_CMDLINE + E.textconv_set_via_cmdline @@ expression E; @@ - E.DIFF_FROM_CONTENTS + E.diff_from_contents @@ expression E; @@ - E.DIRTY_SUBMODULES + E.dirty_submodules @@ expression E; @@ - E.IGNORE_UNTRACKED_IN_SUBMODULES + E.ignore_untracked_in_submodules @@ expression E; @@ - E.IGNORE_DIRTY_SUBMODULES + E.ignore_dirty_submodules @@ expression E; @@ - E.OVERRIDE_SUBMODULE_CONFIG + E.override_submodule_config @@ expression E; @@ - E.DIRSTAT_BY_LINE + E.dirstat_by_line @@ expression E; @@ - E.FUNCCONTEXT + E.funccontext @@ expression E; @@ - E.PICKAXE_IGNORE_CASE + E.pickaxe_ignore_case @@ expression E; @@ - E.DEFAULT_FOLLOW_RENAMES + E.default_follow_renames Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-31 19:19:11 +01:00
if (o->flags.reverse_diff) {
strvec_pushf(&cp.args, "--src-prefix=%s%s/",
o->b_prefix, path);
strvec_pushf(&cp.args, "--dst-prefix=%s%s/",
o->a_prefix, path);
} else {
strvec_pushf(&cp.args, "--src-prefix=%s%s/",
o->a_prefix, path);
strvec_pushf(&cp.args, "--dst-prefix=%s%s/",
o->b_prefix, path);
}
strvec_push(&cp.args, oid_to_hex(old_oid));
/*
* If the submodule has modified content, we will diff against the
* work tree, under the assumption that the user has asked for the
* diff format and wishes to actually see all differences even if they
* haven't yet been committed to the submodule yet.
*/
if (!(dirty_submodule & DIRTY_SUBMODULE_MODIFIED))
strvec_push(&cp.args, oid_to_hex(new_oid));
prepare_submodule_repo_env(&cp.env_array);
if (!is_directory(path)) {
/* fall back to absorbed git dir, if any */
if (!sub)
goto done;
cp.dir = sub->gitdir;
strvec_push(&cp.env_array, GIT_DIR_ENVIRONMENT "=.");
strvec_push(&cp.env_array, GIT_WORK_TREE_ENVIRONMENT "=.");
}
if (start_command(&cp)) {
diff_emit_submodule_error(o, "(diff failed)\n");
goto done;
}
while (strbuf_getwholeline_fd(&sb, cp.out, '\n') != EOF)
diff_emit_submodule_pipethrough(o, sb.buf, sb.len);
if (finish_command(&cp))
diff_emit_submodule_error(o, "(diff failed)\n");
done:
strbuf_release(&sb);
if (merge_bases)
free_commit_list(merge_bases);
if (left)
clear_commit_marks(left, ~0);
if (right)
clear_commit_marks(right, ~0);
if (sub) {
repo_clear(sub);
free(sub);
}
}
int should_update_submodules(void)
{
return config_update_recurse_submodules == RECURSE_SUBMODULES_ON;
}
const struct submodule *submodule_from_ce(const struct cache_entry *ce)
{
if (!S_ISGITLINK(ce->ce_mode))
return NULL;
if (!should_update_submodules())
return NULL;
return submodule_from_path(the_repository, null_oid(), ce->name);
}
static struct oid_array *submodule_commits(struct string_list *submodules,
const char *name)
{
struct string_list_item *item;
item = string_list_insert(submodules, name);
if (item->util)
return (struct oid_array *) item->util;
/* NEEDSWORK: should we have oid_array_init()? */
item->util = xcalloc(1, sizeof(struct oid_array));
return (struct oid_array *) item->util;
}
struct collect_changed_submodules_cb_data {
struct repository *repo;
struct string_list *changed;
const struct object_id *commit_oid;
};
/*
* this would normally be two functions: default_name_from_path() and
* path_from_default_name(). Since the default name is the same as
* the submodule path we can get away with just one function which only
* checks whether there is a submodule in the working directory at that
* location.
*/
static const char *default_name_or_path(const char *path_or_name)
{
int error_code;
if (!is_submodule_populated_gently(path_or_name, &error_code))
return NULL;
return path_or_name;
}
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
{
struct collect_changed_submodules_cb_data *me = data;
struct string_list *changed = me->changed;
const struct object_id *commit_oid = me->commit_oid;
int i;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
struct oid_array *commits;
const struct submodule *submodule;
const char *name;
if (!S_ISGITLINK(p->two->mode))
continue;
submodule = submodule_from_path(me->repo,
commit_oid, p->two->path);
if (submodule)
name = submodule->name;
else {
name = default_name_or_path(p->two->path);
/* make sure name does not collide with existing one */
2018-06-14 19:31:07 +02:00
if (name)
submodule = submodule_from_name(me->repo,
2018-06-14 19:31:07 +02:00
commit_oid, name);
if (submodule) {
warning(_("Submodule in commit %s at path: "
"'%s' collides with a submodule named "
"the same. Skipping it."),
2018-06-14 19:31:07 +02:00
oid_to_hex(commit_oid), p->two->path);
name = NULL;
}
}
if (!name)
continue;
commits = submodule_commits(changed, name);
oid_array_append(commits, &p->two->oid);
}
}
/*
* Collect the paths of submodules in 'changed' which have changed based on
* the revisions as specified in 'argv'. Each entry in 'changed' will also
* have a corresponding 'struct oid_array' (in the 'util' field) which lists
* what the submodule pointers were updated to during the change.
*/
static void collect_changed_submodules(struct repository *r,
struct string_list *changed,
struct strvec *argv)
{
struct rev_info rev;
const struct commit *commit;
int save_warning;
struct setup_revision_opt s_r_opt = {
.assume_dashdash = 1,
};
save_warning = warn_on_object_refname_ambiguity;
warn_on_object_refname_ambiguity = 0;
repo_init_revisions(r, &rev, NULL);
setup_revisions(argv->nr, argv->v, &rev, &s_r_opt);
warn_on_object_refname_ambiguity = save_warning;
if (prepare_revision_walk(&rev))
die(_("revision walk setup failed"));
while ((commit = get_revision(&rev))) {
struct rev_info diff_rev;
struct collect_changed_submodules_cb_data data;
data.repo = r;
data.changed = changed;
data.commit_oid = &commit->object.oid;
repo_init_revisions(r, &diff_rev, NULL);
diff_rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
diff_rev.diffopt.format_callback = collect_changed_submodules_cb;
diff_rev.diffopt.format_callback_data = &data;
diff_rev.dense_combined_merges = 1;
diff_tree_combined_merge(commit, &diff_rev);
}
reset_revision_walk();
}
static void free_submodules_oids(struct string_list *submodules)
{
struct string_list_item *item;
for_each_string_list_item(item, submodules)
oid_array_clear((struct oid_array *) item->util);
string_list_clear(submodules, 1);
}
static int has_remote(const char *refname, const struct object_id *oid,
int flags, void *cb_data)
{
return 1;
}
static int append_oid_to_argv(const struct object_id *oid, void *data)
{
struct strvec *argv = data;
strvec_push(argv, oid_to_hex(oid));
return 0;
}
struct has_commit_data {
struct repository *repo;
int result;
const char *path;
};
static int check_has_commit(const struct object_id *oid, void *data)
{
struct has_commit_data *cb = data;
enum object_type type = oid_object_info(cb->repo, oid, NULL);
switch (type) {
case OBJ_COMMIT:
return 0;
case OBJ_BAD:
/*
* Object is missing or invalid. If invalid, an error message
* has already been printed.
*/
cb->result = 0;
return 0;
default:
die(_("submodule entry '%s' (%s) is a %s, not a commit"),
cb->path, oid_to_hex(oid), type_name(type));
}
}
static int submodule_has_commits(struct repository *r,
const char *path,
struct oid_array *commits)
{
struct has_commit_data has_commit = { r, 1, path };
/*
* Perform a cheap, but incorrect check for the existence of 'commits'.
* This is done by adding the submodule's object store to the in-core
* object store, and then querying for each commit's existence. If we
* do not have the commit object anywhere, there is no chance we have
* it in the object store of the correct submodule and have it
* reachable from a ref, so we can fail early without spawning rev-list
* which is expensive.
*/
if (add_submodule_odb(path))
return 0;
oid_array_for_each_unique(commits, check_has_commit, &has_commit);
if (has_commit.result) {
/*
* Even if the submodule is checked out and the commit is
* present, make sure it exists in the submodule's object store
* and that it is reachable from a ref.
*/
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf out = STRBUF_INIT;
strvec_pushl(&cp.args, "rev-list", "-n", "1", NULL);
oid_array_for_each_unique(commits, append_oid_to_argv, &cp.args);
strvec_pushl(&cp.args, "--not", "--all", NULL);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
if (capture_command(&cp, &out, GIT_MAX_HEXSZ + 1) || out.len)
has_commit.result = 0;
strbuf_release(&out);
}
return has_commit.result;
}
static int submodule_needs_pushing(struct repository *r,
const char *path,
struct oid_array *commits)
{
if (!submodule_has_commits(r, path, commits))
/*
* NOTE: We do consider it safe to return "no" here. The
* correct answer would be "We do not know" instead of
* "No push needed", but it is quite hard to change
* the submodule pointer without having the submodule
* around. If a user did however change the submodules
* without having the submodule around, this indicates
* an expert who knows what they are doing or a
* maintainer integrating work from other people. In
* both cases it should be safe to skip this check.
*/
return 0;
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
int needs_pushing = 0;
strvec_push(&cp.args, "rev-list");
oid_array_for_each_unique(commits, append_oid_to_argv, &cp.args);
strvec_pushl(&cp.args, "--not", "--remotes", "-n", "1" , NULL);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die(_("Could not run 'git rev-list <commits> --not --remotes -n 1' command in submodule %s"),
path);
if (strbuf_read(&buf, cp.out, the_hash_algo->hexsz + 1))
needs_pushing = 1;
finish_command(&cp);
close(cp.out);
strbuf_release(&buf);
return needs_pushing;
}
return 0;
}
int find_unpushed_submodules(struct repository *r,
struct oid_array *commits,
const char *remotes_name,
struct string_list *needs_pushing)
{
struct string_list submodules = STRING_LIST_INIT_DUP;
struct string_list_item *name;
struct strvec argv = STRVEC_INIT;
/* argv.v[0] will be ignored by setup_revisions */
strvec_push(&argv, "find_unpushed_submodules");
oid_array_for_each_unique(commits, append_oid_to_argv, &argv);
strvec_push(&argv, "--not");
strvec_pushf(&argv, "--remotes=%s", remotes_name);
collect_changed_submodules(r, &submodules, &argv);
for_each_string_list_item(name, &submodules) {
struct oid_array *commits = name->util;
const struct submodule *submodule;
const char *path = NULL;
submodule = submodule_from_name(r, null_oid(), name->string);
if (submodule)
path = submodule->path;
else
path = default_name_or_path(name->string);
if (!path)
continue;
if (submodule_needs_pushing(r, path, commits))
string_list_insert(needs_pushing, path);
}
free_submodules_oids(&submodules);
strvec_clear(&argv);
return needs_pushing->nr;
}
static int push_submodule(const char *path,
const struct remote *remote,
const struct refspec *rs,
const struct string_list *push_options,
int dry_run)
{
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp = CHILD_PROCESS_INIT;
strvec_push(&cp.args, "push");
if (dry_run)
strvec_push(&cp.args, "--dry-run");
if (push_options && push_options->nr) {
const struct string_list_item *item;
for_each_string_list_item(item, push_options)
strvec_pushf(&cp.args, "--push-option=%s",
item->string);
}
if (remote->origin != REMOTE_UNCONFIGURED) {
int i;
strvec_push(&cp.args, remote->name);
for (i = 0; i < rs->raw_nr; i++)
strvec_push(&cp.args, rs->raw[i]);
}
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
if (run_command(&cp))
return 0;
close(cp.out);
}
return 1;
}
/*
* Perform a check in the submodule to see if the remote and refspec work.
* Die if the submodule can't be pushed.
*/
static void submodule_push_check(const char *path, const char *head,
const struct remote *remote,
const struct refspec *rs)
{
struct child_process cp = CHILD_PROCESS_INIT;
int i;
strvec_push(&cp.args, "submodule--helper");
strvec_push(&cp.args, "push-check");
strvec_push(&cp.args, head);
strvec_push(&cp.args, remote->name);
for (i = 0; i < rs->raw_nr; i++)
strvec_push(&cp.args, rs->raw[i]);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.no_stdout = 1;
cp.dir = path;
/*
* Simply indicate if 'submodule--helper push-check' failed.
* More detailed error information will be provided by the
* child process.
*/
if (run_command(&cp))
die(_("process for submodule '%s' failed"), path);
}
int push_unpushed_submodules(struct repository *r,
struct oid_array *commits,
const struct remote *remote,
const struct refspec *rs,
const struct string_list *push_options,
int dry_run)
{
int i, ret = 1;
struct string_list needs_pushing = STRING_LIST_INIT_DUP;
if (!find_unpushed_submodules(r, commits,
remote->name, &needs_pushing))
return 1;
/*
* Verify that the remote and refspec can be propagated to all
* submodules. This check can be skipped if the remote and refspec
* won't be propagated due to the remote being unconfigured (e.g. a URL
* instead of a remote name).
*/
if (remote->origin != REMOTE_UNCONFIGURED) {
char *head;
struct object_id head_oid;
head = resolve_refdup("HEAD", 0, &head_oid, NULL);
if (!head)
die(_("Failed to resolve HEAD as a valid ref."));
for (i = 0; i < needs_pushing.nr; i++)
submodule_push_check(needs_pushing.items[i].string,
head, remote, rs);
free(head);
}
/* Actually push the submodules */
for (i = 0; i < needs_pushing.nr; i++) {
const char *path = needs_pushing.items[i].string;
fprintf(stderr, _("Pushing submodule '%s'\n"), path);
if (!push_submodule(path, remote, rs,
push_options, dry_run)) {
fprintf(stderr, _("Unable to push submodule '%s'\n"), path);
ret = 0;
}
}
string_list_clear(&needs_pushing, 0);
return ret;
}
static int append_oid_to_array(const char *ref, const struct object_id *oid,
int flags, void *data)
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
{
struct oid_array *array = data;
oid_array_append(array, oid);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
return 0;
}
void check_for_new_submodule_commits(struct object_id *oid)
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
{
if (!initialized_fetch_ref_tips) {
for_each_ref(append_oid_to_array, &ref_tips_before_fetch);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
initialized_fetch_ref_tips = 1;
}
oid_array_append(&ref_tips_after_fetch, oid);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
}
static void calculate_changed_submodule_paths(struct repository *r,
struct string_list *changed_submodule_names)
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
{
struct strvec argv = STRVEC_INIT;
struct string_list_item *name;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
/* No need to check if there are no submodules configured */
if (!submodule_from_path(r, NULL, NULL))
return;
strvec_push(&argv, "--"); /* argv[0] program name */
oid_array_for_each_unique(&ref_tips_after_fetch,
append_oid_to_argv, &argv);
strvec_push(&argv, "--not");
oid_array_for_each_unique(&ref_tips_before_fetch,
append_oid_to_argv, &argv);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
/*
* Collect all submodules (whether checked out or not) for which new
* commits have been recorded upstream in "changed_submodule_names".
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
*/
collect_changed_submodules(r, changed_submodule_names, &argv);
for_each_string_list_item(name, changed_submodule_names) {
struct oid_array *commits = name->util;
const struct submodule *submodule;
const char *path = NULL;
submodule = submodule_from_name(r, null_oid(), name->string);
if (submodule)
path = submodule->path;
else
path = default_name_or_path(name->string);
if (!path)
continue;
if (submodule_has_commits(r, path, commits)) {
oid_array_clear(commits);
*name->string = '\0';
}
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
}
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
string_list_remove_empty_items(changed_submodule_names, 1);
strvec_clear(&argv);
oid_array_clear(&ref_tips_before_fetch);
oid_array_clear(&ref_tips_after_fetch);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
initialized_fetch_ref_tips = 0;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
}
int submodule_touches_in_range(struct repository *r,
struct object_id *excl_oid,
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
struct object_id *incl_oid)
{
struct string_list subs = STRING_LIST_INIT_DUP;
struct strvec args = STRVEC_INIT;
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
int ret;
/* No need to check if there are no submodules configured */
if (!submodule_from_path(r, NULL, NULL))
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
return 0;
strvec_push(&args, "--"); /* args[0] program name */
strvec_push(&args, oid_to_hex(incl_oid));
if (!is_null_oid(excl_oid)) {
strvec_push(&args, "--not");
strvec_push(&args, oid_to_hex(excl_oid));
}
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
collect_changed_submodules(r, &subs, &args);
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
ret = subs.nr;
strvec_clear(&args);
pull: optionally rebase submodules (remote submodule changes only) Teach pull to optionally update submodules when '--recurse-submodules' is provided. This will teach pull to run 'submodule update --rebase' when the '--recurse-submodules' and '--rebase' flags are given under specific circumstances. On a rebase workflow: ===================== 1. Both sides change the submodule ------------------------------ Let's assume the following history in a submodule: H---I---J---K---L local branch \ M---N---O---P remote branch and the following in the superproject (recorded submodule in parens): A(H)---B(I)---F(K)---G(L) local branch \ C(N)---D(N)---E(P) remote branch In an ideal world this would rebase the submodule and rewrite the submodule pointers that the superproject points at such that the superproject looks like A(H)---B(I) F(K')---G(L') rebased branch \ / C(N)---D(N)---E(P) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch And if a conflict arises in the submodule the superproject rebase would stop at that commit at which the submodule conflict occurs. Currently a "pull --rebase" in the superproject produces a merge conflict as the submodule pointer changes are conflicting and cannot be resolved. 2. Local submodule changes only ----------------------- Assuming histories as above, except that the remote branch would not contain submodule changes, then a result as A(H)---B(I) F(K)---G(L) rebased branch \ / C(I)---D(I)---E(I) remote branch is desire-able. This is what currently happens in rebase. If the recursive flag is given, the ideal git would produce a superproject as: A(H)---B(I) F(K')---G(L') rebased branch (incl. sub rebase!) \ / C(I)---D(I)---E(I) remote branch and the submodule as: J---K---L (old dangeling tip) / H---I J'---K'---L' locally rebased branch \ / M---N---O---P advanced branch This patch doesn't address this issue, however a test is added that this fails up front. 3. Remote submodule changes only ---------------------- Assuming histories as in (1) except that the local superproject branch would not have touched the submodule the rebase already works out in the superproject with no conflicts: A(H)---B(I) F(P)---G(P) rebased branch (no sub changes) \ / C(N)---D(N)---E(P) remote branch The recurse flag as presented in this patch would additionally update the submodule as: H---I J'---K'---L' rebased branch \ / M---N---O---P remote branch As neither J, K, L nor J', K', L' are referred to from the superproject, no rewriting of the superproject commits is required. Conclusion for 'pull --rebase --recursive' ----------------------------------------- If there are no local superproject changes it is sufficient to call "submodule update --rebase" as this produces the desired results. In case of conflicts, the behavior is the same as in 'submodule update --recursive' which is assumed to be sane. This patch implements (3) only. On a merge workflow: ==================== We'll start off with the same underlying DAG as in (1) in the rebase workflow. So in an ideal world a 'pull --merge --recursive' would produce this: H---I---J---K---L----X \ / M---N---O---P with X as the new merge-commit in the submodule and the superproject as: A(H)---B(I)---F(K)---G(L)---Y(X) \ / C(N)---D(N)---E(P) However modifying the submodules on the fly is not supported in git-merge such that Y(X) is not easy to produce in a single patch. In fact git-merge doesn't know about submodules at all. However when at least one side does not contain commits touching the submodule at all, then we do not need to perform the merge for the submodule but a fast-forward can be done via checking out either L or P in the submodule. This strategy is implemented in 68d03e4a6e (Implement automatic fast-forward merge for submodules, 2010-07-07) already, so to align with the rebase behavior we need to also update the worktree of the submodule. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-06-23 21:13:02 +02:00
free_submodules_oids(&subs);
return ret;
}
struct submodule_parallel_fetch {
int count;
struct strvec args;
struct repository *r;
const char *prefix;
int command_line_option;
int default_option;
int quiet;
int result;
struct string_list changed_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
int oid_fetch_tasks_nr, oid_fetch_tasks_alloc;
struct strbuf submodules_with_errors;
};
#define SPF_INIT {0, STRVEC_INIT, NULL, NULL, 0, 0, 0, 0, \
STRING_LIST_INIT_DUP, \
NULL, 0, 0, STRBUF_INIT}
static int get_fetch_recurse_config(const struct submodule *submodule,
struct submodule_parallel_fetch *spf)
{
if (spf->command_line_option != RECURSE_SUBMODULES_DEFAULT)
return spf->command_line_option;
if (submodule) {
char *key;
const char *value;
int fetch_recurse = submodule->fetch_recurse;
key = xstrfmt("submodule.%s.fetchRecurseSubmodules", submodule->name);
config: fix leaks from git_config_get_string_const() There are two functions to get a single config string: - git_config_get_string() - git_config_get_string_const() One might naively think that the first one allocates a new string and the second one just points us to the internal configset storage. But in fact they both allocate a new copy; the second one exists only to avoid having to cast when using it with a const global which we never intend to free. The documentation for the function explains that clearly, but it seems I'm not alone in being surprised by this. Of 17 calls to the function, 13 of them leak the resulting value. We could obviously fix these by adding the appropriate free(). But it would be simpler still if we actually had a non-allocating way to get the string. There's git_config_get_value() but that doesn't quite do what we want. If the config key is present but is a boolean with no value (e.g., "[foo]bar" in the file), then we'll get NULL (whereas the string versions will print an error and die). So let's introduce a new variant, git_config_get_string_tmp(), that behaves as these callers expect. We need a new name because we have new semantics but the same function signature (so even if we converted the four remaining callers, topics in flight might be surprised). The "tmp" is because this value should only be held onto for a short time. In practice it's rare for us to clear and refresh the configset, invalidating the pointer, but hopefully the "tmp" makes callers think about the lifetime. In each of the converted cases here the value only needs to last within the local function or its immediate caller. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-08-14 18:17:36 +02:00
if (!repo_config_get_string_tmp(spf->r, key, &value)) {
fetch_recurse = parse_fetch_recurse_submodules_arg(key, value);
}
free(key);
if (fetch_recurse != RECURSE_SUBMODULES_NONE)
/* local config overrules everything except commandline */
return fetch_recurse;
}
return spf->default_option;
}
/*
* Fetch in progress (if callback data) or
* pending (if in oid_fetch_tasks in struct submodule_parallel_fetch)
*/
struct fetch_task {
struct repository *repo;
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
struct oid_array *commits; /* Ensure these commits are fetched */
};
/**
* When a submodule is not defined in .gitmodules, we cannot access it
* via the regular submodule-config. Create a fake submodule, which we can
* work on.
*/
static const struct submodule *get_non_gitmodules_submodule(const char *path)
{
struct submodule *ret = NULL;
const char *name = default_name_or_path(path);
if (!name)
return NULL;
ret = xmalloc(sizeof(*ret));
memset(ret, 0, sizeof(*ret));
ret->path = name;
ret->name = name;
return (const struct submodule *) ret;
}
static struct fetch_task *fetch_task_create(struct repository *r,
const char *path)
{
struct fetch_task *task = xmalloc(sizeof(*task));
memset(task, 0, sizeof(*task));
task->sub = submodule_from_path(r, null_oid(), path);
if (!task->sub) {
/*
* No entry in .gitmodules? Technically not a submodule,
* but historically we supported repositories that happen to be
* in-place where a gitlink is. Keep supporting them.
*/
task->sub = get_non_gitmodules_submodule(path);
if (!task->sub) {
free(task);
return NULL;
}
task->free_sub = 1;
}
return task;
}
static void fetch_task_release(struct fetch_task *p)
{
if (p->free_sub)
free((void*)p->sub);
p->free_sub = 0;
p->sub = NULL;
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
}
static struct repository *get_submodule_repo_for(struct repository *r,
const struct submodule *sub)
{
struct repository *ret = xmalloc(sizeof(*ret));
if (repo_submodule_init(ret, r, sub)) {
/*
* No entry in .gitmodules? Technically not a submodule,
* but historically we supported repositories that happen to be
* in-place where a gitlink is. Keep supporting them.
*/
struct strbuf gitdir = STRBUF_INIT;
strbuf_repo_worktree_path(&gitdir, r, "%s/.git", sub->path);
if (repo_init(ret, gitdir.buf, NULL)) {
strbuf_release(&gitdir);
free(ret);
return NULL;
}
strbuf_release(&gitdir);
}
return ret;
}
static int get_next_submodule(struct child_process *cp,
struct strbuf *err, void *data, void **task_cb)
{
struct submodule_parallel_fetch *spf = data;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
for (; spf->count < spf->r->index->cache_nr; spf->count++) {
const struct cache_entry *ce = spf->r->index->cache[spf->count];
const char *default_argv;
struct fetch_task *task;
if (!S_ISGITLINK(ce->ce_mode))
continue;
task = fetch_task_create(spf->r, ce->name);
if (!task)
continue;
switch (get_fetch_recurse_config(task->sub, spf))
{
default:
case RECURSE_SUBMODULES_DEFAULT:
case RECURSE_SUBMODULES_ON_DEMAND:
if (!task->sub ||
!string_list_lookup(
&spf->changed_submodule_names,
task->sub->name))
continue;
default_argv = "on-demand";
break;
case RECURSE_SUBMODULES_ON:
default_argv = "yes";
break;
case RECURSE_SUBMODULES_OFF:
continue;
}
task->repo = get_submodule_repo_for(spf->r, task->sub);
if (task->repo) {
struct strbuf submodule_prefix = STRBUF_INIT;
child_process_init(cp);
cp->dir = task->repo->gitdir;
prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1;
if (!spf->quiet)
strbuf_addf(err, _("Fetching submodule %s%s\n"),
spf->prefix, ce->name);
strvec_init(&cp->args);
strvec_pushv(&cp->args, spf->args.v);
strvec_push(&cp->args, default_argv);
strvec_push(&cp->args, "--submodule-prefix");
strbuf_addf(&submodule_prefix, "%s%s/",
spf->prefix,
task->sub->path);
strvec_push(&cp->args, submodule_prefix.buf);
spf->count++;
*task_cb = task;
strbuf_release(&submodule_prefix);
return 1;
} else {
submodules: fix of regression on fetching of non-init subsub-repo A regression has been introduced by a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28). The scenario in which it triggers is when one has a repository with a submodule inside a submodule like this: superproject/middle_repo/inner_repo Person A and B have both a clone of it, while Person B is not working with the inner_repo and thus does not have it initialized in his working copy. Now person A introduces a change to the inner_repo and propagates it through the middle_repo and the superproject. Once person A pushed the changes and person B wants to fetch them using "git fetch" at the superproject level, B's git call will return with error saying: Could not access submodule 'inner_repo' Errors during submodule fetch: middle_repo Expectation is that in this case the inner submodule will be recognized as uninitialized submodule and skipped by the git fetch command. This used to work correctly before 'a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28)'. Starting with a62387b the code wants to evaluate "is_empty_dir()" inside .git/modules for a directory only existing in the worktree, delivering then of course wrong return value. This patch ensures is_empty_dir() is getting the correct path of the uninitialized submodule by concatenation of the actual worktree and the name of the uninitialized submodule. The first attempt to fix this regression, in 1b7ac4e6d4 (submodules: fix of regression on fetching of non-init subsub-repo, 2020-11-12), by simply reverting a62387b, resulted in an infinite loop of submodule fetches in the simpler case of a recursive fetch of a superproject with uninitialized submodules, and so this commit was reverted in 7091499bc0 (Revert "submodules: fix of regression on fetching of non-init subsub-repo", 2020-12-02). To prevent future breakages, also add a regression test for this scenario. Signed-off-by: Peter Kaestle <peter.kaestle@nokia.com> CC: Junio C Hamano <gitster@pobox.com> CC: Philippe Blain <levraiphilippeblain@gmail.com> CC: Ralf Thielow <ralf.thielow@gmail.com> CC: Eric Sunshine <sunshine@sunshineco.us> Reviewed-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-12-09 11:58:44 +01:00
struct strbuf empty_submodule_path = STRBUF_INIT;
fetch_task_release(task);
free(task);
/*
* An empty directory is normal,
* the submodule is not initialized
*/
submodules: fix of regression on fetching of non-init subsub-repo A regression has been introduced by a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28). The scenario in which it triggers is when one has a repository with a submodule inside a submodule like this: superproject/middle_repo/inner_repo Person A and B have both a clone of it, while Person B is not working with the inner_repo and thus does not have it initialized in his working copy. Now person A introduces a change to the inner_repo and propagates it through the middle_repo and the superproject. Once person A pushed the changes and person B wants to fetch them using "git fetch" at the superproject level, B's git call will return with error saying: Could not access submodule 'inner_repo' Errors during submodule fetch: middle_repo Expectation is that in this case the inner submodule will be recognized as uninitialized submodule and skipped by the git fetch command. This used to work correctly before 'a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28)'. Starting with a62387b the code wants to evaluate "is_empty_dir()" inside .git/modules for a directory only existing in the worktree, delivering then of course wrong return value. This patch ensures is_empty_dir() is getting the correct path of the uninitialized submodule by concatenation of the actual worktree and the name of the uninitialized submodule. The first attempt to fix this regression, in 1b7ac4e6d4 (submodules: fix of regression on fetching of non-init subsub-repo, 2020-11-12), by simply reverting a62387b, resulted in an infinite loop of submodule fetches in the simpler case of a recursive fetch of a superproject with uninitialized submodules, and so this commit was reverted in 7091499bc0 (Revert "submodules: fix of regression on fetching of non-init subsub-repo", 2020-12-02). To prevent future breakages, also add a regression test for this scenario. Signed-off-by: Peter Kaestle <peter.kaestle@nokia.com> CC: Junio C Hamano <gitster@pobox.com> CC: Philippe Blain <levraiphilippeblain@gmail.com> CC: Ralf Thielow <ralf.thielow@gmail.com> CC: Eric Sunshine <sunshine@sunshineco.us> Reviewed-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-12-09 11:58:44 +01:00
strbuf_addf(&empty_submodule_path, "%s/%s/",
spf->r->worktree,
ce->name);
if (S_ISGITLINK(ce->ce_mode) &&
submodules: fix of regression on fetching of non-init subsub-repo A regression has been introduced by a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28). The scenario in which it triggers is when one has a repository with a submodule inside a submodule like this: superproject/middle_repo/inner_repo Person A and B have both a clone of it, while Person B is not working with the inner_repo and thus does not have it initialized in his working copy. Now person A introduces a change to the inner_repo and propagates it through the middle_repo and the superproject. Once person A pushed the changes and person B wants to fetch them using "git fetch" at the superproject level, B's git call will return with error saying: Could not access submodule 'inner_repo' Errors during submodule fetch: middle_repo Expectation is that in this case the inner submodule will be recognized as uninitialized submodule and skipped by the git fetch command. This used to work correctly before 'a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28)'. Starting with a62387b the code wants to evaluate "is_empty_dir()" inside .git/modules for a directory only existing in the worktree, delivering then of course wrong return value. This patch ensures is_empty_dir() is getting the correct path of the uninitialized submodule by concatenation of the actual worktree and the name of the uninitialized submodule. The first attempt to fix this regression, in 1b7ac4e6d4 (submodules: fix of regression on fetching of non-init subsub-repo, 2020-11-12), by simply reverting a62387b, resulted in an infinite loop of submodule fetches in the simpler case of a recursive fetch of a superproject with uninitialized submodules, and so this commit was reverted in 7091499bc0 (Revert "submodules: fix of regression on fetching of non-init subsub-repo", 2020-12-02). To prevent future breakages, also add a regression test for this scenario. Signed-off-by: Peter Kaestle <peter.kaestle@nokia.com> CC: Junio C Hamano <gitster@pobox.com> CC: Philippe Blain <levraiphilippeblain@gmail.com> CC: Ralf Thielow <ralf.thielow@gmail.com> CC: Eric Sunshine <sunshine@sunshineco.us> Reviewed-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-12-09 11:58:44 +01:00
!is_empty_dir(empty_submodule_path.buf)) {
spf->result = 1;
strbuf_addf(err,
_("Could not access submodule '%s'\n"),
ce->name);
}
submodules: fix of regression on fetching of non-init subsub-repo A regression has been introduced by a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28). The scenario in which it triggers is when one has a repository with a submodule inside a submodule like this: superproject/middle_repo/inner_repo Person A and B have both a clone of it, while Person B is not working with the inner_repo and thus does not have it initialized in his working copy. Now person A introduces a change to the inner_repo and propagates it through the middle_repo and the superproject. Once person A pushed the changes and person B wants to fetch them using "git fetch" at the superproject level, B's git call will return with error saying: Could not access submodule 'inner_repo' Errors during submodule fetch: middle_repo Expectation is that in this case the inner submodule will be recognized as uninitialized submodule and skipped by the git fetch command. This used to work correctly before 'a62387b (submodule.c: fetch in submodules git directory instead of in worktree, 2018-11-28)'. Starting with a62387b the code wants to evaluate "is_empty_dir()" inside .git/modules for a directory only existing in the worktree, delivering then of course wrong return value. This patch ensures is_empty_dir() is getting the correct path of the uninitialized submodule by concatenation of the actual worktree and the name of the uninitialized submodule. The first attempt to fix this regression, in 1b7ac4e6d4 (submodules: fix of regression on fetching of non-init subsub-repo, 2020-11-12), by simply reverting a62387b, resulted in an infinite loop of submodule fetches in the simpler case of a recursive fetch of a superproject with uninitialized submodules, and so this commit was reverted in 7091499bc0 (Revert "submodules: fix of regression on fetching of non-init subsub-repo", 2020-12-02). To prevent future breakages, also add a regression test for this scenario. Signed-off-by: Peter Kaestle <peter.kaestle@nokia.com> CC: Junio C Hamano <gitster@pobox.com> CC: Philippe Blain <levraiphilippeblain@gmail.com> CC: Ralf Thielow <ralf.thielow@gmail.com> CC: Eric Sunshine <sunshine@sunshineco.us> Reviewed-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-12-09 11:58:44 +01:00
strbuf_release(&empty_submodule_path);
}
}
if (spf->oid_fetch_tasks_nr) {
struct fetch_task *task =
spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr - 1];
struct strbuf submodule_prefix = STRBUF_INIT;
spf->oid_fetch_tasks_nr--;
strbuf_addf(&submodule_prefix, "%s%s/",
spf->prefix, task->sub->path);
child_process_init(cp);
prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1;
cp->dir = task->repo->gitdir;
strvec_init(&cp->args);
strvec_pushv(&cp->args, spf->args.v);
strvec_push(&cp->args, "on-demand");
strvec_push(&cp->args, "--submodule-prefix");
strvec_push(&cp->args, submodule_prefix.buf);
/* NEEDSWORK: have get_default_remote from submodule--helper */
strvec_push(&cp->args, "origin");
oid_array_for_each_unique(task->commits,
append_oid_to_argv, &cp->args);
*task_cb = task;
strbuf_release(&submodule_prefix);
return 1;
}
return 0;
}
static int fetch_start_failure(struct strbuf *err,
void *cb, void *task_cb)
{
struct submodule_parallel_fetch *spf = cb;
struct fetch_task *task = task_cb;
spf->result = 1;
fetch_task_release(task);
return 0;
}
static int commit_missing_in_sub(const struct object_id *oid, void *data)
{
struct repository *subrepo = data;
enum object_type type = oid_object_info(subrepo, oid, NULL);
return type != OBJ_COMMIT;
}
static int fetch_finish(int retvalue, struct strbuf *err,
void *cb, void *task_cb)
{
struct submodule_parallel_fetch *spf = cb;
struct fetch_task *task = task_cb;
struct string_list_item *it;
struct oid_array *commits;
if (!task || !task->sub)
BUG("callback cookie bogus");
if (retvalue) {
submodule: explain first attempt failure clearly When cloning with --recurse-submodules a superproject with at least one submodule with HEAD pointing to an unborn branch, the clone goes something like this: Cloning into 'test'... <messages about cloning of superproject> Submodule '<name>' (<uri>) registered for path '<submodule path>' Cloning into '<submodule path>'... fatal: Couldn't find remote ref HEAD Unable to fetch in submodule path '<submodule path>' <messages about fetching with SHA-1> From <uri> * branch <hash> -> FETCH_HEAD Submodule path '<submodule path>': checked out '<hash>' In other words, first, a fetch is done with no hash arguments (that is, a fetch of HEAD) resulting in a "Couldn't find remote ref HEAD" error; then, a fetch is done given a hash, which succeeds. The fetch given a hash was added in fb43e31f2b ("submodule: try harder to fetch needed sha1 by direct fetching sha1", 2016-02-24), and the "Unable to fetch..." message was downgraded from a fatal error to a notice in e30d833671 ("git-submodule.sh: try harder to fetch a submodule", 2018-05-16). This commit improves the notice to be clearer that we are retrying the fetch, and that the previous messages (in particular, the fatal errors from fetch) do not necessarily indicate that the whole command fails. In other words: - If the HEAD-fetch succeeds and we then have the commit we want, git-submodule prints no explanation. - If the HEAD-fetch succeeds and we do not have the commit we want, but the hash-fetch succeeds, git-submodule prints no explanation. - If the HEAD-fetch succeeds and we do not have the commit we want, but the hash-fetch fails, git-submodule prints a fatal error. - If the HEAD-fetch fails, fetch prints a fatal error, and git-submodule informs the user that it will retry by fetching specific commits by hash. - If the hash-fetch then succeeds, git-submodule prints no explanation (besides the ones already printed). - If the HEAD-fetch then fails, git-submodule prints a fatal error. It could be said that we should just eliminate the HEAD-fetch altogether, but that changes some behavior (in particular, some refs that were opportunistically updated would no longer be), so I have left that alone for now. There is an analogous situation with the fetching code in fetch_finish() and surrounding functions. For now, I have added a NEEDSWORK. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-03-13 18:57:38 +01:00
/*
* NEEDSWORK: This indicates that the overall fetch
* failed, even though there may be a subsequent fetch
* by commit hash that might work. It may be a good
* idea to not indicate failure in this case, and only
* indicate failure if the subsequent fetch fails.
*/
spf->result = 1;
strbuf_addf(&spf->submodules_with_errors, "\t%s\n",
task->sub->name);
}
/* Is this the second time we process this submodule? */
if (task->commits)
goto out;
it = string_list_lookup(&spf->changed_submodule_names, task->sub->name);
if (!it)
/* Could be an unchanged submodule, not contained in the list */
goto out;
commits = it->util;
oid_array_filter(commits,
commit_missing_in_sub,
task->repo);
/* Are there commits we want, but do not exist? */
if (commits->nr) {
task->commits = commits;
ALLOC_GROW(spf->oid_fetch_tasks,
spf->oid_fetch_tasks_nr + 1,
spf->oid_fetch_tasks_alloc);
spf->oid_fetch_tasks[spf->oid_fetch_tasks_nr] = task;
spf->oid_fetch_tasks_nr++;
return 0;
}
out:
fetch_task_release(task);
return 0;
}
int fetch_populated_submodules(struct repository *r,
const struct strvec *options,
const char *prefix, int command_line_option,
int default_option,
int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;
spf.r = r;
spf.command_line_option = command_line_option;
spf.default_option = default_option;
spf.quiet = quiet;
spf.prefix = prefix;
if (!r->worktree)
goto out;
if (repo_read_index(r) < 0)
die(_("index file corrupt"));
strvec_push(&spf.args, "fetch");
for (i = 0; i < options->nr; i++)
strvec_push(&spf.args, options->v[i]);
strvec_push(&spf.args, "--recurse-submodules-default");
/* default value, "--submodule-prefix" and its value are added later */
calculate_changed_submodule_paths(r, &spf.changed_submodule_names);
string_list_sort(&spf.changed_submodule_names);
run_processes_parallel_tr2(max_parallel_jobs,
get_next_submodule,
fetch_start_failure,
fetch_finish,
&spf,
"submodule", "parallel/fetch");
if (spf.submodules_with_errors.len > 0)
fprintf(stderr, _("Errors during submodule fetch:\n%s"),
spf.submodules_with_errors.buf);
strvec_clear(&spf.args);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
out:
free_submodules_oids(&spf.changed_submodule_names);
return spf.result;
}
unsigned is_submodule_modified(const char *path, int ignore_untracked)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
FILE *fp;
unsigned dirty_submodule = 0;
const char *git_dir;
int ignore_cp_exit_code = 0;
strbuf_addf(&buf, "%s/.git", path);
git_dir = read_gitfile(buf.buf);
if (!git_dir)
git_dir = buf.buf;
if (!is_git_directory(git_dir)) {
if (is_directory(git_dir))
die(_("'%s' not recognized as a git repository"), git_dir);
strbuf_release(&buf);
/* The submodule is not checked out, so it is not modified */
return 0;
}
strbuf_reset(&buf);
strvec_pushl(&cp.args, "status", "--porcelain=2", NULL);
if (ignore_untracked)
strvec_push(&cp.args, "-uno");
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die(_("Could not run 'git status --porcelain=2' in submodule %s"), path);
fp = xfdopen(cp.out, "r");
while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
/* regular untracked files */
if (buf.buf[0] == '?')
dirty_submodule |= DIRTY_SUBMODULE_UNTRACKED;
if (buf.buf[0] == 'u' ||
buf.buf[0] == '1' ||
buf.buf[0] == '2') {
/* T = line type, XY = status, SSSS = submodule state */
if (buf.len < strlen("T XY SSSS"))
BUG("invalid status --porcelain=2 line %s",
buf.buf);
if (buf.buf[5] == 'S' && buf.buf[8] == 'U')
/* nested untracked file */
dirty_submodule |= DIRTY_SUBMODULE_UNTRACKED;
if (buf.buf[0] == 'u' ||
buf.buf[0] == '2' ||
memcmp(buf.buf + 5, "S..U", 4))
/* other change */
dirty_submodule |= DIRTY_SUBMODULE_MODIFIED;
}
if ((dirty_submodule & DIRTY_SUBMODULE_MODIFIED) &&
((dirty_submodule & DIRTY_SUBMODULE_UNTRACKED) ||
ignore_untracked)) {
/*
* We're not interested in any further information from
* the child any more, neither output nor its exit code.
*/
ignore_cp_exit_code = 1;
break;
}
}
fclose(fp);
if (finish_command(&cp) && !ignore_cp_exit_code)
die(_("'git status --porcelain=2' failed in submodule %s"), path);
strbuf_release(&buf);
return dirty_submodule;
}
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
int submodule_uses_gitfile(const char *path)
{
struct child_process cp = CHILD_PROCESS_INIT;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
struct strbuf buf = STRBUF_INIT;
const char *git_dir;
strbuf_addf(&buf, "%s/.git", path);
git_dir = read_gitfile(buf.buf);
if (!git_dir) {
strbuf_release(&buf);
return 0;
}
strbuf_release(&buf);
/* Now test that all nested submodules use a gitfile too */
strvec_pushl(&cp.args,
"submodule", "foreach", "--quiet", "--recursive",
"test -f .git", NULL);
prepare_submodule_repo_env(&cp.env_array);
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.no_stderr = 1;
cp.no_stdout = 1;
cp.dir = path;
if (run_command(&cp))
return 0;
return 1;
}
/*
* Check if it is a bad idea to remove a submodule, i.e. if we'd lose data
* when doing so.
*
* Return 1 if we'd lose data, return 0 if the removal is fine,
* and negative values for errors.
*/
int bad_to_remove_submodule(const char *path, unsigned flags)
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
{
ssize_t len;
struct child_process cp = CHILD_PROCESS_INIT;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
struct strbuf buf = STRBUF_INIT;
int ret = 0;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
if (!file_exists(path) || is_empty_dir(path))
return 0;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
if (!submodule_uses_gitfile(path))
return 1;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
strvec_pushl(&cp.args, "status", "--porcelain",
"--ignore-submodules=none", NULL);
if (flags & SUBMODULE_REMOVAL_IGNORE_UNTRACKED)
strvec_push(&cp.args, "-uno");
else
strvec_push(&cp.args, "-uall");
if (!(flags & SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED))
strvec_push(&cp.args, "--ignored");
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
prepare_submodule_repo_env(&cp.env_array);
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp)) {
if (flags & SUBMODULE_REMOVAL_DIE_ON_ERROR)
die(_("could not start 'git status' in submodule '%s'"),
path);
ret = -1;
goto out;
}
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
len = strbuf_read(&buf, cp.out, 1024);
if (len > 2)
ret = 1;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
close(cp.out);
if (finish_command(&cp)) {
if (flags & SUBMODULE_REMOVAL_DIE_ON_ERROR)
die(_("could not run 'git status' in submodule '%s'"),
path);
ret = -1;
}
out:
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
strbuf_release(&buf);
return ret;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
}
submodule: unset core.worktree if no working tree is present When a submodules work tree is removed, we should unset its core.worktree setting as the worktree is no longer present. This is not just in line with the conceptual view of submodules, but it fixes an inconvenience for looking at submodules that are not checked out: git clone --recurse-submodules git://github.com/git/git && cd git && git checkout --recurse-submodules v2.13.0 git -C .git/modules/sha1collisiondetection log fatal: cannot chdir to '../../../sha1collisiondetection': \ No such file or directory With this patch applied, the final call to git log works instead of dying in its setup, as the checkout will unset the core.worktree setting such that following log will be run in a bare repository. This patch covers all commands that are in the unpack machinery, i.e. checkout, read-tree, reset. A follow up patch will address "git submodule deinit", which will also make use of the new function submodule_unset_core_worktree(), which is why we expose it in this patch. This patch was authored as 4fa4f90ccd (submodule: unset core.worktree if no working tree is present, 2018-06-12), which was reverted as part of f178c13fda (Revert "Merge branch 'sb/submodule-core-worktree'", 2018-09-07). The revert was needed as the nearby commit e98317508c (submodule: ensure core.worktree is set after update, 2018-06-18) is faulty and at the time of 7e25437d35 (Merge branch 'sb/submodule-core-worktree', 2018-07-18) we could not revert the faulty commit only, as they were depending on each other: If core.worktree is unset, we have to have ways to ensure that it is set again once the working tree reappears again. Now that 4d6d6ef1fc (Merge branch 'sb/submodule-update-in-c', 2018-09-17), specifically 74d4731da1 (submodule--helper: replace connect-gitdir-workingtree by ensure-core-worktree, 2018-08-13) is present, we already check and ensure core.worktree is set when populating a new work tree, such that we can re-introduce the commits that unset core.worktree when removing the worktree. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-15 00:59:43 +01:00
void submodule_unset_core_worktree(const struct submodule *sub)
{
char *config_path = xstrfmt("%s/modules/%s/config",
submodule.c: use get_git_dir() instead of get_git_common_dir() Ever since df56607dff (git-common-dir: make "modules/" per-working-directory directory, 2014-11-30), submodules in linked worktrees are cloned to $GIT_DIR/modules, i.e. $GIT_COMMON_DIR/worktrees/<name>/modules. However, this convention was not followed when the worktree updater commands checkout, reset and read-tree learned to recurse into submodules. Specifically, submodule.c::submodule_move_head, introduced in 6e3c1595c6 (update submodules: add submodule_move_head, 2017-03-14) and submodule.c::submodule_unset_core_worktree, (re)introduced in 898c2e65b7 (submodule: unset core.worktree if no working tree is present, 2018-12-14) use get_git_common_dir() instead of get_git_dir() to get the path of the submodule repository. This means that, for example, 'git checkout --recurse-submodules <branch>' in a linked worktree will correctly checkout <branch>, detach the submodule's HEAD at the commit recorded in <branch> and update the submodule working tree, but the submodule HEAD that will be moved is the one in $GIT_COMMON_DIR/modules/<name>/, i.e. the submodule repository of the main superproject working tree. It will also rewrite the gitfile in the submodule working tree of the linked worktree to point to $GIT_COMMON_DIR/modules/<name>/. This leads to an incorrect (and confusing!) state in the submodule working tree of the main superproject worktree. Additionally, if switching to a commit where the submodule is not present, submodule_unset_core_worktree will be called and will incorrectly remove 'core.wortree' from the config file of the submodule in the main superproject worktree, $GIT_COMMON_DIR/modules/<name>/config. Fix this by constructing the path to the submodule repository using get_git_dir() in both submodule_move_head and submodule_unset_core_worktree. Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-21 16:01:17 +01:00
get_git_dir(), sub->name);
submodule: unset core.worktree if no working tree is present When a submodules work tree is removed, we should unset its core.worktree setting as the worktree is no longer present. This is not just in line with the conceptual view of submodules, but it fixes an inconvenience for looking at submodules that are not checked out: git clone --recurse-submodules git://github.com/git/git && cd git && git checkout --recurse-submodules v2.13.0 git -C .git/modules/sha1collisiondetection log fatal: cannot chdir to '../../../sha1collisiondetection': \ No such file or directory With this patch applied, the final call to git log works instead of dying in its setup, as the checkout will unset the core.worktree setting such that following log will be run in a bare repository. This patch covers all commands that are in the unpack machinery, i.e. checkout, read-tree, reset. A follow up patch will address "git submodule deinit", which will also make use of the new function submodule_unset_core_worktree(), which is why we expose it in this patch. This patch was authored as 4fa4f90ccd (submodule: unset core.worktree if no working tree is present, 2018-06-12), which was reverted as part of f178c13fda (Revert "Merge branch 'sb/submodule-core-worktree'", 2018-09-07). The revert was needed as the nearby commit e98317508c (submodule: ensure core.worktree is set after update, 2018-06-18) is faulty and at the time of 7e25437d35 (Merge branch 'sb/submodule-core-worktree', 2018-07-18) we could not revert the faulty commit only, as they were depending on each other: If core.worktree is unset, we have to have ways to ensure that it is set again once the working tree reappears again. Now that 4d6d6ef1fc (Merge branch 'sb/submodule-update-in-c', 2018-09-17), specifically 74d4731da1 (submodule--helper: replace connect-gitdir-workingtree by ensure-core-worktree, 2018-08-13) is present, we already check and ensure core.worktree is set when populating a new work tree, such that we can re-introduce the commits that unset core.worktree when removing the worktree. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-15 00:59:43 +01:00
if (git_config_set_in_file_gently(config_path, "core.worktree", NULL))
warning(_("Could not unset core.worktree setting in submodule '%s'"),
sub->path);
free(config_path);
}
static const char *get_super_prefix_or_empty(void)
{
const char *s = get_super_prefix();
if (!s)
s = "";
return s;
}
static int submodule_has_dirty_index(const struct submodule *sub)
{
struct child_process cp = CHILD_PROCESS_INIT;
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
strvec_pushl(&cp.args, "diff-index", "--quiet",
"--cached", "HEAD", NULL);
cp.no_stdin = 1;
cp.no_stdout = 1;
cp.dir = sub->path;
if (start_command(&cp))
die(_("could not recurse into submodule '%s'"), sub->path);
return finish_command(&cp);
}
static void submodule_reset_index(const char *path)
{
struct child_process cp = CHILD_PROCESS_INIT;
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
strvec_pushf(&cp.args, "--super-prefix=%s%s/",
get_super_prefix_or_empty(), path);
strvec_pushl(&cp.args, "read-tree", "-u", "--reset", NULL);
strvec_push(&cp.args, empty_tree_oid_hex());
if (run_command(&cp))
die(_("could not reset submodule index"));
}
/**
* Moves a submodule at a given path from a given head to another new head.
* For edge cases (a submodule coming into existence or removing a submodule)
* pass NULL for old or new respectively.
*/
int submodule_move_head(const char *path,
const char *old_head,
const char *new_head,
unsigned flags)
{
int ret = 0;
struct child_process cp = CHILD_PROCESS_INIT;
const struct submodule *sub;
int *error_code_ptr, error_code;
if (!is_submodule_active(the_repository, path))
return 0;
if (flags & SUBMODULE_MOVE_HEAD_FORCE)
/*
* Pass non NULL pointer to is_submodule_populated_gently
* to prevent die()-ing. We'll use connect_work_tree_and_git_dir
* to fixup the submodule in the force case later.
*/
error_code_ptr = &error_code;
else
error_code_ptr = NULL;
if (old_head && !is_submodule_populated_gently(path, error_code_ptr))
return 0;
sub = submodule_from_path(the_repository, null_oid(), path);
if (!sub)
BUG("could not get submodule information for '%s'", path);
if (old_head && !(flags & SUBMODULE_MOVE_HEAD_FORCE)) {
/* Check if the submodule has a dirty index. */
if (submodule_has_dirty_index(sub))
return error(_("submodule '%s' has dirty index"), path);
}
if (!(flags & SUBMODULE_MOVE_HEAD_DRY_RUN)) {
if (old_head) {
if (!submodule_uses_gitfile(path))
absorb_git_dir_into_superproject(path,
ABSORB_GITDIR_RECURSE_SUBMODULES);
} else {
char *gitdir = xstrfmt("%s/modules/%s",
submodule.c: use get_git_dir() instead of get_git_common_dir() Ever since df56607dff (git-common-dir: make "modules/" per-working-directory directory, 2014-11-30), submodules in linked worktrees are cloned to $GIT_DIR/modules, i.e. $GIT_COMMON_DIR/worktrees/<name>/modules. However, this convention was not followed when the worktree updater commands checkout, reset and read-tree learned to recurse into submodules. Specifically, submodule.c::submodule_move_head, introduced in 6e3c1595c6 (update submodules: add submodule_move_head, 2017-03-14) and submodule.c::submodule_unset_core_worktree, (re)introduced in 898c2e65b7 (submodule: unset core.worktree if no working tree is present, 2018-12-14) use get_git_common_dir() instead of get_git_dir() to get the path of the submodule repository. This means that, for example, 'git checkout --recurse-submodules <branch>' in a linked worktree will correctly checkout <branch>, detach the submodule's HEAD at the commit recorded in <branch> and update the submodule working tree, but the submodule HEAD that will be moved is the one in $GIT_COMMON_DIR/modules/<name>/, i.e. the submodule repository of the main superproject working tree. It will also rewrite the gitfile in the submodule working tree of the linked worktree to point to $GIT_COMMON_DIR/modules/<name>/. This leads to an incorrect (and confusing!) state in the submodule working tree of the main superproject worktree. Additionally, if switching to a commit where the submodule is not present, submodule_unset_core_worktree will be called and will incorrectly remove 'core.wortree' from the config file of the submodule in the main superproject worktree, $GIT_COMMON_DIR/modules/<name>/config. Fix this by constructing the path to the submodule repository using get_git_dir() in both submodule_move_head and submodule_unset_core_worktree. Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-21 16:01:17 +01:00
get_git_dir(), sub->name);
connect_work_tree_and_git_dir(path, gitdir, 0);
free(gitdir);
/* make sure the index is clean as well */
submodule_reset_index(path);
}
if (old_head && (flags & SUBMODULE_MOVE_HEAD_FORCE)) {
char *gitdir = xstrfmt("%s/modules/%s",
submodule.c: use get_git_dir() instead of get_git_common_dir() Ever since df56607dff (git-common-dir: make "modules/" per-working-directory directory, 2014-11-30), submodules in linked worktrees are cloned to $GIT_DIR/modules, i.e. $GIT_COMMON_DIR/worktrees/<name>/modules. However, this convention was not followed when the worktree updater commands checkout, reset and read-tree learned to recurse into submodules. Specifically, submodule.c::submodule_move_head, introduced in 6e3c1595c6 (update submodules: add submodule_move_head, 2017-03-14) and submodule.c::submodule_unset_core_worktree, (re)introduced in 898c2e65b7 (submodule: unset core.worktree if no working tree is present, 2018-12-14) use get_git_common_dir() instead of get_git_dir() to get the path of the submodule repository. This means that, for example, 'git checkout --recurse-submodules <branch>' in a linked worktree will correctly checkout <branch>, detach the submodule's HEAD at the commit recorded in <branch> and update the submodule working tree, but the submodule HEAD that will be moved is the one in $GIT_COMMON_DIR/modules/<name>/, i.e. the submodule repository of the main superproject working tree. It will also rewrite the gitfile in the submodule working tree of the linked worktree to point to $GIT_COMMON_DIR/modules/<name>/. This leads to an incorrect (and confusing!) state in the submodule working tree of the main superproject worktree. Additionally, if switching to a commit where the submodule is not present, submodule_unset_core_worktree will be called and will incorrectly remove 'core.wortree' from the config file of the submodule in the main superproject worktree, $GIT_COMMON_DIR/modules/<name>/config. Fix this by constructing the path to the submodule repository using get_git_dir() in both submodule_move_head and submodule_unset_core_worktree. Signed-off-by: Philippe Blain <levraiphilippeblain@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-01-21 16:01:17 +01:00
get_git_dir(), sub->name);
connect_work_tree_and_git_dir(path, gitdir, 1);
free(gitdir);
}
}
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
strvec_pushf(&cp.args, "--super-prefix=%s%s/",
get_super_prefix_or_empty(), path);
strvec_pushl(&cp.args, "read-tree", "--recurse-submodules", NULL);
if (flags & SUBMODULE_MOVE_HEAD_DRY_RUN)
strvec_push(&cp.args, "-n");
else
strvec_push(&cp.args, "-u");
if (flags & SUBMODULE_MOVE_HEAD_FORCE)
strvec_push(&cp.args, "--reset");
else
strvec_push(&cp.args, "-m");
if (!(flags & SUBMODULE_MOVE_HEAD_FORCE))
strvec_push(&cp.args, old_head ? old_head : empty_tree_oid_hex());
strvec_push(&cp.args, new_head ? new_head : empty_tree_oid_hex());
if (run_command(&cp)) {
ret = error(_("Submodule '%s' could not be updated."), path);
goto out;
}
if (!(flags & SUBMODULE_MOVE_HEAD_DRY_RUN)) {
if (new_head) {
child_process_init(&cp);
/* also set the HEAD accordingly */
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
prepare_submodule_repo_env(&cp.env_array);
strvec_pushl(&cp.args, "update-ref", "HEAD",
"--no-deref", new_head, NULL);
if (run_command(&cp)) {
ret = -1;
goto out;
}
} else {
struct strbuf sb = STRBUF_INIT;
strbuf_addf(&sb, "%s/.git", path);
unlink_or_warn(sb.buf);
strbuf_release(&sb);
if (is_empty_dir(path))
rmdir_or_warn(path);
submodule: unset core.worktree if no working tree is present When a submodules work tree is removed, we should unset its core.worktree setting as the worktree is no longer present. This is not just in line with the conceptual view of submodules, but it fixes an inconvenience for looking at submodules that are not checked out: git clone --recurse-submodules git://github.com/git/git && cd git && git checkout --recurse-submodules v2.13.0 git -C .git/modules/sha1collisiondetection log fatal: cannot chdir to '../../../sha1collisiondetection': \ No such file or directory With this patch applied, the final call to git log works instead of dying in its setup, as the checkout will unset the core.worktree setting such that following log will be run in a bare repository. This patch covers all commands that are in the unpack machinery, i.e. checkout, read-tree, reset. A follow up patch will address "git submodule deinit", which will also make use of the new function submodule_unset_core_worktree(), which is why we expose it in this patch. This patch was authored as 4fa4f90ccd (submodule: unset core.worktree if no working tree is present, 2018-06-12), which was reverted as part of f178c13fda (Revert "Merge branch 'sb/submodule-core-worktree'", 2018-09-07). The revert was needed as the nearby commit e98317508c (submodule: ensure core.worktree is set after update, 2018-06-18) is faulty and at the time of 7e25437d35 (Merge branch 'sb/submodule-core-worktree', 2018-07-18) we could not revert the faulty commit only, as they were depending on each other: If core.worktree is unset, we have to have ways to ensure that it is set again once the working tree reappears again. Now that 4d6d6ef1fc (Merge branch 'sb/submodule-update-in-c', 2018-09-17), specifically 74d4731da1 (submodule--helper: replace connect-gitdir-workingtree by ensure-core-worktree, 2018-08-13) is present, we already check and ensure core.worktree is set when populating a new work tree, such that we can re-introduce the commits that unset core.worktree when removing the worktree. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-12-15 00:59:43 +01:00
submodule_unset_core_worktree(sub);
}
}
out:
return ret;
}
int validate_submodule_git_dir(char *git_dir, const char *submodule_name)
{
size_t len = strlen(git_dir), suffix_len = strlen(submodule_name);
char *p;
int ret = 0;
if (len <= suffix_len || (p = git_dir + len - suffix_len)[-1] != '/' ||
strcmp(p, submodule_name))
BUG("submodule name '%s' not a suffix of git dir '%s'",
submodule_name, git_dir);
/*
* We prevent the contents of sibling submodules' git directories to
* clash.
*
* Example: having a submodule named `hippo` and another one named
* `hippo/hooks` would result in the git directories
* `.git/modules/hippo/` and `.git/modules/hippo/hooks/`, respectively,
* but the latter directory is already designated to contain the hooks
* of the former.
*/
for (; *p; p++) {
if (is_dir_sep(*p)) {
char c = *p;
*p = '\0';
if (is_git_directory(git_dir))
ret = -1;
*p = c;
if (ret < 0)
return error(_("submodule git dir '%s' is "
"inside git dir '%.*s'"),
git_dir,
(int)(p - git_dir), git_dir);
}
}
return 0;
}
/*
* Embeds a single submodules git directory into the superprojects git dir,
* non recursively.
*/
static void relocate_single_git_dir_into_superproject(const char *path)
{
char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL;
char *new_git_dir;
const struct submodule *sub;
if (submodule_uses_worktrees(path))
die(_("relocate_gitdir for submodule '%s' with "
"more than one worktree not supported"), path);
old_git_dir = xstrfmt("%s/.git", path);
if (read_gitfile(old_git_dir))
/* If it is an actual gitfile, it doesn't need migration. */
return;
real_old_git_dir = real_pathdup(old_git_dir, 1);
sub = submodule_from_path(the_repository, null_oid(), path);
if (!sub)
die(_("could not lookup name for submodule '%s'"), path);
new_git_dir = git_pathdup("modules/%s", sub->name);
if (validate_submodule_git_dir(new_git_dir, sub->name) < 0)
die(_("refusing to move '%s' into an existing git dir"),
real_old_git_dir);
if (safe_create_leading_directories_const(new_git_dir) < 0)
die(_("could not create directory '%s'"), new_git_dir);
real_new_git_dir = real_pathdup(new_git_dir, 1);
free(new_git_dir);
fprintf(stderr, _("Migrating git directory of '%s%s' from\n'%s' to\n'%s'\n"),
get_super_prefix_or_empty(), path,
real_old_git_dir, real_new_git_dir);
relocate_gitdir(path, real_old_git_dir, real_new_git_dir);
free(old_git_dir);
free(real_old_git_dir);
free(real_new_git_dir);
}
/*
* Migrate the git directory of the submodule given by path from
* having its git directory within the working tree to the git dir nested
* in its superprojects git dir under modules/.
*/
void absorb_git_dir_into_superproject(const char *path,
unsigned flags)
{
int err_code;
const char *sub_git_dir;
struct strbuf gitdir = STRBUF_INIT;
strbuf_addf(&gitdir, "%s/.git", path);
sub_git_dir = resolve_gitdir_gently(gitdir.buf, &err_code);
/* Not populated? */
if (!sub_git_dir) {
const struct submodule *sub;
if (err_code == READ_GITFILE_ERR_STAT_FAILED) {
/* unpopulated as expected */
strbuf_release(&gitdir);
return;
}
if (err_code != READ_GITFILE_ERR_NOT_A_REPO)
/* We don't know what broke here. */
read_gitfile_error_die(err_code, path, NULL);
/*
* Maybe populated, but no git directory was found?
* This can happen if the superproject is a submodule
* itself and was just absorbed. The absorption of the
* superproject did not rewrite the git file links yet,
* fix it now.
*/
sub = submodule_from_path(the_repository, null_oid(), path);
if (!sub)
die(_("could not lookup name for submodule '%s'"), path);
connect_work_tree_and_git_dir(path,
git_path("modules/%s", sub->name), 0);
} else {
/* Is it already absorbed into the superprojects git dir? */
char *real_sub_git_dir = real_pathdup(sub_git_dir, 1);
char *real_common_git_dir = real_pathdup(get_git_common_dir(), 1);
if (!starts_with(real_sub_git_dir, real_common_git_dir))
relocate_single_git_dir_into_superproject(path);
free(real_sub_git_dir);
free(real_common_git_dir);
}
strbuf_release(&gitdir);
if (flags & ABSORB_GITDIR_RECURSE_SUBMODULES) {
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
if (flags & ~ABSORB_GITDIR_RECURSE_SUBMODULES)
BUG("we don't know how to pass the flags down?");
strbuf_addstr(&sb, get_super_prefix_or_empty());
strbuf_addstr(&sb, path);
strbuf_addch(&sb, '/');
cp.dir = path;
cp.git_cmd = 1;
cp.no_stdin = 1;
strvec_pushl(&cp.args, "--super-prefix", sb.buf,
"submodule--helper",
"absorb-git-dirs", NULL);
prepare_submodule_repo_env(&cp.env_array);
if (run_command(&cp))
die(_("could not recurse into submodule '%s'"), path);
strbuf_release(&sb);
}
}
int get_superproject_working_tree(struct strbuf *buf)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
struct strbuf one_up = STRBUF_INIT;
const char *cwd = xgetcwd();
int ret = 0;
const char *subpath;
int code;
ssize_t len;
if (!is_inside_work_tree())
/*
* FIXME:
* We might have a superproject, but it is harder
* to determine.
*/
return 0;
if (!strbuf_realpath(&one_up, "../", 0))
return 0;
subpath = relative_path(cwd, one_up.buf, &sb);
strbuf_release(&one_up);
prepare_submodule_repo_env(&cp.env_array);
strvec_pop(&cp.env_array);
strvec_pushl(&cp.args, "--literal-pathspecs", "-C", "..",
"ls-files", "-z", "--stage", "--full-name", "--",
subpath, NULL);
strbuf_reset(&sb);
cp.no_stdin = 1;
cp.no_stderr = 1;
cp.out = -1;
cp.git_cmd = 1;
if (start_command(&cp))
die(_("could not start ls-files in .."));
len = strbuf_read(&sb, cp.out, PATH_MAX);
close(cp.out);
if (starts_with(sb.buf, "160000")) {
int super_sub_len;
int cwd_len = strlen(cwd);
char *super_sub, *super_wt;
/*
* There is a superproject having this repo as a submodule.
* The format is <mode> SP <hash> SP <stage> TAB <full name> \0,
* We're only interested in the name after the tab.
*/
super_sub = strchr(sb.buf, '\t') + 1;
super_sub_len = strlen(super_sub);
if (super_sub_len > cwd_len ||
strcmp(&cwd[cwd_len - super_sub_len], super_sub))
BUG("returned path string doesn't match cwd?");
super_wt = xstrdup(cwd);
super_wt[cwd_len - super_sub_len] = '\0';
strbuf_realpath(buf, super_wt, 1);
ret = 1;
free(super_wt);
}
strbuf_release(&sb);
code = finish_command(&cp);
if (code == 128)
/* '../' is not a git repository */
return 0;
if (code == 0 && len == 0)
/* There is an unrelated git repository at '../' */
return 0;
if (code)
die(_("ls-tree returned unexpected return code %d"), code);
return ret;
}
/*
* Put the gitdir for a submodule (given relative to the main
* repository worktree) into `buf`, or return -1 on error.
*/
int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
{
const struct submodule *sub;
const char *git_dir;
int ret = 0;
strbuf_reset(buf);
strbuf_addstr(buf, submodule);
strbuf_complete(buf, '/');
strbuf_addstr(buf, ".git");
git_dir = read_gitfile(buf->buf);
if (git_dir) {
strbuf_reset(buf);
strbuf_addstr(buf, git_dir);
}
if (!is_git_directory(buf->buf)) {
sub = submodule_from_path(the_repository, null_oid(),
submodule);
if (!sub) {
ret = -1;
goto cleanup;
}
strbuf_reset(buf);
strbuf_git_path(buf, "%s/%s", "modules", sub->name);
}
cleanup:
return ret;
}