git-commit-vandalism/submodule.c

2019 lines
54 KiB
C
Raw Normal View History

#include "cache.h"
#include "config.h"
#include "submodule-config.h"
#include "submodule.h"
#include "dir.h"
#include "diff.h"
#include "commit.h"
#include "revision.h"
#include "run-command.h"
#include "diffcore.h"
#include "refs.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
#include "string-list.h"
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
#include "sha1-array.h"
#include "argv-array.h"
#include "blob.h"
#include "thread-utils.h"
#include "quote.h"
#include "remote.h"
#include "worktree.h"
#include "parse-options.h"
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND;
static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
static int parallel_jobs = 1;
static struct string_list changed_submodule_paths = STRING_LIST_INIT_DUP;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
static int initialized_fetch_ref_tips;
static struct oid_array ref_tips_before_fetch;
static struct oid_array ref_tips_after_fetch;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
/*
* The following flag is set if the .gitmodules file is unmerged. We then
* disable recursion for all submodules where .git/config doesn't have a
* matching config entry because we can't guess what might be configured in
* .gitmodules unless the user resolves the conflict. When a command line
* option is given (which always overrides configuration) this flag will be
* ignored.
*/
static int gitmodules_is_unmerged;
/*
* This flag is set if the .gitmodules file had unstaged modifications on
* startup. This must be checked before allowing modifications to the
* .gitmodules file with the intention to stage them later, because when
* continuing we would stage the modifications the user didn't stage herself
* too. That might change in a future version when we learn to stage the
* changes we do ourselves without staging any previous modifications.
*/
static int gitmodules_is_modified;
int is_staging_gitmodules_ok(void)
{
return !gitmodules_is_modified;
}
/*
* Try to update the "path" entry in the "submodule.<name>" section of the
* .gitmodules file. Return 0 only if a .gitmodules file was found, a section
* with the correct path=<oldpath> setting was found and we could update it.
*/
int update_path_in_gitmodules(const char *oldpath, const char *newpath)
{
struct strbuf entry = STRBUF_INIT;
const struct submodule *submodule;
if (!file_exists(".gitmodules")) /* Do nothing without .gitmodules */
return -1;
if (gitmodules_is_unmerged)
die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first"));
submodule = submodule_from_path(null_sha1, oldpath);
if (!submodule || !submodule->name) {
warning(_("Could not find section in .gitmodules where path=%s"), oldpath);
return -1;
}
strbuf_addstr(&entry, "submodule.");
strbuf_addstr(&entry, submodule->name);
strbuf_addstr(&entry, ".path");
if (git_config_set_in_file_gently(".gitmodules", entry.buf, newpath) < 0) {
/* Maybe the user already did that, don't error out here */
warning(_("Could not update .gitmodules entry %s"), entry.buf);
strbuf_release(&entry);
return -1;
}
strbuf_release(&entry);
return 0;
}
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
/*
* Try to remove the "submodule.<name>" section from .gitmodules where the given
* path is configured. Return 0 only if a .gitmodules file was found, a section
* with the correct path=<path> setting was found and we could remove it.
*/
int remove_path_from_gitmodules(const char *path)
{
struct strbuf sect = STRBUF_INIT;
const struct submodule *submodule;
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
if (!file_exists(".gitmodules")) /* Do nothing without .gitmodules */
return -1;
if (gitmodules_is_unmerged)
die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first"));
submodule = submodule_from_path(null_sha1, path);
if (!submodule || !submodule->name) {
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
warning(_("Could not find section in .gitmodules where path=%s"), path);
return -1;
}
strbuf_addstr(&sect, "submodule.");
strbuf_addstr(&sect, submodule->name);
rm: delete .gitmodules entry of submodules removed from the work tree Currently using "git rm" on a submodule removes the submodule's work tree from that of the superproject and the gitlink from the index. But the submodule's section in .gitmodules is left untouched, which is a leftover of the now removed submodule and might irritate users (as opposed to the setting in .git/config, this must stay as a reminder that the user showed interest in this submodule so it will be repopulated later when an older commit is checked out). Let "git rm" help the user by not only removing the submodule from the work tree but by also removing the "submodule.<submodule name>" section from the .gitmodules file and stage both. This doesn't happen when the "--cached" option is used, as it would modify the work tree. This also silently does nothing when no .gitmodules file is found and only issues a warning when it doesn't have a section for this submodule. This is because the user might just use plain gitlinks without the .gitmodules file or has already removed the section by hand before issuing the "git rm" command (in which case the warning reminds him that rm would have done that for him). Only when .gitmodules is found and contains merge conflicts the rm command will fail and tell the user to resolve the conflict before trying again. Also extend the man page to inform the user about this new feature. While at it promote the submodule sub-section to a chapter as it made not much sense under "REMOVING FILES THAT HAVE DISAPPEARED FROM THE FILESYSTEM". In t7610 three uses of "git rm submod" had to be replaced with "git rm --cached submod" because that test expects .gitmodules and the work tree to stay untouched. Also in t7400 the tests for the remaining settings in the .gitmodules file had to be changed to assert that these settings are missing. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-06 21:15:25 +02:00
if (git_config_rename_section_in_file(".gitmodules", sect.buf, NULL) < 0) {
/* Maybe the user already did that, don't error out here */
warning(_("Could not remove .gitmodules entry for %s"), path);
strbuf_release(&sect);
return -1;
}
strbuf_release(&sect);
return 0;
}
void stage_updated_gitmodules(void)
{
if (add_file_to_cache(".gitmodules", 0))
die(_("staging updated .gitmodules failed"));
}
static int add_submodule_odb(const char *path)
{
struct strbuf objects_directory = STRBUF_INIT;
int ret = 0;
allow do_submodule_path to work even if submodule isn't checked out Currently, do_submodule_path will attempt locating the .git directory by using read_gitfile on <path>/.git. If this fails it just assumes the <path>/.git is actually a git directory. This is good because it allows for handling submodules which were cloned in a regular manner first before being added to the superproject. Unfortunately this fails if the <path> is not actually checked out any longer, such as by removing the directory. Fix this by checking if the directory we found is actually a gitdir. In the case it is not, attempt to lookup the submodule configuration and find the name of where it is stored in the .git/modules/ directory of the superproject. If we can't locate the submodule configuration, this might occur because for example a submodule gitlink was added but the corresponding .gitmodules file was not properly updated. A die() here would not be pleasant to the users of submodule diff formats, so instead, modify do_submodule_path() to return an error code: - git_pathdup_submodule() returns NULL when we fail to find a path. - strbuf_git_path_submodule() propagates the error code to the caller. Modify the callers of these functions to check the error code and fail properly. This ensures we don't attempt to use a bad path that doesn't match the corresponding submodule. Because this change fixes add_submodule_odb() to work even if the submodule is not checked out, update the wording of the submodule log diff format to correctly display that the submodule is "not initialized" instead of "not checked out" Add tests to ensure this change works as expected. Signed-off-by: Jacob Keller <jacob.keller@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-01 01:27:22 +02:00
ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
if (ret)
goto done;
if (!is_directory(objects_directory.buf)) {
ret = -1;
goto done;
}
add_to_alternates_memory(objects_directory.buf);
done:
strbuf_release(&objects_directory);
return ret;
}
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt,
const char *path)
{
const struct submodule *submodule = submodule_from_path(null_sha1, path);
if (submodule) {
if (submodule->ignore)
handle_ignore_submodules_arg(diffopt, submodule->ignore);
else if (gitmodules_is_unmerged)
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
}
}
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
/* For loading from the .gitmodules file. */
static int git_modules_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "submodule.fetchjobs")) {
parallel_jobs = git_config_int(var, value);
if (parallel_jobs < 0)
die(_("negative values not allowed for submodule.fetchJobs"));
return 0;
} else if (starts_with(var, "submodule."))
return parse_submodule_config_option(var, value);
else if (!strcmp(var, "fetch.recursesubmodules")) {
config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value);
return 0;
}
return 0;
}
/* Loads all submodule settings from the config. */
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
int submodule_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "submodule.recurse")) {
int v = git_config_bool(var, value) ?
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
config_update_recurse_submodules = v;
return 0;
} else {
return git_modules_config(var, value, cb);
}
}
/* Cheap function that only determines if we're interested in submodules at all */
int git_default_submodule_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "submodule.recurse")) {
int v = git_config_bool(var, value) ?
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
config_update_recurse_submodules = v;
}
return 0;
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
}
int option_parse_recurse_submodules_worktree_updater(const struct option *opt,
const char *arg, int unset)
{
if (unset) {
config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
return 0;
}
if (arg)
config_update_recurse_submodules =
parse_update_recurse_submodules_arg(opt->long_name,
arg);
else
config_update_recurse_submodules = RECURSE_SUBMODULES_ON;
return 0;
}
void load_submodule_cache(void)
{
if (config_update_recurse_submodules == RECURSE_SUBMODULES_OFF)
return;
gitmodules_config();
git_config(submodule_config, NULL);
}
void gitmodules_config(void)
{
const char *work_tree = get_git_work_tree();
if (work_tree) {
struct strbuf gitmodules_path = STRBUF_INIT;
int pos;
strbuf_addstr(&gitmodules_path, work_tree);
strbuf_addstr(&gitmodules_path, "/.gitmodules");
if (read_cache() < 0)
die("index file corrupt");
pos = cache_name_pos(".gitmodules", 11);
if (pos < 0) { /* .gitmodules not found or isn't merged */
pos = -1 - pos;
if (active_nr > pos) { /* there is a .gitmodules */
const struct cache_entry *ce = active_cache[pos];
if (ce_namelen(ce) == 11 &&
!memcmp(ce->name, ".gitmodules", 11))
gitmodules_is_unmerged = 1;
}
} else if (pos < active_nr) {
struct stat st;
if (lstat(".gitmodules", &st) == 0 &&
ce_match_stat(active_cache[pos], &st, 0) & DATA_CHANGED)
gitmodules_is_modified = 1;
}
if (!gitmodules_is_unmerged)
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
git_config_from_file(git_modules_config,
gitmodules_path.buf, NULL);
strbuf_release(&gitmodules_path);
}
}
void gitmodules_config_sha1(const unsigned char *commit_sha1)
{
struct strbuf rev = STRBUF_INIT;
unsigned char sha1[20];
if (gitmodule_sha1_from_commit(commit_sha1, sha1, &rev)) {
submodule loading: separate code path for .gitmodules and config overlay The .gitmodules file is not supposed to have all the options available, that are available in the configuration so separate it out. A configuration option such as the hypothetical submodule.color.diff that determines in which color a submodule change is printed, is a very user specific thing, that the .gitmodules file should not tamper with. The .gitmodules file should only be used for settings that required to setup the project in which the .gitmodules file is tracked. As the minimum this would only include the name<->path mapping of the submodule and its URL and branch. Any further setting (such as 'fetch.recursesubmodules' or 'submodule.<name>.{update, ignore, shallow}') is not specific to the project setup requirements, but rather is a distribution of suggested developer configurations. In other areas of Git a suggested developer configuration is not transported in-tree but via other means. In an organisation this could be done by deploying an opinionated system wide config (/etc/gitconfig) or by putting the settings in the users home directory when they start at the organisation. In open source projects this is often accomplished via extensive READMEs (cf. our SubmittingPatches/CodingGuidlines). As a later patch in this series wants to introduce a generic submodule recursion option, we want to make sure that switch is not exposed via the gitmodules file. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 21:10:13 +02:00
git_config_from_blob_sha1(git_modules_config, rev.buf,
sha1, NULL);
}
strbuf_release(&rev);
}
/*
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
* NEEDSWORK: With the addition of different configuration options to determine
* if a submodule is of interests, the validity of this function's name comes
* into question. Once the dust has settled and more concrete terminology is
* decided upon, come up with a more proper name for this function. One
* potential candidate could be 'is_submodule_active()'.
*
* Determine if a submodule has been initialized at a given 'path'
*/
int is_submodule_initialized(const char *path)
{
int ret = 0;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
char *key = NULL;
char *value = NULL;
const struct string_list *sl;
const struct submodule *module = submodule_from_path(null_sha1, path);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* early return if there isn't a path->module mapping */
if (!module)
return 0;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* submodule.<name>.active is set */
key = xstrfmt("submodule.%s.active", module->name);
if (!git_config_get_bool(key, &ret)) {
free(key);
return ret;
}
free(key);
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* submodule.active is set */
sl = git_config_get_value_multi("submodule.active");
if (sl) {
struct pathspec ps;
struct argv_array args = ARGV_ARRAY_INIT;
const struct string_list_item *item;
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
for_each_string_list_item(item, sl) {
argv_array_push(&args, item->string);
}
parse_pathspec(&ps, 0, 0, NULL, args.argv);
ret = match_pathspec(&ps, path, strlen(path), 0, NULL, 1);
argv_array_clear(&args);
clear_pathspec(&ps);
return ret;
}
submodule: decouple url and submodule interest Currently the submodule.<name>.url config option is used to determine if a given submodule is of interest to the user. This ends up being cumbersome in a world where we want to have different submodules checked out in different worktrees or a more generalized mechanism to select which submodules are of interest. In a future with worktree support for submodules, there will be multiple working trees, each of which may only need a subset of the submodules checked out. The URL (which is where the submodule repository can be obtained) should not differ between different working trees. It may also be convenient for users to more easily specify groups of submodules they are interested in as opposed to running "git submodule init <path>" on each submodule they want checked out in their working tree. To this end two config options are introduced, submodule.active and submodule.<name>.active. The submodule.active config holds a pathspec that specifies which submodules should exist in the working tree. The submodule.<name>.active config is a boolean flag used to indicate if that particular submodule should exist in the working tree. Its important to note that submodule.active functions differently than the other configuration options since it takes a pathspec. This allows users to adopt at least two new workflows: 1. Submodules can be grouped with a leading directory, such that a pathspec e.g. 'lib/' would cover all library-ish modules to allow those who are interested in library-ish modules to set "submodule.active = lib/" just once to say any and all modules in 'lib/' are interesting. 2. Once the pathspec-attribute feature is invented, users can label submodules with attributes to group them, so that a broad pathspec with attribute requirements, e.g. ':(attr:lib)', can be used to say any and all modules with the 'lib' attribute are interesting. Since the .gitattributes file, just like the .gitmodules file, is tracked by the superproject, when a submodule moves in the superproject tree, the project can adjust which path gets the attribute in .gitattributes, just like it can adjust which path has the submodule in .gitmodules. Neither of these two additional configuration options solve the problem of wanting different submodules checked out in different worktrees because multiple worktrees share .git/config. Only once per-worktree configurations become a reality can this be solved, but this is a necessary preparatory step for that future. Given these multiple ways to check if a submodule is of interest, the more fine-grained submodule.<name>.active option has the highest order of precedence followed by the pathspec check against submodule.active. To ensure backwards compatibility, if neither of these options are set, git falls back to checking the submodule.<name>.url option to determine if a submodule is interesting. Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-03-17 23:38:01 +01:00
/* fallback to checking if the URL is set */
key = xstrfmt("submodule.%s.url", module->name);
ret = !git_config_get_string(key, &value);
free(value);
free(key);
return ret;
}
int is_submodule_populated_gently(const char *path, int *return_error_code)
{
int ret = 0;
char *gitdir = xstrfmt("%s/.git", path);
if (resolve_gitdir_gently(gitdir, return_error_code))
ret = 1;
free(gitdir);
return ret;
}
/*
* Dies if the provided 'prefix' corresponds to an unpopulated submodule
*/
void die_in_unpopulated_submodule(const struct index_state *istate,
const char *prefix)
{
int i, prefixlen;
if (!prefix)
return;
prefixlen = strlen(prefix);
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
int ce_len = ce_namelen(ce);
if (!S_ISGITLINK(ce->ce_mode))
continue;
if (prefixlen <= ce_len)
continue;
if (strncmp(ce->name, prefix, ce_len))
continue;
if (prefix[ce_len] != '/')
continue;
die(_("in unpopulated submodule '%s'"), ce->name);
}
}
/*
* Dies if any paths in the provided pathspec descends into a submodule
*/
void die_path_inside_submodule(const struct index_state *istate,
const struct pathspec *ps)
{
int i, j;
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
int ce_len = ce_namelen(ce);
if (!S_ISGITLINK(ce->ce_mode))
continue;
for (j = 0; j < ps->nr ; j++) {
const struct pathspec_item *item = &ps->items[j];
if (item->len <= ce_len)
continue;
if (item->match[ce_len] != '/')
continue;
if (strncmp(ce->name, item->match, ce_len))
continue;
if (item->len == ce_len + 1)
continue;
die(_("Pathspec '%s' is in submodule '%.*s'"),
item->original, ce_len, ce->name);
}
}
}
int parse_submodule_update_strategy(const char *value,
struct submodule_update_strategy *dst)
{
free((void*)dst->command);
dst->command = NULL;
if (!strcmp(value, "none"))
dst->type = SM_UPDATE_NONE;
else if (!strcmp(value, "checkout"))
dst->type = SM_UPDATE_CHECKOUT;
else if (!strcmp(value, "rebase"))
dst->type = SM_UPDATE_REBASE;
else if (!strcmp(value, "merge"))
dst->type = SM_UPDATE_MERGE;
else if (skip_prefix(value, "!", &value)) {
dst->type = SM_UPDATE_COMMAND;
dst->command = xstrdup(value);
} else
return -1;
return 0;
}
submodule: port init from shell to C By having the `submodule init` functionality in C, we can reference it easier from other parts in the code in later patches. The code is split up to have one function to initialize one submodule and a calling function that takes care of the rest, such as argument handling and translating the arguments to the paths of the submodules. This is the first submodule subcommand that is fully converted to C except for the usage string, so this is actually removing a call to the `submodule--helper list` function, which is supposed to be used in this transition. Instead we'll make a direct call to `module_list_compute`. An explanation why we need to edit the prefixes in cmd_update in git-submodule.sh in this patch: By having no processing in the shell part, we need to convey the notion of wt_prefix and prefix to the C parts, which former patches punted on and did the processing of displaying path in the shell. `wt_prefix` used to hold the path from the repository root to the current directory, e.g. wt_prefix would be t/ if the user invoked the `git submodule` command in ~/repo/t and ~repo is the GIT_DIR. `prefix` used to hold the relative path from the repository root to the operation, e.g. if you have recursive submodules, the shell script would modify the `prefix` in each recursive step by adding the submodule path. We will pass `wt_prefix` into the C helper via `git -C <dir>` as that will setup git in the directory the user actually called git-submodule.sh from. The `prefix` will be passed in via the `--prefix` option. Having `prefix` and `wt_prefix` relative to the GIT_DIR of the calling superproject is unfortunate with this patch as the C code doesn't know about a possible recursion from a superproject via `submodule update --init --recursive`. To fix this, we change the meaning of `wt_prefix` to point to the current project instead of the superproject and `prefix` to include any relative paths issues in the superproject. That way `prefix` will become the leading part for displaying paths and `wt_prefix` will be empty in recursive calls for now. The new notion of `wt_prefix` and `prefix` still allows us to reconstruct the calling directory in the superproject by just traveling reverse of `prefix`. Signed-off-by: Stefan Beller <sbeller@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-04-16 02:50:13 +02:00
const char *submodule_strategy_to_string(const struct submodule_update_strategy *s)
{
struct strbuf sb = STRBUF_INIT;
switch (s->type) {
case SM_UPDATE_CHECKOUT:
return "checkout";
case SM_UPDATE_MERGE:
return "merge";
case SM_UPDATE_REBASE:
return "rebase";
case SM_UPDATE_NONE:
return "none";
case SM_UPDATE_UNSPECIFIED:
return NULL;
case SM_UPDATE_COMMAND:
strbuf_addf(&sb, "!%s", s->command);
return strbuf_detach(&sb, NULL);
}
return NULL;
}
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
void handle_ignore_submodules_arg(struct diff_options *diffopt,
const char *arg)
{
DIFF_OPT_CLR(diffopt, IGNORE_SUBMODULES);
DIFF_OPT_CLR(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES);
DIFF_OPT_CLR(diffopt, IGNORE_DIRTY_SUBMODULES);
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
if (!strcmp(arg, "all"))
DIFF_OPT_SET(diffopt, IGNORE_SUBMODULES);
else if (!strcmp(arg, "untracked"))
DIFF_OPT_SET(diffopt, IGNORE_UNTRACKED_IN_SUBMODULES);
else if (!strcmp(arg, "dirty"))
DIFF_OPT_SET(diffopt, IGNORE_DIRTY_SUBMODULES);
Submodules: Add the new "ignore" config option for diff and status The new "ignore" config option controls the default behavior for "git status" and the diff family. It specifies under what circumstances they consider submodules as modified and can be set separately for each submodule. The command line option "--ignore-submodules=" has been extended to accept the new parameter "none" for both status and diff. Users that chose submodules to get rid of long work tree scanning times might want to set the "dirty" option for those submodules. This brings back the pre 1.7.0 behavior, where submodule work trees were never scanned for modifications. By using "--ignore-submodules=none" on the command line the status and diff commands can be told to do a full scan. This option can be set to the following values (which have the same name and meaning as for the "--ignore-submodules" option of status and diff): "all": All changes to the submodule will be ignored. "dirty": Only differences of the commit recorded in the superproject and the submodules HEAD will be considered modifications, all changes to the work tree of the submodule will be ignored. When using this value, the submodule will not be scanned for work tree changes at all, leading to a performance benefit on large submodules. "untracked": Only untracked files in the submodules work tree are ignored, a changed HEAD and/or modified files in the submodule will mark it as modified. "none" (which is the default): Either untracked or modified files in a submodules work tree or a difference between the subdmodules HEAD and the commit recorded in the superproject will make it show up as changed. This value is added as a new parameter for the "--ignore-submodules" option of the diff family and "git status" so the user can override the settings in the configuration. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-06 00:39:25 +02:00
else if (strcmp(arg, "none"))
Add the option "--ignore-submodules" to "git status" In some use cases it is not desirable that "git status" considers submodules that only contain untracked content as dirty. This may happen e.g. when the submodule is not under the developers control and not all build generated files have been added to .gitignore by the upstream developers. Using the "untracked" parameter for the "--ignore-submodules" option disables checking for untracked content and lets git diff report them as changed only when they have new commits or modified content. Sometimes it is not wanted to have submodules show up as changed when they just contain changes to their work tree (this was the behavior before 1.7.0). An example for that are scripts which just want to check for submodule commits while ignoring any changes to the work tree. Also users having large submodules known not to change might want to use this option, as the - sometimes substantial - time it takes to scan the submodule work tree(s) is saved when using the "dirty" parameter. And if you want to ignore any changes to submodules, you can now do that by using this option without parameters or with "all" (when the config option status.submodulesummary is set, using "all" will also suppress the output of the submodule summary). A new function handle_ignore_submodules_arg() is introduced to parse this option new to "git status" in a single location, as "git diff" already knew it. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-25 16:56:47 +02:00
die("bad --ignore-submodules argument: %s", arg);
}
static int prepare_submodule_summary(struct rev_info *rev, const char *path,
struct commit *left, struct commit *right,
struct commit_list *merge_bases)
{
struct commit_list *list;
init_revisions(rev, NULL);
setup_revisions(0, NULL, rev, NULL);
rev->left_right = 1;
rev->first_parent_only = 1;
left->object.flags |= SYMMETRIC_LEFT;
add_pending_object(rev, &left->object, path);
add_pending_object(rev, &right->object, path);
for (list = merge_bases; list; list = list->next) {
list->item->object.flags |= UNINTERESTING;
add_pending_object(rev, &list->item->object,
oid_to_hex(&list->item->object.oid));
}
return prepare_revision_walk(rev);
}
static void print_submodule_summary(struct rev_info *rev, struct diff_options *o)
{
static const char format[] = " %m %s";
struct strbuf sb = STRBUF_INIT;
struct commit *commit;
while ((commit = get_revision(rev))) {
struct pretty_print_context ctx = {0};
ctx.date_mode = rev->date_mode;
ctx.output_encoding = get_log_output_encoding();
strbuf_setlen(&sb, 0);
format_commit_message(commit, format, &sb, &ctx);
strbuf_addch(&sb, '\n');
if (commit->object.flags & SYMMETRIC_LEFT)
diff_emit_submodule_del(o, sb.buf);
else
diff_emit_submodule_add(o, sb.buf);
}
strbuf_release(&sb);
}
static void prepare_submodule_repo_env_no_git_dir(struct argv_array *out)
{
const char * const *var;
for (var = local_repo_env; *var; var++) {
if (strcmp(*var, CONFIG_DATA_ENVIRONMENT))
argv_array_push(out, *var);
}
}
void prepare_submodule_repo_env(struct argv_array *out)
{
prepare_submodule_repo_env_no_git_dir(out);
argv_array_pushf(out, "%s=%s", GIT_DIR_ENVIRONMENT,
DEFAULT_GIT_DIR_ENVIRONMENT);
}
/* Helper function to display the submodule header line prior to the full
* summary output. If it can locate the submodule objects directory it will
* attempt to lookup both the left and right commits and put them into the
* left and right pointers.
*/
static void show_submodule_header(struct diff_options *o, const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule,
struct commit **left, struct commit **right,
struct commit_list **merge_bases)
{
const char *message = NULL;
struct strbuf sb = STRBUF_INIT;
int fast_forward = 0, fast_backward = 0;
if (dirty_submodule & DIRTY_SUBMODULE_UNTRACKED)
diff_emit_submodule_untracked(o, path);
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
diff_emit_submodule_modified(o, path);
if (is_null_oid(one))
message = "(new submodule)";
else if (is_null_oid(two))
message = "(submodule deleted)";
if (add_submodule_odb(path)) {
if (!message)
message = "(not initialized)";
goto output_header;
}
/*
* Attempt to lookup the commit references, and determine if this is
* a fast forward or fast backwards update.
*/
Convert lookup_commit* to struct object_id Convert lookup_commit, lookup_commit_or_die, lookup_commit_reference, and lookup_commit_reference_gently to take struct object_id arguments. Introduce a temporary in parse_object buffer in order to convert this function. This is required since in order to convert parse_object and parse_object_buffer, lookup_commit_reference_gently and lookup_commit_or_die would need to be converted. Not introducing a temporary would therefore require that lookup_commit_or_die take a struct object_id *, but lookup_commit would take unsigned char *, leaving a confusing and hard-to-use interface. parse_object_buffer will lose this temporary in a later patch. This commit was created with manual changes to commit.c, commit.h, and object.c, plus the following semantic patch: @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1.hash, E2) + lookup_commit_reference_gently(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1->hash, E2) + lookup_commit_reference_gently(E1, E2) @@ expression E1; @@ - lookup_commit_reference(E1.hash) + lookup_commit_reference(&E1) @@ expression E1; @@ - lookup_commit_reference(E1->hash) + lookup_commit_reference(E1) @@ expression E1; @@ - lookup_commit(E1.hash) + lookup_commit(&E1) @@ expression E1; @@ - lookup_commit(E1->hash) + lookup_commit(E1) @@ expression E1, E2; @@ - lookup_commit_or_die(E1.hash, E2) + lookup_commit_or_die(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_or_die(E1->hash, E2) + lookup_commit_or_die(E1, E2) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-07 00:10:10 +02:00
*left = lookup_commit_reference(one);
*right = lookup_commit_reference(two);
/*
* Warn about missing commits in the submodule project, but only if
* they aren't null.
*/
if ((!is_null_oid(one) && !*left) ||
(!is_null_oid(two) && !*right))
message = "(commits not present)";
*merge_bases = get_merge_bases(*left, *right);
if (*merge_bases) {
if ((*merge_bases)->item == *left)
fast_forward = 1;
else if ((*merge_bases)->item == *right)
fast_backward = 1;
}
if (!oidcmp(one, two)) {
strbuf_release(&sb);
return;
}
output_header:
strbuf_addf(&sb, "Submodule %s ", path);
strbuf_add_unique_abbrev(&sb, one->hash, DEFAULT_ABBREV);
strbuf_addstr(&sb, (fast_backward || fast_forward) ? ".." : "...");
strbuf_add_unique_abbrev(&sb, two->hash, DEFAULT_ABBREV);
if (message)
strbuf_addf(&sb, " %s\n", message);
else
strbuf_addf(&sb, "%s:\n", fast_backward ? " (rewind)" : "");
diff_emit_submodule_header(o, sb.buf);
strbuf_release(&sb);
}
void show_submodule_summary(struct diff_options *o, const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule)
{
struct rev_info rev;
struct commit *left = NULL, *right = NULL;
struct commit_list *merge_bases = NULL;
show_submodule_header(o, path, one, two, dirty_submodule,
&left, &right, &merge_bases);
/*
* If we don't have both a left and a right pointer, there is no
* reason to try and display a summary. The header line should contain
* all the information the user needs.
*/
if (!left || !right)
goto out;
/* Treat revision walker failure the same as missing commits */
if (prepare_submodule_summary(&rev, path, left, right, merge_bases)) {
diff_emit_submodule_error(o, "(revision walker failed)\n");
goto out;
}
print_submodule_summary(&rev, o);
out:
if (merge_bases)
free_commit_list(merge_bases);
clear_commit_marks(left, ~0);
clear_commit_marks(right, ~0);
}
void show_submodule_inline_diff(struct diff_options *o, const char *path,
struct object_id *one, struct object_id *two,
unsigned dirty_submodule)
{
const struct object_id *old = &empty_tree_oid, *new = &empty_tree_oid;
struct commit *left = NULL, *right = NULL;
struct commit_list *merge_bases = NULL;
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
show_submodule_header(o, path, one, two, dirty_submodule,
&left, &right, &merge_bases);
/* We need a valid left and right commit to display a difference */
if (!(left || is_null_oid(one)) ||
!(right || is_null_oid(two)))
goto done;
if (left)
old = one;
if (right)
new = two;
cp.git_cmd = 1;
cp.dir = path;
cp.out = -1;
cp.no_stdin = 1;
/* TODO: other options may need to be passed here. */
argv_array_pushl(&cp.args, "diff", "--submodule=diff", NULL);
argv_array_pushf(&cp.args, "--color=%s", want_color(o->use_color) ?
"always" : "never");
if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
argv_array_pushf(&cp.args, "--src-prefix=%s%s/",
o->b_prefix, path);
argv_array_pushf(&cp.args, "--dst-prefix=%s%s/",
o->a_prefix, path);
} else {
argv_array_pushf(&cp.args, "--src-prefix=%s%s/",
o->a_prefix, path);
argv_array_pushf(&cp.args, "--dst-prefix=%s%s/",
o->b_prefix, path);
}
argv_array_push(&cp.args, oid_to_hex(old));
/*
* If the submodule has modified content, we will diff against the
* work tree, under the assumption that the user has asked for the
* diff format and wishes to actually see all differences even if they
* haven't yet been committed to the submodule yet.
*/
if (!(dirty_submodule & DIRTY_SUBMODULE_MODIFIED))
argv_array_push(&cp.args, oid_to_hex(new));
prepare_submodule_repo_env(&cp.env_array);
if (start_command(&cp))
diff_emit_submodule_error(o, "(diff failed)\n");
while (strbuf_getwholeline_fd(&sb, cp.out, '\n') != EOF)
diff_emit_submodule_pipethrough(o, sb.buf, sb.len);
if (finish_command(&cp))
diff_emit_submodule_error(o, "(diff failed)\n");
done:
strbuf_release(&sb);
if (merge_bases)
free_commit_list(merge_bases);
if (left)
clear_commit_marks(left, ~0);
if (right)
clear_commit_marks(right, ~0);
}
void set_config_fetch_recurse_submodules(int value)
{
config_fetch_recurse_submodules = value;
}
int should_update_submodules(void)
{
return config_update_recurse_submodules == RECURSE_SUBMODULES_ON;
}
const struct submodule *submodule_from_ce(const struct cache_entry *ce)
{
if (!S_ISGITLINK(ce->ce_mode))
return NULL;
if (!should_update_submodules())
return NULL;
return submodule_from_path(null_sha1, ce->name);
}
static struct oid_array *submodule_commits(struct string_list *submodules,
const char *path)
{
struct string_list_item *item;
item = string_list_insert(submodules, path);
if (item->util)
return (struct oid_array *) item->util;
/* NEEDSWORK: should we have oid_array_init()? */
item->util = xcalloc(1, sizeof(struct oid_array));
return (struct oid_array *) item->util;
}
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
{
int i;
struct string_list *changed = data;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
struct oid_array *commits;
if (!S_ISGITLINK(p->two->mode))
continue;
if (S_ISGITLINK(p->one->mode)) {
/*
* NEEDSWORK: We should honor the name configured in
* the .gitmodules file of the commit we are examining
* here to be able to correctly follow submodules
* being moved around.
*/
commits = submodule_commits(changed, p->two->path);
oid_array_append(commits, &p->two->oid);
} else {
/* Submodule is new or was moved here */
/*
* NEEDSWORK: When the .git directories of submodules
* live inside the superprojects .git directory some
* day we should fetch new submodules directly into
* that location too when config or options request
* that so they can be checked out from there.
*/
continue;
}
}
}
/*
* Collect the paths of submodules in 'changed' which have changed based on
* the revisions as specified in 'argv'. Each entry in 'changed' will also
* have a corresponding 'struct oid_array' (in the 'util' field) which lists
* what the submodule pointers were updated to during the change.
*/
static void collect_changed_submodules(struct string_list *changed,
struct argv_array *argv)
{
struct rev_info rev;
const struct commit *commit;
init_revisions(&rev, NULL);
setup_revisions(argv->argc, argv->argv, &rev, NULL);
if (prepare_revision_walk(&rev))
die("revision walk setup failed");
while ((commit = get_revision(&rev))) {
struct rev_info diff_rev;
init_revisions(&diff_rev, NULL);
diff_rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
diff_rev.diffopt.format_callback = collect_changed_submodules_cb;
diff_rev.diffopt.format_callback_data = changed;
diff_tree_combined_merge(commit, 1, &diff_rev);
}
reset_revision_walk();
}
static void free_submodules_oids(struct string_list *submodules)
{
struct string_list_item *item;
for_each_string_list_item(item, submodules)
oid_array_clear((struct oid_array *) item->util);
string_list_clear(submodules, 1);
}
static int has_remote(const char *refname, const struct object_id *oid,
int flags, void *cb_data)
{
return 1;
}
static int append_oid_to_argv(const struct object_id *oid, void *data)
{
struct argv_array *argv = data;
argv_array_push(argv, oid_to_hex(oid));
return 0;
}
static int check_has_commit(const struct object_id *oid, void *data)
{
int *has_commit = data;
Convert lookup_commit* to struct object_id Convert lookup_commit, lookup_commit_or_die, lookup_commit_reference, and lookup_commit_reference_gently to take struct object_id arguments. Introduce a temporary in parse_object buffer in order to convert this function. This is required since in order to convert parse_object and parse_object_buffer, lookup_commit_reference_gently and lookup_commit_or_die would need to be converted. Not introducing a temporary would therefore require that lookup_commit_or_die take a struct object_id *, but lookup_commit would take unsigned char *, leaving a confusing and hard-to-use interface. parse_object_buffer will lose this temporary in a later patch. This commit was created with manual changes to commit.c, commit.h, and object.c, plus the following semantic patch: @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1.hash, E2) + lookup_commit_reference_gently(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1->hash, E2) + lookup_commit_reference_gently(E1, E2) @@ expression E1; @@ - lookup_commit_reference(E1.hash) + lookup_commit_reference(&E1) @@ expression E1; @@ - lookup_commit_reference(E1->hash) + lookup_commit_reference(E1) @@ expression E1; @@ - lookup_commit(E1.hash) + lookup_commit(&E1) @@ expression E1; @@ - lookup_commit(E1->hash) + lookup_commit(E1) @@ expression E1, E2; @@ - lookup_commit_or_die(E1.hash, E2) + lookup_commit_or_die(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_or_die(E1->hash, E2) + lookup_commit_or_die(E1, E2) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-07 00:10:10 +02:00
if (!lookup_commit_reference(oid))
*has_commit = 0;
return 0;
}
static int submodule_has_commits(const char *path, struct oid_array *commits)
{
int has_commit = 1;
/*
* Perform a cheap, but incorrect check for the existance of 'commits'.
* This is done by adding the submodule's object store to the in-core
* object store, and then querying for each commit's existance. If we
* do not have the commit object anywhere, there is no chance we have
* it in the object store of the correct submodule and have it
* reachable from a ref, so we can fail early without spawning rev-list
* which is expensive.
*/
if (add_submodule_odb(path))
return 0;
oid_array_for_each_unique(commits, check_has_commit, &has_commit);
if (has_commit) {
/*
* Even if the submodule is checked out and the commit is
* present, make sure it exists in the submodule's object store
* and that it is reachable from a ref.
*/
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf out = STRBUF_INIT;
argv_array_pushl(&cp.args, "rev-list", "-n", "1", NULL);
oid_array_for_each_unique(commits, append_oid_to_argv, &cp.args);
argv_array_pushl(&cp.args, "--not", "--all", NULL);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
if (capture_command(&cp, &out, GIT_MAX_HEXSZ + 1) || out.len)
has_commit = 0;
strbuf_release(&out);
}
return has_commit;
}
static int submodule_needs_pushing(const char *path, struct oid_array *commits)
{
if (!submodule_has_commits(path, commits))
/*
* NOTE: We do consider it safe to return "no" here. The
* correct answer would be "We do not know" instead of
* "No push needed", but it is quite hard to change
* the submodule pointer without having the submodule
* around. If a user did however change the submodules
* without having the submodule around, this indicates
* an expert who knows what they are doing or a
* maintainer integrating work from other people. In
* both cases it should be safe to skip this check.
*/
return 0;
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
int needs_pushing = 0;
argv_array_push(&cp.args, "rev-list");
oid_array_for_each_unique(commits, append_oid_to_argv, &cp.args);
argv_array_pushl(&cp.args, "--not", "--remotes", "-n", "1" , NULL);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die("Could not run 'git rev-list <commits> --not --remotes -n 1' command in submodule %s",
path);
if (strbuf_read(&buf, cp.out, 41))
needs_pushing = 1;
finish_command(&cp);
close(cp.out);
strbuf_release(&buf);
return needs_pushing;
}
return 0;
}
int find_unpushed_submodules(struct oid_array *commits,
const char *remotes_name, struct string_list *needs_pushing)
{
struct string_list submodules = STRING_LIST_INIT_DUP;
struct string_list_item *submodule;
struct argv_array argv = ARGV_ARRAY_INIT;
/* argv.argv[0] will be ignored by setup_revisions */
argv_array_push(&argv, "find_unpushed_submodules");
oid_array_for_each_unique(commits, append_oid_to_argv, &argv);
argv_array_push(&argv, "--not");
argv_array_pushf(&argv, "--remotes=%s", remotes_name);
collect_changed_submodules(&submodules, &argv);
for_each_string_list_item(submodule, &submodules) {
struct oid_array *commits = submodule->util;
const char *path = submodule->string;
if (submodule_needs_pushing(path, commits))
string_list_insert(needs_pushing, path);
}
free_submodules_oids(&submodules);
argv_array_clear(&argv);
return needs_pushing->nr;
}
static int push_submodule(const char *path,
const struct remote *remote,
const char **refspec, int refspec_nr,
const struct string_list *push_options,
int dry_run)
{
if (add_submodule_odb(path))
return 1;
if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
struct child_process cp = CHILD_PROCESS_INIT;
argv_array_push(&cp.args, "push");
if (dry_run)
argv_array_push(&cp.args, "--dry-run");
if (push_options && push_options->nr) {
const struct string_list_item *item;
for_each_string_list_item(item, push_options)
argv_array_pushf(&cp.args, "--push-option=%s",
item->string);
}
if (remote->origin != REMOTE_UNCONFIGURED) {
int i;
argv_array_push(&cp.args, remote->name);
for (i = 0; i < refspec_nr; i++)
argv_array_push(&cp.args, refspec[i]);
}
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
if (run_command(&cp))
return 0;
close(cp.out);
}
return 1;
}
/*
* Perform a check in the submodule to see if the remote and refspec work.
* Die if the submodule can't be pushed.
*/
static void submodule_push_check(const char *path, const struct remote *remote,
const char **refspec, int refspec_nr)
{
struct child_process cp = CHILD_PROCESS_INIT;
int i;
argv_array_push(&cp.args, "submodule--helper");
argv_array_push(&cp.args, "push-check");
argv_array_push(&cp.args, remote->name);
for (i = 0; i < refspec_nr; i++)
argv_array_push(&cp.args, refspec[i]);
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.no_stdout = 1;
cp.dir = path;
/*
* Simply indicate if 'submodule--helper push-check' failed.
* More detailed error information will be provided by the
* child process.
*/
if (run_command(&cp))
die("process for submodule '%s' failed", path);
}
int push_unpushed_submodules(struct oid_array *commits,
const struct remote *remote,
const char **refspec, int refspec_nr,
const struct string_list *push_options,
int dry_run)
{
int i, ret = 1;
struct string_list needs_pushing = STRING_LIST_INIT_DUP;
if (!find_unpushed_submodules(commits, remote->name, &needs_pushing))
return 1;
/*
* Verify that the remote and refspec can be propagated to all
* submodules. This check can be skipped if the remote and refspec
* won't be propagated due to the remote being unconfigured (e.g. a URL
* instead of a remote name).
*/
if (remote->origin != REMOTE_UNCONFIGURED)
for (i = 0; i < needs_pushing.nr; i++)
submodule_push_check(needs_pushing.items[i].string,
remote, refspec, refspec_nr);
/* Actually push the submodules */
for (i = 0; i < needs_pushing.nr; i++) {
const char *path = needs_pushing.items[i].string;
fprintf(stderr, "Pushing submodule '%s'\n", path);
if (!push_submodule(path, remote, refspec, refspec_nr,
push_options, dry_run)) {
fprintf(stderr, "Unable to push submodule '%s'\n", path);
ret = 0;
}
}
string_list_clear(&needs_pushing, 0);
return ret;
}
static int append_oid_to_array(const char *ref, const struct object_id *oid,
int flags, void *data)
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
{
struct oid_array *array = data;
oid_array_append(array, oid);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
return 0;
}
void check_for_new_submodule_commits(struct object_id *oid)
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
{
if (!initialized_fetch_ref_tips) {
for_each_ref(append_oid_to_array, &ref_tips_before_fetch);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
initialized_fetch_ref_tips = 1;
}
oid_array_append(&ref_tips_after_fetch, oid);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
}
static void calculate_changed_submodule_paths(void)
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
{
struct argv_array argv = ARGV_ARRAY_INIT;
struct string_list changed_submodules = STRING_LIST_INIT_DUP;
const struct string_list_item *item;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
/* No need to check if there are no submodules configured */
if (!submodule_from_path(NULL, NULL))
return;
argv_array_push(&argv, "--"); /* argv[0] program name */
oid_array_for_each_unique(&ref_tips_after_fetch,
append_oid_to_argv, &argv);
argv_array_push(&argv, "--not");
oid_array_for_each_unique(&ref_tips_before_fetch,
append_oid_to_argv, &argv);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
/*
* Collect all submodules (whether checked out or not) for which new
* commits have been recorded upstream in "changed_submodule_paths".
*/
collect_changed_submodules(&changed_submodules, &argv);
for_each_string_list_item(item, &changed_submodules) {
struct oid_array *commits = item->util;
const char *path = item->string;
if (!submodule_has_commits(path, commits))
string_list_append(&changed_submodule_paths, path);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
}
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
free_submodules_oids(&changed_submodules);
argv_array_clear(&argv);
oid_array_clear(&ref_tips_before_fetch);
oid_array_clear(&ref_tips_after_fetch);
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
initialized_fetch_ref_tips = 0;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
}
struct submodule_parallel_fetch {
int count;
struct argv_array args;
const char *work_tree;
const char *prefix;
int command_line_option;
int quiet;
int result;
};
#define SPF_INIT {0, ARGV_ARRAY_INIT, NULL, NULL, 0, 0, 0}
static int get_next_submodule(struct child_process *cp,
struct strbuf *err, void *data, void **task_cb)
{
int ret = 0;
struct submodule_parallel_fetch *spf = data;
fetch: avoid quadratic loop checking for updated submodules Recent versions of git can be slow to fetch repositories with a large number of refs (or when they already have a large number of refs). For example, GitHub makes pull-requests available as refs, which can lead to a large number of available refs. This slowness goes away when submodule recursion is turned off: $ git ls-remote git://github.com/rails/rails.git | wc -l 3034 [this takes ~10 seconds of CPU time to complete] git fetch --recurse-submodules=no \ git://github.com/rails/rails.git "refs/*:refs/*" [this still isn't done after 10 _minutes_ of pegging the CPU] git fetch \ git://github.com/rails/rails.git "refs/*:refs/*" You can produce a quicker and simpler test case like this: doit() { head=`git rev-parse HEAD` for i in `seq 1 $1`; do echo $head refs/heads/ref$i done >.git/packed-refs echo "==> $1" rm -rf dest git init -q --bare dest && (cd dest && time git.compile fetch -q .. refs/*:refs/*) } rm -rf repo git init -q repo && cd repo && >file && git add file && git commit -q -m one doit 100 doit 200 doit 400 doit 800 doit 1600 doit 3200 Which yields timings like: # refs seconds of CPU 100 0.06 200 0.24 400 0.95 800 3.39 1600 13.66 3200 54.09 Notice that although the number of refs doubles in each trial, the CPU time spent quadruples. The problem is that the submodule recursion code works something like: - for each ref we fetch - for each commit in git rev-list $new_sha1 --not --all - add modified submodules to list - fetch any newly referenced submodules But that means if we fetch N refs, we start N revision walks. Worse, because we use "--all", the number of refs we must process that constitute "--all" keeps growing, too. And you end up doing O(N^2) ref resolutions. Instead, this patch structures the code like this: - for each sha1 we already have - add $old_sha1 to list $old - for each ref we fetch - add $new_sha1 to list $new - for each commit in git rev-list $new --not $old - add modified submodules to list - fetch any newly referenced submodules This yields timings like: # refs seconds of CPU 100 0.00 200 0.04 400 0.04 800 0.10 1600 0.21 3200 0.39 Note that the amount of effort doubles as the number of refs doubles. Similarly, the fetch of rails.git takes about as much time as it does with --recurse-submodules=no. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-09-12 21:56:52 +02:00
for (; spf->count < active_nr; spf->count++) {
struct strbuf submodule_path = STRBUF_INIT;
struct strbuf submodule_git_dir = STRBUF_INIT;
struct strbuf submodule_prefix = STRBUF_INIT;
const struct cache_entry *ce = active_cache[spf->count];
const char *git_dir, *default_argv;
const struct submodule *submodule;
if (!S_ISGITLINK(ce->ce_mode))
continue;
submodule = submodule_from_path(null_sha1, ce->name);
if (!submodule)
submodule = submodule_from_name(null_sha1, ce->name);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
default_argv = "yes";
if (spf->command_line_option == RECURSE_SUBMODULES_DEFAULT) {
if (submodule &&
submodule->fetch_recurse !=
RECURSE_SUBMODULES_NONE) {
if (submodule->fetch_recurse ==
RECURSE_SUBMODULES_OFF)
continue;
if (submodule->fetch_recurse ==
RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
} else {
if ((config_fetch_recurse_submodules == RECURSE_SUBMODULES_OFF) ||
gitmodules_is_unmerged)
continue;
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
if (config_fetch_recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
}
} else if (spf->command_line_option == RECURSE_SUBMODULES_ON_DEMAND) {
if (!unsorted_string_list_lookup(&changed_submodule_paths, ce->name))
continue;
default_argv = "on-demand";
}
strbuf_addf(&submodule_path, "%s/%s", spf->work_tree, ce->name);
strbuf_addf(&submodule_git_dir, "%s/.git", submodule_path.buf);
strbuf_addf(&submodule_prefix, "%s%s/", spf->prefix, ce->name);
git_dir = read_gitfile(submodule_git_dir.buf);
if (!git_dir)
git_dir = submodule_git_dir.buf;
if (is_directory(git_dir)) {
child_process_init(cp);
cp->dir = strbuf_detach(&submodule_path, NULL);
prepare_submodule_repo_env(&cp->env_array);
cp->git_cmd = 1;
if (!spf->quiet)
strbuf_addf(err, "Fetching submodule %s%s\n",
spf->prefix, ce->name);
argv_array_init(&cp->args);
argv_array_pushv(&cp->args, spf->args.argv);
argv_array_push(&cp->args, default_argv);
argv_array_push(&cp->args, "--submodule-prefix");
argv_array_push(&cp->args, submodule_prefix.buf);
ret = 1;
}
strbuf_release(&submodule_path);
strbuf_release(&submodule_git_dir);
strbuf_release(&submodule_prefix);
if (ret) {
spf->count++;
return 1;
}
}
return 0;
}
static int fetch_start_failure(struct strbuf *err,
void *cb, void *task_cb)
{
struct submodule_parallel_fetch *spf = cb;
spf->result = 1;
return 0;
}
static int fetch_finish(int retvalue, struct strbuf *err,
void *cb, void *task_cb)
{
struct submodule_parallel_fetch *spf = cb;
if (retvalue)
spf->result = 1;
return 0;
}
int fetch_populated_submodules(const struct argv_array *options,
const char *prefix, int command_line_option,
int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;
spf.work_tree = get_git_work_tree();
spf.command_line_option = command_line_option;
spf.quiet = quiet;
spf.prefix = prefix;
if (!spf.work_tree)
goto out;
if (read_cache() < 0)
die("index file corrupt");
argv_array_push(&spf.args, "fetch");
for (i = 0; i < options->argc; i++)
argv_array_push(&spf.args, options->argv[i]);
argv_array_push(&spf.args, "--recurse-submodules-default");
/* default value, "--submodule-prefix" and its value are added later */
if (max_parallel_jobs < 0)
max_parallel_jobs = parallel_jobs;
calculate_changed_submodule_paths();
run_processes_parallel(max_parallel_jobs,
get_next_submodule,
fetch_start_failure,
fetch_finish,
&spf);
argv_array_clear(&spf.args);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
out:
string_list_clear(&changed_submodule_paths, 1);
return spf.result;
}
unsigned is_submodule_modified(const char *path, int ignore_untracked)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf buf = STRBUF_INIT;
FILE *fp;
unsigned dirty_submodule = 0;
const char *git_dir;
int ignore_cp_exit_code = 0;
strbuf_addf(&buf, "%s/.git", path);
git_dir = read_gitfile(buf.buf);
if (!git_dir)
git_dir = buf.buf;
if (!is_git_directory(git_dir)) {
if (is_directory(git_dir))
die(_("'%s' not recognized as a git repository"), git_dir);
strbuf_release(&buf);
/* The submodule is not checked out, so it is not modified */
return 0;
}
strbuf_reset(&buf);
argv_array_pushl(&cp.args, "status", "--porcelain=2", NULL);
if (ignore_untracked)
argv_array_push(&cp.args, "-uno");
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp))
die("Could not run 'git status --porcelain=2' in submodule %s", path);
fp = xfdopen(cp.out, "r");
while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
/* regular untracked files */
if (buf.buf[0] == '?')
dirty_submodule |= DIRTY_SUBMODULE_UNTRACKED;
if (buf.buf[0] == 'u' ||
buf.buf[0] == '1' ||
buf.buf[0] == '2') {
/* T = line type, XY = status, SSSS = submodule state */
if (buf.len < strlen("T XY SSSS"))
die("BUG: invalid status --porcelain=2 line %s",
buf.buf);
if (buf.buf[5] == 'S' && buf.buf[8] == 'U')
/* nested untracked file */
dirty_submodule |= DIRTY_SUBMODULE_UNTRACKED;
if (buf.buf[0] == 'u' ||
buf.buf[0] == '2' ||
memcmp(buf.buf + 5, "S..U", 4))
/* other change */
dirty_submodule |= DIRTY_SUBMODULE_MODIFIED;
}
if ((dirty_submodule & DIRTY_SUBMODULE_MODIFIED) &&
((dirty_submodule & DIRTY_SUBMODULE_UNTRACKED) ||
ignore_untracked)) {
/*
* We're not interested in any further information from
* the child any more, neither output nor its exit code.
*/
ignore_cp_exit_code = 1;
break;
}
}
fclose(fp);
if (finish_command(&cp) && !ignore_cp_exit_code)
die("'git status --porcelain=2' failed in submodule %s", path);
strbuf_release(&buf);
return dirty_submodule;
}
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
int submodule_uses_gitfile(const char *path)
{
struct child_process cp = CHILD_PROCESS_INIT;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
const char *argv[] = {
"submodule",
"foreach",
"--quiet",
"--recursive",
"test -f .git",
NULL,
};
struct strbuf buf = STRBUF_INIT;
const char *git_dir;
strbuf_addf(&buf, "%s/.git", path);
git_dir = read_gitfile(buf.buf);
if (!git_dir) {
strbuf_release(&buf);
return 0;
}
strbuf_release(&buf);
/* Now test that all nested submodules use a gitfile too */
cp.argv = argv;
prepare_submodule_repo_env(&cp.env_array);
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.no_stderr = 1;
cp.no_stdout = 1;
cp.dir = path;
if (run_command(&cp))
return 0;
return 1;
}
/*
* Check if it is a bad idea to remove a submodule, i.e. if we'd lose data
* when doing so.
*
* Return 1 if we'd lose data, return 0 if the removal is fine,
* and negative values for errors.
*/
int bad_to_remove_submodule(const char *path, unsigned flags)
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
{
ssize_t len;
struct child_process cp = CHILD_PROCESS_INIT;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
struct strbuf buf = STRBUF_INIT;
int ret = 0;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
if (!file_exists(path) || is_empty_dir(path))
return 0;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
if (!submodule_uses_gitfile(path))
return 1;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
argv_array_pushl(&cp.args, "status", "--porcelain",
"--ignore-submodules=none", NULL);
if (flags & SUBMODULE_REMOVAL_IGNORE_UNTRACKED)
argv_array_push(&cp.args, "-uno");
else
argv_array_push(&cp.args, "-uall");
if (!(flags & SUBMODULE_REMOVAL_IGNORE_IGNORED_UNTRACKED))
argv_array_push(&cp.args, "--ignored");
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
prepare_submodule_repo_env(&cp.env_array);
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.out = -1;
cp.dir = path;
if (start_command(&cp)) {
if (flags & SUBMODULE_REMOVAL_DIE_ON_ERROR)
die(_("could not start 'git status' in submodule '%s'"),
path);
ret = -1;
goto out;
}
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
len = strbuf_read(&buf, cp.out, 1024);
if (len > 2)
ret = 1;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
close(cp.out);
if (finish_command(&cp)) {
if (flags & SUBMODULE_REMOVAL_DIE_ON_ERROR)
die(_("could not run 'git status' in submodule '%s'"),
path);
ret = -1;
}
out:
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
strbuf_release(&buf);
return ret;
submodule: teach rm to remove submodules unless they contain a git directory Currently using "git rm" on a submodule - populated or not - fails with this error: fatal: git rm: '<submodule path>': Is a directory This made sense in the past as there was no way to remove a submodule without possibly removing unpushed parts of the submodule's history contained in its .git directory too, so erroring out here protected the user from possible loss of data. But submodules cloned with a recent git version do not contain the .git directory anymore, they use a gitfile to point to their git directory which is safely stored inside the superproject's .git directory. The work tree of these submodules can safely be removed without losing history, so let's teach git to do so. Using rm on an unpopulated submodule now removes the empty directory from the work tree and the gitlink from the index. If the submodule's directory is missing from the work tree, it will still be removed from the index. Using rm on a populated submodule using a gitfile will apply the usual checks for work tree modification adapted to submodules (unless forced). For a submodule that means that the HEAD is the same as recorded in the index, no tracked files are modified and no untracked files that aren't ignored are present in the submodules work tree (ignored files are deemed expendable and won't stop a submodule's work tree from being removed). That logic has to be applied in all nested submodules too. Using rm on a submodule which has its .git directory inside the work trees top level directory will just error out like it did before to protect the repository, even when forced. In the future git could either provide a message informing the user to convert the submodule to use a gitfile or even attempt to do the conversion itself, but that is not part of this change. Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-09-26 20:21:13 +02:00
}
static const char *get_super_prefix_or_empty(void)
{
const char *s = get_super_prefix();
if (!s)
s = "";
return s;
}
static int submodule_has_dirty_index(const struct submodule *sub)
{
struct child_process cp = CHILD_PROCESS_INIT;
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
argv_array_pushl(&cp.args, "diff-index", "--quiet",
"--cached", "HEAD", NULL);
cp.no_stdin = 1;
cp.no_stdout = 1;
cp.dir = sub->path;
if (start_command(&cp))
die("could not recurse into submodule '%s'", sub->path);
return finish_command(&cp);
}
static void submodule_reset_index(const char *path)
{
struct child_process cp = CHILD_PROCESS_INIT;
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
argv_array_pushf(&cp.args, "--super-prefix=%s%s/",
get_super_prefix_or_empty(), path);
argv_array_pushl(&cp.args, "read-tree", "-u", "--reset", NULL);
argv_array_push(&cp.args, EMPTY_TREE_SHA1_HEX);
if (run_command(&cp))
die("could not reset submodule index");
}
/**
* Moves a submodule at a given path from a given head to another new head.
* For edge cases (a submodule coming into existence or removing a submodule)
* pass NULL for old or new respectively.
*/
int submodule_move_head(const char *path,
const char *old,
const char *new,
unsigned flags)
{
int ret = 0;
struct child_process cp = CHILD_PROCESS_INIT;
const struct submodule *sub;
int *error_code_ptr, error_code;
if (!is_submodule_initialized(path))
return 0;
if (flags & SUBMODULE_MOVE_HEAD_FORCE)
/*
* Pass non NULL pointer to is_submodule_populated_gently
* to prevent die()-ing. We'll use connect_work_tree_and_git_dir
* to fixup the submodule in the force case later.
*/
error_code_ptr = &error_code;
else
error_code_ptr = NULL;
if (old && !is_submodule_populated_gently(path, error_code_ptr))
return 0;
sub = submodule_from_path(null_sha1, path);
if (!sub)
die("BUG: could not get submodule information for '%s'", path);
if (old && !(flags & SUBMODULE_MOVE_HEAD_FORCE)) {
/* Check if the submodule has a dirty index. */
if (submodule_has_dirty_index(sub))
return error(_("submodule '%s' has dirty index"), path);
}
if (!(flags & SUBMODULE_MOVE_HEAD_DRY_RUN)) {
if (old) {
if (!submodule_uses_gitfile(path))
absorb_git_dir_into_superproject("", path,
ABSORB_GITDIR_RECURSE_SUBMODULES);
} else {
char *gitdir = xstrfmt("%s/modules/%s",
get_git_common_dir(), sub->name);
connect_work_tree_and_git_dir(path, gitdir);
free(gitdir);
/* make sure the index is clean as well */
submodule_reset_index(path);
}
if (old && (flags & SUBMODULE_MOVE_HEAD_FORCE)) {
char *gitdir = xstrfmt("%s/modules/%s",
get_git_common_dir(), sub->name);
connect_work_tree_and_git_dir(path, gitdir);
free(gitdir);
}
}
prepare_submodule_repo_env(&cp.env_array);
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
argv_array_pushf(&cp.args, "--super-prefix=%s%s/",
get_super_prefix_or_empty(), path);
argv_array_pushl(&cp.args, "read-tree", "--recurse-submodules", NULL);
if (flags & SUBMODULE_MOVE_HEAD_DRY_RUN)
argv_array_push(&cp.args, "-n");
else
argv_array_push(&cp.args, "-u");
if (flags & SUBMODULE_MOVE_HEAD_FORCE)
argv_array_push(&cp.args, "--reset");
else
argv_array_push(&cp.args, "-m");
argv_array_push(&cp.args, old ? old : EMPTY_TREE_SHA1_HEX);
argv_array_push(&cp.args, new ? new : EMPTY_TREE_SHA1_HEX);
if (run_command(&cp)) {
ret = -1;
goto out;
}
if (!(flags & SUBMODULE_MOVE_HEAD_DRY_RUN)) {
if (new) {
child_process_init(&cp);
/* also set the HEAD accordingly */
cp.git_cmd = 1;
cp.no_stdin = 1;
cp.dir = path;
prepare_submodule_repo_env(&cp.env_array);
argv_array_pushl(&cp.args, "update-ref", "HEAD", new, NULL);
if (run_command(&cp)) {
ret = -1;
goto out;
}
} else {
struct strbuf sb = STRBUF_INIT;
strbuf_addf(&sb, "%s/.git", path);
unlink_or_warn(sb.buf);
strbuf_release(&sb);
if (is_empty_dir(path))
rmdir_or_warn(path);
}
}
out:
return ret;
}
static int find_first_merges(struct object_array *result, const char *path,
struct commit *a, struct commit *b)
{
int i, j;
struct object_array merges = OBJECT_ARRAY_INIT;
struct commit *commit;
int contains_another;
char merged_revision[42];
const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path",
"--all", merged_revision, NULL };
struct rev_info revs;
struct setup_revision_opt rev_opts;
memset(result, 0, sizeof(struct object_array));
memset(&rev_opts, 0, sizeof(rev_opts));
/* get all revisions that merge commit a */
xsnprintf(merged_revision, sizeof(merged_revision), "^%s",
oid_to_hex(&a->object.oid));
init_revisions(&revs, NULL);
rev_opts.submodule = path;
setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts);
/* save all revisions from the above list that contain b */
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
while ((commit = get_revision(&revs)) != NULL) {
struct object *o = &(commit->object);
if (in_merge_bases(b, commit))
add_object_array(o, NULL, &merges);
}
reset_revision_walk();
/* Now we've got all merges that contain a and b. Prune all
* merges that contain another found merge and save them in
* result.
*/
for (i = 0; i < merges.nr; i++) {
struct commit *m1 = (struct commit *) merges.objects[i].item;
contains_another = 0;
for (j = 0; j < merges.nr; j++) {
struct commit *m2 = (struct commit *) merges.objects[j].item;
if (i != j && in_merge_bases(m2, m1)) {
contains_another = 1;
break;
}
}
if (!contains_another)
add_object_array(merges.objects[i].item, NULL, result);
}
free(merges.objects);
return result->nr;
}
static void print_commit(struct commit *commit)
{
struct strbuf sb = STRBUF_INIT;
struct pretty_print_context ctx = {0};
convert "enum date_mode" into a struct In preparation for adding date modes that may carry extra information beyond the mode itself, this patch converts the date_mode enum into a struct. Most of the conversion is fairly straightforward; we pass the struct as a pointer and dereference the type field where necessary. Locations that declare a date_mode can use a "{}" constructor. However, the tricky case is where we use the enum labels as constants, like: show_date(t, tz, DATE_NORMAL); Ideally we could say: show_date(t, tz, &{ DATE_NORMAL }); but of course C does not allow that. Likewise, we cannot cast the constant to a struct, because we need to pass an actual address. Our options are basically: 1. Manually add a "struct date_mode d = { DATE_NORMAL }" definition to each caller, and pass "&d". This makes the callers uglier, because they sometimes do not even have their own scope (e.g., they are inside a switch statement). 2. Provide a pre-made global "date_normal" struct that can be passed by address. We'd also need "date_rfc2822", "date_iso8601", and so forth. But at least the ugliness is defined in one place. 3. Provide a wrapper that generates the correct struct on the fly. The big downside is that we end up pointing to a single global, which makes our wrapper non-reentrant. But show_date is already not reentrant, so it does not matter. This patch implements 3, along with a minor macro to keep the size of the callers sane. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
ctx.date_mode.type = DATE_NORMAL;
format_commit_message(commit, " %h: %m %s", &sb, &ctx);
fprintf(stderr, "%s\n", sb.buf);
strbuf_release(&sb);
}
#define MERGE_WARNING(path, msg) \
warning("Failed to merge submodule %s (%s)", path, msg);
int merge_submodule(struct object_id *result, const char *path,
const struct object_id *base, const struct object_id *a,
const struct object_id *b, int search)
{
struct commit *commit_base, *commit_a, *commit_b;
int parent_count;
struct object_array merges;
int i;
/* store a in result in case we fail */
oidcpy(result, a);
/* we can not handle deletion conflicts */
if (is_null_oid(base))
return 0;
if (is_null_oid(a))
return 0;
if (is_null_oid(b))
return 0;
if (add_submodule_odb(path)) {
MERGE_WARNING(path, "not checked out");
return 0;
}
if (!(commit_base = lookup_commit_reference(base)) ||
!(commit_a = lookup_commit_reference(a)) ||
!(commit_b = lookup_commit_reference(b))) {
MERGE_WARNING(path, "commits not present");
return 0;
}
/* check whether both changes are forward */
if (!in_merge_bases(commit_base, commit_a) ||
!in_merge_bases(commit_base, commit_b)) {
MERGE_WARNING(path, "commits don't follow merge-base");
return 0;
}
/* Case #1: a is contained in b or vice versa */
if (in_merge_bases(commit_a, commit_b)) {
oidcpy(result, b);
return 1;
}
if (in_merge_bases(commit_b, commit_a)) {
oidcpy(result, a);
return 1;
}
/*
* Case #2: There are one or more merges that contain a and b in
* the submodule. If there is only one, then present it as a
* suggestion to the user, but leave it marked unmerged so the
* user needs to confirm the resolution.
*/
/* Skip the search if makes no sense to the calling context. */
if (!search)
return 0;
/* find commit which merges them */
parent_count = find_first_merges(&merges, path, commit_a, commit_b);
switch (parent_count) {
case 0:
MERGE_WARNING(path, "merge following commits not found");
break;
case 1:
MERGE_WARNING(path, "not fast-forward");
fprintf(stderr, "Found a possible merge resolution "
"for the submodule:\n");
print_commit((struct commit *) merges.objects[0].item);
fprintf(stderr,
"If this is correct simply add it to the index "
"for example\n"
"by using:\n\n"
" git update-index --cacheinfo 160000 %s \"%s\"\n\n"
"which will accept this suggestion.\n",
oid_to_hex(&merges.objects[0].item->oid), path);
break;
default:
MERGE_WARNING(path, "multiple merges found");
for (i = 0; i < merges.nr; i++)
print_commit((struct commit *) merges.objects[i].item);
}
free(merges.objects);
return 0;
}
int parallel_submodules(void)
{
return parallel_jobs;
}
/*
* Embeds a single submodules git directory into the superprojects git dir,
* non recursively.
*/
static void relocate_single_git_dir_into_superproject(const char *prefix,
const char *path)
{
char *old_git_dir = NULL, *real_old_git_dir = NULL, *real_new_git_dir = NULL;
const char *new_git_dir;
const struct submodule *sub;
if (submodule_uses_worktrees(path))
die(_("relocate_gitdir for submodule '%s' with "
"more than one worktree not supported"), path);
old_git_dir = xstrfmt("%s/.git", path);
if (read_gitfile(old_git_dir))
/* If it is an actual gitfile, it doesn't need migration. */
return;
real_old_git_dir = real_pathdup(old_git_dir, 1);
sub = submodule_from_path(null_sha1, path);
if (!sub)
die(_("could not lookup name for submodule '%s'"), path);
new_git_dir = git_path("modules/%s", sub->name);
if (safe_create_leading_directories_const(new_git_dir) < 0)
die(_("could not create directory '%s'"), new_git_dir);
real_new_git_dir = real_pathdup(new_git_dir, 1);
fprintf(stderr, _("Migrating git directory of '%s%s' from\n'%s' to\n'%s'\n"),
get_super_prefix_or_empty(), path,
real_old_git_dir, real_new_git_dir);
relocate_gitdir(path, real_old_git_dir, real_new_git_dir);
free(old_git_dir);
free(real_old_git_dir);
free(real_new_git_dir);
}
/*
* Migrate the git directory of the submodule given by path from
* having its git directory within the working tree to the git dir nested
* in its superprojects git dir under modules/.
*/
void absorb_git_dir_into_superproject(const char *prefix,
const char *path,
unsigned flags)
{
int err_code;
const char *sub_git_dir;
struct strbuf gitdir = STRBUF_INIT;
strbuf_addf(&gitdir, "%s/.git", path);
sub_git_dir = resolve_gitdir_gently(gitdir.buf, &err_code);
/* Not populated? */
if (!sub_git_dir) {
const struct submodule *sub;
if (err_code == READ_GITFILE_ERR_STAT_FAILED) {
/* unpopulated as expected */
strbuf_release(&gitdir);
return;
}
if (err_code != READ_GITFILE_ERR_NOT_A_REPO)
/* We don't know what broke here. */
read_gitfile_error_die(err_code, path, NULL);
/*
* Maybe populated, but no git directory was found?
* This can happen if the superproject is a submodule
* itself and was just absorbed. The absorption of the
* superproject did not rewrite the git file links yet,
* fix it now.
*/
sub = submodule_from_path(null_sha1, path);
if (!sub)
die(_("could not lookup name for submodule '%s'"), path);
connect_work_tree_and_git_dir(path,
git_path("modules/%s", sub->name));
} else {
/* Is it already absorbed into the superprojects git dir? */
char *real_sub_git_dir = real_pathdup(sub_git_dir, 1);
char *real_common_git_dir = real_pathdup(get_git_common_dir(), 1);
if (!starts_with(real_sub_git_dir, real_common_git_dir))
relocate_single_git_dir_into_superproject(prefix, path);
free(real_sub_git_dir);
free(real_common_git_dir);
}
strbuf_release(&gitdir);
if (flags & ABSORB_GITDIR_RECURSE_SUBMODULES) {
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
if (flags & ~ABSORB_GITDIR_RECURSE_SUBMODULES)
die("BUG: we don't know how to pass the flags down?");
strbuf_addstr(&sb, get_super_prefix_or_empty());
strbuf_addstr(&sb, path);
strbuf_addch(&sb, '/');
cp.dir = path;
cp.git_cmd = 1;
cp.no_stdin = 1;
argv_array_pushl(&cp.args, "--super-prefix", sb.buf,
"submodule--helper",
"absorb-git-dirs", NULL);
prepare_submodule_repo_env(&cp.env_array);
if (run_command(&cp))
die(_("could not recurse into submodule '%s'"), path);
strbuf_release(&sb);
}
}
const char *get_superproject_working_tree(void)
{
struct child_process cp = CHILD_PROCESS_INIT;
struct strbuf sb = STRBUF_INIT;
const char *one_up = real_path_if_valid("../");
const char *cwd = xgetcwd();
const char *ret = NULL;
const char *subpath;
int code;
ssize_t len;
if (!is_inside_work_tree())
/*
* FIXME:
* We might have a superproject, but it is harder
* to determine.
*/
return NULL;
if (!one_up)
return NULL;
subpath = relative_path(cwd, one_up, &sb);
prepare_submodule_repo_env(&cp.env_array);
argv_array_pop(&cp.env_array);
argv_array_pushl(&cp.args, "--literal-pathspecs", "-C", "..",
"ls-files", "-z", "--stage", "--full-name", "--",
subpath, NULL);
strbuf_reset(&sb);
cp.no_stdin = 1;
cp.no_stderr = 1;
cp.out = -1;
cp.git_cmd = 1;
if (start_command(&cp))
die(_("could not start ls-files in .."));
len = strbuf_read(&sb, cp.out, PATH_MAX);
close(cp.out);
if (starts_with(sb.buf, "160000")) {
int super_sub_len;
int cwd_len = strlen(cwd);
char *super_sub, *super_wt;
/*
* There is a superproject having this repo as a submodule.
* The format is <mode> SP <hash> SP <stage> TAB <full name> \0,
* We're only interested in the name after the tab.
*/
super_sub = strchr(sb.buf, '\t') + 1;
super_sub_len = sb.buf + sb.len - super_sub - 1;
if (super_sub_len > cwd_len ||
strcmp(&cwd[cwd_len - super_sub_len], super_sub))
die (_("BUG: returned path string doesn't match cwd?"));
super_wt = xstrdup(cwd);
super_wt[cwd_len - super_sub_len] = '\0';
ret = real_path(super_wt);
free(super_wt);
}
strbuf_release(&sb);
code = finish_command(&cp);
if (code == 128)
/* '../' is not a git repository */
return NULL;
if (code == 0 && len == 0)
/* There is an unrelated git repository at '../' */
return NULL;
if (code)
die(_("ls-tree returned unexpected return code %d"), code);
return ret;
}
int submodule_to_gitdir(struct strbuf *buf, const char *submodule)
{
const struct submodule *sub;
const char *git_dir;
int ret = 0;
strbuf_reset(buf);
strbuf_addstr(buf, submodule);
strbuf_complete(buf, '/');
strbuf_addstr(buf, ".git");
git_dir = read_gitfile(buf->buf);
if (git_dir) {
strbuf_reset(buf);
strbuf_addstr(buf, git_dir);
}
if (!is_git_directory(buf->buf)) {
gitmodules_config();
sub = submodule_from_path(null_sha1, submodule);
if (!sub) {
ret = -1;
goto cleanup;
}
strbuf_reset(buf);
strbuf_git_path(buf, "%s/%s", "modules", sub->name);
}
cleanup:
return ret;
}