git-commit-vandalism/builtin/fetch.c

1417 lines
39 KiB
C
Raw Normal View History

/*
* "git fetch"
*/
#include "cache.h"
#include "config.h"
#include "refs.h"
#include "commit.h"
#include "builtin.h"
#include "string-list.h"
#include "remote.h"
#include "transport.h"
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
#include "run-command.h"
#include "parse-options.h"
#include "sigchain.h"
#include "submodule-config.h"
#include "submodule.h"
#include "connected.h"
#include "argv-array.h"
#include "utf8.h"
static const char * const builtin_fetch_usage[] = {
N_("git fetch [<options>] [<repository> [<refspec>...]]"),
N_("git fetch [<options>] <group>"),
N_("git fetch --multiple [<options>] [(<repository> | <group>)...]"),
N_("git fetch --all [<options>]"),
NULL
};
enum {
TAGS_UNSET = 0,
TAGS_DEFAULT = 1,
TAGS_SET = 2
};
static int fetch_prune_config = -1; /* unspecified */
static int prune = -1; /* unspecified */
#define PRUNE_BY_DEFAULT 0 /* do we prune by default? */
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity, deepen_relative;
static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT;
static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
static int max_children = -1;
static enum transport_family family;
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
static const char *depth;
static const char *deepen_since;
static const char *upload_pack;
static struct string_list deepen_not = STRING_LIST_INIT_NODUP;
static struct strbuf default_rla = STRBUF_INIT;
static struct transport *gtransport;
fetch: work around "transport-take-over" hack A Git-aware "connect" transport allows the "transport_take_over" to redirect generic transport requests like fetch(), push_refs() and get_refs_list() to the native Git transport handling methods. The take-over process replaces transport->data with a fake data that these method implementations understand. While this hack works OK for a single request, it breaks when the transport needs to make more than one requests. transport->data that used to hold necessary information for the specific helper to work correctly is destroyed during the take-over process. One codepath that this matters is "git fetch" in auto-follow mode; when it does not get all the tags that ought to point at the history it got (which can be determined by looking at the peeled tags in the initial advertisement) from the primary transfer, it internally makes a second request to complete the fetch. Because "take-over" hack has already destroyed the data necessary to talk to the transport helper by the time this happens, the second request cannot make a request to the helper to make another connection to fetch these additional tags. Mark such a transport as "cannot_reuse", and use a separate transport to perform the backfill fetch in order to work around this breakage. Note that this problem does not manifest itself when running t5802, because our upload-pack gives you all the necessary auto-followed tags during the primary transfer. You would need to step through "git fetch" in a debugger, stop immediately after the primary transfer finishes and writes these auto-followed tags, remove the tag references and repack/prune the repository to convince the "find-non-local-tags" procedure that the primary transfer failed to give us all the necessary tags, and then let it continue, in order to trigger the bug in the secondary transfer this patch fixes. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
static struct transport *gsecondary;
static const char *submodule_prefix = "";
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
static const char *recurse_submodules_default;
static int shown_url = 0;
static int refmap_alloc, refmap_nr;
static const char **refmap_array;
static int option_parse_recurse_submodules(const struct option *opt,
const char *arg, int unset)
{
if (unset) {
recurse_submodules = RECURSE_SUBMODULES_OFF;
} else {
if (arg)
recurse_submodules = parse_fetch_recurse_submodules_arg(opt->long_name, arg);
else
recurse_submodules = RECURSE_SUBMODULES_ON;
}
return 0;
}
static int git_fetch_config(const char *k, const char *v, void *cb)
{
if (!strcmp(k, "fetch.prune")) {
fetch_prune_config = git_config_bool(k, v);
return 0;
}
if (!strcmp(k, "submodule.recurse")) {
int r = git_config_bool(k, v) ?
RECURSE_SUBMODULES_ON : RECURSE_SUBMODULES_OFF;
recurse_submodules = r;
}
return git_default_config(k, v, cb);
}
static int parse_refmap_arg(const struct option *opt, const char *arg, int unset)
{
ALLOC_GROW(refmap_array, refmap_nr + 1, refmap_alloc);
/*
* "git fetch --refmap='' origin foo"
* can be used to tell the command not to store anywhere
*/
if (*arg)
refmap_array[refmap_nr++] = arg;
return 0;
}
static struct option builtin_fetch_options[] = {
OPT__VERBOSITY(&verbosity),
OPT_BOOL(0, "all", &all,
N_("fetch from all remotes")),
OPT_BOOL('a', "append", &append,
N_("append to .git/FETCH_HEAD instead of overwriting")),
OPT_STRING(0, "upload-pack", &upload_pack, N_("path"),
N_("path to upload pack on remote end")),
OPT__FORCE(&force, N_("force overwrite of local branch")),
OPT_BOOL('m', "multiple", &multiple,
N_("fetch from multiple remotes")),
OPT_SET_INT('t', "tags", &tags,
N_("fetch all tags and associated objects"), TAGS_SET),
OPT_SET_INT('n', NULL, &tags,
N_("do not fetch all tags (--no-tags)"), TAGS_UNSET),
OPT_INTEGER('j', "jobs", &max_children,
N_("number of submodules fetched in parallel")),
OPT_BOOL('p', "prune", &prune,
N_("prune remote-tracking branches no longer on remote")),
{ OPTION_CALLBACK, 0, "recurse-submodules", NULL, N_("on-demand"),
N_("control recursive fetching of submodules"),
PARSE_OPT_OPTARG, option_parse_recurse_submodules },
OPT_BOOL(0, "dry-run", &dry_run,
N_("dry run")),
OPT_BOOL('k', "keep", &keep, N_("keep downloaded pack")),
OPT_BOOL('u', "update-head-ok", &update_head_ok,
N_("allow updating of HEAD ref")),
OPT_BOOL(0, "progress", &progress, N_("force progress reporting")),
OPT_STRING(0, "depth", &depth, N_("depth"),
N_("deepen history of shallow clone")),
OPT_STRING(0, "shallow-since", &deepen_since, N_("time"),
N_("deepen history of shallow repository based on time")),
OPT_STRING_LIST(0, "shallow-exclude", &deepen_not, N_("revision"),
N_("deepen history of shallow clone, excluding rev")),
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
OPT_INTEGER(0, "deepen", &deepen_relative,
N_("deepen history of shallow clone")),
{ OPTION_SET_INT, 0, "unshallow", &unshallow, NULL,
N_("convert to a complete repository"),
PARSE_OPT_NONEG | PARSE_OPT_NOARG, NULL, 1 },
{ OPTION_STRING, 0, "submodule-prefix", &submodule_prefix, N_("dir"),
N_("prepend this to submodule path output"), PARSE_OPT_HIDDEN },
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
{ OPTION_STRING, 0, "recurse-submodules-default",
&recurse_submodules_default, NULL,
N_("default mode for recursion"), PARSE_OPT_HIDDEN },
OPT_BOOL(0, "update-shallow", &update_shallow,
N_("accept refs that update .git/shallow")),
{ OPTION_CALLBACK, 0, "refmap", NULL, N_("refmap"),
N_("specify fetch refmap"), PARSE_OPT_NONEG, parse_refmap_arg },
OPT_SET_INT('4', "ipv4", &family, N_("use IPv4 addresses only"),
TRANSPORT_FAMILY_IPV4),
OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
TRANSPORT_FAMILY_IPV6),
OPT_END()
};
static void unlock_pack(void)
{
if (gtransport)
transport_unlock_pack(gtransport);
fetch: work around "transport-take-over" hack A Git-aware "connect" transport allows the "transport_take_over" to redirect generic transport requests like fetch(), push_refs() and get_refs_list() to the native Git transport handling methods. The take-over process replaces transport->data with a fake data that these method implementations understand. While this hack works OK for a single request, it breaks when the transport needs to make more than one requests. transport->data that used to hold necessary information for the specific helper to work correctly is destroyed during the take-over process. One codepath that this matters is "git fetch" in auto-follow mode; when it does not get all the tags that ought to point at the history it got (which can be determined by looking at the peeled tags in the initial advertisement) from the primary transfer, it internally makes a second request to complete the fetch. Because "take-over" hack has already destroyed the data necessary to talk to the transport helper by the time this happens, the second request cannot make a request to the helper to make another connection to fetch these additional tags. Mark such a transport as "cannot_reuse", and use a separate transport to perform the backfill fetch in order to work around this breakage. Note that this problem does not manifest itself when running t5802, because our upload-pack gives you all the necessary auto-followed tags during the primary transfer. You would need to step through "git fetch" in a debugger, stop immediately after the primary transfer finishes and writes these auto-followed tags, remove the tag references and repack/prune the repository to convince the "find-non-local-tags" procedure that the primary transfer failed to give us all the necessary tags, and then let it continue, in order to trigger the bug in the secondary transfer this patch fixes. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
if (gsecondary)
transport_unlock_pack(gsecondary);
}
static void unlock_pack_on_signal(int signo)
{
unlock_pack();
sigchain_pop(signo);
raise(signo);
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
static void add_merge_config(struct ref **head,
const struct ref *remote_refs,
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
struct branch *branch,
struct ref ***tail)
{
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
int i;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
for (i = 0; i < branch->merge_nr; i++) {
struct ref *rm, **old_tail = *tail;
struct refspec refspec;
for (rm = *head; rm; rm = rm->next) {
if (branch_merge_matches(branch, i, rm->name)) {
rm->fetch_head_status = FETCH_HEAD_MERGE;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
break;
}
}
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
if (rm)
continue;
/*
* Not fetched to a remote-tracking branch? We need to fetch
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
* it anyway to allow this branch's "branch.$name.merge"
* to be honored by 'git pull', but we do not have to
* fail if branch.$name.merge is misconfigured to point
* at a nonexisting branch. If we were indeed called by
* 'git pull', it will notice the misconfiguration because
* there is no entry in the resulting FETCH_HEAD marked
* for merging.
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
*/
memset(&refspec, 0, sizeof(refspec));
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
refspec.src = branch->merge[i]->src;
get_fetch_map(remote_refs, &refspec, tail, 1);
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
for (rm = *old_tail; rm; rm = rm->next)
rm->fetch_head_status = FETCH_HEAD_MERGE;
}
}
static int add_existing(const char *refname, const struct object_id *oid,
int flag, void *cbdata)
{
struct string_list *list = (struct string_list *)cbdata;
struct string_list_item *item = string_list_insert(list, refname);
struct object_id *old_oid = xmalloc(sizeof(*old_oid));
oidcpy(old_oid, oid);
item->util = old_oid;
return 0;
}
static int will_fetch(struct ref **head, const unsigned char *sha1)
{
struct ref *rm = *head;
while (rm) {
if (!hashcmp(rm->old_oid.hash, sha1))
return 1;
rm = rm->next;
}
return 0;
}
static void find_non_local_tags(struct transport *transport,
struct ref **head,
struct ref ***tail)
{
struct string_list existing_refs = STRING_LIST_INIT_DUP;
struct string_list remote_refs = STRING_LIST_INIT_NODUP;
const struct ref *ref;
struct string_list_item *item = NULL;
for_each_ref(add_existing, &existing_refs);
for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
if (!starts_with(ref->name, "refs/tags/"))
continue;
/*
* The peeled ref always follows the matching base
* ref, so if we see a peeled ref that we don't want
* to fetch then we can mark the ref entry in the list
* as one to ignore by setting util to NULL.
*/
if (ends_with(ref->name, "^{}")) {
fetch: use "quick" has_sha1_file for tag following When we auto-follow tags in a fetch, we look at all of the tags advertised by the remote and fetch ones where we don't already have the tag, but we do have the object it peels to. This involves a lot of calls to has_sha1_file(), some of which we can reasonably expect to fail. Since 45e8a74 (has_sha1_file: re-check pack directory before giving up, 2013-08-30), this may cause many calls to reprepare_packed_git(), which is potentially expensive. This has gone unnoticed for several years because it requires a fairly unique setup to matter: 1. You need to have a lot of packs on the client side to make reprepare_packed_git() expensive (the most expensive part is finding duplicates in an unsorted list, which is currently quadratic). 2. You need a large number of tag refs on the server side that are candidates for auto-following (i.e., that the client doesn't have). Each one triggers a re-read of the pack directory. 3. Under normal circumstances, the client would auto-follow those tags and after one large fetch, (2) would no longer be true. But if those tags point to history which is disconnected from what the client otherwise fetches, then it will never auto-follow, and those candidates will impact it on every fetch. So when all three are true, each fetch pays an extra O(nr_tags * nr_packs^2) cost, mostly in string comparisons on the pack names. This was exacerbated by 47bf4b0 (prepare_packed_git_one: refactor duplicate-pack check, 2014-06-30) which uses a slightly more expensive string check, under the assumption that the duplicate check doesn't happen very often (and it shouldn't; the real problem here is how often we are calling reprepare_packed_git()). This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice accuracy for speed, in cases where we might be racy with a simultaneous repack. This is similar to the fix in 0eeb077 (index-pack: avoid excessive re-reading of pack directory, 2015-06-09). As with that case, it's OK for has_sha1_file() occasionally say "no I don't have it" when we do, because the worst case is not a corruption, but simply that we may fail to auto-follow a tag that points to it. Here are results from the included perf script, which sets up a situation similar to the one described above: Test HEAD^ HEAD ---------------------------------------------------------- 5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3% Reported-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
if (item &&
!has_object_file_with_flags(&ref->old_oid,
OBJECT_INFO_QUICK) &&
!will_fetch(head, ref->old_oid.hash) &&
!has_sha1_file_with_flags(item->util,
OBJECT_INFO_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
item = NULL;
continue;
}
/*
* If item is non-NULL here, then we previously saw a
* ref not followed by a peeled reference, so we need
* to check if it is a lightweight tag that we want to
* fetch.
*/
fetch: use "quick" has_sha1_file for tag following When we auto-follow tags in a fetch, we look at all of the tags advertised by the remote and fetch ones where we don't already have the tag, but we do have the object it peels to. This involves a lot of calls to has_sha1_file(), some of which we can reasonably expect to fail. Since 45e8a74 (has_sha1_file: re-check pack directory before giving up, 2013-08-30), this may cause many calls to reprepare_packed_git(), which is potentially expensive. This has gone unnoticed for several years because it requires a fairly unique setup to matter: 1. You need to have a lot of packs on the client side to make reprepare_packed_git() expensive (the most expensive part is finding duplicates in an unsorted list, which is currently quadratic). 2. You need a large number of tag refs on the server side that are candidates for auto-following (i.e., that the client doesn't have). Each one triggers a re-read of the pack directory. 3. Under normal circumstances, the client would auto-follow those tags and after one large fetch, (2) would no longer be true. But if those tags point to history which is disconnected from what the client otherwise fetches, then it will never auto-follow, and those candidates will impact it on every fetch. So when all three are true, each fetch pays an extra O(nr_tags * nr_packs^2) cost, mostly in string comparisons on the pack names. This was exacerbated by 47bf4b0 (prepare_packed_git_one: refactor duplicate-pack check, 2014-06-30) which uses a slightly more expensive string check, under the assumption that the duplicate check doesn't happen very often (and it shouldn't; the real problem here is how often we are calling reprepare_packed_git()). This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice accuracy for speed, in cases where we might be racy with a simultaneous repack. This is similar to the fix in 0eeb077 (index-pack: avoid excessive re-reading of pack directory, 2015-06-09). As with that case, it's OK for has_sha1_file() occasionally say "no I don't have it" when we do, because the worst case is not a corruption, but simply that we may fail to auto-follow a tag that points to it. Here are results from the included perf script, which sets up a situation similar to the one described above: Test HEAD^ HEAD ---------------------------------------------------------- 5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3% Reported-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
if (item &&
!has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
item = NULL;
/* skip duplicates and refs that we already have */
if (string_list_has_string(&remote_refs, ref->name) ||
string_list_has_string(&existing_refs, ref->name))
continue;
item = string_list_insert(&remote_refs, ref->name);
item->util = (void *)&ref->old_oid;
}
string_list_clear(&existing_refs, 1);
/*
* We may have a final lightweight tag that needs to be
* checked to see if it needs fetching.
*/
fetch: use "quick" has_sha1_file for tag following When we auto-follow tags in a fetch, we look at all of the tags advertised by the remote and fetch ones where we don't already have the tag, but we do have the object it peels to. This involves a lot of calls to has_sha1_file(), some of which we can reasonably expect to fail. Since 45e8a74 (has_sha1_file: re-check pack directory before giving up, 2013-08-30), this may cause many calls to reprepare_packed_git(), which is potentially expensive. This has gone unnoticed for several years because it requires a fairly unique setup to matter: 1. You need to have a lot of packs on the client side to make reprepare_packed_git() expensive (the most expensive part is finding duplicates in an unsorted list, which is currently quadratic). 2. You need a large number of tag refs on the server side that are candidates for auto-following (i.e., that the client doesn't have). Each one triggers a re-read of the pack directory. 3. Under normal circumstances, the client would auto-follow those tags and after one large fetch, (2) would no longer be true. But if those tags point to history which is disconnected from what the client otherwise fetches, then it will never auto-follow, and those candidates will impact it on every fetch. So when all three are true, each fetch pays an extra O(nr_tags * nr_packs^2) cost, mostly in string comparisons on the pack names. This was exacerbated by 47bf4b0 (prepare_packed_git_one: refactor duplicate-pack check, 2014-06-30) which uses a slightly more expensive string check, under the assumption that the duplicate check doesn't happen very often (and it shouldn't; the real problem here is how often we are calling reprepare_packed_git()). This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice accuracy for speed, in cases where we might be racy with a simultaneous repack. This is similar to the fix in 0eeb077 (index-pack: avoid excessive re-reading of pack directory, 2015-06-09). As with that case, it's OK for has_sha1_file() occasionally say "no I don't have it" when we do, because the worst case is not a corruption, but simply that we may fail to auto-follow a tag that points to it. Here are results from the included perf script, which sets up a situation similar to the one described above: Test HEAD^ HEAD ---------------------------------------------------------- 5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3% Reported-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
if (item &&
!has_sha1_file_with_flags(item->util, OBJECT_INFO_QUICK) &&
!will_fetch(head, item->util))
item->util = NULL;
/*
* For all the tags in the remote_refs string list,
* add them to the list of refs to be fetched
*/
for_each_string_list_item(item, &remote_refs) {
/* Unless we have already decided to ignore this item... */
if (item->util)
{
struct ref *rm = alloc_ref(item->string);
rm->peer_ref = alloc_ref(item->string);
oidcpy(&rm->old_oid, item->util);
**tail = rm;
*tail = &rm->next;
}
}
string_list_clear(&remote_refs, 0);
}
static struct ref *get_ref_map(struct transport *transport,
struct refspec *refspecs, int refspec_count,
int tags, int *autotags)
{
int i;
struct ref *rm;
struct ref *ref_map = NULL;
struct ref **tail = &ref_map;
/* opportunistically-updated references: */
struct ref *orefs = NULL, **oref_tail = &orefs;
const struct ref *remote_refs = transport_get_remote_refs(transport);
if (refspec_count) {
struct refspec *fetch_refspec;
int fetch_refspec_nr;
for (i = 0; i < refspec_count; i++) {
get_fetch_map(remote_refs, &refspecs[i], &tail, 0);
if (refspecs[i].dst && refspecs[i].dst[0])
*autotags = 1;
}
/* Merge everything on the command line (but not --tags) */
for (rm = ref_map; rm; rm = rm->next)
rm->fetch_head_status = FETCH_HEAD_MERGE;
/*
* For any refs that we happen to be fetching via
* command-line arguments, the destination ref might
* have been missing or have been different than the
* remote-tracking ref that would be derived from the
* configured refspec. In these cases, we want to
* take the opportunity to update their configured
* remote-tracking reference. However, we do not want
* to mention these entries in FETCH_HEAD at all, as
* they would simply be duplicates of existing
* entries, so we set them FETCH_HEAD_IGNORE below.
*
* We compute these entries now, based only on the
* refspecs specified on the command line. But we add
* them to the list following the refspecs resulting
* from the tags option so that one of the latter,
* which has FETCH_HEAD_NOT_FOR_MERGE, is not removed
* by ref_remove_duplicates() in favor of one of these
* opportunistic entries with FETCH_HEAD_IGNORE.
*/
if (refmap_array) {
fetch_refspec = parse_fetch_refspec(refmap_nr, refmap_array);
fetch_refspec_nr = refmap_nr;
} else {
fetch_refspec = transport->remote->fetch;
fetch_refspec_nr = transport->remote->fetch_refspec_nr;
}
for (i = 0; i < fetch_refspec_nr; i++)
get_fetch_map(ref_map, &fetch_refspec[i], &oref_tail, 1);
} else if (refmap_array) {
die("--refmap option is only meaningful with command-line refspec(s).");
} else {
/* Use the defaults */
struct remote *remote = transport->remote;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
struct branch *branch = branch_get(NULL);
int has_merge = branch_has_merge_config(branch);
builtin/fetch.c: ignore merge config when not fetching from branch's remote When 'git fetch' is supplied a single argument, it tries to match it against a configured remote and then fetch the refs specified by the named remote's fetchspec. Additionally, or alternatively, if the current branch has a merge ref configured, and if the name of the remote supplied to fetch matches the one in the branch's configuration, then git also adds the merge ref to the list of refs to update. If the argument to fetch does not specify a named remote, or if the name supplied does not match the remote configured for the current branch, then the current branch's merge configuration should not be considered. git currently mishandles the case when the argument to fetch specifies a GIT URL(i.e. not a named remote) and the current branch has a configured merge ref. In this case, fetch should ignore the branch's merge ref and attempt to fetch from the remote repository's HEAD branch. But, since fetch only checks _whether_ the current branch has a merge ref configured, and does _not_ check whether the branch's configured remote matches the command line argument (until later), it will mistakenly enter the wrong branch of an 'if' statement and will not fall back to fetch the HEAD branch. The fetch ends up doing nothing and returns with a successful zero status. Fix this by comparing the remote repository's name to the branch's remote name, in addition to whether it has a configured merge ref, sooner, so that fetch can correctly decide whether the branch's configuration is interesting or not, and fall back to fetching from the remote's HEAD branch when appropriate. This fixes the test in t5510. Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-25 19:52:56 +02:00
if (remote &&
(remote->fetch_refspec_nr ||
/* Note: has_merge implies non-NULL branch->remote_name */
builtin/fetch.c: ignore merge config when not fetching from branch's remote When 'git fetch' is supplied a single argument, it tries to match it against a configured remote and then fetch the refs specified by the named remote's fetchspec. Additionally, or alternatively, if the current branch has a merge ref configured, and if the name of the remote supplied to fetch matches the one in the branch's configuration, then git also adds the merge ref to the list of refs to update. If the argument to fetch does not specify a named remote, or if the name supplied does not match the remote configured for the current branch, then the current branch's merge configuration should not be considered. git currently mishandles the case when the argument to fetch specifies a GIT URL(i.e. not a named remote) and the current branch has a configured merge ref. In this case, fetch should ignore the branch's merge ref and attempt to fetch from the remote repository's HEAD branch. But, since fetch only checks _whether_ the current branch has a merge ref configured, and does _not_ check whether the branch's configured remote matches the command line argument (until later), it will mistakenly enter the wrong branch of an 'if' statement and will not fall back to fetch the HEAD branch. The fetch ends up doing nothing and returns with a successful zero status. Fix this by comparing the remote repository's name to the branch's remote name, in addition to whether it has a configured merge ref, sooner, so that fetch can correctly decide whether the branch's configuration is interesting or not, and fall back to fetching from the remote's HEAD branch when appropriate. This fixes the test in t5510. Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-08-25 19:52:56 +02:00
(has_merge && !strcmp(branch->remote_name, remote->name)))) {
for (i = 0; i < remote->fetch_refspec_nr; i++) {
get_fetch_map(remote_refs, &remote->fetch[i], &tail, 0);
if (remote->fetch[i].dst &&
remote->fetch[i].dst[0])
*autotags = 1;
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
if (!i && !has_merge && ref_map &&
!remote->fetch[0].pattern)
ref_map->fetch_head_status = FETCH_HEAD_MERGE;
}
/*
* if the remote we're fetching from is the same
* as given in branch.<name>.remote, we add the
* ref given in branch.<name>.merge, too.
*
* Note: has_merge implies non-NULL branch->remote_name
*/
if (has_merge &&
!strcmp(branch->remote_name, remote->name))
Correct handling of branch.$name.merge in builtin-fetch My prior bug fix for git-push titled "Don't configure remote "." to fetch everything to itself" actually broke t5520 as we were unable to evaluate a branch configuration of: [branch "copy"] remote = . merge = refs/heads/master as remote "." did not have a "remote...fetch" configuration entry to offer up refs/heads/master as a possible candidate available to be fetched and merged. In shell script git-fetch and prior to the above mentioned commit this was hardcoded for a url of "." to be the set of local branches. Chasing down this bug led me to the conclusion that our prior behavior with regards to branch.$name.merge was incorrect. In the shell script based git-fetch implementation we only fetched and merged a branch if it appeared both in branch.$name.merge *and* in remote.$r.fetch, where $r = branch.$name.remote. In other words in the following config file: [remote "origin"] url = git://git.kernel.org/pub/scm/git/git.git fetch = refs/heads/master:refs/remotes/origin/master [branch "master"] remote = origin merge = refs/heads/master [branch "pu"] remote = origin merge = refs/heads/pu Attempting to run `git pull` while on branch "pu" would always give the user "Already up-to-date" as git-fetch did not fetch pu and thus did not mark it for merge in .git/FETCH_HEAD. The configured merge would always be ignored and the user would be left scratching her confused head wondering why merge did not work on "pu" but worked fine on "master". If we are using the "default fetch" specification for the current branch and the current branch has a branch.$name.merge configured we now union it with the list of refs in remote.$r.fetch. This way the above configuration does what the user expects it to do, which is to fetch only "master" by default but when on "pu" to fetch both "master" and "pu". This uncovered some breakage in the test suite where old-style Cogito branches (.git/branches/$r) did not fetch the branches listed in .git/config for merging and thus did not actually merge them if the user tried to use `git pull` on that branch. Junio and I discussed it on list and felt that the union approach here makes more sense to DWIM for the end-user than silently ignoring their configured request so the test vectors for t5515 have been updated to include for-merge lines in .git/FETCH_HEAD where they have been configured for-merge in .git/config. Since we are now performing a union of the fetch specification and the merge specification and we cannot allow a branch to be listed twice (otherwise it comes out twice in .git/FETCH_HEAD) we need to perform a double loop here over all of the branch.$name.merge lines and try to set their merge flag if we have already schedule that branch for fetching by remote.$r.fetch. If no match is found then we must add new specifications to fetch the branch but not store it as no local tracking branch has been designated. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-09-18 10:54:53 +02:00
add_merge_config(&ref_map, remote_refs, branch, &tail);
} else {
ref_map = get_remote_ref(remote_refs, "HEAD");
if (!ref_map)
die(_("Couldn't find remote ref HEAD"));
ref_map->fetch_head_status = FETCH_HEAD_MERGE;
tail = &ref_map->next;
}
}
if (tags == TAGS_SET)
/* also fetch all tags */
get_fetch_map(remote_refs, tag_refspec, &tail, 0);
else if (tags == TAGS_DEFAULT && *autotags)
find_non_local_tags(transport, &ref_map, &tail);
/* Now append any refs to be updated opportunistically: */
*tail = orefs;
for (rm = orefs; rm; rm = rm->next) {
rm->fetch_head_status = FETCH_HEAD_IGNORE;
tail = &rm->next;
}
return ref_remove_duplicates(ref_map);
}
#define STORE_REF_ERROR_OTHER 1
#define STORE_REF_ERROR_DF_CONFLICT 2
static int s_update_ref(const char *action,
struct ref *ref,
int check_old)
{
char *msg;
char *rla = getenv("GIT_REFLOG_ACTION");
struct ref_transaction *transaction;
struct strbuf err = STRBUF_INIT;
int ret, df_conflict = 0;
if (dry_run)
return 0;
if (!rla)
rla = default_rla.buf;
msg = xstrfmt("%s: %s", rla, action);
transaction = ref_transaction_begin(&err);
if (!transaction ||
ref_transaction_update(transaction, ref->name,
ref->new_oid.hash,
check_old ? ref->old_oid.hash : NULL,
0, msg, &err))
goto fail;
ret = ref_transaction_commit(transaction, &err);
if (ret) {
df_conflict = (ret == TRANSACTION_NAME_CONFLICT);
goto fail;
}
ref_transaction_free(transaction);
strbuf_release(&err);
free(msg);
return 0;
fail:
ref_transaction_free(transaction);
error("%s", err.buf);
strbuf_release(&err);
free(msg);
return df_conflict ? STORE_REF_ERROR_DF_CONFLICT
: STORE_REF_ERROR_OTHER;
}
static int refcol_width = 10;
static int compact_format;
static void adjust_refcol_width(const struct ref *ref)
{
int max, rlen, llen, len;
/* uptodate lines are only shown on high verbosity level */
if (!verbosity && !oidcmp(&ref->peer_ref->old_oid, &ref->old_oid))
return;
max = term_columns();
rlen = utf8_strwidth(prettify_refname(ref->name));
llen = utf8_strwidth(prettify_refname(ref->peer_ref->name));
/*
* rough estimation to see if the output line is too long and
* should not be counted (we can't do precise calculation
* anyway because we don't know if the error explanation part
* will be printed in update_local_ref)
*/
if (compact_format) {
llen = 0;
max = max * 2 / 3;
}
len = 21 /* flag and summary */ + rlen + 4 /* -> */ + llen;
if (len >= max)
return;
/*
* Not precise calculation for compact mode because '*' can
* appear on the left hand side of '->' and shrink the column
* back.
*/
if (refcol_width < rlen)
refcol_width = rlen;
}
static void prepare_format_display(struct ref *ref_map)
{
struct ref *rm;
const char *format = "full";
git_config_get_string_const("fetch.output", &format);
if (!strcasecmp(format, "full"))
compact_format = 0;
else if (!strcasecmp(format, "compact"))
compact_format = 1;
else
die(_("configuration fetch.output contains invalid value %s"),
format);
for (rm = ref_map; rm; rm = rm->next) {
if (rm->status == REF_STATUS_REJECT_SHALLOW ||
!rm->peer_ref ||
!strcmp(rm->name, "HEAD"))
continue;
adjust_refcol_width(rm);
}
}
static void print_remote_to_local(struct strbuf *display,
const char *remote, const char *local)
{
strbuf_addf(display, "%-*s -> %s", refcol_width, remote, local);
}
static int find_and_replace(struct strbuf *haystack,
const char *needle,
const char *placeholder)
{
const char *p = strstr(haystack->buf, needle);
int plen, nlen;
if (!p)
return 0;
if (p > haystack->buf && p[-1] != '/')
return 0;
plen = strlen(p);
nlen = strlen(needle);
if (plen > nlen && p[nlen] != '/')
return 0;
strbuf_splice(haystack, p - haystack->buf, nlen,
placeholder, strlen(placeholder));
return 1;
}
static void print_compact(struct strbuf *display,
const char *remote, const char *local)
{
struct strbuf r = STRBUF_INIT;
struct strbuf l = STRBUF_INIT;
if (!strcmp(remote, local)) {
strbuf_addf(display, "%-*s -> *", refcol_width, remote);
return;
}
strbuf_addstr(&r, remote);
strbuf_addstr(&l, local);
if (!find_and_replace(&r, local, "*"))
find_and_replace(&l, remote, "*");
print_remote_to_local(display, r.buf, l.buf);
strbuf_release(&r);
strbuf_release(&l);
}
static void format_display(struct strbuf *display, char code,
const char *summary, const char *error,
const char *remote, const char *local,
int summary_width)
{
int width = (summary_width + strlen(summary) - gettext_width(summary));
strbuf_addf(display, "%c %-*s ", code, width, summary);
if (!compact_format)
print_remote_to_local(display, remote, local);
else
print_compact(display, remote, local);
if (error)
strbuf_addf(display, " (%s)", error);
}
static int update_local_ref(struct ref *ref,
const char *remote,
const struct ref *remote_ref,
struct strbuf *display,
int summary_width)
{
struct commit *current = NULL, *updated;
enum object_type type;
struct branch *current_branch = branch_get(NULL);
const char *pretty_ref = prettify_refname(ref->name);
type = sha1_object_info(ref->new_oid.hash, NULL);
if (type < 0)
die(_("object %s not found"), oid_to_hex(&ref->new_oid));
if (!oidcmp(&ref->old_oid, &ref->new_oid)) {
if (verbosity > 0)
format_display(display, '=', _("[up to date]"), NULL,
remote, pretty_ref, summary_width);
return 0;
}
if (current_branch &&
!strcmp(ref->name, current_branch->name) &&
!(update_head_ok || is_bare_repository()) &&
!is_null_oid(&ref->old_oid)) {
/*
* If this is the head, and it's not okay to update
* the head, and the old value of the head isn't empty...
*/
format_display(display, '!', _("[rejected]"),
_("can't fetch in current branch"),
remote, pretty_ref, summary_width);
return 1;
}
if (!is_null_oid(&ref->old_oid) &&
starts_with(ref->name, "refs/tags/")) {
int r;
r = s_update_ref("updating tag", ref, 0);
format_display(display, r ? '!' : 't', _("[tag update]"),
r ? _("unable to update local ref") : NULL,
remote, pretty_ref, summary_width);
return r;
}
Convert lookup_commit* to struct object_id Convert lookup_commit, lookup_commit_or_die, lookup_commit_reference, and lookup_commit_reference_gently to take struct object_id arguments. Introduce a temporary in parse_object buffer in order to convert this function. This is required since in order to convert parse_object and parse_object_buffer, lookup_commit_reference_gently and lookup_commit_or_die would need to be converted. Not introducing a temporary would therefore require that lookup_commit_or_die take a struct object_id *, but lookup_commit would take unsigned char *, leaving a confusing and hard-to-use interface. parse_object_buffer will lose this temporary in a later patch. This commit was created with manual changes to commit.c, commit.h, and object.c, plus the following semantic patch: @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1.hash, E2) + lookup_commit_reference_gently(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1->hash, E2) + lookup_commit_reference_gently(E1, E2) @@ expression E1; @@ - lookup_commit_reference(E1.hash) + lookup_commit_reference(&E1) @@ expression E1; @@ - lookup_commit_reference(E1->hash) + lookup_commit_reference(E1) @@ expression E1; @@ - lookup_commit(E1.hash) + lookup_commit(&E1) @@ expression E1; @@ - lookup_commit(E1->hash) + lookup_commit(E1) @@ expression E1, E2; @@ - lookup_commit_or_die(E1.hash, E2) + lookup_commit_or_die(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_or_die(E1->hash, E2) + lookup_commit_or_die(E1, E2) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-07 00:10:10 +02:00
current = lookup_commit_reference_gently(&ref->old_oid, 1);
updated = lookup_commit_reference_gently(&ref->new_oid, 1);
if (!current || !updated) {
const char *msg;
const char *what;
int r;
/*
* Nicely describe the new ref we're fetching.
* Base this on the remote's ref name, as it's
* more likely to follow a standard layout.
*/
const char *name = remote_ref ? remote_ref->name : "";
if (starts_with(name, "refs/tags/")) {
msg = "storing tag";
what = _("[new tag]");
} else if (starts_with(name, "refs/heads/")) {
msg = "storing head";
what = _("[new branch]");
} else {
msg = "storing ref";
what = _("[new ref]");
}
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref(msg, ref, 0);
format_display(display, r ? '!' : '*', what,
r ? _("unable to update local ref") : NULL,
remote, pretty_ref, summary_width);
return r;
}
if (in_merge_bases(current, updated)) {
struct strbuf quickref = STRBUF_INIT;
int r;
strbuf_add_unique_abbrev(&quickref, current->object.oid.hash, DEFAULT_ABBREV);
strbuf_addstr(&quickref, "..");
strbuf_add_unique_abbrev(&quickref, ref->new_oid.hash, DEFAULT_ABBREV);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref("fast-forward", ref, 1);
format_display(display, r ? '!' : ' ', quickref.buf,
r ? _("unable to update local ref") : NULL,
remote, pretty_ref, summary_width);
strbuf_release(&quickref);
return r;
} else if (force || ref->force) {
struct strbuf quickref = STRBUF_INIT;
int r;
strbuf_add_unique_abbrev(&quickref, current->object.oid.hash, DEFAULT_ABBREV);
strbuf_addstr(&quickref, "...");
strbuf_add_unique_abbrev(&quickref, ref->new_oid.hash, DEFAULT_ABBREV);
fetch/pull: recurse into submodules when necessary To be able to access all commits of populated submodules referenced by the superproject it is sufficient to only then let "git fetch" recurse into a submodule when the new commits fetched in the superproject record new commits for it. Having these commits present is extremely useful when using the "--submodule" option to "git diff" (which is what "git gui" and "gitk" do since 1.6.6), as all submodule commits needed for creating a descriptive output can be accessed. Also merging submodule commits (added in 1.7.3) depends on the submodule commits in question being present to work. Last but not least this enables disconnected operation when using submodules, as all commits necessary for a successful "git submodule update -N" will have been fetched automatically. So we choose this mode as the default for fetch and pull. Before a new or changed ref from upstream is updated in update_local_ref() "git rev-list <new-sha1> --not --branches --remotes" is used to determine all newly fetched commits. These are then walked and diffed against their parent(s) to see if a submodule has been changed. If that is the case, its path is stored to be fetched after the superproject fetch is completed. Using the "--recurse-submodules" or the "--no-recurse-submodules" option disables the examination of the fetched refs because the result will be ignored anyway. There is currently no infrastructure for storing deleted and new submodules in the .git directory of the superproject. That's why fetch and pull for now only fetch submodules that are already checked out and are not renamed. In t7403 the "--no-recurse-submodules" argument had to be added to "git pull" to avoid failure because of the moved upstream submodule repo. Thanks-to: Jonathan Nieder <jrnieder@gmail.com> Thanks-to: Heiko Voigt <hvoigt@hvoigt.net> Signed-off-by: Jens Lehmann <Jens.Lehmann@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-06 23:10:46 +01:00
if ((recurse_submodules != RECURSE_SUBMODULES_OFF) &&
(recurse_submodules != RECURSE_SUBMODULES_ON))
check_for_new_submodule_commits(&ref->new_oid);
r = s_update_ref("forced-update", ref, 1);
format_display(display, r ? '!' : '+', quickref.buf,
r ? _("unable to update local ref") : _("forced update"),
remote, pretty_ref, summary_width);
strbuf_release(&quickref);
return r;
} else {
format_display(display, '!', _("[rejected]"), _("non-fast-forward"),
remote, pretty_ref, summary_width);
return 1;
}
}
static int iterate_ref_map(void *cb_data, unsigned char sha1[20])
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
while (ref && ref->status == REF_STATUS_REJECT_SHALLOW)
ref = ref->next;
if (!ref)
return -1; /* end of the list */
*rm = ref->next;
hashcpy(sha1, ref->old_oid.hash);
return 0;
}
static int store_updated_refs(const char *raw_url, const char *remote_name,
struct ref *ref_map)
{
FILE *fp;
struct commit *commit;
int url_len, i, rc = 0;
struct strbuf note = STRBUF_INIT;
const char *what, *kind;
struct ref *rm;
char *url;
memoize common git-path "constant" files One of the most common uses of git_path() is to pass a constant, like git_path("MERGE_MSG"). This has two drawbacks: 1. The return value is a static buffer, and the lifetime is dependent on other calls to git_path, etc. 2. There's no compile-time checking of the pathname. This is OK for a one-off (after all, we have to spell it correctly at least once), but many of these constant strings appear throughout the code. This patch introduces a series of functions to "memoize" these strings, which are essentially globals for the lifetime of the program. We compute the value once, take ownership of the buffer, and return the cached value for subsequent calls. cache.h provides a helper macro for defining these functions as one-liners, and defines a few common ones for global use. Using a macro is a little bit gross, but it does nicely document the purpose of the functions. If we need to touch them all later (e.g., because we learned how to change the git_dir variable at runtime, and need to invalidate all of the stored values), it will be much easier to have the complete list. Note that the shared-global functions have separate, manual declarations. We could do something clever with the macros (e.g., expand it to a declaration in some places, and a declaration _and_ a definition in path.c). But there aren't that many, and it's probably better to stay away from too-magical macros. Likewise, if we abandon the C preprocessor in favor of generating these with a script, we could get much fancier. E.g., normalizing "FOO/BAR-BAZ" into "git_path_foo_bar_baz". But the small amount of saved typing is probably not worth the resulting confusion to readers who want to grep for the function's definition. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-10 11:38:57 +02:00
const char *filename = dry_run ? "/dev/null" : git_path_fetch_head();
int want_status;
int summary_width = transport_summary_width(ref_map);
fp = fopen(filename, "a");
if (!fp)
return error_errno(_("cannot open %s"), filename);
if (raw_url)
url = transport_anonymize_url(raw_url);
else
url = xstrdup("foreign");
rm = ref_map;
check_everything_connected: use a struct with named options The number of variants of check_everything_connected has grown over the years, so that the "real" function takes several possibly-zero, possibly-NULL arguments. We hid the complexity behind some wrapper functions, but this doesn't scale well when we want to add new options. If we add more wrapper variants to handle the new options, then we can get a combinatorial explosion when those options might be used together (right now nobody wants to use both "shallow" and "transport" together, so we get by with just a few wrappers). If instead we add new parameters to each function, each of which can have a default value, then callers who want the defaults end up with confusing invocations like: check_everything_connected(fn, 0, data, -1, 0, NULL); where it is unclear which parameter is which (and every caller needs updated when we add new options). Instead, let's add a struct to hold all of the optional parameters. This is a little more verbose for the callers (who have to declare the struct and fill it in), but it makes their code much easier to follow, because every option is named as it is set (and unused options do not have to be mentioned at all). Note that we could also stick the iteration function and its callback data into the option struct, too. But since those are required for each call, by avoiding doing so, we can let very simple callers just pass "NULL" for the options and not worry about the struct at all. While we're touching each site, let's also rename the function to check_connected(). The existing name was quite long, and not all of the wrappers even used the full name. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 12:30:40 +02:00
if (check_connected(iterate_ref_map, &rm, NULL)) {
rc = error(_("%s did not send all necessary objects\n"), url);
goto abort;
}
prepare_format_display(ref_map);
/*
* We do a pass for each fetch_head_status type in their enum order, so
* merged entries are written before not-for-merge. That lets readers
* use FETCH_HEAD as a refname to refer to the ref to be merged.
*/
for (want_status = FETCH_HEAD_MERGE;
want_status <= FETCH_HEAD_IGNORE;
want_status++) {
for (rm = ref_map; rm; rm = rm->next) {
struct ref *ref = NULL;
const char *merge_status_marker = "";
if (rm->status == REF_STATUS_REJECT_SHALLOW) {
if (want_status == FETCH_HEAD_MERGE)
warning(_("reject %s because shallow roots are not allowed to be updated"),
rm->peer_ref ? rm->peer_ref->name : rm->name);
continue;
}
Convert lookup_commit* to struct object_id Convert lookup_commit, lookup_commit_or_die, lookup_commit_reference, and lookup_commit_reference_gently to take struct object_id arguments. Introduce a temporary in parse_object buffer in order to convert this function. This is required since in order to convert parse_object and parse_object_buffer, lookup_commit_reference_gently and lookup_commit_or_die would need to be converted. Not introducing a temporary would therefore require that lookup_commit_or_die take a struct object_id *, but lookup_commit would take unsigned char *, leaving a confusing and hard-to-use interface. parse_object_buffer will lose this temporary in a later patch. This commit was created with manual changes to commit.c, commit.h, and object.c, plus the following semantic patch: @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1.hash, E2) + lookup_commit_reference_gently(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_reference_gently(E1->hash, E2) + lookup_commit_reference_gently(E1, E2) @@ expression E1; @@ - lookup_commit_reference(E1.hash) + lookup_commit_reference(&E1) @@ expression E1; @@ - lookup_commit_reference(E1->hash) + lookup_commit_reference(E1) @@ expression E1; @@ - lookup_commit(E1.hash) + lookup_commit(&E1) @@ expression E1; @@ - lookup_commit(E1->hash) + lookup_commit(E1) @@ expression E1, E2; @@ - lookup_commit_or_die(E1.hash, E2) + lookup_commit_or_die(&E1, E2) @@ expression E1, E2; @@ - lookup_commit_or_die(E1->hash, E2) + lookup_commit_or_die(E1, E2) Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-07 00:10:10 +02:00
commit = lookup_commit_reference_gently(&rm->old_oid,
1);
if (!commit)
rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE;
if (rm->fetch_head_status != want_status)
continue;
if (rm->peer_ref) {
ref = alloc_ref(rm->peer_ref->name);
oidcpy(&ref->old_oid, &rm->peer_ref->old_oid);
oidcpy(&ref->new_oid, &rm->old_oid);
ref->force = rm->peer_ref->force;
}
if (!strcmp(rm->name, "HEAD")) {
kind = "";
what = "";
}
else if (starts_with(rm->name, "refs/heads/")) {
kind = "branch";
what = rm->name + 11;
}
else if (starts_with(rm->name, "refs/tags/")) {
kind = "tag";
what = rm->name + 10;
}
else if (starts_with(rm->name, "refs/remotes/")) {
kind = "remote-tracking branch";
what = rm->name + 13;
}
else {
kind = "";
what = rm->name;
}
url_len = strlen(url);
for (i = url_len - 1; url[i] == '/' && 0 <= i; i--)
;
url_len = i + 1;
if (4 < i && !strncmp(".git", url + i - 3, 4))
url_len = i - 3;
strbuf_reset(&note);
if (*what) {
if (*kind)
strbuf_addf(&note, "%s ", kind);
strbuf_addf(&note, "'%s' of ", what);
}
switch (rm->fetch_head_status) {
case FETCH_HEAD_NOT_FOR_MERGE:
merge_status_marker = "not-for-merge";
/* fall-through */
case FETCH_HEAD_MERGE:
fprintf(fp, "%s\t%s\t%s",
oid_to_hex(&rm->old_oid),
merge_status_marker,
note.buf);
for (i = 0; i < url_len; ++i)
if ('\n' == url[i])
fputs("\\n", fp);
else
fputc(url[i], fp);
fputc('\n', fp);
break;
default:
/* do not write anything to FETCH_HEAD */
break;
}
strbuf_reset(&note);
if (ref) {
rc |= update_local_ref(ref, what, rm, &note,
summary_width);
free(ref);
} else
format_display(&note, '*',
*kind ? kind : "branch", NULL,
*what ? what : "HEAD",
"FETCH_HEAD", summary_width);
if (note.len) {
if (verbosity >= 0 && !shown_url) {
fprintf(stderr, _("From %.*s\n"),
url_len, url);
shown_url = 1;
}
if (verbosity >= 0)
fprintf(stderr, " %s\n", note.buf);
}
}
}
if (rc & STORE_REF_ERROR_DF_CONFLICT)
error(_("some local refs could not be updated; try running\n"
" 'git remote prune %s' to remove any old, conflicting "
"branches"), remote_name);
abort:
strbuf_release(&note);
free(url);
fclose(fp);
return rc;
}
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
/*
* We would want to bypass the object transfer altogether if
* everything we are going to fetch already exists and is connected
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
* locally.
*/
static int quickfetch(struct ref *ref_map)
{
struct ref *rm = ref_map;
check_everything_connected: use a struct with named options The number of variants of check_everything_connected has grown over the years, so that the "real" function takes several possibly-zero, possibly-NULL arguments. We hid the complexity behind some wrapper functions, but this doesn't scale well when we want to add new options. If we add more wrapper variants to handle the new options, then we can get a combinatorial explosion when those options might be used together (right now nobody wants to use both "shallow" and "transport" together, so we get by with just a few wrappers). If instead we add new parameters to each function, each of which can have a default value, then callers who want the defaults end up with confusing invocations like: check_everything_connected(fn, 0, data, -1, 0, NULL); where it is unclear which parameter is which (and every caller needs updated when we add new options). Instead, let's add a struct to hold all of the optional parameters. This is a little more verbose for the callers (who have to declare the struct and fill it in), but it makes their code much easier to follow, because every option is named as it is set (and unused options do not have to be mentioned at all). Note that we could also stick the iteration function and its callback data into the option struct, too. But since those are required for each call, by avoiding doing so, we can let very simple callers just pass "NULL" for the options and not worry about the struct at all. While we're touching each site, let's also rename the function to check_connected(). The existing name was quite long, and not all of the wrappers even used the full name. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 12:30:40 +02:00
struct check_connected_options opt = CHECK_CONNECTED_INIT;
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
/*
* If we are deepening a shallow clone we already have these
* objects reachable. Running rev-list here will return with
* a good (0) exit status and we'll bypass the fetch that we
* really need to perform. Claiming failure now will ensure
* we perform the network exchange to deepen our history.
*/
if (deepen)
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
return -1;
check_everything_connected: use a struct with named options The number of variants of check_everything_connected has grown over the years, so that the "real" function takes several possibly-zero, possibly-NULL arguments. We hid the complexity behind some wrapper functions, but this doesn't scale well when we want to add new options. If we add more wrapper variants to handle the new options, then we can get a combinatorial explosion when those options might be used together (right now nobody wants to use both "shallow" and "transport" together, so we get by with just a few wrappers). If instead we add new parameters to each function, each of which can have a default value, then callers who want the defaults end up with confusing invocations like: check_everything_connected(fn, 0, data, -1, 0, NULL); where it is unclear which parameter is which (and every caller needs updated when we add new options). Instead, let's add a struct to hold all of the optional parameters. This is a little more verbose for the callers (who have to declare the struct and fill it in), but it makes their code much easier to follow, because every option is named as it is set (and unused options do not have to be mentioned at all). Note that we could also stick the iteration function and its callback data into the option struct, too. But since those are required for each call, by avoiding doing so, we can let very simple callers just pass "NULL" for the options and not worry about the struct at all. While we're touching each site, let's also rename the function to check_connected(). The existing name was quite long, and not all of the wrappers even used the full name. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-15 12:30:40 +02:00
opt.quiet = 1;
return check_connected(iterate_ref_map, &rm, &opt);
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
}
static int fetch_refs(struct transport *transport, struct ref *ref_map)
{
git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2007-11-11 08:29:47 +01:00
int ret = quickfetch(ref_map);
if (ret)
ret = transport_fetch_refs(transport, ref_map);
if (!ret)
ret |= store_updated_refs(transport->url,
transport->remote->name,
ref_map);
transport_unlock_pack(transport);
return ret;
}
static int prune_refs(struct refspec *refs, int ref_count, struct ref *ref_map,
const char *raw_url)
{
int url_len, i, result = 0;
struct ref *ref, *stale_refs = get_stale_heads(refs, ref_count, ref_map);
char *url;
int summary_width = transport_summary_width(stale_refs);
const char *dangling_msg = dry_run
? _(" (%s will become dangling)")
: _(" (%s has become dangling)");
if (raw_url)
url = transport_anonymize_url(raw_url);
else
url = xstrdup("foreign");
url_len = strlen(url);
for (i = url_len - 1; url[i] == '/' && 0 <= i; i--)
;
url_len = i + 1;
if (4 < i && !strncmp(".git", url + i - 3, 4))
url_len = i - 3;
if (!dry_run) {
struct string_list refnames = STRING_LIST_INIT_NODUP;
for (ref = stale_refs; ref; ref = ref->next)
string_list_append(&refnames, ref->name);
result = delete_refs("fetch: prune", &refnames, 0);
string_list_clear(&refnames, 0);
}
if (verbosity >= 0) {
for (ref = stale_refs; ref; ref = ref->next) {
struct strbuf sb = STRBUF_INIT;
if (!shown_url) {
fprintf(stderr, _("From %.*s\n"), url_len, url);
shown_url = 1;
}
format_display(&sb, '-', _("[deleted]"), NULL,
_("(none)"), prettify_refname(ref->name),
summary_width);
fprintf(stderr, " %s\n",sb.buf);
strbuf_release(&sb);
warn_dangling_symref(stderr, dangling_msg, ref->name);
}
}
free(url);
free_refs(stale_refs);
return result;
}
static void check_not_current_branch(struct ref *ref_map)
{
struct branch *current_branch = branch_get(NULL);
if (is_bare_repository() || !current_branch)
return;
for (; ref_map; ref_map = ref_map->next)
if (ref_map->peer_ref && !strcmp(current_branch->refname,
ref_map->peer_ref->name))
die(_("Refusing to fetch into current branch %s "
"of non-bare repository"), current_branch->refname);
}
static int truncate_fetch_head(void)
{
memoize common git-path "constant" files One of the most common uses of git_path() is to pass a constant, like git_path("MERGE_MSG"). This has two drawbacks: 1. The return value is a static buffer, and the lifetime is dependent on other calls to git_path, etc. 2. There's no compile-time checking of the pathname. This is OK for a one-off (after all, we have to spell it correctly at least once), but many of these constant strings appear throughout the code. This patch introduces a series of functions to "memoize" these strings, which are essentially globals for the lifetime of the program. We compute the value once, take ownership of the buffer, and return the cached value for subsequent calls. cache.h provides a helper macro for defining these functions as one-liners, and defines a few common ones for global use. Using a macro is a little bit gross, but it does nicely document the purpose of the functions. If we need to touch them all later (e.g., because we learned how to change the git_dir variable at runtime, and need to invalidate all of the stored values), it will be much easier to have the complete list. Note that the shared-global functions have separate, manual declarations. We could do something clever with the macros (e.g., expand it to a declaration in some places, and a declaration _and_ a definition in path.c). But there aren't that many, and it's probably better to stay away from too-magical macros. Likewise, if we abandon the C preprocessor in favor of generating these with a script, we could get much fancier. E.g., normalizing "FOO/BAR-BAZ" into "git_path_foo_bar_baz". But the small amount of saved typing is probably not worth the resulting confusion to readers who want to grep for the function's definition. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-08-10 11:38:57 +02:00
const char *filename = git_path_fetch_head();
Handle more file writes correctly in shared repos In shared repositories, we have to be careful when writing files whose permissions do not allow users other than the owner to write them. In particular, we force the marks file of fast-export and the FETCH_HEAD when fetching to be rewritten from scratch. This commit does not touch other calls to fopen() that want to write files: - commands that write to working tree files (core.sharedRepository does not affect permission bits of working tree files), e.g. .rej file created by "apply --reject", result of applying a previous conflict resolution by "rerere", "git merge-file". - git am, when splitting mails (git-am correctly cleans up its directory after finishing, so there is no need to share those files between users) - git submodule clone, when writing the .git file, because the file will not be overwritten - git_terminal_prompt() in compat/terminal.c, because it is not writing to a file at all - git diff --output, because the output file is clearly not intended to be shared between the users of the current repository - git fast-import, when writing a crash report, because the reports' file names are unique due to an embedded process ID - mailinfo() in mailinfo.c, because the output is clearly not intended to be shared between the users of the current repository - check_or_regenerate_marks() in remote-testsvn.c, because this is only used for Git's internal testing - git fsck, when writing lost&found blobs (this should probably be changed, but left as a low-hanging fruit for future contributors). Note that this patch does not touch callers of write_file() and write_file_gently(), which would benefit from the same scrutiny as to usage in shared repositories. Most notable users are branch, daemon, submodule & worktree, and a worrisome call in transport.c when updating one ref (which ignores the shared flag). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-01-11 19:35:54 +01:00
FILE *fp = fopen_for_writing(filename);
if (!fp)
return error_errno(_("cannot open %s"), filename);
fclose(fp);
return 0;
}
static void set_option(struct transport *transport, const char *name, const char *value)
{
int r = transport_set_option(transport, name, value);
if (r < 0)
die(_("Option \"%s\" value \"%s\" is not valid for %s"),
name, value, transport->url);
if (r > 0)
warning(_("Option \"%s\" is ignored for %s\n"),
name, transport->url);
}
static struct transport *prepare_transport(struct remote *remote, int deepen)
{
struct transport *transport;
transport = transport_get(remote, NULL);
transport_set_verbosity(transport, verbosity, progress);
transport->family = family;
if (upload_pack)
set_option(transport, TRANS_OPT_UPLOADPACK, upload_pack);
if (keep)
set_option(transport, TRANS_OPT_KEEP, "yes");
if (depth)
set_option(transport, TRANS_OPT_DEPTH, depth);
if (deepen && deepen_since)
set_option(transport, TRANS_OPT_DEEPEN_SINCE, deepen_since);
if (deepen && deepen_not.nr)
set_option(transport, TRANS_OPT_DEEPEN_NOT,
(const char *)&deepen_not);
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
if (deepen_relative)
set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
if (update_shallow)
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
return transport;
}
static void backfill_tags(struct transport *transport, struct ref *ref_map)
{
int cannot_reuse;
/*
* Once we have set TRANS_OPT_DEEPEN_SINCE, we can't unset it
* when remote helper is used (setting it to an empty string
* is not unsetting). We could extend the remote helper
* protocol for that, but for now, just force a new connection
* without deepen-since. Similar story for deepen-not.
*/
cannot_reuse = transport->cannot_reuse ||
deepen_since || deepen_not.nr;
if (cannot_reuse) {
gsecondary = prepare_transport(transport->remote, 0);
fetch: work around "transport-take-over" hack A Git-aware "connect" transport allows the "transport_take_over" to redirect generic transport requests like fetch(), push_refs() and get_refs_list() to the native Git transport handling methods. The take-over process replaces transport->data with a fake data that these method implementations understand. While this hack works OK for a single request, it breaks when the transport needs to make more than one requests. transport->data that used to hold necessary information for the specific helper to work correctly is destroyed during the take-over process. One codepath that this matters is "git fetch" in auto-follow mode; when it does not get all the tags that ought to point at the history it got (which can be determined by looking at the peeled tags in the initial advertisement) from the primary transfer, it internally makes a second request to complete the fetch. Because "take-over" hack has already destroyed the data necessary to talk to the transport helper by the time this happens, the second request cannot make a request to the helper to make another connection to fetch these additional tags. Mark such a transport as "cannot_reuse", and use a separate transport to perform the backfill fetch in order to work around this breakage. Note that this problem does not manifest itself when running t5802, because our upload-pack gives you all the necessary auto-followed tags during the primary transfer. You would need to step through "git fetch" in a debugger, stop immediately after the primary transfer finishes and writes these auto-followed tags, remove the tag references and repack/prune the repository to convince the "find-non-local-tags" procedure that the primary transfer failed to give us all the necessary tags, and then let it continue, in order to trigger the bug in the secondary transfer this patch fixes. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
transport = gsecondary;
}
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL);
fetch_refs(transport, ref_map);
fetch: work around "transport-take-over" hack A Git-aware "connect" transport allows the "transport_take_over" to redirect generic transport requests like fetch(), push_refs() and get_refs_list() to the native Git transport handling methods. The take-over process replaces transport->data with a fake data that these method implementations understand. While this hack works OK for a single request, it breaks when the transport needs to make more than one requests. transport->data that used to hold necessary information for the specific helper to work correctly is destroyed during the take-over process. One codepath that this matters is "git fetch" in auto-follow mode; when it does not get all the tags that ought to point at the history it got (which can be determined by looking at the peeled tags in the initial advertisement) from the primary transfer, it internally makes a second request to complete the fetch. Because "take-over" hack has already destroyed the data necessary to talk to the transport helper by the time this happens, the second request cannot make a request to the helper to make another connection to fetch these additional tags. Mark such a transport as "cannot_reuse", and use a separate transport to perform the backfill fetch in order to work around this breakage. Note that this problem does not manifest itself when running t5802, because our upload-pack gives you all the necessary auto-followed tags during the primary transfer. You would need to step through "git fetch" in a debugger, stop immediately after the primary transfer finishes and writes these auto-followed tags, remove the tag references and repack/prune the repository to convince the "find-non-local-tags" procedure that the primary transfer failed to give us all the necessary tags, and then let it continue, in order to trigger the bug in the secondary transfer this patch fixes. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 00:47:18 +02:00
if (gsecondary) {
transport_disconnect(gsecondary);
gsecondary = NULL;
}
}
static int do_fetch(struct transport *transport,
struct refspec *refs, int ref_count)
{
struct string_list existing_refs = STRING_LIST_INIT_DUP;
struct ref *ref_map;
struct ref *rm;
int autotags = (transport->remote->fetch_tags == 1);
int retcode = 0;
for_each_ref(add_existing, &existing_refs);
if (tags == TAGS_DEFAULT) {
if (transport->remote->fetch_tags == 2)
tags = TAGS_SET;
if (transport->remote->fetch_tags == -1)
tags = TAGS_UNSET;
}
if (!transport->get_refs_list || !transport->fetch)
die(_("Don't know how to fetch from %s"), transport->url);
/* if not appending, truncate FETCH_HEAD */
if (!append && !dry_run) {
retcode = truncate_fetch_head();
if (retcode)
goto cleanup;
}
ref_map = get_ref_map(transport, refs, ref_count, tags, &autotags);
if (!update_head_ok)
check_not_current_branch(ref_map);
for (rm = ref_map; rm; rm = rm->next) {
if (rm->peer_ref) {
struct string_list_item *peer_item =
string_list_lookup(&existing_refs,
rm->peer_ref->name);
if (peer_item) {
struct object_id *old_oid = peer_item->util;
oidcpy(&rm->peer_ref->old_oid, old_oid);
}
}
}
if (tags == TAGS_DEFAULT && autotags)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
if (prune) {
fetch --prune: prune only based on explicit refspecs The old behavior of "fetch --prune" was to prune whatever was being fetched. In particular, "fetch --prune --tags" caused tags not only to be fetched, but also to be pruned. This is inappropriate because there is only one tags namespace that is shared among the local repository and all remotes. Therefore, if the user defines a local tag and then runs "git fetch --prune --tags", then the local tag is deleted. Moreover, "--prune" and "--tags" can also be configured via fetch.prune / remote.<name>.prune and remote.<name>.tagopt, making it even less obvious that an invocation of "git fetch" could result in tag lossage. Since the command "git remote update" invokes "git fetch", it had the same problem. The command "git remote prune", on the other hand, disregarded the setting of remote.<name>.tagopt, and so its behavior was inconsistent with that of the other commands. So the old behavior made it too easy to lose tags. To fix this problem, change "fetch --prune" to prune references based only on refspecs specified explicitly by the user, either on the command line or via remote.<name>.fetch. Thus, tags are no longer made subject to pruning by the --tags option or the remote.<name>.tagopt setting. However, tags *are* still subject to pruning if they are fetched as part of a refspec, and that is good. For example: * On the command line, git fetch --prune 'refs/tags/*:refs/tags/*' causes tags, and only tags, to be fetched and pruned, and is therefore a simple way for the user to get the equivalent of the old behavior of "--prune --tag". * For a remote that was configured with the "--mirror" option, the configuration is set to include [remote "name"] fetch = +refs/*:refs/* , which causes tags to be subject to pruning along with all other references. This is the behavior that will typically be desired for a mirror. Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-10-30 06:33:00 +01:00
/*
* We only prune based on refspecs specified
* explicitly (via command line or configuration); we
* don't care whether --tags was specified.
*/
if (ref_count) {
prune_refs(refs, ref_count, ref_map, transport->url);
} else {
fetch --prune: prune only based on explicit refspecs The old behavior of "fetch --prune" was to prune whatever was being fetched. In particular, "fetch --prune --tags" caused tags not only to be fetched, but also to be pruned. This is inappropriate because there is only one tags namespace that is shared among the local repository and all remotes. Therefore, if the user defines a local tag and then runs "git fetch --prune --tags", then the local tag is deleted. Moreover, "--prune" and "--tags" can also be configured via fetch.prune / remote.<name>.prune and remote.<name>.tagopt, making it even less obvious that an invocation of "git fetch" could result in tag lossage. Since the command "git remote update" invokes "git fetch", it had the same problem. The command "git remote prune", on the other hand, disregarded the setting of remote.<name>.tagopt, and so its behavior was inconsistent with that of the other commands. So the old behavior made it too easy to lose tags. To fix this problem, change "fetch --prune" to prune references based only on refspecs specified explicitly by the user, either on the command line or via remote.<name>.fetch. Thus, tags are no longer made subject to pruning by the --tags option or the remote.<name>.tagopt setting. However, tags *are* still subject to pruning if they are fetched as part of a refspec, and that is good. For example: * On the command line, git fetch --prune 'refs/tags/*:refs/tags/*' causes tags, and only tags, to be fetched and pruned, and is therefore a simple way for the user to get the equivalent of the old behavior of "--prune --tag". * For a remote that was configured with the "--mirror" option, the configuration is set to include [remote "name"] fetch = +refs/*:refs/* , which causes tags to be subject to pruning along with all other references. This is the behavior that will typically be desired for a mirror. Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-10-30 06:33:00 +01:00
prune_refs(transport->remote->fetch,
transport->remote->fetch_refspec_nr,
ref_map,
transport->url);
}
}
if (fetch_refs(transport, ref_map)) {
free_refs(ref_map);
retcode = 1;
goto cleanup;
}
free_refs(ref_map);
/* if neither --no-tags nor --tags was specified, do automated tag
* following ... */
if (tags == TAGS_DEFAULT && autotags) {
struct ref **tail = &ref_map;
ref_map = NULL;
find_non_local_tags(transport, &ref_map, &tail);
if (ref_map)
backfill_tags(transport, ref_map);
free_refs(ref_map);
}
cleanup:
string_list_clear(&existing_refs, 1);
return retcode;
}
static int get_one_remote_for_fetch(struct remote *remote, void *priv)
{
struct string_list *list = priv;
if (!remote->skip_default_update)
string_list_append(list, remote->name);
return 0;
}
struct remote_group_data {
const char *name;
struct string_list *list;
};
static int get_remote_group(const char *key, const char *value, void *priv)
{
struct remote_group_data *g = priv;
if (skip_prefix(key, "remotes.", &key) && !strcmp(key, g->name)) {
/* split list by white space */
while (*value) {
size_t wordlen = strcspn(value, " \t\n");
if (wordlen >= 1)
string_list_append_nodup(g->list,
xstrndup(value, wordlen));
value += wordlen + (value[wordlen] != '\0');
}
}
return 0;
}
static int add_remote_or_group(const char *name, struct string_list *list)
{
int prev_nr = list->nr;
struct remote_group_data g;
g.name = name; g.list = list;
git_config(get_remote_group, &g);
if (list->nr == prev_nr) {
struct remote *remote = remote_get(name);
remote rename: more carefully determine whether a remote is configured One of the really nice features of the ~/.gitconfig file is that users can override defaults by their own preferred settings for all of their repositories. One such default that some users like to override is whether the "origin" remote gets auto-pruned or not. The user would simply call git config --global remote.origin.prune true and from now on all "origin" remotes would be pruned automatically when fetching into the local repository. There is just one catch: now Git thinks that the "origin" remote is configured, even if the repository config has no [remote "origin"] section at all, as it does not realize that the "prune" setting was configured globally and that there really is no "origin" remote configured in this repository. That is a problem e.g. when renaming a remote to a new name, when Git may be fooled into thinking that there is already a remote of that new name. Let's fix this by paying more attention to *where* the remote settings came from: if they are configured in the local repository config, we must not overwrite them. If they were configured elsewhere, we cannot overwrite them to begin with, as we only write the repository config. There is only one caller of remote_is_configured() (in `git fetch`) that may want to take remotes into account even if they were configured outside the repository config; all other callers essentially try to prevent the Git command from overwriting settings in the repository config. To accommodate that fact, the remote_is_configured() function now requires a parameter that states whether the caller is interested in all remotes, or only in those that were configured in the repository config. Many thanks to Jeff King whose tireless review helped with settling for nothing less than the current strategy. This fixes https://github.com/git-for-windows/git/issues/888 Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-19 22:20:02 +01:00
if (!remote_is_configured(remote, 0))
return 0;
string_list_append(list, remote->name);
}
return 1;
}
static void add_options_to_argv(struct argv_array *argv)
{
if (dry_run)
argv_array_push(argv, "--dry-run");
if (prune != -1)
argv_array_push(argv, prune ? "--prune" : "--no-prune");
if (update_head_ok)
argv_array_push(argv, "--update-head-ok");
if (force)
argv_array_push(argv, "--force");
if (keep)
argv_array_push(argv, "--keep");
if (recurse_submodules == RECURSE_SUBMODULES_ON)
argv_array_push(argv, "--recurse-submodules");
else if (recurse_submodules == RECURSE_SUBMODULES_ON_DEMAND)
argv_array_push(argv, "--recurse-submodules=on-demand");
if (tags == TAGS_SET)
argv_array_push(argv, "--tags");
else if (tags == TAGS_UNSET)
argv_array_push(argv, "--no-tags");
if (verbosity >= 2)
argv_array_push(argv, "-v");
if (verbosity >= 1)
argv_array_push(argv, "-v");
else if (verbosity < 0)
argv_array_push(argv, "-q");
}
static int fetch_multiple(struct string_list *list)
{
int i, result = 0;
struct argv_array argv = ARGV_ARRAY_INIT;
if (!append && !dry_run) {
int errcode = truncate_fetch_head();
if (errcode)
return errcode;
}
argv_array_pushl(&argv, "fetch", "--append", NULL);
add_options_to_argv(&argv);
for (i = 0; i < list->nr; i++) {
const char *name = list->items[i].string;
argv_array_push(&argv, name);
if (verbosity >= 0)
printf(_("Fetching %s\n"), name);
if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) {
error(_("Could not fetch %s"), name);
result = 1;
}
argv_array_pop(&argv);
}
argv_array_clear(&argv);
return result;
}
static int fetch_one(struct remote *remote, int argc, const char **argv)
{
static const char **refs = NULL;
struct refspec *refspec;
int ref_nr = 0;
int exit_code;
if (!remote)
die(_("No remote repository specified. Please, specify either a URL or a\n"
"remote name from which new revisions should be fetched."));
gtransport = prepare_transport(remote, 1);
if (prune < 0) {
/* no command line request */
if (0 <= gtransport->remote->prune)
prune = gtransport->remote->prune;
else if (0 <= fetch_prune_config)
prune = fetch_prune_config;
else
prune = PRUNE_BY_DEFAULT;
}
if (argc > 0) {
int j = 0;
int i;
refs = xcalloc(st_add(argc, 1), sizeof(const char *));
for (i = 0; i < argc; i++) {
if (!strcmp(argv[i], "tag")) {
i++;
if (i >= argc)
die(_("You need to specify a tag name."));
refs[j++] = xstrfmt("refs/tags/%s:refs/tags/%s",
argv[i], argv[i]);
} else
refs[j++] = argv[i];
}
refs[j] = NULL;
ref_nr = j;
}
sigchain_push_common(unlock_pack_on_signal);
atexit(unlock_pack);
refspec = parse_fetch_refspec(ref_nr, refs);
exit_code = do_fetch(gtransport, refspec, ref_nr);
free_refspec(ref_nr, refspec);
transport_disconnect(gtransport);
gtransport = NULL;
return exit_code;
}
int cmd_fetch(int argc, const char **argv, const char *prefix)
{
int i;
struct string_list list = STRING_LIST_INIT_DUP;
struct remote *remote;
int result = 0;
struct argv_array argv_gc_auto = ARGV_ARRAY_INIT;
packet_trace_identity("fetch");
/* Record the command line for the reflog */
strbuf_addstr(&default_rla, "fetch");
for (i = 1; i < argc; i++)
strbuf_addf(&default_rla, " %s", argv[i]);
git_config(git_fetch_config, NULL);
argc = parse_options(argc, argv, prefix,
builtin_fetch_options, builtin_fetch_usage, 0);
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 12:54:09 +02:00
if (deepen_relative) {
if (deepen_relative < 0)
die(_("Negative depth in --deepen is not supported"));
if (depth)
die(_("--deepen and --depth are mutually exclusive"));
depth = xstrfmt("%d", deepen_relative);
}
if (unshallow) {
if (depth)
die(_("--depth and --unshallow cannot be used together"));
else if (!is_repository_shallow())
die(_("--unshallow on a complete repository does not make sense"));
else
depth = xstrfmt("%d", INFINITE_DEPTH);
}
/* no need to be strict, transport_set_option() will validate it again */
if (depth && atoi(depth) < 1)
die(_("depth %s is not a positive number"), depth);
if (depth || deepen_since || deepen_not.nr)
deepen = 1;
if (recurse_submodules != RECURSE_SUBMODULES_OFF) {
if (recurse_submodules_default) {
int arg = parse_fetch_recurse_submodules_arg("--recurse-submodules-default", recurse_submodules_default);
set_config_fetch_recurse_submodules(arg);
}
gitmodules_config();
git_config(submodule_config, NULL);
}
if (all) {
if (argc == 1)
die(_("fetch --all does not take a repository argument"));
else if (argc > 1)
die(_("fetch --all does not make sense with refspecs"));
(void) for_each_remote(get_one_remote_for_fetch, &list);
result = fetch_multiple(&list);
} else if (argc == 0) {
/* No arguments -- use default remote */
remote = remote_get(NULL);
result = fetch_one(remote, argc, argv);
} else if (multiple) {
/* All arguments are assumed to be remotes or groups */
for (i = 0; i < argc; i++)
if (!add_remote_or_group(argv[i], &list))
die(_("No such remote or remote group: %s"), argv[i]);
result = fetch_multiple(&list);
} else {
/* Single remote or group */
(void) add_remote_or_group(argv[0], &list);
if (list.nr > 1) {
/* More than one remote */
if (argc > 1)
die(_("Fetching a group and specifying refspecs does not make sense"));
result = fetch_multiple(&list);
} else {
/* Zero or one remotes */
remote = remote_get(argv[0]);
result = fetch_one(remote, argc-1, argv+1);
}
}
if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) {
struct argv_array options = ARGV_ARRAY_INIT;
add_options_to_argv(&options);
result = fetch_populated_submodules(&options,
submodule_prefix,
recurse_submodules,
verbosity < 0,
max_children);
argv_array_clear(&options);
}
string_list_clear(&list, 0);
close_all_packs();
argv_array_pushl(&argv_gc_auto, "gc", "--auto", NULL);
if (verbosity < 0)
argv_array_push(&argv_gc_auto, "--quiet");
run_command_v_opt(argv_gc_auto.argv, RUN_GIT_CMD);
argv_array_clear(&argv_gc_auto);
return result;
}