git-commit-vandalism/builtin/clone.c

988 lines
26 KiB
C
Raw Normal View History

/*
* Builtin "git clone"
*
* Copyright (c) 2007 Kristian Høgsberg <krh@redhat.com>,
* 2008 Daniel Barkalow <barkalow@iabervon.org>
* Based on git-commit.sh by Junio C Hamano and Linus Torvalds
*
* Clone a repository into a different directory that does not yet exist.
*/
Fix sparse warnings Fix warnings from 'make check'. - These files don't include 'builtin.h' causing sparse to complain that cmd_* isn't declared: builtin/clone.c:364, builtin/fetch-pack.c:797, builtin/fmt-merge-msg.c:34, builtin/hash-object.c:78, builtin/merge-index.c:69, builtin/merge-recursive.c:22 builtin/merge-tree.c:341, builtin/mktag.c:156, builtin/notes.c:426 builtin/notes.c:822, builtin/pack-redundant.c:596, builtin/pack-refs.c:10, builtin/patch-id.c:60, builtin/patch-id.c:149, builtin/remote.c:1512, builtin/remote-ext.c:240, builtin/remote-fd.c:53, builtin/reset.c:236, builtin/send-pack.c:384, builtin/unpack-file.c:25, builtin/var.c:75 - These files have symbols which should be marked static since they're only file scope: submodule.c:12, diff.c:631, replace_object.c:92, submodule.c:13, submodule.c:14, trace.c:78, transport.c:195, transport-helper.c:79, unpack-trees.c:19, url.c:3, url.c:18, url.c:104, url.c:117, url.c:123, url.c:129, url.c:136, thread-utils.c:21, thread-utils.c:48 - These files redeclare symbols to be different types: builtin/index-pack.c:210, parse-options.c:564, parse-options.c:571, usage.c:49, usage.c:58, usage.c:63, usage.c:72 - These files use a literal integer 0 when they really should use a NULL pointer: daemon.c:663, fast-import.c:2942, imap-send.c:1072, notes-merge.c:362 While we're in the area, clean up some unused #includes in builtin files (mostly exec_cmd.h). Signed-off-by: Stephen Boyd <bebarino@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-22 08:51:05 +01:00
#include "builtin.h"
#include "parse-options.h"
#include "fetch-pack.h"
#include "refs.h"
#include "tree.h"
#include "tree-walk.h"
#include "unpack-trees.h"
#include "transport.h"
#include "strbuf.h"
#include "dir.h"
#include "sigchain.h"
#include "branch.h"
#include "remote.h"
#include "run-command.h"
#include "connected.h"
/*
* Overall FIXMEs:
* - respect DB_ENVIRONMENT for .git/objects.
*
* Implementation notes:
* - dropping use-separate-remote and no-separate-remote compatibility
*
*/
static const char * const builtin_clone_usage[] = {
N_("git clone [options] [--] <repo> [<dir>]"),
NULL
};
static int option_no_checkout, option_bare, option_mirror, option_single_branch = -1;
static int option_local = -1, option_no_hardlinks, option_shared, option_recursive;
static char *option_template, *option_depth;
static char *option_origin = NULL;
static char *option_branch = NULL;
static const char *real_git_dir;
static char *option_upload_pack = "git-upload-pack";
static int option_verbosity;
static int option_progress = -1;
static struct string_list option_config;
static struct string_list option_reference;
static int opt_parse_reference(const struct option *opt, const char *arg, int unset)
{
struct string_list *option_reference = opt->value;
if (!arg)
return -1;
string_list_append(option_reference, arg);
return 0;
}
static struct option builtin_clone_options[] = {
OPT__VERBOSITY(&option_verbosity),
OPT_BOOL(0, "progress", &option_progress,
N_("force progress reporting")),
OPT_BOOLEAN('n', "no-checkout", &option_no_checkout,
N_("don't create a checkout")),
OPT_BOOLEAN(0, "bare", &option_bare, N_("create a bare repository")),
{ OPTION_BOOLEAN, 0, "naked", &option_bare, NULL,
N_("create a bare repository"),
PARSE_OPT_NOARG | PARSE_OPT_HIDDEN },
OPT_BOOLEAN(0, "mirror", &option_mirror,
N_("create a mirror repository (implies bare)")),
OPT_BOOL('l', "local", &option_local,
N_("to clone from a local repository")),
OPT_BOOLEAN(0, "no-hardlinks", &option_no_hardlinks,
N_("don't use local hardlinks, always copy")),
OPT_BOOLEAN('s', "shared", &option_shared,
N_("setup as shared repository")),
OPT_BOOLEAN(0, "recursive", &option_recursive,
N_("initialize submodules in the clone")),
OPT_BOOLEAN(0, "recurse-submodules", &option_recursive,
N_("initialize submodules in the clone")),
OPT_STRING(0, "template", &option_template, N_("template-directory"),
N_("directory from which templates will be used")),
OPT_CALLBACK(0 , "reference", &option_reference, N_("repo"),
N_("reference repository"), &opt_parse_reference),
OPT_STRING('o', "origin", &option_origin, N_("name"),
N_("use <name> instead of 'origin' to track upstream")),
OPT_STRING('b', "branch", &option_branch, N_("branch"),
N_("checkout <branch> instead of the remote's HEAD")),
OPT_STRING('u', "upload-pack", &option_upload_pack, N_("path"),
N_("path to git-upload-pack on the remote")),
OPT_STRING(0, "depth", &option_depth, N_("depth"),
N_("create a shallow clone of that depth")),
OPT_BOOL(0, "single-branch", &option_single_branch,
N_("clone only one branch, HEAD or --branch")),
OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
N_("separate git dir from working tree")),
OPT_STRING_LIST('c', "config", &option_config, N_("key=value"),
N_("set config inside the new repository")),
OPT_END()
};
static const char *argv_submodule[] = {
"submodule", "update", "--init", "--recursive", NULL
};
static char *get_repo_path(const char *repo, int *is_bundle)
{
standardize and improve lookup rules for external local repos When you specify a local repository on the command line of clone, ls-remote, upload-pack, receive-pack, or upload-archive, or in a request to git-daemon, we perform a little bit of lookup magic, doing things like looking in working trees for .git directories and appending ".git" for bare repos. For clone, this magic happens in get_repo_path. For everything else, it happens in enter_repo. In both cases, there are some ambiguous or confusing cases that aren't handled well, and there is one case that is not handled the same by both methods. This patch tries to provide (and test!) standard, sensible lookup rules for both code paths. The intended changes are: 1. When looking up "foo", we have always preferred a working tree "foo" (containing "foo/.git" over the bare "foo.git". But we did not prefer a bare "foo" over "foo.git". With this patch, we do so. 2. We would select directories that existed but didn't actually look like git repositories. With this patch, we make sure a selected directory looks like a git repo. Not only is this more sensible in general, but it will help anybody who is negatively affected by change (1) negatively (e.g., if they had "foo.git" next to its separate work tree "foo", and expect to keep finding "foo.git" when they reference "foo"). 3. The enter_repo code path would, given "foo", look for "foo.git/.git" (i.e., do the ".git" append magic even for a repo with working tree). The clone code path did not; with this patch, they now behave the same. In the unlikely case of a working tree overlaying a bare repo (i.e., a ".git" directory _inside_ a bare repo), we continue to treat it as a working tree (prefering the "inner" .git over the bare repo). This is mainly because the combination seems nonsensical, and I'd rather stick with existing behavior on the off chance that somebody is relying on it. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
static char *suffix[] = { "/.git", "", ".git/.git", ".git" };
static char *bundle_suffix[] = { ".bundle", "" };
struct stat st;
int i;
for (i = 0; i < ARRAY_SIZE(suffix); i++) {
const char *path;
path = mkpath("%s%s", repo, suffix[i]);
if (stat(path, &st))
continue;
standardize and improve lookup rules for external local repos When you specify a local repository on the command line of clone, ls-remote, upload-pack, receive-pack, or upload-archive, or in a request to git-daemon, we perform a little bit of lookup magic, doing things like looking in working trees for .git directories and appending ".git" for bare repos. For clone, this magic happens in get_repo_path. For everything else, it happens in enter_repo. In both cases, there are some ambiguous or confusing cases that aren't handled well, and there is one case that is not handled the same by both methods. This patch tries to provide (and test!) standard, sensible lookup rules for both code paths. The intended changes are: 1. When looking up "foo", we have always preferred a working tree "foo" (containing "foo/.git" over the bare "foo.git". But we did not prefer a bare "foo" over "foo.git". With this patch, we do so. 2. We would select directories that existed but didn't actually look like git repositories. With this patch, we make sure a selected directory looks like a git repo. Not only is this more sensible in general, but it will help anybody who is negatively affected by change (1) negatively (e.g., if they had "foo.git" next to its separate work tree "foo", and expect to keep finding "foo.git" when they reference "foo"). 3. The enter_repo code path would, given "foo", look for "foo.git/.git" (i.e., do the ".git" append magic even for a repo with working tree). The clone code path did not; with this patch, they now behave the same. In the unlikely case of a working tree overlaying a bare repo (i.e., a ".git" directory _inside_ a bare repo), we continue to treat it as a working tree (prefering the "inner" .git over the bare repo). This is mainly because the combination seems nonsensical, and I'd rather stick with existing behavior on the off chance that somebody is relying on it. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-02 22:59:13 +01:00
if (S_ISDIR(st.st_mode) && is_git_directory(path)) {
*is_bundle = 0;
return xstrdup(absolute_path(path));
} else if (S_ISREG(st.st_mode) && st.st_size > 8) {
/* Is it a "gitfile"? */
char signature[8];
int len, fd = open(path, O_RDONLY);
if (fd < 0)
continue;
len = read_in_full(fd, signature, 8);
close(fd);
if (len != 8 || strncmp(signature, "gitdir: ", 8))
continue;
path = read_gitfile(path);
if (path) {
*is_bundle = 0;
return xstrdup(absolute_path(path));
}
}
}
for (i = 0; i < ARRAY_SIZE(bundle_suffix); i++) {
const char *path;
path = mkpath("%s%s", repo, bundle_suffix[i]);
if (!stat(path, &st) && S_ISREG(st.st_mode)) {
*is_bundle = 1;
return xstrdup(absolute_path(path));
}
}
return NULL;
}
static char *guess_dir_name(const char *repo, int is_bundle, int is_bare)
{
const char *end = repo + strlen(repo), *start;
char *dir;
/*
* Strip trailing spaces, slashes and /.git
*/
while (repo < end && (is_dir_sep(end[-1]) || isspace(end[-1])))
end--;
if (end - repo > 5 && is_dir_sep(end[-5]) &&
!strncmp(end - 4, ".git", 4)) {
end -= 5;
while (repo < end && is_dir_sep(end[-1]))
end--;
}
/*
* Find last component, but be prepared that repo could have
* the form "remote.example.com:foo.git", i.e. no slash
* in the directory part.
*/
start = end;
while (repo < start && !is_dir_sep(start[-1]) && start[-1] != ':')
start--;
/*
* Strip .{bundle,git}.
*/
if (is_bundle) {
if (end - start > 7 && !strncmp(end - 7, ".bundle", 7))
end -= 7;
} else {
if (end - start > 4 && !strncmp(end - 4, ".git", 4))
end -= 4;
}
if (is_bare) {
struct strbuf result = STRBUF_INIT;
strbuf_addf(&result, "%.*s.git", (int)(end - start), start);
dir = strbuf_detach(&result, NULL);
} else
dir = xstrndup(start, end - start);
/*
* Replace sequences of 'control' characters and whitespace
* with one ascii space, remove leading and trailing spaces.
*/
if (*dir) {
char *out = dir;
int prev_space = 1 /* strip leading whitespace */;
for (end = dir; *end; ++end) {
char ch = *end;
if ((unsigned char)ch < '\x20')
ch = '\x20';
if (isspace(ch)) {
if (prev_space)
continue;
prev_space = 1;
} else
prev_space = 0;
*out++ = ch;
}
*out = '\0';
if (out > dir && prev_space)
out[-1] = '\0';
}
return dir;
}
static void strip_trailing_slashes(char *dir)
{
char *end = dir + strlen(dir);
while (dir < end - 1 && is_dir_sep(end[-1]))
end--;
*end = '\0';
}
static int add_one_reference(struct string_list_item *item, void *cb_data)
{
char *ref_git;
const char *repo;
struct strbuf alternate = STRBUF_INIT;
/* Beware: read_gitfile(), real_path() and mkpath() return static buffer */
ref_git = xstrdup(real_path(item->string));
repo = read_gitfile(ref_git);
if (!repo)
repo = read_gitfile(mkpath("%s/.git", ref_git));
if (repo) {
free(ref_git);
ref_git = xstrdup(repo);
}
if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
char *ref_git_git = mkpathdup("%s/.git", ref_git);
free(ref_git);
ref_git = ref_git_git;
} else if (!is_directory(mkpath("%s/objects", ref_git)))
die(_("reference repository '%s' is not a local repository."),
item->string);
strbuf_addf(&alternate, "%s/objects", ref_git);
add_to_alternates_file(alternate.buf);
strbuf_release(&alternate);
free(ref_git);
return 0;
}
static void setup_reference(void)
{
for_each_string_list(&option_reference, add_one_reference, NULL);
}
static void copy_alternates(struct strbuf *src, struct strbuf *dst,
const char *src_repo)
{
/*
* Read from the source objects/info/alternates file
* and copy the entries to corresponding file in the
* destination repository with add_to_alternates_file().
* Both src and dst have "$path/objects/info/alternates".
*
* Instead of copying bit-for-bit from the original,
* we need to append to existing one so that the already
* created entry via "clone -s" is not lost, and also
* to turn entries with paths relative to the original
* absolute, so that they can be used in the new repository.
*/
FILE *in = fopen(src->buf, "r");
struct strbuf line = STRBUF_INIT;
while (strbuf_getline(&line, in, '\n') != EOF) {
char *abs_path, abs_buf[PATH_MAX];
if (!line.len || line.buf[0] == '#')
continue;
if (is_absolute_path(line.buf)) {
add_to_alternates_file(line.buf);
continue;
}
abs_path = mkpath("%s/objects/%s", src_repo, line.buf);
normalize_path_copy(abs_buf, abs_path);
add_to_alternates_file(abs_buf);
}
strbuf_release(&line);
fclose(in);
}
static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
const char *src_repo, int src_baselen)
{
struct dirent *de;
struct stat buf;
int src_len, dest_len;
DIR *dir;
dir = opendir(src->buf);
if (!dir)
die_errno(_("failed to open '%s'"), src->buf);
if (mkdir(dest->buf, 0777)) {
if (errno != EEXIST)
die_errno(_("failed to create directory '%s'"), dest->buf);
else if (stat(dest->buf, &buf))
die_errno(_("failed to stat '%s'"), dest->buf);
else if (!S_ISDIR(buf.st_mode))
die(_("%s exists and is not a directory"), dest->buf);
}
strbuf_addch(src, '/');
src_len = src->len;
strbuf_addch(dest, '/');
dest_len = dest->len;
while ((de = readdir(dir)) != NULL) {
strbuf_setlen(src, src_len);
strbuf_addstr(src, de->d_name);
strbuf_setlen(dest, dest_len);
strbuf_addstr(dest, de->d_name);
if (stat(src->buf, &buf)) {
warning (_("failed to stat %s\n"), src->buf);
continue;
}
if (S_ISDIR(buf.st_mode)) {
if (de->d_name[0] != '.')
copy_or_link_directory(src, dest,
src_repo, src_baselen);
continue;
}
/* Files that cannot be copied bit-for-bit... */
if (!strcmp(src->buf + src_baselen, "/info/alternates")) {
copy_alternates(src, dest, src_repo);
continue;
}
if (unlink(dest->buf) && errno != ENOENT)
die_errno(_("failed to unlink '%s'"), dest->buf);
if (!option_no_hardlinks) {
if (!link(src->buf, dest->buf))
continue;
if (option_local > 0)
die_errno(_("failed to create link '%s'"), dest->buf);
option_no_hardlinks = 1;
}
if (copy_file_with_time(dest->buf, src->buf, 0666))
die_errno(_("failed to copy file to '%s'"), dest->buf);
}
closedir(dir);
}
static void clone_local(const char *src_repo, const char *dest_repo)
{
if (option_shared) {
struct strbuf alt = STRBUF_INIT;
strbuf_addf(&alt, "%s/objects", src_repo);
add_to_alternates_file(alt.buf);
strbuf_release(&alt);
} else {
struct strbuf src = STRBUF_INIT;
struct strbuf dest = STRBUF_INIT;
strbuf_addf(&src, "%s/objects", src_repo);
strbuf_addf(&dest, "%s/objects", dest_repo);
copy_or_link_directory(&src, &dest, src_repo, src.len);
strbuf_release(&src);
strbuf_release(&dest);
}
if (0 <= option_verbosity)
fprintf(stderr, _("done.\n"));
}
static const char *junk_work_tree;
static const char *junk_git_dir;
static pid_t junk_pid;
static enum {
JUNK_LEAVE_NONE,
JUNK_LEAVE_REPO,
JUNK_LEAVE_ALL
} junk_mode = JUNK_LEAVE_NONE;
static const char junk_leave_repo_msg[] =
N_("Clone succeeded, but checkout failed.\n"
"You can inspect what was checked out with 'git status'\n"
"and retry the checkout with 'git checkout -f HEAD'\n");
static void remove_junk(void)
{
struct strbuf sb = STRBUF_INIT;
switch (junk_mode) {
case JUNK_LEAVE_REPO:
warning("%s", _(junk_leave_repo_msg));
/* fall-through */
case JUNK_LEAVE_ALL:
return;
default:
/* proceed to removal */
break;
}
if (getpid() != junk_pid)
return;
if (junk_git_dir) {
strbuf_addstr(&sb, junk_git_dir);
remove_dir_recursively(&sb, 0);
strbuf_reset(&sb);
}
if (junk_work_tree) {
strbuf_addstr(&sb, junk_work_tree);
remove_dir_recursively(&sb, 0);
strbuf_reset(&sb);
}
}
static void remove_junk_on_signal(int signo)
{
remove_junk();
sigchain_pop(signo);
raise(signo);
}
static struct ref *find_remote_branch(const struct ref *refs, const char *branch)
{
struct ref *ref;
struct strbuf head = STRBUF_INIT;
strbuf_addstr(&head, "refs/heads/");
strbuf_addstr(&head, branch);
ref = find_ref_by_name(refs, head.buf);
strbuf_release(&head);
if (ref)
return ref;
strbuf_addstr(&head, "refs/tags/");
strbuf_addstr(&head, branch);
ref = find_ref_by_name(refs, head.buf);
strbuf_release(&head);
return ref;
}
static struct ref *wanted_peer_refs(const struct ref *refs,
struct refspec *refspec)
{
struct ref *head = copy_ref(find_ref_by_name(refs, "HEAD"));
struct ref *local_refs = head;
struct ref **tail = head ? &head->next : &local_refs;
if (option_single_branch) {
struct ref *remote_head = NULL;
if (!option_branch)
remote_head = guess_remote_head(head, refs, 0);
else {
local_refs = NULL;
tail = &local_refs;
remote_head = copy_ref(find_remote_branch(refs, option_branch));
}
if (!remote_head && option_branch)
warning(_("Could not find remote branch %s to clone."),
option_branch);
else {
get_fetch_map(remote_head, refspec, &tail, 0);
/* if --branch=tag, pull the requested tag explicitly */
get_fetch_map(remote_head, tag_refspec, &tail, 0);
}
} else
get_fetch_map(refs, refspec, &tail, 0);
if (!option_mirror && !option_single_branch)
get_fetch_map(refs, tag_refspec, &tail, 0);
return local_refs;
}
static void write_remote_refs(const struct ref *local_refs)
{
const struct ref *r;
lock_packed_refs(LOCK_DIE_ON_ERROR);
for (r = local_refs; r; r = r->next) {
if (!r->peer_ref)
continue;
add_packed_ref(r->peer_ref->name, r->old_sha1);
}
if (commit_packed_refs())
die_errno("unable to overwrite old ref-pack file");
}
static void write_followtags(const struct ref *refs, const char *msg)
{
const struct ref *ref;
for (ref = refs; ref; ref = ref->next) {
if (prefixcmp(ref->name, "refs/tags/"))
continue;
if (!suffixcmp(ref->name, "^{}"))
continue;
if (!has_sha1_file(ref->old_sha1))
continue;
update_ref(msg, ref->name, ref->old_sha1,
NULL, 0, DIE_ON_ERR);
}
}
static int iterate_ref_map(void *cb_data, unsigned char sha1[20])
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
/*
* Skip anything missing a peer_ref, which we are not
* actually going to write a ref for.
*/
while (ref && !ref->peer_ref)
ref = ref->next;
/* Returning -1 notes "end of list" to the caller. */
if (!ref)
return -1;
hashcpy(sha1, ref->old_sha1);
*rm = ref->next;
return 0;
}
static void update_remote_refs(const struct ref *refs,
const struct ref *mapped_refs,
const struct ref *remote_head_points_at,
const char *branch_top,
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-26 03:16:17 +02:00
const char *msg,
struct transport *transport,
clone: drop connectivity check for local clones Commit 0433ad1 (clone: run check_everything_connected, 2013-03-25) added the same connectivity check to clone that we use for fetching. The intent was to provide enough safety checks that "git clone git://..." could be counted on to detect bit errors and other repo corruption, and not silently propagate them to the clone. For local clones, this turns out to be a bad idea, for two reasons: 1. Local clones use hard linking (or even shared object stores), and so complete far more quickly. The time spent on the connectivity check is therefore proportionally much more painful. 2. Local clones do not actually meet our safety guarantee anyway. The connectivity check makes sure we have all of the objects we claim to, but it does not check for bit errors. We will notice bit errors in commits and trees, but we do not load blob objects at all. Whereas over the pack transport, we actually recompute the sha1 of each object in the incoming packfile; bit errors change the sha1 of the object, which is then caught by the connectivity check. This patch drops the connectivity check in the local case. Note that we have to revert the changes from 0433ad1 to t5710, as we no longer notice the corruption during clone. We could go a step further and provide a "verify even local clones" option, but it is probably not worthwhile. You can already spell that as "cd foo.git && git fsck && git clone ." or as "git clone --no-local foo.git". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 09:30:41 +02:00
int check_connectivity)
{
const struct ref *rm = mapped_refs;
clone: drop connectivity check for local clones Commit 0433ad1 (clone: run check_everything_connected, 2013-03-25) added the same connectivity check to clone that we use for fetching. The intent was to provide enough safety checks that "git clone git://..." could be counted on to detect bit errors and other repo corruption, and not silently propagate them to the clone. For local clones, this turns out to be a bad idea, for two reasons: 1. Local clones use hard linking (or even shared object stores), and so complete far more quickly. The time spent on the connectivity check is therefore proportionally much more painful. 2. Local clones do not actually meet our safety guarantee anyway. The connectivity check makes sure we have all of the objects we claim to, but it does not check for bit errors. We will notice bit errors in commits and trees, but we do not load blob objects at all. Whereas over the pack transport, we actually recompute the sha1 of each object in the incoming packfile; bit errors change the sha1 of the object, which is then caught by the connectivity check. This patch drops the connectivity check in the local case. Note that we have to revert the changes from 0433ad1 to t5710, as we no longer notice the corruption during clone. We could go a step further and provide a "verify even local clones" option, but it is probably not worthwhile. You can already spell that as "cd foo.git && git fsck && git clone ." or as "git clone --no-local foo.git". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 09:30:41 +02:00
if (check_connectivity) {
if (transport->progress)
fprintf(stderr, _("Checking connectivity... "));
if (check_everything_connected_with_transport(iterate_ref_map,
0, &rm, transport))
clone: drop connectivity check for local clones Commit 0433ad1 (clone: run check_everything_connected, 2013-03-25) added the same connectivity check to clone that we use for fetching. The intent was to provide enough safety checks that "git clone git://..." could be counted on to detect bit errors and other repo corruption, and not silently propagate them to the clone. For local clones, this turns out to be a bad idea, for two reasons: 1. Local clones use hard linking (or even shared object stores), and so complete far more quickly. The time spent on the connectivity check is therefore proportionally much more painful. 2. Local clones do not actually meet our safety guarantee anyway. The connectivity check makes sure we have all of the objects we claim to, but it does not check for bit errors. We will notice bit errors in commits and trees, but we do not load blob objects at all. Whereas over the pack transport, we actually recompute the sha1 of each object in the incoming packfile; bit errors change the sha1 of the object, which is then caught by the connectivity check. This patch drops the connectivity check in the local case. Note that we have to revert the changes from 0433ad1 to t5710, as we no longer notice the corruption during clone. We could go a step further and provide a "verify even local clones" option, but it is probably not worthwhile. You can already spell that as "cd foo.git && git fsck && git clone ." or as "git clone --no-local foo.git". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 09:30:41 +02:00
die(_("remote did not send all necessary objects"));
if (transport->progress)
fprintf(stderr, _("done\n"));
clone: drop connectivity check for local clones Commit 0433ad1 (clone: run check_everything_connected, 2013-03-25) added the same connectivity check to clone that we use for fetching. The intent was to provide enough safety checks that "git clone git://..." could be counted on to detect bit errors and other repo corruption, and not silently propagate them to the clone. For local clones, this turns out to be a bad idea, for two reasons: 1. Local clones use hard linking (or even shared object stores), and so complete far more quickly. The time spent on the connectivity check is therefore proportionally much more painful. 2. Local clones do not actually meet our safety guarantee anyway. The connectivity check makes sure we have all of the objects we claim to, but it does not check for bit errors. We will notice bit errors in commits and trees, but we do not load blob objects at all. Whereas over the pack transport, we actually recompute the sha1 of each object in the incoming packfile; bit errors change the sha1 of the object, which is then caught by the connectivity check. This patch drops the connectivity check in the local case. Note that we have to revert the changes from 0433ad1 to t5710, as we no longer notice the corruption during clone. We could go a step further and provide a "verify even local clones" option, but it is probably not worthwhile. You can already spell that as "cd foo.git && git fsck && git clone ." or as "git clone --no-local foo.git". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-08 09:30:41 +02:00
}
if (refs) {
write_remote_refs(mapped_refs);
if (option_single_branch)
write_followtags(refs, msg);
}
if (remote_head_points_at && !option_bare) {
struct strbuf head_ref = STRBUF_INIT;
strbuf_addstr(&head_ref, branch_top);
strbuf_addstr(&head_ref, "HEAD");
create_symref(head_ref.buf,
remote_head_points_at->peer_ref->name,
msg);
}
}
static void update_head(const struct ref *our, const struct ref *remote,
const char *msg)
{
if (our && !prefixcmp(our->name, "refs/heads/")) {
/* Local default branch link */
create_symref("HEAD", our->name, NULL);
if (!option_bare) {
const char *head = skip_prefix(our->name, "refs/heads/");
update_ref(msg, "HEAD", our->old_sha1, NULL, 0, DIE_ON_ERR);
install_branch_config(0, head, option_origin, our->name);
}
} else if (our) {
struct commit *c = lookup_commit_reference(our->old_sha1);
/* --branch specifies a non-branch (i.e. tags), detach HEAD */
update_ref(msg, "HEAD", c->object.sha1,
NULL, REF_NODEREF, DIE_ON_ERR);
} else if (remote) {
/*
* We know remote HEAD points to a non-branch, or
* HEAD points to a branch but we don't know which one.
* Detach HEAD in all these cases.
*/
update_ref(msg, "HEAD", remote->old_sha1,
NULL, REF_NODEREF, DIE_ON_ERR);
}
}
static int checkout(void)
{
unsigned char sha1[20];
char *head;
struct lock_file *lock_file;
struct unpack_trees_options opts;
struct tree *tree;
struct tree_desc t;
int err = 0, fd;
if (option_no_checkout)
return 0;
head = resolve_refdup("HEAD", sha1, 1, NULL);
if (!head) {
warning(_("remote HEAD refers to nonexistent ref, "
"unable to checkout.\n"));
return 0;
}
if (!strcmp(head, "HEAD")) {
if (advice_detached_head)
detach_advice(sha1_to_hex(sha1));
} else {
if (prefixcmp(head, "refs/heads/"))
die(_("HEAD not found below refs/heads!"));
}
free(head);
/* We need to be in the new work tree for the checkout */
setup_work_tree();
lock_file = xcalloc(1, sizeof(struct lock_file));
fd = hold_locked_index(lock_file, 1);
memset(&opts, 0, sizeof opts);
opts.update = 1;
opts.merge = 1;
opts.fn = oneway_merge;
opts.verbose_update = (option_verbosity >= 0);
opts.src_index = &the_index;
opts.dst_index = &the_index;
tree = parse_tree_indirect(sha1);
parse_tree(tree);
init_tree_desc(&t, tree->buffer, tree->size);
if (unpack_trees(1, &t, &opts) < 0)
die(_("unable to checkout working tree"));
if (write_cache(fd, active_cache, active_nr) ||
commit_locked_index(lock_file))
die(_("unable to write new index file"));
err |= run_hook(NULL, "post-checkout", sha1_to_hex(null_sha1),
sha1_to_hex(sha1), "1", NULL);
if (!err && option_recursive)
err = run_command_v_opt(argv_submodule, RUN_GIT_CMD);
return err;
}
static int write_one_config(const char *key, const char *value, void *data)
{
return git_config_set_multivar(key, value ? value : "true", "^$", 0);
}
static void write_config(struct string_list *config)
{
int i;
for (i = 0; i < config->nr; i++) {
if (git_config_parse_parameter(config->items[i].string,
write_one_config, NULL) < 0)
die("unable to write parameters to config file");
}
}
static void write_refspec_config(const char* src_ref_prefix,
const struct ref* our_head_points_at,
const struct ref* remote_head_points_at, struct strbuf* branch_top)
{
struct strbuf key = STRBUF_INIT;
struct strbuf value = STRBUF_INIT;
if (option_mirror || !option_bare) {
if (option_single_branch && !option_mirror) {
if (option_branch) {
if (strstr(our_head_points_at->name, "refs/tags/"))
strbuf_addf(&value, "+%s:%s", our_head_points_at->name,
our_head_points_at->name);
else
strbuf_addf(&value, "+%s:%s%s", our_head_points_at->name,
branch_top->buf, option_branch);
} else if (remote_head_points_at) {
strbuf_addf(&value, "+%s:%s%s", remote_head_points_at->name,
branch_top->buf,
skip_prefix(remote_head_points_at->name, "refs/heads/"));
}
/*
* otherwise, the next "git fetch" will
* simply fetch from HEAD without updating
* any remote-tracking branch, which is what
* we want.
*/
} else {
strbuf_addf(&value, "+%s*:%s*", src_ref_prefix, branch_top->buf);
}
/* Configure the remote */
if (value.len) {
strbuf_addf(&key, "remote.%s.fetch", option_origin);
git_config_set_multivar(key.buf, value.buf, "^$", 0);
strbuf_reset(&key);
if (option_mirror) {
strbuf_addf(&key, "remote.%s.mirror", option_origin);
git_config_set(key.buf, "true");
strbuf_reset(&key);
}
}
}
strbuf_release(&key);
strbuf_release(&value);
}
int cmd_clone(int argc, const char **argv, const char *prefix)
{
int is_bundle = 0, is_local;
struct stat buf;
const char *repo_name, *repo, *work_tree, *git_dir;
char *path, *dir;
int dest_exists;
const struct ref *refs, *remote_head;
const struct ref *remote_head_points_at;
const struct ref *our_head_points_at;
struct ref *mapped_refs;
clone: fix up delay cloning conditions 6f48d39 (clone: delay cloning until after remote HEAD checking - 2012-01-16) allows us to perform some checks on remote refs before the actual cloning happens. But not all transport types support this. Remote helper with "import" capability will not return complete ref information until fetch is performed and therefore the clone cannot be delayed. foreign_vcs field in struct remote was used to detect this kind of transport and save the result. This is a mistake because foreign_vcs is designed to override url-based transport detection. As a result, if the same "struct transport *" object is used on many different urls and one of them attached remote transport, the following urls will be mistakenly attached to the same transport. This fault is worked around by dad0b3d (push: do not let configured foreign-vcs permanently clobbered - 2012-01-23) To fix this, detect incomplete refs from transport_get_remote_refs() by SHA-1. Incomplete ones must have null SHA-1 (*). Then revert changes related to foreign_cvs field in 6f48d39 and dad0b3d. A good thing from this change is that cloning smart http transport can also be delayed. Earlier it falls into the same category "remote transport, no delay". (*) Theoretically if one of the remote refs happens to have null SHA-1, it will trigger false alarm and the clone will not be delayed. But that chance may be too small for us to pay attention to. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-24 12:10:38 +01:00
const struct ref *ref;
struct strbuf key = STRBUF_INIT, value = STRBUF_INIT;
struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
struct transport *transport = NULL;
const char *src_ref_prefix = "refs/heads/";
struct remote *remote;
clone: fix up delay cloning conditions 6f48d39 (clone: delay cloning until after remote HEAD checking - 2012-01-16) allows us to perform some checks on remote refs before the actual cloning happens. But not all transport types support this. Remote helper with "import" capability will not return complete ref information until fetch is performed and therefore the clone cannot be delayed. foreign_vcs field in struct remote was used to detect this kind of transport and save the result. This is a mistake because foreign_vcs is designed to override url-based transport detection. As a result, if the same "struct transport *" object is used on many different urls and one of them attached remote transport, the following urls will be mistakenly attached to the same transport. This fault is worked around by dad0b3d (push: do not let configured foreign-vcs permanently clobbered - 2012-01-23) To fix this, detect incomplete refs from transport_get_remote_refs() by SHA-1. Incomplete ones must have null SHA-1 (*). Then revert changes related to foreign_cvs field in 6f48d39 and dad0b3d. A good thing from this change is that cloning smart http transport can also be delayed. Earlier it falls into the same category "remote transport, no delay". (*) Theoretically if one of the remote refs happens to have null SHA-1, it will trigger false alarm and the clone will not be delayed. But that chance may be too small for us to pay attention to. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-24 12:10:38 +01:00
int err = 0, complete_refs_before_fetch = 1;
struct refspec *refspec;
const char *fetch_pattern;
junk_pid = getpid();
packet_trace_identity("clone");
argc = parse_options(argc, argv, prefix, builtin_clone_options,
builtin_clone_usage, 0);
if (argc > 2)
usage_msg_opt(_("Too many arguments."),
builtin_clone_usage, builtin_clone_options);
if (argc == 0)
usage_msg_opt(_("You must specify a repository to clone."),
builtin_clone_usage, builtin_clone_options);
if (option_single_branch == -1)
option_single_branch = option_depth ? 1 : 0;
if (option_mirror)
option_bare = 1;
if (option_bare) {
if (option_origin)
die(_("--bare and --origin %s options are incompatible."),
option_origin);
if (real_git_dir)
die(_("--bare and --separate-git-dir are incompatible."));
option_no_checkout = 1;
}
if (!option_origin)
option_origin = "origin";
repo_name = argv[0];
path = get_repo_path(repo_name, &is_bundle);
if (path)
repo = xstrdup(absolute_path(repo_name));
else if (!strchr(repo_name, ':'))
die(_("repository '%s' does not exist"), repo_name);
else
repo = repo_name;
is_local = option_local != 0 && path && !is_bundle;
if (is_local && option_depth)
warning(_("--depth is ignored in local clones; use file:// instead."));
if (option_local > 0 && !is_local)
warning(_("--local is ignored"));
if (argc == 2)
dir = xstrdup(argv[1]);
else
dir = guess_dir_name(repo_name, is_bundle, option_bare);
strip_trailing_slashes(dir);
dest_exists = !stat(dir, &buf);
if (dest_exists && !is_empty_dir(dir))
die(_("destination path '%s' already exists and is not "
"an empty directory."), dir);
strbuf_addf(&reflog_msg, "clone: from %s", repo);
if (option_bare)
work_tree = NULL;
else {
work_tree = getenv("GIT_WORK_TREE");
if (work_tree && !stat(work_tree, &buf))
die(_("working tree '%s' already exists."), work_tree);
}
if (option_bare || work_tree)
git_dir = xstrdup(dir);
else {
work_tree = dir;
git_dir = mkpathdup("%s/.git", dir);
}
if (!option_bare) {
junk_work_tree = work_tree;
if (safe_create_leading_directories_const(work_tree) < 0)
die_errno(_("could not create leading directories of '%s'"),
work_tree);
if (!dest_exists && mkdir(work_tree, 0777))
die_errno(_("could not create work tree dir '%s'."),
work_tree);
set_git_work_tree(work_tree);
}
junk_git_dir = git_dir;
atexit(remove_junk);
sigchain_push_common(remove_junk_on_signal);
if (safe_create_leading_directories_const(git_dir) < 0)
die(_("could not create leading directories of '%s'"), git_dir);
set_git_dir_init(git_dir, real_git_dir, 0);
if (real_git_dir) {
git_dir = real_git_dir;
junk_git_dir = real_git_dir;
}
if (0 <= option_verbosity) {
if (option_bare)
fprintf(stderr, _("Cloning into bare repository '%s'...\n"), dir);
else
fprintf(stderr, _("Cloning into '%s'...\n"), dir);
}
init_db(option_template, INIT_DB_QUIET);
write_config(&option_config);
git_config(git_default_config, NULL);
if (option_bare) {
if (option_mirror)
src_ref_prefix = "refs/";
strbuf_addstr(&branch_top, src_ref_prefix);
git_config_set("core.bare", "true");
} else {
strbuf_addf(&branch_top, "refs/remotes/%s/", option_origin);
}
strbuf_addf(&value, "+%s*:%s*", src_ref_prefix, branch_top.buf);
strbuf_addf(&key, "remote.%s.url", option_origin);
git_config_set(key.buf, repo);
strbuf_reset(&key);
if (option_reference.nr)
setup_reference();
fetch_pattern = value.buf;
refspec = parse_fetch_refspec(1, &fetch_pattern);
strbuf_reset(&value);
remote = remote_get(option_origin);
transport = transport_get(remote, remote->url[0]);
if (!transport->get_refs_list || (!is_local && !transport->fetch))
die(_("Don't know how to clone %s"), transport->url);
transport_set_option(transport, TRANS_OPT_KEEP, "yes");
if (option_depth)
transport_set_option(transport, TRANS_OPT_DEPTH,
option_depth);
if (option_single_branch)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1");
transport_set_verbosity(transport, option_verbosity, option_progress);
if (option_upload_pack)
transport_set_option(transport, TRANS_OPT_UPLOADPACK,
option_upload_pack);
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-26 03:16:17 +02:00
if (transport->smart_options && !option_depth)
transport->smart_options->check_self_contained_and_connected = 1;
refs = transport_get_remote_refs(transport);
if (refs) {
mapped_refs = wanted_peer_refs(refs, refspec);
/*
* transport_get_remote_refs() may return refs with null sha-1
* in mapped_refs (see struct transport->get_refs_list
* comment). In that case we need fetch it early because
* remote_head code below relies on it.
*
* for normal clones, transport_get_remote_refs() should
* return reliable ref set, we can delay cloning until after
* remote HEAD check.
*/
for (ref = refs; ref; ref = ref->next)
if (is_null_sha1(ref->old_sha1)) {
complete_refs_before_fetch = 0;
break;
}
clone: fix up delay cloning conditions 6f48d39 (clone: delay cloning until after remote HEAD checking - 2012-01-16) allows us to perform some checks on remote refs before the actual cloning happens. But not all transport types support this. Remote helper with "import" capability will not return complete ref information until fetch is performed and therefore the clone cannot be delayed. foreign_vcs field in struct remote was used to detect this kind of transport and save the result. This is a mistake because foreign_vcs is designed to override url-based transport detection. As a result, if the same "struct transport *" object is used on many different urls and one of them attached remote transport, the following urls will be mistakenly attached to the same transport. This fault is worked around by dad0b3d (push: do not let configured foreign-vcs permanently clobbered - 2012-01-23) To fix this, detect incomplete refs from transport_get_remote_refs() by SHA-1. Incomplete ones must have null SHA-1 (*). Then revert changes related to foreign_cvs field in 6f48d39 and dad0b3d. A good thing from this change is that cloning smart http transport can also be delayed. Earlier it falls into the same category "remote transport, no delay". (*) Theoretically if one of the remote refs happens to have null SHA-1, it will trigger false alarm and the clone will not be delayed. But that chance may be too small for us to pay attention to. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-24 12:10:38 +01:00
if (!is_local && !complete_refs_before_fetch)
transport_fetch_refs(transport, mapped_refs);
remote_head = find_ref_by_name(refs, "HEAD");
remote_head_points_at =
guess_remote_head(remote_head, mapped_refs, 0);
if (option_branch) {
our_head_points_at =
find_remote_branch(mapped_refs, option_branch);
if (!our_head_points_at)
die(_("Remote branch %s not found in upstream %s"),
option_branch, option_origin);
}
else
our_head_points_at = remote_head_points_at;
}
else {
if (option_branch)
die(_("Remote branch %s not found in upstream %s"),
option_branch, option_origin);
warning(_("You appear to have cloned an empty repository."));
mapped_refs = NULL;
our_head_points_at = NULL;
remote_head_points_at = NULL;
remote_head = NULL;
option_no_checkout = 1;
if (!option_bare)
install_branch_config(0, "master", option_origin,
"refs/heads/master");
}
write_refspec_config(src_ref_prefix, our_head_points_at,
remote_head_points_at, &branch_top);
if (is_local)
clone_local(path, git_dir);
clone: fix up delay cloning conditions 6f48d39 (clone: delay cloning until after remote HEAD checking - 2012-01-16) allows us to perform some checks on remote refs before the actual cloning happens. But not all transport types support this. Remote helper with "import" capability will not return complete ref information until fetch is performed and therefore the clone cannot be delayed. foreign_vcs field in struct remote was used to detect this kind of transport and save the result. This is a mistake because foreign_vcs is designed to override url-based transport detection. As a result, if the same "struct transport *" object is used on many different urls and one of them attached remote transport, the following urls will be mistakenly attached to the same transport. This fault is worked around by dad0b3d (push: do not let configured foreign-vcs permanently clobbered - 2012-01-23) To fix this, detect incomplete refs from transport_get_remote_refs() by SHA-1. Incomplete ones must have null SHA-1 (*). Then revert changes related to foreign_cvs field in 6f48d39 and dad0b3d. A good thing from this change is that cloning smart http transport can also be delayed. Earlier it falls into the same category "remote transport, no delay". (*) Theoretically if one of the remote refs happens to have null SHA-1, it will trigger false alarm and the clone will not be delayed. But that chance may be too small for us to pay attention to. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-01-24 12:10:38 +01:00
else if (refs && complete_refs_before_fetch)
transport_fetch_refs(transport, mapped_refs);
update_remote_refs(refs, mapped_refs, remote_head_points_at,
branch_top.buf, reflog_msg.buf, transport, !is_local);
update_head(our_head_points_at, remote_head, reflog_msg.buf);
transport_unlock_pack(transport);
transport_disconnect(transport);
junk_mode = JUNK_LEAVE_REPO;
err = checkout();
strbuf_release(&reflog_msg);
strbuf_release(&branch_top);
strbuf_release(&key);
strbuf_release(&value);
junk_mode = JUNK_LEAVE_ALL;
return err;
}