From a3b0079c6a2e6336b061465623b8f2db308a6978 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 11 Nov 2007 02:29:30 -0500 Subject: [PATCH 1/4] git-fetch: Always fetch tags if the object they reference exists Previously git-fetch.sh used `git cat-file -t` to determine if an object referenced by a tag exists, and if so fetch that tag locally. This was subtly broken during the port to C based builtin-fetch as lookup_object() only works to locate an object if it was previously accessed by the transport. Not all transports will access all objects in this way, so tags were not always being fetched. The rsync transport never loads objects into the internal object table so automated tag following didn't work if rsync was used. Automated tag following also didn't work on the native transport if the new tag was behind the common point(s) negotiated between the two ends of the connection as the tag's referrant would not be loaded into the internal object table. Further the automated tag following was broken with the HTTP commit walker if the new tag's referrant was behind an existing ref, as the walker would stop before loading the tag's referrant into the object table. Switching to has_sha1_file() restores the original behavior from the shell script by checking if the object exists in the ODB, without relying on the state left behind by a transport. Signed-off-by: Shawn O. Pearce --- builtin-fetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin-fetch.c b/builtin-fetch.c index 5f5b59bfdb..1cb30c52c1 100644 --- a/builtin-fetch.c +++ b/builtin-fetch.c @@ -384,7 +384,7 @@ static struct ref *find_non_local_tags(struct transport *transport, if (!path_list_has_path(&existing_refs, ref_name) && !path_list_has_path(&new_refs, ref_name) && - lookup_object(ref->old_sha1)) { + has_sha1_file(ref->old_sha1)) { path_list_insert(ref_name, &new_refs); rm = alloc_ref(strlen(ref_name) + 1); From b73a4397590df9582dd1c994cac30e55e26b0b1e Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 11 Nov 2007 02:29:37 -0500 Subject: [PATCH 2/4] run-command: Support sending stderr to /dev/null Some callers may wish to redirect stderr to /dev/null in some contexts, such as if they are executing a command only to get the exit status and don't want users to see whatever output it may produce as a side-effect of computing that exit status. Signed-off-by: Shawn O. Pearce --- run-command.c | 6 ++++-- run-command.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/run-command.c b/run-command.c index d99a6c4ea7..476d00c218 100644 --- a/run-command.c +++ b/run-command.c @@ -41,7 +41,7 @@ int start_command(struct child_process *cmd) cmd->close_out = 1; } - need_err = cmd->err < 0; + need_err = !cmd->no_stderr && cmd->err < 0; if (need_err) { if (pipe(fderr) < 0) { if (need_in) @@ -87,7 +87,9 @@ int start_command(struct child_process *cmd) close(cmd->out); } - if (need_err) { + if (cmd->no_stderr) + dup_devnull(2); + else if (need_err) { dup2(fderr[1], 2); close_pair(fderr); } diff --git a/run-command.h b/run-command.h index 94e1e9d516..1fc781d766 100644 --- a/run-command.h +++ b/run-command.h @@ -23,6 +23,7 @@ struct child_process { unsigned close_out:1; unsigned no_stdin:1; unsigned no_stdout:1; + unsigned no_stderr:1; unsigned git_cmd:1; /* if this is to be git sub-command */ unsigned stdout_to_stderr:1; }; From 27350891de59608d4db689cf0851f7e49158a6e3 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 11 Nov 2007 02:29:41 -0500 Subject: [PATCH 3/4] rev-list: Introduce --quiet to avoid /dev/null redirects Some uses of git-rev-list are to run it with --objects to see if a range of objects between two or more commits is fully connected or not. In such a case the caller doesn't care about the actual object names or hash hints so formatting this data only for it to be dumped to /dev/null by a redirect is a waste of CPU time. If all the caller needs is the exit status then --quiet can be used to bypass the commit and object formatting. Signed-off-by: Shawn O. Pearce --- Documentation/git-rev-list.txt | 9 +++++++++ builtin-rev-list.c | 26 ++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index 485280423e..989fbf3562 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -20,6 +20,7 @@ SYNOPSIS [ \--not ] [ \--all ] [ \--stdin ] + [ \--quiet ] [ \--topo-order ] [ \--parents ] [ \--timestamp ] @@ -270,6 +271,14 @@ limiting may be applied. In addition to the '' listed on the command line, read them from the standard input. +--quiet:: + + Don't print anything to standard output. This form of + git-rev-list is primarly meant to allow the caller to + test the exit status to see if a range of objects is fully + connected (or not). It is faster than redirecting stdout + to /dev/null as the output does not have to be formatted. + --cherry-pick:: Omit any commit that introduces the same change as diff --git a/builtin-rev-list.c b/builtin-rev-list.c index 697046723f..f5149e59b7 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -26,6 +26,7 @@ static const char rev_list_usage[] = " --remove-empty\n" " --all\n" " --stdin\n" +" --quiet\n" " ordering output:\n" " --topo-order\n" " --date-order\n" @@ -50,6 +51,7 @@ static int show_timestamp; static int hdr_termination; static const char *header_prefix; +static void finish_commit(struct commit *commit); static void show_commit(struct commit *commit) { if (show_timestamp) @@ -93,6 +95,11 @@ static void show_commit(struct commit *commit) strbuf_release(&buf); } maybe_flush_or_die(stdout, "stdout"); + finish_commit(commit); +} + +static void finish_commit(struct commit *commit) +{ if (commit->parents) { free_commit_list(commit->parents); commit->parents = NULL; @@ -101,6 +108,12 @@ static void show_commit(struct commit *commit) commit->buffer = NULL; } +static void finish_object(struct object_array_entry *p) +{ + if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1)) + die("missing blob object '%s'", sha1_to_hex(p->item->sha1)); +} + static void show_object(struct object_array_entry *p) { /* An object with name "foo\n0000000..." can be used to @@ -108,9 +121,7 @@ static void show_object(struct object_array_entry *p) */ const char *ep = strchr(p->name, '\n'); - if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1)) - die("missing blob object '%s'", sha1_to_hex(p->item->sha1)); - + finish_object(p); if (ep) { printf("%s %.*s\n", sha1_to_hex(p->item->sha1), (int) (ep - p->name), @@ -527,6 +538,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) int read_from_stdin = 0; int bisect_show_vars = 0; int bisect_find_all = 0; + int quiet = 0; git_config(git_default_config); init_revisions(&revs, prefix); @@ -565,6 +577,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) read_revisions_from_stdin(&revs); continue; } + if (!strcmp(arg, "--quiet")) { + quiet = 1; + continue; + } usage(rev_list_usage); } @@ -640,7 +656,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) } } - traverse_commit_list(&revs, show_commit, show_object); + traverse_commit_list(&revs, + quiet ? finish_commit : show_commit, + quiet ? finish_object : show_object); return 0; } From 4191c35671f6392173221bea3994f8b305f4f3a8 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Sun, 11 Nov 2007 02:29:47 -0500 Subject: [PATCH 4/4] git-fetch: avoid local fetching from alternate (again) Back in e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Junio taught git-fetch to avoid copying objects when we are fetching from a repository that is already registered as an alternate object database. In such a case there is no reason to copy any objects as we can already obtain them through the alternate. However we need to ensure the objects are all reachable, so we run `git rev-list --objects $theirs --not --all` to verify this. If any object is missing or unreadable then we need to fetch/copy the objects from the remote. When a missing object is detected the git-rev-list process will exit with a non-zero exit status, making this condition quite easy to detect. Although git-fetch is currently a builtin (and so is rev-list) we cannot invoke the traverse_objects() API at this point in the transport code. The object walker within traverse_objects() calls die() as soon as it finds an object it cannot read. If that happens we want to resume the fetch process by calling do_fetch_pack(). To get around this we spawn git-rev-list into a background process to prevent a die() from killing the foreground fetch process, thus allowing the fetch process to resume into do_fetch_pack() if copying is necessary. We aren't interested in the output of rev-list (a list of SHA-1 object names that are reachable) or its errors (a "spurious" error about an object not being found as we need to copy it) so we redirect both stdout and stderr to /dev/null. We run this git-rev-list based check before any fetch as we may already have the necessary objects local from a prior fetch. If we don't then its very likely the first $theirs object listed on the command line won't exist locally and git-rev-list will die very quickly, allowing us to start the network transfer. This test even on remote URLs may save bandwidth if someone runs `git pull origin`, sees a merge conflict, resets out, then redoes the same pull just a short time later. If the remote hasn't changed between the two pulls and the local repository hasn't had git-gc run in it then there is probably no need to perform network transfer as all of the objects are local. Documentation for the new quickfetch function was suggested and written by Junio, based on his original comment in git-fetch.sh. Signed-off-by: Shawn O. Pearce --- builtin-fetch.c | 69 +++++++++++++++++++++++++++++++++++++++++-- t/t5502-quickfetch.sh | 33 +++++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/builtin-fetch.c b/builtin-fetch.c index 1cb30c52c1..c1930aaa0e 100644 --- a/builtin-fetch.c +++ b/builtin-fetch.c @@ -8,10 +8,12 @@ #include "path-list.h" #include "remote.h" #include "transport.h" +#include "run-command.h" static const char fetch_usage[] = "git-fetch [-a | --append] [--upload-pack ] [-f | --force] [--no-tags] [-t | --tags] [-k | --keep] [-u | --update-head-ok] [--depth ] [-v | --verbose] [ ...]"; static int append, force, tags, no_tags, update_head_ok, verbose, quiet; +static const char *depth; static char *default_rla = NULL; static struct transport *transport; @@ -330,9 +332,72 @@ static void store_updated_refs(const char *url, struct ref *ref_map) fclose(fp); } +/* + * We would want to bypass the object transfer altogether if + * everything we are going to fetch already exists and connected + * locally. + * + * The refs we are going to fetch are in to_fetch (nr_heads in + * total). If running + * + * $ git-rev-list --objects to_fetch[0] to_fetch[1] ... --not --all + * + * does not error out, that means everything reachable from the + * refs we are going to fetch exists and is connected to some of + * our existing refs. + */ +static int quickfetch(struct ref *ref_map) +{ + struct child_process revlist; + struct ref *ref; + char **argv; + int i, err; + + /* + * If we are deepening a shallow clone we already have these + * objects reachable. Running rev-list here will return with + * a good (0) exit status and we'll bypass the fetch that we + * really need to perform. Claiming failure now will ensure + * we perform the network exchange to deepen our history. + */ + if (depth) + return -1; + + for (i = 0, ref = ref_map; ref; ref = ref->next) + i++; + if (!i) + return 0; + + argv = xmalloc(sizeof(*argv) * (i + 6)); + i = 0; + argv[i++] = xstrdup("rev-list"); + argv[i++] = xstrdup("--quiet"); + argv[i++] = xstrdup("--objects"); + for (ref = ref_map; ref; ref = ref->next) + argv[i++] = xstrdup(sha1_to_hex(ref->old_sha1)); + argv[i++] = xstrdup("--not"); + argv[i++] = xstrdup("--all"); + argv[i++] = NULL; + + memset(&revlist, 0, sizeof(revlist)); + revlist.argv = (const char**)argv; + revlist.git_cmd = 1; + revlist.no_stdin = 1; + revlist.no_stdout = 1; + revlist.no_stderr = 1; + err = run_command(&revlist); + + for (i = 0; argv[i]; i++) + free(argv[i]); + free(argv); + return err; +} + static int fetch_refs(struct transport *transport, struct ref *ref_map) { - int ret = transport_fetch_refs(transport, ref_map); + int ret = quickfetch(ref_map); + if (ret) + ret = transport_fetch_refs(transport, ref_map); if (!ret) store_updated_refs(transport->url, ref_map); transport_unlock_pack(transport); @@ -468,7 +533,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) static const char **refs = NULL; int ref_nr = 0; int cmd_len = 0; - const char *depth = NULL, *upload_pack = NULL; + const char *upload_pack = NULL; int keep = 0; for (i = 1; i < argc; i++) { diff --git a/t/t5502-quickfetch.sh b/t/t5502-quickfetch.sh index b4760f2dc0..16eadd6b68 100755 --- a/t/t5502-quickfetch.sh +++ b/t/t5502-quickfetch.sh @@ -86,4 +86,37 @@ test_expect_success 'quickfetch should not leave a corrupted repository' ' ' +test_expect_success 'quickfetch should not copy from alternate' ' + + ( + mkdir quickclone && + cd quickclone && + git init-db && + (cd ../.git/objects && pwd) >.git/objects/info/alternates && + git remote add origin .. && + git fetch -k -k + ) && + obj_cnt=$( ( + cd quickclone && + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + pck_cnt=$( ( + cd quickclone && + git count-objects -v | sed -n -e "/packs:/{ + s/packs:// + p + q + }" + ) ) && + origin_master=$( ( + cd quickclone && + git rev-parse origin/master + ) ) && + echo "loose objects: $obj_cnt, packfiles: $pck_cnt" && + test $obj_cnt -eq 0 && + test $pck_cnt -eq 0 && + test z$origin_master = z$(git rev-parse master) + +' + test_done