diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 632bd1348e..d032d971dd 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -323,6 +323,13 @@ or `--mirror` is given) for `host.xz:foo/.git`). Cloning into an existing directory is only allowed if the directory is empty. +--bundle-uri=:: + Before fetching from the remote, fetch a bundle from the given + `` and unbundle the data into the local repository. The refs + in the bundle will be stored under the hidden `refs/bundle/*` + namespace. This option is incompatible with `--depth`, + `--shallow-since`, and `--shallow-exclude`. + :git-clone: 1 include::urls.txt[] diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index 6f1e269ae4..ed8da428c9 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -168,6 +168,9 @@ Supported commands: 'list', 'import'. Can guarantee that when a clone is requested, the received pack is self contained and is connected. +'get':: + Can use the 'get' command to download a file from a given URI. + If a helper advertises 'connect', Git will use it if possible and fall back to another capability if the helper requests so when connecting (see the 'connect' command under COMMANDS). @@ -418,6 +421,12 @@ Supported if the helper has the "connect" capability. + Supported if the helper has the "stateless-connect" capability. +'get' :: + Downloads the file from the given `` to the given ``. If + `.temp` exists, then Git assumes that the `.temp` file is a + partial download from a previous attempt and will resume the + download from that position. + If a fatal error occurs, the program writes the error message to stderr and exits. The caller should expect that a suitable error message has been printed if the child closes the connection without diff --git a/Makefile b/Makefile index eac30126e2..924b864ae8 100644 --- a/Makefile +++ b/Makefile @@ -907,6 +907,7 @@ LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o LIB_OBJS += bulk-checkin.o +LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o LIB_OBJS += cbtree.o diff --git a/builtin/clone.c b/builtin/clone.c index c4ff4643ec..e21d42dfee 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -34,6 +34,7 @@ #include "list-objects-filter-options.h" #include "hook.h" #include "bundle.h" +#include "bundle-uri.h" /* * Overall FIXMEs: @@ -77,6 +78,7 @@ static int option_filter_submodules = -1; /* unspecified */ static int config_filter_submodules = -1; /* unspecified */ static struct string_list server_options = STRING_LIST_INIT_NODUP; static int option_remote_submodules; +static const char *bundle_uri; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -160,6 +162,8 @@ static struct option builtin_clone_options[] = { N_("any cloned submodules will use their remote-tracking branch")), OPT_BOOL(0, "sparse", &option_sparse_checkout, N_("initialize sparse-checkout file to include only files at root")), + OPT_STRING(0, "bundle-uri", &bundle_uri, + N_("uri"), N_("a URI for downloading bundles before fetching from origin remote")), OPT_END() }; @@ -933,6 +937,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) option_no_checkout = 1; } + if (bundle_uri && deepen) + die(_("--bundle-uri is incompatible with --depth, --shallow-since, and --shallow-exclude")); + repo_name = argv[0]; path = get_repo_path(repo_name, &is_bundle); @@ -1232,6 +1239,18 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; + /* + * Before fetching from the remote, download and install bundle + * data from the --bundle-uri option. + */ + if (bundle_uri) { + /* At this point, we need the_repository to match the cloned repo. */ + if (repo_init(the_repository, git_dir, work_tree)) + warning(_("failed to initialize the repo, skipping bundle URI")); + else if (fetch_bundle_uri(the_repository, bundle_uri)) + warning(_("failed to fetch objects from bundle URI '%s'"), + bundle_uri); + } strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); refspec_ref_prefixes(&remote->fetch, diff --git a/bundle-uri.c b/bundle-uri.c new file mode 100644 index 0000000000..4a8cc74ed0 --- /dev/null +++ b/bundle-uri.c @@ -0,0 +1,168 @@ +#include "cache.h" +#include "bundle-uri.h" +#include "bundle.h" +#include "object-store.h" +#include "refs.h" +#include "run-command.h" + +static int find_temp_filename(struct strbuf *name) +{ + int fd; + /* + * Find a temporary filename that is available. This is briefly + * racy, but unlikely to collide. + */ + fd = odb_mkstemp(name, "bundles/tmp_uri_XXXXXX"); + if (fd < 0) { + warning(_("failed to create temporary file")); + return -1; + } + + close(fd); + unlink(name->buf); + return 0; +} + +static int download_https_uri_to_file(const char *file, const char *uri) +{ + int result = 0; + struct child_process cp = CHILD_PROCESS_INIT; + FILE *child_in = NULL, *child_out = NULL; + struct strbuf line = STRBUF_INIT; + int found_get = 0; + + strvec_pushl(&cp.args, "git-remote-https", uri, NULL); + cp.in = -1; + cp.out = -1; + + if (start_command(&cp)) + return 1; + + child_in = fdopen(cp.in, "w"); + if (!child_in) { + result = 1; + goto cleanup; + } + + child_out = fdopen(cp.out, "r"); + if (!child_out) { + result = 1; + goto cleanup; + } + + fprintf(child_in, "capabilities\n"); + fflush(child_in); + + while (!strbuf_getline(&line, child_out)) { + if (!line.len) + break; + if (!strcmp(line.buf, "get")) + found_get = 1; + } + strbuf_release(&line); + + if (!found_get) { + result = error(_("insufficient capabilities")); + goto cleanup; + } + + fprintf(child_in, "get %s %s\n\n", uri, file); + +cleanup: + if (child_in) + fclose(child_in); + if (finish_command(&cp)) + return 1; + if (child_out) + fclose(child_out); + return result; +} + +static int copy_uri_to_file(const char *filename, const char *uri) +{ + const char *out; + + if (starts_with(uri, "https:") || + starts_with(uri, "http:")) + return download_https_uri_to_file(filename, uri); + + if (skip_prefix(uri, "file://", &out)) + uri = out; + + /* Copy as a file */ + return copy_file(filename, uri, 0); +} + +static int unbundle_from_file(struct repository *r, const char *file) +{ + int result = 0; + int bundle_fd; + struct bundle_header header = BUNDLE_HEADER_INIT; + struct string_list_item *refname; + struct strbuf bundle_ref = STRBUF_INIT; + size_t bundle_prefix_len; + + if ((bundle_fd = read_bundle_header(file, &header)) < 0) + return 1; + + if ((result = unbundle(r, &header, bundle_fd, NULL))) + return 1; + + /* + * Convert all refs/heads/ from the bundle into refs/bundles/ + * in the local repository. + */ + strbuf_addstr(&bundle_ref, "refs/bundles/"); + bundle_prefix_len = bundle_ref.len; + + for_each_string_list_item(refname, &header.references) { + struct object_id *oid = refname->util; + struct object_id old_oid; + const char *branch_name; + int has_old; + + if (!skip_prefix(refname->string, "refs/heads/", &branch_name)) + continue; + + strbuf_setlen(&bundle_ref, bundle_prefix_len); + strbuf_addstr(&bundle_ref, branch_name); + + has_old = !read_ref(bundle_ref.buf, &old_oid); + update_ref("fetched bundle", bundle_ref.buf, oid, + has_old ? &old_oid : NULL, + REF_SKIP_OID_VERIFICATION, + UPDATE_REFS_MSG_ON_ERR); + } + + bundle_header_release(&header); + return result; +} + +int fetch_bundle_uri(struct repository *r, const char *uri) +{ + int result = 0; + struct strbuf filename = STRBUF_INIT; + + if ((result = find_temp_filename(&filename))) + goto cleanup; + + if ((result = copy_uri_to_file(filename.buf, uri))) { + warning(_("failed to download bundle from URI '%s'"), uri); + goto cleanup; + } + + if ((result = !is_bundle(filename.buf, 0))) { + warning(_("file at URI '%s' is not a bundle"), uri); + goto cleanup; + } + + if ((result = unbundle_from_file(r, filename.buf))) { + warning(_("failed to unbundle bundle from URI '%s'"), uri); + goto cleanup; + } + +cleanup: + unlink(filename.buf); + strbuf_release(&filename); + return result; +} diff --git a/bundle-uri.h b/bundle-uri.h new file mode 100644 index 0000000000..8a152f1ef1 --- /dev/null +++ b/bundle-uri.h @@ -0,0 +1,14 @@ +#ifndef BUNDLE_URI_H +#define BUNDLE_URI_H + +struct repository; + +/** + * Fetch data from the given 'uri' and unbundle the bundle data found + * based on that information. + * + * Returns non-zero if no bundle information is found at the given 'uri'. + */ +int fetch_bundle_uri(struct repository *r, const char *uri); + +#endif diff --git a/remote-curl.c b/remote-curl.c index b8758757ec..72dfb8fb86 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1286,6 +1286,29 @@ static void parse_fetch(struct strbuf *buf) strbuf_reset(buf); } +static void parse_get(const char *arg) +{ + struct strbuf url = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + const char *space; + + space = strchr(arg, ' '); + + if (!space) + die(_("protocol error: expected ' ', missing space")); + + strbuf_add(&url, arg, space - arg); + strbuf_addstr(&path, space + 1); + + if (http_get_file(url.buf, path.buf, NULL)) + die(_("failed to download file at URL '%s'"), url.buf); + + strbuf_release(&url); + strbuf_release(&path); + printf("\n"); + fflush(stdout); +} + static int push_dav(int nr_spec, const char **specs) { struct child_process child = CHILD_PROCESS_INIT; @@ -1564,9 +1587,14 @@ int cmd_main(int argc, const char **argv) printf("unsupported\n"); fflush(stdout); + } else if (skip_prefix(buf.buf, "get ", &arg)) { + parse_get(arg); + fflush(stdout); + } else if (!strcmp(buf.buf, "capabilities")) { printf("stateless-connect\n"); printf("fetch\n"); + printf("get\n"); printf("option\n"); printf("push\n"); printf("check-connectivity\n"); diff --git a/t/t5557-http-get.sh b/t/t5557-http-get.sh new file mode 100755 index 0000000000..76a4bbd16a --- /dev/null +++ b/t/t5557-http-get.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='test downloading a file by URL' + +TEST_PASSES_SANITIZE_LEAK=true + +. ./test-lib.sh + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'get by URL: 404' ' + test_when_finished "rm -f file.temp" && + url="$HTTPD_URL/none.txt" && + cat >input <<-EOF && + capabilities + get $url file1 + EOF + + test_must_fail git remote-http $url err && + test_path_is_missing file1 && + grep "failed to download file at URL" err +' + +test_expect_success 'get by URL: 200' ' + echo data >"$HTTPD_DOCUMENT_ROOT_PATH/exists.txt" && + + url="$HTTPD_URL/exists.txt" && + cat >input <<-EOF && + capabilities + get $url file2 + + EOF + + git remote-http $url err && + grep "failed to download bundle from URI" err +' + +test_expect_success 'fail to clone from non-bundle file' ' + test_when_finished rm -rf test && + echo bogus >bogus && + git clone --bundle-uri="$(pwd)/bogus" . test 2>err && + grep "is not a bundle" err +' + +test_expect_success 'create bundle' ' + git init clone-from && + git -C clone-from checkout -b topic && + test_commit -C clone-from A && + test_commit -C clone-from B && + git -C clone-from bundle create B.bundle topic +' + +test_expect_success 'clone with path bundle' ' + git clone --bundle-uri="clone-from/B.bundle" \ + clone-from clone-path && + git -C clone-path rev-parse refs/bundles/topic >actual && + git -C clone-from rev-parse topic >expect && + test_cmp expect actual +' + +test_expect_success 'clone with file:// bundle' ' + git clone --bundle-uri="file://$(pwd)/clone-from/B.bundle" \ + clone-from clone-file && + git -C clone-file rev-parse refs/bundles/topic >actual && + git -C clone-from rev-parse topic >expect && + test_cmp expect actual +' + +######################################################################### +# HTTP tests begin here + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'fail to fetch from non-existent HTTP URL' ' + test_when_finished rm -rf test && + git clone --bundle-uri="$HTTPD_URL/does-not-exist" . test 2>err && + grep "failed to download bundle from URI" err +' + +test_expect_success 'fail to fetch from non-bundle HTTP URL' ' + test_when_finished rm -rf test && + echo bogus >"$HTTPD_DOCUMENT_ROOT_PATH/bogus" && + git clone --bundle-uri="$HTTPD_URL/bogus" . test 2>err && + grep "is not a bundle" err +' + +test_expect_success 'clone HTTP bundle' ' + cp clone-from/B.bundle "$HTTPD_DOCUMENT_ROOT_PATH/B.bundle" && + + git clone --no-local --mirror clone-from \ + "$HTTPD_DOCUMENT_ROOT_PATH/fetch.git" && + + git clone --bundle-uri="$HTTPD_URL/B.bundle" \ + "$HTTPD_URL/smart/fetch.git" clone-http && + git -C clone-http rev-parse refs/bundles/topic >actual && + git -C clone-from rev-parse topic >expect && + test_cmp expect actual && + + test_config -C clone-http log.excludedecoration refs/bundle/ +' + +# Do not add tests here unless they use the HTTP server, as they will +# not run unless the HTTP dependencies exist. + +test_done diff --git a/t/t5606-clone-options.sh b/t/t5606-clone-options.sh index 8f676d6b0c..f6bb02ab94 100755 --- a/t/t5606-clone-options.sh +++ b/t/t5606-clone-options.sh @@ -58,6 +58,14 @@ test_expect_success 'disallows --bare with --separate-git-dir' ' ' +test_expect_success 'disallows --bundle-uri with shallow options' ' + for option in --depth=1 --shallow-since=01-01-2000 --shallow-exclude=HEAD + do + test_must_fail git clone --bundle-uri=bundle $option from to 2>err && + grep "bundle-uri is incompatible" err || return 1 + done +' + test_expect_success 'reject cloning shallow repository' ' test_when_finished "rm -rf repo" && test_must_fail git clone --reject-shallow shallow-repo out 2>err &&