Merge branch 'ds/bundle-uri-5'

The bundle-URI subsystem adds support for creation-token heuristics
to help incremental fetches.

* ds/bundle-uri-5:
  bundle-uri: test missing bundles with heuristic
  bundle-uri: store fetch.bundleCreationToken
  fetch: fetch from an external bundle URI
  bundle-uri: drop bundle.flag from design doc
  clone: set fetch.bundleURI if appropriate
  bundle-uri: download in creationToken order
  bundle-uri: parse bundle.<id>.creationToken values
  bundle-uri: parse bundle.heuristic=creationToken
  t5558: add tests for creationToken heuristic
  bundle: verify using check_connected()
  bundle: test unbundling with incomplete history
This commit is contained in:
Junio C Hamano 2023-02-15 17:11:52 -08:00
commit 4f59836451
13 changed files with 1149 additions and 57 deletions

View File

@ -15,6 +15,13 @@ bundle.mode::
complete understanding of the bundled information (`all`) or if any one
of the listed bundle URIs is sufficient (`any`).
bundle.heuristic::
If this string-valued key exists, then the bundle list is designed to
work well with incremental `git fetch` commands. The heuristic signals
that there are additional keys available for each bundle that help
determine which subset of bundles the client should download. The
only value currently understood is `creationToken`.
bundle.<id>.*::
The `bundle.<id>.*` keys are used to describe a single item in the
bundle list, grouped under `<id>` for identification purposes.

View File

@ -96,3 +96,27 @@ fetch.writeCommitGraph::
merge and the write may take longer. Having an updated commit-graph
file helps performance of many Git commands, including `git merge-base`,
`git push -f`, and `git log --graph`. Defaults to false.
fetch.bundleURI::
This value stores a URI for downloading Git object data from a bundle
URI before performing an incremental fetch from the origin Git server.
This is similar to how the `--bundle-uri` option behaves in
linkgit:git-clone[1]. `git clone --bundle-uri` will set the
`fetch.bundleURI` value if the supplied bundle URI contains a bundle
list that is organized for incremental fetches.
+
If you modify this value and your repository has a `fetch.bundleCreationToken`
value, then remove that `fetch.bundleCreationToken` value before fetching from
the new bundle URI.
fetch.bundleCreationToken::
When using `fetch.bundleURI` to fetch incrementally from a bundle
list that uses the "creationToken" heuristic, this config value
stores the maximum `creationToken` value of the downloaded bundles.
This value is used to prevent downloading bundles in the future
if the advertised `creationToken` is not strictly larger than this
value.
+
The creation token values are chosen by the provider serving the specific
bundle URI. If you modify the URI at `fetch.bundleURI`, then be sure to
remove the value for the `fetch.bundleCreationToken` value before fetching.

View File

@ -479,14 +479,14 @@ outline for submitting these features:
(This choice is an opt-in via a config option and a command-line
option.)
4. Allow the client to understand the `bundle.flag=forFetch` configuration
4. Allow the client to understand the `bundle.heuristic` configuration key
and the `bundle.<id>.creationToken` heuristic. When `git clone`
discovers a bundle URI with `bundle.flag=forFetch`, it configures the
client repository to check that bundle URI during later `git fetch <remote>`
discovers a bundle URI with `bundle.heuristic`, it configures the client
repository to check that bundle URI during later `git fetch <remote>`
commands.
5. Allow clients to discover bundle URIs during `git fetch` and configure
a bundle URI for later fetches if `bundle.flag=forFetch`.
a bundle URI for later fetches if `bundle.heuristic` is set.
6. Implement the "inspect headers" heuristic to reduce data downloads when
the `bundle.<id>.creationToken` heuristic is not available.

View File

@ -1248,12 +1248,16 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
* data from the --bundle-uri option.
*/
if (bundle_uri) {
int has_heuristic = 0;
/* At this point, we need the_repository to match the cloned repo. */
if (repo_init(the_repository, git_dir, work_tree))
warning(_("failed to initialize the repo, skipping bundle URI"));
else if (fetch_bundle_uri(the_repository, bundle_uri))
else if (fetch_bundle_uri(the_repository, bundle_uri, &has_heuristic))
warning(_("failed to fetch objects from bundle URI '%s'"),
bundle_uri);
else if (has_heuristic)
git_config_set_gently("fetch.bundleuri", bundle_uri);
}
strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD");

View File

@ -29,6 +29,7 @@
#include "commit-graph.h"
#include "shallow.h"
#include "worktree.h"
#include "bundle-uri.h"
#define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
@ -2109,6 +2110,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv,
int cmd_fetch(int argc, const char **argv, const char *prefix)
{
int i;
const char *bundle_uri;
struct string_list list = STRING_LIST_INIT_DUP;
struct remote *remote = NULL;
int result = 0;
@ -2194,6 +2196,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
if (dry_run)
write_fetch_head = 0;
if (!git_config_get_string_tmp("fetch.bundleuri", &bundle_uri) &&
fetch_bundle_uri(the_repository, bundle_uri, NULL))
warning(_("failed to fetch bundles from '%s'"), bundle_uri);
if (all) {
if (argc == 1)
die(_("fetch --all does not take a repository argument"));

View File

@ -9,6 +9,14 @@
#include "config.h"
#include "remote.h"
static struct {
enum bundle_list_heuristic heuristic;
const char *name;
} heuristics[BUNDLE_HEURISTIC__COUNT] = {
{ BUNDLE_HEURISTIC_NONE, ""},
{ BUNDLE_HEURISTIC_CREATIONTOKEN, "creationToken" },
};
static int compare_bundles(const void *hashmap_cmp_fn_data,
const struct hashmap_entry *he1,
const struct hashmap_entry *he2,
@ -75,6 +83,9 @@ static int summarize_bundle(struct remote_bundle_info *info, void *data)
FILE *fp = data;
fprintf(fp, "[bundle \"%s\"]\n", info->id);
fprintf(fp, "\turi = %s\n", info->uri);
if (info->creationToken)
fprintf(fp, "\tcreationToken = %"PRIu64"\n", info->creationToken);
return 0;
}
@ -100,6 +111,17 @@ void print_bundle_list(FILE *fp, struct bundle_list *list)
fprintf(fp, "\tversion = %d\n", list->version);
fprintf(fp, "\tmode = %s\n", mode);
if (list->heuristic) {
int i;
for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) {
if (heuristics[i].heuristic == list->heuristic) {
printf("\theuristic = %s\n",
heuristics[list->heuristic].name);
break;
}
}
}
for_all_bundles_in_list(list, summarize_bundle, fp);
}
@ -142,6 +164,21 @@ static int bundle_list_update(const char *key, const char *value,
return 0;
}
if (!strcmp(subkey, "heuristic")) {
int i;
for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) {
if (heuristics[i].heuristic &&
heuristics[i].name &&
!strcmp(value, heuristics[i].name)) {
list->heuristic = heuristics[i].heuristic;
return 0;
}
}
/* Ignore unknown heuristics. */
return 0;
}
/* Ignore other unknown global keys. */
return 0;
}
@ -169,6 +206,13 @@ static int bundle_list_update(const char *key, const char *value,
return 0;
}
if (!strcmp(subkey, "creationtoken")) {
if (sscanf(value, "%"PRIu64, &bundle->creationToken) != 1)
warning(_("could not parse bundle list key %s with value '%s'"),
"creationToken", value);
return 0;
}
/*
* At this point, we ignore any information that we don't
* understand, assuming it to be hints for a heuristic the client
@ -403,6 +447,183 @@ static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data
return 0;
}
struct bundles_for_sorting {
struct remote_bundle_info **items;
size_t alloc;
size_t nr;
};
static int append_bundle(struct remote_bundle_info *bundle, void *data)
{
struct bundles_for_sorting *list = data;
list->items[list->nr++] = bundle;
return 0;
}
/**
* For use in QSORT() to get a list sorted by creationToken
* in decreasing order.
*/
static int compare_creation_token_decreasing(const void *va, const void *vb)
{
const struct remote_bundle_info * const *a = va;
const struct remote_bundle_info * const *b = vb;
if ((*a)->creationToken > (*b)->creationToken)
return -1;
if ((*a)->creationToken < (*b)->creationToken)
return 1;
return 0;
}
static int fetch_bundles_by_token(struct repository *r,
struct bundle_list *list)
{
int cur;
int move_direction = 0;
const char *creationTokenStr;
uint64_t maxCreationToken = 0, newMaxCreationToken = 0;
struct bundle_list_context ctx = {
.r = r,
.list = list,
.mode = list->mode,
};
struct bundles_for_sorting bundles = {
.alloc = hashmap_get_size(&list->bundles),
};
ALLOC_ARRAY(bundles.items, bundles.alloc);
for_all_bundles_in_list(list, append_bundle, &bundles);
if (!bundles.nr) {
free(bundles.items);
return 0;
}
QSORT(bundles.items, bundles.nr, compare_creation_token_decreasing);
/*
* If fetch.bundleCreationToken exists, parses to a uint64t, and
* is not strictly smaller than the maximum creation token in the
* bundle list, then do not download any bundles.
*/
if (!repo_config_get_value(r,
"fetch.bundlecreationtoken",
&creationTokenStr) &&
sscanf(creationTokenStr, "%"PRIu64, &maxCreationToken) == 1 &&
bundles.items[0]->creationToken <= maxCreationToken) {
free(bundles.items);
return 0;
}
/*
* Attempt to download and unbundle the minimum number of bundles by
* creationToken in decreasing order. If we fail to unbundle (after
* a successful download) then move to the next non-downloaded bundle
* and attempt downloading. Once we succeed in applying a bundle,
* move to the previous unapplied bundle and attempt to unbundle it
* again.
*
* In the case of a fresh clone, we will likely download all of the
* bundles before successfully unbundling the oldest one, then the
* rest of the bundles unbundle successfully in increasing order
* of creationToken.
*
* If there are existing objects, then this process may terminate
* early when all required commits from "new" bundles exist in the
* repo's object store.
*/
cur = 0;
while (cur >= 0 && cur < bundles.nr) {
struct remote_bundle_info *bundle = bundles.items[cur];
/*
* If we need to dig into bundles below the previous
* creation token value, then likely we are in an erroneous
* state due to missing or invalid bundles. Halt the process
* instead of continuing to download extra data.
*/
if (bundle->creationToken <= maxCreationToken)
break;
if (!bundle->file) {
/*
* Not downloaded yet. Try downloading.
*
* Note that bundle->file is non-NULL if a download
* was attempted, even if it failed to download.
*/
if (fetch_bundle_uri_internal(ctx.r, bundle, ctx.depth + 1, ctx.list)) {
/* Mark as unbundled so we do not retry. */
bundle->unbundled = 1;
/* Try looking deeper in the list. */
move_direction = 1;
goto move;
}
/* We expect bundles when using creationTokens. */
if (!is_bundle(bundle->file, 1)) {
warning(_("file downloaded from '%s' is not a bundle"),
bundle->uri);
break;
}
}
if (bundle->file && !bundle->unbundled) {
/*
* This was downloaded, but not successfully
* unbundled. Try unbundling again.
*/
if (unbundle_from_file(ctx.r, bundle->file)) {
/* Try looking deeper in the list. */
move_direction = 1;
} else {
/*
* Succeeded in unbundle. Retry bundles
* that previously failed to unbundle.
*/
move_direction = -1;
bundle->unbundled = 1;
if (bundle->creationToken > newMaxCreationToken)
newMaxCreationToken = bundle->creationToken;
}
}
/*
* Else case: downloaded and unbundled successfully.
* Skip this by moving in the same direction as the
* previous step.
*/
move:
/* Move in the specified direction and repeat. */
cur += move_direction;
}
/*
* We succeed if the loop terminates because 'cur' drops below
* zero. The other case is that we terminate because 'cur'
* reaches the end of the list, so we have a failure no matter
* which bundles we apply from the list.
*/
if (cur < 0) {
struct strbuf value = STRBUF_INIT;
strbuf_addf(&value, "%"PRIu64"", newMaxCreationToken);
if (repo_config_set_multivar_gently(ctx.r,
"fetch.bundleCreationToken",
value.buf, NULL, 0))
warning(_("failed to store maximum creation token"));
strbuf_release(&value);
}
free(bundles.items);
return cur >= 0;
}
static int download_bundle_list(struct repository *r,
struct bundle_list *local_list,
struct bundle_list *global_list,
@ -440,7 +661,15 @@ static int fetch_bundle_list_in_config_format(struct repository *r,
goto cleanup;
}
if ((result = download_bundle_list(r, &list_from_bundle,
/*
* If this list uses the creationToken heuristic, then the URIs
* it advertises are expected to be bundles, not nested lists.
* We can drop 'global_list' and 'depth'.
*/
if (list_from_bundle.heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) {
result = fetch_bundles_by_token(r, &list_from_bundle);
global_list->heuristic = BUNDLE_HEURISTIC_CREATIONTOKEN;
} else if ((result = download_bundle_list(r, &list_from_bundle,
global_list, depth)))
goto cleanup;
@ -551,7 +780,8 @@ static int unlink_bundle(struct remote_bundle_info *info, void *data)
return 0;
}
int fetch_bundle_uri(struct repository *r, const char *uri)
int fetch_bundle_uri(struct repository *r, const char *uri,
int *has_heuristic)
{
int result;
struct bundle_list list;
@ -571,6 +801,8 @@ int fetch_bundle_uri(struct repository *r, const char *uri)
result = unbundle_all_bundles(r, &list);
cleanup:
if (has_heuristic)
*has_heuristic = (list.heuristic != BUNDLE_HEURISTIC_NONE);
for_all_bundles_in_list(&list, unlink_bundle, NULL);
clear_bundle_list(&list);
clear_remote_bundle_info(&bundle, NULL);
@ -582,6 +814,14 @@ int fetch_bundle_list(struct repository *r, struct bundle_list *list)
int result;
struct bundle_list global_list;
/*
* If the creationToken heuristic is used, then the URIs
* advertised by 'list' are not nested lists and instead
* direct bundles. We do not need to use global_list.
*/
if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN)
return fetch_bundles_by_token(r, list);
init_bundle_list(&global_list);
/* If a bundle is added to this global list, then it is required. */
@ -590,7 +830,10 @@ int fetch_bundle_list(struct repository *r, struct bundle_list *list)
if ((result = download_bundle_list(r, list, &global_list, 0)))
goto cleanup;
result = unbundle_all_bundles(r, &global_list);
if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN)
result = fetch_bundles_by_token(r, list);
else
result = unbundle_all_bundles(r, &global_list);
cleanup:
for_all_bundles_in_list(&global_list, unlink_bundle, NULL);

View File

@ -42,6 +42,12 @@ struct remote_bundle_info {
* this boolean is true.
*/
unsigned unbundled:1;
/**
* If the bundle is part of a list with the creationToken
* heuristic, then we use this member for sorting the bundles.
*/
uint64_t creationToken;
};
#define REMOTE_BUNDLE_INFO_INIT { 0 }
@ -52,6 +58,14 @@ enum bundle_list_mode {
BUNDLE_MODE_ANY
};
enum bundle_list_heuristic {
BUNDLE_HEURISTIC_NONE = 0,
BUNDLE_HEURISTIC_CREATIONTOKEN,
/* Must be last. */
BUNDLE_HEURISTIC__COUNT
};
/**
* A bundle_list contains an unordered set of remote_bundle_info structs,
* as well as information about the bundle listing, such as version and
@ -75,6 +89,12 @@ struct bundle_list {
* advertised by the bundle list at that location.
*/
char *baseURI;
/**
* A list can have a heuristic, which helps reduce the number of
* downloaded bundles.
*/
enum bundle_list_heuristic heuristic;
};
void init_bundle_list(struct bundle_list *list);
@ -104,8 +124,14 @@ int bundle_uri_parse_config_format(const char *uri,
* based on that information.
*
* Returns non-zero if no bundle information is found at the given 'uri'.
*
* If the pointer 'has_heuristic' is non-NULL, then the value it points to
* will be set to be non-zero if and only if the fetched list has a
* heuristic value. Such a value indicates that the list was designed for
* incremental fetches.
*/
int fetch_bundle_uri(struct repository *r, const char *uri);
int fetch_bundle_uri(struct repository *r, const char *uri,
int *has_heuristic);
/**
* Given a bundle list that was already advertised (likely by the

View File

@ -12,6 +12,7 @@
#include "refs.h"
#include "strvec.h"
#include "list-objects-filter-options.h"
#include "connected.h"
static const char v2_bundle_signature[] = "# v2 git bundle\n";
static const char v3_bundle_signature[] = "# v3 git bundle\n";
@ -187,6 +188,21 @@ static int list_refs(struct string_list *r, int argc, const char **argv)
/* Remember to update object flag allocation in object.h */
#define PREREQ_MARK (1u<<16)
struct string_list_iterator {
struct string_list *list;
size_t cur;
};
static const struct object_id *iterate_ref_map(void *cb_data)
{
struct string_list_iterator *iter = cb_data;
if (iter->cur >= iter->list->nr)
return NULL;
return iter->list->items[iter->cur++].util;
}
int verify_bundle(struct repository *r,
struct bundle_header *header,
enum verify_bundle_flags flags)
@ -196,26 +212,25 @@ int verify_bundle(struct repository *r,
* to be verbose about the errors
*/
struct string_list *p = &header->prerequisites;
struct rev_info revs = REV_INFO_INIT;
const char *argv[] = {NULL, "--all", NULL};
struct commit *commit;
int i, ret = 0, req_nr;
int i, ret = 0;
const char *message = _("Repository lacks these prerequisite commits:");
struct string_list_iterator iter = {
.list = p,
};
struct check_connected_options opts = {
.quiet = 1,
};
if (!r || !r->objects || !r->objects->odb)
return error(_("need a repository to verify a bundle"));
repo_init_revisions(r, &revs, NULL);
for (i = 0; i < p->nr; i++) {
struct string_list_item *e = p->items + i;
const char *name = e->string;
struct object_id *oid = e->util;
struct object *o = parse_object(r, oid);
if (o) {
o->flags |= PREREQ_MARK;
add_pending_object(&revs, o, name);
if (o)
continue;
}
ret++;
if (flags & VERIFY_BUNDLE_QUIET)
continue;
@ -223,37 +238,14 @@ int verify_bundle(struct repository *r,
error("%s", message);
error("%s %s", oid_to_hex(oid), name);
}
if (revs.pending.nr != p->nr)
if (ret)
goto cleanup;
req_nr = revs.pending.nr;
setup_revisions(2, argv, &revs, NULL);
list_objects_filter_copy(&revs.filter, &header->filter);
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
i = req_nr;
while (i && (commit = get_revision(&revs)))
if (commit->object.flags & PREREQ_MARK)
i--;
for (i = 0; i < p->nr; i++) {
struct string_list_item *e = p->items + i;
const char *name = e->string;
const struct object_id *oid = e->util;
struct object *o = parse_object(r, oid);
assert(o); /* otherwise we'd have returned early */
if (o->flags & SHOWN)
continue;
ret++;
if (flags & VERIFY_BUNDLE_QUIET)
continue;
if (ret == 1)
error("%s", message);
error("%s %s", oid_to_hex(oid), name);
}
if ((ret = check_connected(iterate_ref_map, &iter, &opts)))
error(_("some prerequisite commits exist in the object store, "
"but are not connected to the repository's history"));
/* TODO: preserve this verbose language. */
if (flags & VERIFY_BUNDLE_VERBOSE) {
struct string_list *r;
@ -282,15 +274,6 @@ int verify_bundle(struct repository *r,
list_objects_filter_spec(&header->filter));
}
cleanup:
/* Clean up objects used, as they will be reused. */
for (i = 0; i < p->nr; i++) {
struct string_list_item *e = p->items + i;
struct object_id *oid = e->util;
commit = lookup_commit_reference_gently(r, oid, 1);
if (commit)
clear_commit_marks(commit, ALL_REV_FLAGS | PREREQ_MARK);
}
release_revisions(&revs);
return ret;
}

View File

@ -285,6 +285,8 @@ test_expect_success 'clone HTTP bundle' '
'
test_expect_success 'clone bundle list (HTTP, no heuristic)' '
test_when_finished rm -f trace*.txt &&
cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
@ -304,12 +306,26 @@ test_expect_success 'clone bundle list (HTTP, no heuristic)' '
uri = $HTTPD_URL/bundle-4.bundle
EOF
git clone --bundle-uri="$HTTPD_URL/bundle-list" \
GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \
git clone --bundle-uri="$HTTPD_URL/bundle-list" \
clone-from clone-list-http 2>err &&
! grep "Repository lacks these prerequisite commits" err &&
git -C clone-from for-each-ref --format="%(objectname)" >oids &&
git -C clone-list-http cat-file --batch-check <oids
git -C clone-list-http cat-file --batch-check <oids &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-1.bundle
$HTTPD_URL/bundle-2.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/bundle-4.bundle
$HTTPD_URL/bundle-list
EOF
# Sort the list, since the order is not well-defined
# without a heuristic.
test_remote_https_urls <trace-clone.txt | sort >actual &&
test_cmp expect actual
'
test_expect_success 'clone bundle list (HTTP, any mode)' '
@ -350,6 +366,658 @@ test_expect_success 'clone bundle list (HTTP, any mode)' '
test_cmp expect actual
'
test_expect_success 'clone bundle list (http, creationToken)' '
test_when_finished rm -f trace*.txt &&
cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" git \
clone --bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" clone-list-http-2 &&
git -C clone-from for-each-ref --format="%(objectname)" >oids &&
git -C clone-list-http-2 cat-file --batch-check <oids &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-4.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/bundle-2.bundle
$HTTPD_URL/bundle-1.bundle
EOF
test_remote_https_urls <trace-clone.txt >actual &&
test_cmp expect actual
'
test_expect_success 'clone incomplete bundle list (http, creationToken)' '
test_when_finished rm -f trace*.txt &&
cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" &&
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
EOF
GIT_TRACE2_EVENT=$(pwd)/trace-clone.txt \
git clone --bundle-uri="$HTTPD_URL/bundle-list" \
--single-branch --branch=base --no-tags \
"$HTTPD_URL/smart/fetch.git" clone-token-http &&
test_cmp_config -C clone-token-http "$HTTPD_URL/bundle-list" fetch.bundleuri &&
test_cmp_config -C clone-token-http 1 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-1.bundle
EOF
test_remote_https_urls <trace-clone.txt >actual &&
test_cmp expect actual &&
# We now have only one bundle ref.
git -C clone-token-http for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-\EOF &&
refs/bundles/base
EOF
test_cmp expect refs &&
# Add remaining bundles, exercising the "deepening" strategy
# for downloading via the creationToken heurisitc.
cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
EOF
GIT_TRACE2_EVENT="$(pwd)/trace1.txt" \
git -C clone-token-http fetch origin --no-tags \
refs/heads/merge:refs/heads/merge &&
test_cmp_config -C clone-token-http 4 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-4.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/bundle-2.bundle
EOF
test_remote_https_urls <trace1.txt >actual &&
test_cmp expect actual &&
# We now have all bundle refs.
git -C clone-token-http for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-\EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/merge
refs/bundles/right
EOF
test_cmp expect refs
'
test_expect_success 'http clone with bundle.heuristic creates fetch.bundleURI' '
test_when_finished rm -rf fetch-http-4 trace*.txt &&
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \
git clone --single-branch --branch=base \
--bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" fetch-http-4 &&
test_cmp_config -C fetch-http-4 "$HTTPD_URL/bundle-list" fetch.bundleuri &&
test_cmp_config -C fetch-http-4 1 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-1.bundle
EOF
test_remote_https_urls <trace-clone.txt >actual &&
test_cmp expect actual &&
# only received base ref from bundle-1
git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-\EOF &&
refs/bundles/base
EOF
test_cmp expect refs &&
cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
EOF
# Fetch the objects for bundle-2 _and_ bundle-3.
GIT_TRACE2_EVENT="$(pwd)/trace1.txt" \
git -C fetch-http-4 fetch origin --no-tags \
refs/heads/left:refs/heads/left \
refs/heads/right:refs/heads/right &&
test_cmp_config -C fetch-http-4 2 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-2.bundle
EOF
test_remote_https_urls <trace1.txt >actual &&
test_cmp expect actual &&
# received left from bundle-2
git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-\EOF &&
refs/bundles/base
refs/bundles/left
EOF
test_cmp expect refs &&
# No-op fetch
GIT_TRACE2_EVENT="$(pwd)/trace1b.txt" \
git -C fetch-http-4 fetch origin --no-tags \
refs/heads/left:refs/heads/left \
refs/heads/right:refs/heads/right &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
EOF
test_remote_https_urls <trace1b.txt >actual &&
test_cmp expect actual &&
cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
EOF
# This fetch should skip bundle-3.bundle, since its objects are
# already local (we have the requisite commits for bundle-4.bundle).
GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
git -C fetch-http-4 fetch origin --no-tags \
refs/heads/merge:refs/heads/merge &&
test_cmp_config -C fetch-http-4 4 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-4.bundle
EOF
test_remote_https_urls <trace2.txt >actual &&
test_cmp expect actual &&
# received merge ref from bundle-4, but right is missing
# because we did not download bundle-3.
git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-\EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/merge
EOF
test_cmp expect refs &&
# No-op fetch
GIT_TRACE2_EVENT="$(pwd)/trace2b.txt" \
git -C fetch-http-4 fetch origin &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
EOF
test_remote_https_urls <trace2b.txt >actual &&
test_cmp expect actual
'
test_expect_success 'creationToken heuristic with failed downloads (clone)' '
test_when_finished rm -rf download-* trace*.txt &&
# Case 1: base bundle does not exist, nothing can unbundle
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = fake.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone-1.txt" \
git clone --single-branch --branch=base \
--bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" download-1 &&
# Bundle failure does not set these configs.
test_must_fail git -C download-1 config fetch.bundleuri &&
test_must_fail git -C download-1 config fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-4.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/bundle-2.bundle
$HTTPD_URL/fake.bundle
EOF
test_remote_https_urls <trace-clone-1.txt >actual &&
test_cmp expect actual &&
# All bundles failed to unbundle
git -C download-1 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
test_must_be_empty refs &&
# Case 2: middle bundle does not exist, only two bundles can unbundle
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = fake.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone-2.txt" \
git clone --single-branch --branch=base \
--bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" download-2 &&
# Bundle failure does not set these configs.
test_must_fail git -C download-2 config fetch.bundleuri &&
test_must_fail git -C download-2 config fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-4.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/fake.bundle
$HTTPD_URL/bundle-1.bundle
EOF
test_remote_https_urls <trace-clone-2.txt >actual &&
test_cmp expect actual &&
# bundle-1 and bundle-3 could unbundle, but bundle-4 could not
git -C download-2 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-EOF &&
refs/bundles/base
refs/bundles/right
EOF
test_cmp expect refs &&
# Case 3: top bundle does not exist, rest unbundle fine.
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = fake.bundle
creationToken = 4
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone-3.txt" \
git clone --single-branch --branch=base \
--bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" download-3 &&
# As long as we have continguous successful downloads,
# we _do_ set these configs.
test_cmp_config -C download-3 "$HTTPD_URL/bundle-list" fetch.bundleuri &&
test_cmp_config -C download-3 3 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/fake.bundle
$HTTPD_URL/bundle-3.bundle
$HTTPD_URL/bundle-2.bundle
$HTTPD_URL/bundle-1.bundle
EOF
test_remote_https_urls <trace-clone-3.txt >actual &&
test_cmp expect actual &&
# fake.bundle did not unbundle, but the others did.
git -C download-3 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/right
EOF
test_cmp expect refs
'
# Expand the bundle list to include other interesting shapes, specifically
# interesting for use when fetching from a previous state.
#
# ---------------- bundle-7
# 7
# _/|\_
# ---/--|--\------ bundle-6
# 5 | 6
# --|---|---|----- bundle-4
# | 4 |
# | / \ /
# --|-|---|/------ bundle-3 (the client will be caught up to this point.)
# \ | 3
# ---\|---|------- bundle-2
# 2 |
# ----|---|------- bundle-1
# \ /
# 1
# |
# (previous commits)
test_expect_success 'expand incremental bundle list' '
(
cd clone-from &&
git checkout -b lefter left &&
test_commit 5 &&
git checkout -b righter right &&
test_commit 6 &&
git checkout -b top lefter &&
git merge -m "7" merge righter &&
git bundle create bundle-6.bundle lefter righter --not left right &&
git bundle create bundle-7.bundle top --not lefter merge righter &&
cp bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/"
) &&
git -C "$HTTPD_DOCUMENT_ROOT_PATH/fetch.git" fetch origin +refs/heads/*:refs/heads/*
'
test_expect_success 'creationToken heuristic with failed downloads (fetch)' '
test_when_finished rm -rf download-* trace*.txt &&
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
EOF
git clone --single-branch --branch=left \
--bundle-uri="$HTTPD_URL/bundle-list" \
"$HTTPD_URL/smart/fetch.git" fetch-base &&
test_cmp_config -C fetch-base "$HTTPD_URL/bundle-list" fetch.bundleURI &&
test_cmp_config -C fetch-base 3 fetch.bundleCreationToken &&
# Case 1: all bundles exist: successful unbundling of all bundles
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
[bundle "bundle-6"]
uri = bundle-6.bundle
creationToken = 6
[bundle "bundle-7"]
uri = bundle-7.bundle
creationToken = 7
EOF
cp -r fetch-base fetch-1 &&
GIT_TRACE2_EVENT="$(pwd)/trace-fetch-1.txt" \
git -C fetch-1 fetch origin &&
test_cmp_config -C fetch-1 7 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-7.bundle
$HTTPD_URL/bundle-6.bundle
$HTTPD_URL/bundle-4.bundle
EOF
test_remote_https_urls <trace-fetch-1.txt >actual &&
test_cmp expect actual &&
# Check which bundles have unbundled by refs
git -C fetch-1 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/lefter
refs/bundles/merge
refs/bundles/right
refs/bundles/righter
refs/bundles/top
EOF
test_cmp expect refs &&
# Case 2: middle bundle does not exist, only bundle-4 can unbundle
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
[bundle "bundle-6"]
uri = fake.bundle
creationToken = 6
[bundle "bundle-7"]
uri = bundle-7.bundle
creationToken = 7
EOF
cp -r fetch-base fetch-2 &&
GIT_TRACE2_EVENT="$(pwd)/trace-fetch-2.txt" \
git -C fetch-2 fetch origin &&
# Since bundle-7 fails to unbundle, do not update creation token.
test_cmp_config -C fetch-2 3 fetch.bundlecreationtoken &&
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/bundle-7.bundle
$HTTPD_URL/fake.bundle
$HTTPD_URL/bundle-4.bundle
EOF
test_remote_https_urls <trace-fetch-2.txt >actual &&
test_cmp expect actual &&
# Check which bundles have unbundled by refs
git -C fetch-2 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/merge
refs/bundles/right
EOF
test_cmp expect refs &&
# Case 3: top bundle does not exist, rest unbundle fine.
cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "bundle-1"]
uri = bundle-1.bundle
creationToken = 1
[bundle "bundle-2"]
uri = bundle-2.bundle
creationToken = 2
[bundle "bundle-3"]
uri = bundle-3.bundle
creationToken = 3
[bundle "bundle-4"]
uri = bundle-4.bundle
creationToken = 4
[bundle "bundle-6"]
uri = bundle-6.bundle
creationToken = 6
[bundle "bundle-7"]
uri = fake.bundle
creationToken = 7
EOF
cp -r fetch-base fetch-3 &&
GIT_TRACE2_EVENT="$(pwd)/trace-fetch-3.txt" \
git -C fetch-3 fetch origin &&
# As long as we have continguous successful downloads,
# we _do_ set the maximum creation token.
test_cmp_config -C fetch-3 6 fetch.bundlecreationtoken &&
# NOTE: the fetch skips bundle-4 since bundle-6 successfully
# unbundles itself and bundle-7 failed to download.
cat >expect <<-EOF &&
$HTTPD_URL/bundle-list
$HTTPD_URL/fake.bundle
$HTTPD_URL/bundle-6.bundle
EOF
test_remote_https_urls <trace-fetch-3.txt >actual &&
test_cmp expect actual &&
# Check which bundles have unbundled by refs
git -C fetch-3 for-each-ref --format="%(refname)" "refs/bundles/*" >refs &&
cat >expect <<-EOF &&
refs/bundles/base
refs/bundles/left
refs/bundles/lefter
refs/bundles/right
refs/bundles/righter
EOF
test_cmp expect refs
'
# Do not add tests here unless they use the HTTP server, as they will
# not run unless the HTTP dependencies exist.

View File

@ -831,6 +831,52 @@ test_expect_success 'auto-discover multiple bundles from HTTP clone' '
grep -f pattern trace.txt
'
test_expect_success 'auto-discover multiple bundles from HTTP clone: creationToken heuristic' '
test_when_finished rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/repo4.git" &&
test_when_finished rm -rf clone-heuristic trace*.txt &&
test_commit -C src newest &&
git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/newest.bundle" HEAD~1..HEAD &&
git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo4.git" &&
cat >>"$HTTPD_DOCUMENT_ROOT_PATH/repo4.git/config" <<-EOF &&
[uploadPack]
advertiseBundleURIs = true
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "everything"]
uri = $HTTPD_URL/everything.bundle
creationtoken = 1
[bundle "new"]
uri = $HTTPD_URL/new.bundle
creationtoken = 2
[bundle "newest"]
uri = $HTTPD_URL/newest.bundle
creationtoken = 3
EOF
GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \
git -c protocol.version=2 \
-c transfer.bundleURI=true clone \
"$HTTPD_URL/smart/repo4.git" clone-heuristic &&
cat >expect <<-EOF &&
$HTTPD_URL/newest.bundle
$HTTPD_URL/new.bundle
$HTTPD_URL/everything.bundle
EOF
# We should fetch all bundles in the expected order.
test_remote_https_urls <trace-clone.txt >actual &&
test_cmp expect actual
'
# DO NOT add non-httpd-specific tests here, because the last part of this
# test script is only executed when httpd is available and enabled.

View File

@ -250,4 +250,41 @@ test_expect_success 'parse config format edge cases: empty key or value' '
test_cmp_config_output expect actual
'
test_expect_success 'parse config format: creationToken heuristic' '
cat >expect <<-\EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "one"]
uri = http://example.com/bundle.bdl
creationToken = 123456
[bundle "two"]
uri = https://example.com/bundle.bdl
creationToken = 12345678901234567890
[bundle "three"]
uri = file:///usr/share/git/bundle.bdl
creationToken = 1
EOF
test-tool bundle-uri parse-config expect >actual 2>err &&
test_must_be_empty err &&
test_cmp_config_output expect actual
'
test_expect_success 'parse config format edge cases: creationToken heuristic' '
cat >expect <<-\EOF &&
[bundle]
version = 1
mode = all
heuristic = creationToken
[bundle "one"]
uri = http://example.com/bundle.bdl
creationToken = bogus
EOF
test-tool bundle-uri parse-config expect >actual 2>err &&
grep "could not parse bundle list key creationToken with value '\''bogus'\''" err
'
test_done

View File

@ -566,4 +566,44 @@ test_expect_success 'cloning from filtered bundle has useful error' '
grep "cannot clone from filtered bundle" err
'
test_expect_success 'verify catches unreachable, broken prerequisites' '
test_when_finished rm -rf clone-from clone-to &&
git init clone-from &&
(
cd clone-from &&
git checkout -b base &&
test_commit A &&
git checkout -b tip &&
git commit --allow-empty -m "will drop by shallow" &&
git commit --allow-empty -m "will keep by shallow" &&
git commit --allow-empty -m "for bundle, not clone" &&
git bundle create tip.bundle tip~1..tip &&
git reset --hard HEAD~1 &&
git checkout base
) &&
BAD_OID=$(git -C clone-from rev-parse tip~1) &&
TIP_OID=$(git -C clone-from rev-parse tip) &&
git clone --depth=1 --no-single-branch \
"file://$(pwd)/clone-from" clone-to &&
(
cd clone-to &&
# Set up broken history by removing shallow markers
git update-ref -d refs/remotes/origin/tip &&
rm .git/shallow &&
# Verify should fail
test_must_fail git bundle verify \
../clone-from/tip.bundle 2>err &&
grep "some prerequisite commits .* are not connected" err &&
test_line_count = 1 err &&
# Unbundling should fail
test_must_fail git bundle unbundle \
../clone-from/tip.bundle 2>err &&
grep "some prerequisite commits .* are not connected" err &&
test_line_count = 1 err
)
'
test_done

View File

@ -1767,6 +1767,14 @@ test_region () {
return 0
}
# Given a GIT_TRACE2_EVENT log over stdin, writes to stdout a list of URLs
# sent to git-remote-https child processes.
test_remote_https_urls() {
grep -e '"event":"child_start".*"argv":\["git-remote-https",".*"\]' |
sed -e 's/{"event":"child_start".*"argv":\["git-remote-https","//g' \
-e 's/"\]}//g'
}
# Print the destination of symlink(s) provided as arguments. Basically
# the same as the readlink command, but it's not available everywhere.
test_readlink () {