git-commit-vandalism/bundle-uri.c
Junio C Hamano 89833fc249 Merge branch 'ds/fetch-bundle-uri-with-all'
"git fetch --all" does not have to download and handle the same
bundleURI over and over, which has been corrected.

* ds/fetch-bundle-uri-with-all:
  fetch: download bundles once, even with --all
2023-04-06 13:38:32 -07:00

930 lines
22 KiB
C

#include "cache.h"
#include "bundle-uri.h"
#include "bundle.h"
#include "environment.h"
#include "gettext.h"
#include "object-store.h"
#include "refs.h"
#include "run-command.h"
#include "hashmap.h"
#include "pkt-line.h"
#include "config.h"
#include "remote.h"
static struct {
enum bundle_list_heuristic heuristic;
const char *name;
} heuristics[BUNDLE_HEURISTIC__COUNT] = {
{ BUNDLE_HEURISTIC_NONE, ""},
{ BUNDLE_HEURISTIC_CREATIONTOKEN, "creationToken" },
};
static int compare_bundles(const void *hashmap_cmp_fn_data,
const struct hashmap_entry *he1,
const struct hashmap_entry *he2,
const void *id)
{
const struct remote_bundle_info *e1 =
container_of(he1, const struct remote_bundle_info, ent);
const struct remote_bundle_info *e2 =
container_of(he2, const struct remote_bundle_info, ent);
return strcmp(e1->id, id ? (const char *)id : e2->id);
}
void init_bundle_list(struct bundle_list *list)
{
memset(list, 0, sizeof(*list));
/* Implied defaults. */
list->mode = BUNDLE_MODE_ALL;
list->version = 1;
hashmap_init(&list->bundles, compare_bundles, NULL, 0);
}
static int clear_remote_bundle_info(struct remote_bundle_info *bundle,
void *data)
{
FREE_AND_NULL(bundle->id);
FREE_AND_NULL(bundle->uri);
FREE_AND_NULL(bundle->file);
bundle->unbundled = 0;
return 0;
}
void clear_bundle_list(struct bundle_list *list)
{
if (!list)
return;
for_all_bundles_in_list(list, clear_remote_bundle_info, NULL);
hashmap_clear_and_free(&list->bundles, struct remote_bundle_info, ent);
free(list->baseURI);
}
int for_all_bundles_in_list(struct bundle_list *list,
bundle_iterator iter,
void *data)
{
struct remote_bundle_info *info;
struct hashmap_iter i;
hashmap_for_each_entry(&list->bundles, &i, info, ent) {
int result = iter(info, data);
if (result)
return result;
}
return 0;
}
static int summarize_bundle(struct remote_bundle_info *info, void *data)
{
FILE *fp = data;
fprintf(fp, "[bundle \"%s\"]\n", info->id);
fprintf(fp, "\turi = %s\n", info->uri);
if (info->creationToken)
fprintf(fp, "\tcreationToken = %"PRIu64"\n", info->creationToken);
return 0;
}
void print_bundle_list(FILE *fp, struct bundle_list *list)
{
const char *mode;
switch (list->mode) {
case BUNDLE_MODE_ALL:
mode = "all";
break;
case BUNDLE_MODE_ANY:
mode = "any";
break;
case BUNDLE_MODE_NONE:
default:
mode = "<unknown>";
}
fprintf(fp, "[bundle]\n");
fprintf(fp, "\tversion = %d\n", list->version);
fprintf(fp, "\tmode = %s\n", mode);
if (list->heuristic) {
int i;
for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) {
if (heuristics[i].heuristic == list->heuristic) {
printf("\theuristic = %s\n",
heuristics[list->heuristic].name);
break;
}
}
}
for_all_bundles_in_list(list, summarize_bundle, fp);
}
/**
* Given a key-value pair, update the state of the given bundle list.
* Returns 0 if the key-value pair is understood. Returns -1 if the key
* is not understood or the value is malformed.
*/
static int bundle_list_update(const char *key, const char *value,
struct bundle_list *list)
{
struct strbuf id = STRBUF_INIT;
struct remote_bundle_info lookup = REMOTE_BUNDLE_INFO_INIT;
struct remote_bundle_info *bundle;
const char *subsection, *subkey;
size_t subsection_len;
if (parse_config_key(key, "bundle", &subsection, &subsection_len, &subkey))
return -1;
if (!subsection_len) {
if (!strcmp(subkey, "version")) {
int version;
if (!git_parse_int(value, &version))
return -1;
if (version != 1)
return -1;
list->version = version;
return 0;
}
if (!strcmp(subkey, "mode")) {
if (!strcmp(value, "all"))
list->mode = BUNDLE_MODE_ALL;
else if (!strcmp(value, "any"))
list->mode = BUNDLE_MODE_ANY;
else
return -1;
return 0;
}
if (!strcmp(subkey, "heuristic")) {
int i;
for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) {
if (heuristics[i].heuristic &&
heuristics[i].name &&
!strcmp(value, heuristics[i].name)) {
list->heuristic = heuristics[i].heuristic;
return 0;
}
}
/* Ignore unknown heuristics. */
return 0;
}
/* Ignore other unknown global keys. */
return 0;
}
strbuf_add(&id, subsection, subsection_len);
/*
* Check for an existing bundle with this <id>, or create one
* if necessary.
*/
lookup.id = id.buf;
hashmap_entry_init(&lookup.ent, strhash(lookup.id));
if (!(bundle = hashmap_get_entry(&list->bundles, &lookup, ent, NULL))) {
CALLOC_ARRAY(bundle, 1);
bundle->id = strbuf_detach(&id, NULL);
hashmap_entry_init(&bundle->ent, strhash(bundle->id));
hashmap_add(&list->bundles, &bundle->ent);
}
strbuf_release(&id);
if (!strcmp(subkey, "uri")) {
if (bundle->uri)
return -1;
bundle->uri = relative_url(list->baseURI, value, NULL);
return 0;
}
if (!strcmp(subkey, "creationtoken")) {
if (sscanf(value, "%"PRIu64, &bundle->creationToken) != 1)
warning(_("could not parse bundle list key %s with value '%s'"),
"creationToken", value);
return 0;
}
/*
* At this point, we ignore any information that we don't
* understand, assuming it to be hints for a heuristic the client
* does not currently understand.
*/
return 0;
}
static int config_to_bundle_list(const char *key, const char *value, void *data)
{
struct bundle_list *list = data;
return bundle_list_update(key, value, list);
}
int bundle_uri_parse_config_format(const char *uri,
const char *filename,
struct bundle_list *list)
{
int result;
struct config_options opts = {
.error_action = CONFIG_ERROR_ERROR,
};
if (!list->baseURI) {
struct strbuf baseURI = STRBUF_INIT;
strbuf_addstr(&baseURI, uri);
/*
* If the URI does not end with a trailing slash, then
* remove the filename portion of the path. This is
* important for relative URIs.
*/
strbuf_strip_file_from_path(&baseURI);
list->baseURI = strbuf_detach(&baseURI, NULL);
}
result = git_config_from_file_with_options(config_to_bundle_list,
filename, list,
&opts);
if (!result && list->mode == BUNDLE_MODE_NONE) {
warning(_("bundle list at '%s' has no mode"), uri);
result = 1;
}
return result;
}
static char *find_temp_filename(void)
{
int fd;
struct strbuf name = STRBUF_INIT;
/*
* Find a temporary filename that is available. This is briefly
* racy, but unlikely to collide.
*/
fd = odb_mkstemp(&name, "bundles/tmp_uri_XXXXXX");
if (fd < 0) {
warning(_("failed to create temporary file"));
return NULL;
}
close(fd);
unlink(name.buf);
return strbuf_detach(&name, NULL);
}
static int download_https_uri_to_file(const char *file, const char *uri)
{
int result = 0;
struct child_process cp = CHILD_PROCESS_INIT;
FILE *child_in = NULL, *child_out = NULL;
struct strbuf line = STRBUF_INIT;
int found_get = 0;
strvec_pushl(&cp.args, "git-remote-https", uri, NULL);
cp.err = -1;
cp.in = -1;
cp.out = -1;
if (start_command(&cp))
return 1;
child_in = fdopen(cp.in, "w");
if (!child_in) {
result = 1;
goto cleanup;
}
child_out = fdopen(cp.out, "r");
if (!child_out) {
result = 1;
goto cleanup;
}
fprintf(child_in, "capabilities\n");
fflush(child_in);
while (!strbuf_getline(&line, child_out)) {
if (!line.len)
break;
if (!strcmp(line.buf, "get"))
found_get = 1;
}
strbuf_release(&line);
if (!found_get) {
result = error(_("insufficient capabilities"));
goto cleanup;
}
fprintf(child_in, "get %s %s\n\n", uri, file);
cleanup:
if (child_in)
fclose(child_in);
if (finish_command(&cp))
return 1;
if (child_out)
fclose(child_out);
return result;
}
static int copy_uri_to_file(const char *filename, const char *uri)
{
const char *out;
if (starts_with(uri, "https:") ||
starts_with(uri, "http:"))
return download_https_uri_to_file(filename, uri);
if (skip_prefix(uri, "file://", &out))
uri = out;
/* Copy as a file */
return copy_file(filename, uri, 0);
}
static int unbundle_from_file(struct repository *r, const char *file)
{
int result = 0;
int bundle_fd;
struct bundle_header header = BUNDLE_HEADER_INIT;
struct string_list_item *refname;
struct strbuf bundle_ref = STRBUF_INIT;
size_t bundle_prefix_len;
if ((bundle_fd = read_bundle_header(file, &header)) < 0)
return 1;
/*
* Skip the reachability walk here, since we will be adding
* a reachable ref pointing to the new tips, which will reach
* the prerequisite commits.
*/
if ((result = unbundle(r, &header, bundle_fd, NULL,
VERIFY_BUNDLE_QUIET)))
return 1;
/*
* Convert all refs/heads/ from the bundle into refs/bundles/
* in the local repository.
*/
strbuf_addstr(&bundle_ref, "refs/bundles/");
bundle_prefix_len = bundle_ref.len;
for_each_string_list_item(refname, &header.references) {
struct object_id *oid = refname->util;
struct object_id old_oid;
const char *branch_name;
int has_old;
if (!skip_prefix(refname->string, "refs/heads/", &branch_name))
continue;
strbuf_setlen(&bundle_ref, bundle_prefix_len);
strbuf_addstr(&bundle_ref, branch_name);
has_old = !read_ref(bundle_ref.buf, &old_oid);
update_ref("fetched bundle", bundle_ref.buf, oid,
has_old ? &old_oid : NULL,
REF_SKIP_OID_VERIFICATION,
UPDATE_REFS_MSG_ON_ERR);
}
bundle_header_release(&header);
return result;
}
struct bundle_list_context {
struct repository *r;
struct bundle_list *list;
enum bundle_list_mode mode;
int count;
int depth;
};
/*
* This early definition is necessary because we use indirect recursion:
*
* While iterating through a bundle list that was downloaded as part
* of fetch_bundle_uri_internal(), iterator methods eventually call it
* again, but with depth + 1.
*/
static int fetch_bundle_uri_internal(struct repository *r,
struct remote_bundle_info *bundle,
int depth,
struct bundle_list *list);
static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data)
{
int res;
struct bundle_list_context *ctx = data;
if (ctx->mode == BUNDLE_MODE_ANY && ctx->count)
return 0;
res = fetch_bundle_uri_internal(ctx->r, bundle, ctx->depth + 1, ctx->list);
/*
* Only increment count if the download succeeded. If our mode is
* BUNDLE_MODE_ANY, then we will want to try other URIs in the
* list in case they work instead.
*/
if (!res)
ctx->count++;
/*
* To be opportunistic as possible, we continue iterating and
* download as many bundles as we can, so we can apply the ones
* that work, even in BUNDLE_MODE_ALL mode.
*/
return 0;
}
struct bundles_for_sorting {
struct remote_bundle_info **items;
size_t alloc;
size_t nr;
};
static int append_bundle(struct remote_bundle_info *bundle, void *data)
{
struct bundles_for_sorting *list = data;
list->items[list->nr++] = bundle;
return 0;
}
/**
* For use in QSORT() to get a list sorted by creationToken
* in decreasing order.
*/
static int compare_creation_token_decreasing(const void *va, const void *vb)
{
const struct remote_bundle_info * const *a = va;
const struct remote_bundle_info * const *b = vb;
if ((*a)->creationToken > (*b)->creationToken)
return -1;
if ((*a)->creationToken < (*b)->creationToken)
return 1;
return 0;
}
static int fetch_bundles_by_token(struct repository *r,
struct bundle_list *list)
{
int cur;
int move_direction = 0;
const char *creationTokenStr;
uint64_t maxCreationToken = 0, newMaxCreationToken = 0;
struct bundle_list_context ctx = {
.r = r,
.list = list,
.mode = list->mode,
};
struct bundles_for_sorting bundles = {
.alloc = hashmap_get_size(&list->bundles),
};
ALLOC_ARRAY(bundles.items, bundles.alloc);
for_all_bundles_in_list(list, append_bundle, &bundles);
if (!bundles.nr) {
free(bundles.items);
return 0;
}
QSORT(bundles.items, bundles.nr, compare_creation_token_decreasing);
/*
* If fetch.bundleCreationToken exists, parses to a uint64t, and
* is not strictly smaller than the maximum creation token in the
* bundle list, then do not download any bundles.
*/
if (!repo_config_get_value(r,
"fetch.bundlecreationtoken",
&creationTokenStr) &&
sscanf(creationTokenStr, "%"PRIu64, &maxCreationToken) == 1 &&
bundles.items[0]->creationToken <= maxCreationToken) {
free(bundles.items);
return 0;
}
/*
* Attempt to download and unbundle the minimum number of bundles by
* creationToken in decreasing order. If we fail to unbundle (after
* a successful download) then move to the next non-downloaded bundle
* and attempt downloading. Once we succeed in applying a bundle,
* move to the previous unapplied bundle and attempt to unbundle it
* again.
*
* In the case of a fresh clone, we will likely download all of the
* bundles before successfully unbundling the oldest one, then the
* rest of the bundles unbundle successfully in increasing order
* of creationToken.
*
* If there are existing objects, then this process may terminate
* early when all required commits from "new" bundles exist in the
* repo's object store.
*/
cur = 0;
while (cur >= 0 && cur < bundles.nr) {
struct remote_bundle_info *bundle = bundles.items[cur];
/*
* If we need to dig into bundles below the previous
* creation token value, then likely we are in an erroneous
* state due to missing or invalid bundles. Halt the process
* instead of continuing to download extra data.
*/
if (bundle->creationToken <= maxCreationToken)
break;
if (!bundle->file) {
/*
* Not downloaded yet. Try downloading.
*
* Note that bundle->file is non-NULL if a download
* was attempted, even if it failed to download.
*/
if (fetch_bundle_uri_internal(ctx.r, bundle, ctx.depth + 1, ctx.list)) {
/* Mark as unbundled so we do not retry. */
bundle->unbundled = 1;
/* Try looking deeper in the list. */
move_direction = 1;
goto move;
}
/* We expect bundles when using creationTokens. */
if (!is_bundle(bundle->file, 1)) {
warning(_("file downloaded from '%s' is not a bundle"),
bundle->uri);
break;
}
}
if (bundle->file && !bundle->unbundled) {
/*
* This was downloaded, but not successfully
* unbundled. Try unbundling again.
*/
if (unbundle_from_file(ctx.r, bundle->file)) {
/* Try looking deeper in the list. */
move_direction = 1;
} else {
/*
* Succeeded in unbundle. Retry bundles
* that previously failed to unbundle.
*/
move_direction = -1;
bundle->unbundled = 1;
if (bundle->creationToken > newMaxCreationToken)
newMaxCreationToken = bundle->creationToken;
}
}
/*
* Else case: downloaded and unbundled successfully.
* Skip this by moving in the same direction as the
* previous step.
*/
move:
/* Move in the specified direction and repeat. */
cur += move_direction;
}
/*
* We succeed if the loop terminates because 'cur' drops below
* zero. The other case is that we terminate because 'cur'
* reaches the end of the list, so we have a failure no matter
* which bundles we apply from the list.
*/
if (cur < 0) {
struct strbuf value = STRBUF_INIT;
strbuf_addf(&value, "%"PRIu64"", newMaxCreationToken);
if (repo_config_set_multivar_gently(ctx.r,
"fetch.bundleCreationToken",
value.buf, NULL, 0))
warning(_("failed to store maximum creation token"));
strbuf_release(&value);
}
free(bundles.items);
return cur >= 0;
}
static int download_bundle_list(struct repository *r,
struct bundle_list *local_list,
struct bundle_list *global_list,
int depth)
{
struct bundle_list_context ctx = {
.r = r,
.list = global_list,
.depth = depth + 1,
.mode = local_list->mode,
};
return for_all_bundles_in_list(local_list, download_bundle_to_file, &ctx);
}
static int fetch_bundle_list_in_config_format(struct repository *r,
struct bundle_list *global_list,
struct remote_bundle_info *bundle,
int depth)
{
int result;
struct bundle_list list_from_bundle;
init_bundle_list(&list_from_bundle);
if ((result = bundle_uri_parse_config_format(bundle->uri,
bundle->file,
&list_from_bundle)))
goto cleanup;
if (list_from_bundle.mode == BUNDLE_MODE_NONE) {
warning(_("unrecognized bundle mode from URI '%s'"),
bundle->uri);
result = -1;
goto cleanup;
}
/*
* If this list uses the creationToken heuristic, then the URIs
* it advertises are expected to be bundles, not nested lists.
* We can drop 'global_list' and 'depth'.
*/
if (list_from_bundle.heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) {
result = fetch_bundles_by_token(r, &list_from_bundle);
global_list->heuristic = BUNDLE_HEURISTIC_CREATIONTOKEN;
} else if ((result = download_bundle_list(r, &list_from_bundle,
global_list, depth)))
goto cleanup;
cleanup:
clear_bundle_list(&list_from_bundle);
return result;
}
/**
* This limits the recursion on fetch_bundle_uri_internal() when following
* bundle lists.
*/
static int max_bundle_uri_depth = 4;
/**
* Recursively download all bundles advertised at the given URI
* to files. If the file is a bundle, then add it to the given
* 'list'. Otherwise, expect a bundle list and recurse on the
* URIs in that list according to the list mode (ANY or ALL).
*/
static int fetch_bundle_uri_internal(struct repository *r,
struct remote_bundle_info *bundle,
int depth,
struct bundle_list *list)
{
int result = 0;
struct remote_bundle_info *bcopy;
if (depth >= max_bundle_uri_depth) {
warning(_("exceeded bundle URI recursion limit (%d)"),
max_bundle_uri_depth);
return -1;
}
if (!bundle->file &&
!(bundle->file = find_temp_filename())) {
result = -1;
goto cleanup;
}
if ((result = copy_uri_to_file(bundle->file, bundle->uri))) {
warning(_("failed to download bundle from URI '%s'"), bundle->uri);
goto cleanup;
}
if ((result = !is_bundle(bundle->file, 1))) {
result = fetch_bundle_list_in_config_format(
r, list, bundle, depth);
if (result)
warning(_("file at URI '%s' is not a bundle or bundle list"),
bundle->uri);
goto cleanup;
}
/* Copy the bundle and insert it into the global list. */
CALLOC_ARRAY(bcopy, 1);
bcopy->id = xstrdup(bundle->id);
bcopy->file = xstrdup(bundle->file);
hashmap_entry_init(&bcopy->ent, strhash(bcopy->id));
hashmap_add(&list->bundles, &bcopy->ent);
cleanup:
if (result && bundle->file)
unlink(bundle->file);
return result;
}
/**
* This loop iterator breaks the loop with nonzero return code on the
* first successful unbundling of a bundle.
*/
static int attempt_unbundle(struct remote_bundle_info *info, void *data)
{
struct repository *r = data;
if (!info->file || info->unbundled)
return 0;
if (!unbundle_from_file(r, info->file)) {
info->unbundled = 1;
return 1;
}
return 0;
}
static int unbundle_all_bundles(struct repository *r,
struct bundle_list *list)
{
/*
* Iterate through all bundles looking for ones that can
* successfully unbundle. If any succeed, then perhaps another
* will succeed in the next attempt.
*
* Keep in mind that a non-zero result for the loop here means
* the loop terminated early on a successful unbundling, which
* signals that we can try again.
*/
while (for_all_bundles_in_list(list, attempt_unbundle, r)) ;
return 0;
}
static int unlink_bundle(struct remote_bundle_info *info, void *data)
{
if (info->file)
unlink_or_warn(info->file);
return 0;
}
int fetch_bundle_uri(struct repository *r, const char *uri,
int *has_heuristic)
{
int result;
struct bundle_list list;
struct remote_bundle_info bundle = {
.uri = xstrdup(uri),
.id = xstrdup(""),
};
init_bundle_list(&list);
/*
* Do not fetch a NULL or empty bundle URI. An empty bundle URI
* could signal that a configured bundle URI has been disabled.
*/
if (!uri || !*uri) {
result = 0;
goto cleanup;
}
/* If a bundle is added to this global list, then it is required. */
list.mode = BUNDLE_MODE_ALL;
if ((result = fetch_bundle_uri_internal(r, &bundle, 0, &list)))
goto cleanup;
result = unbundle_all_bundles(r, &list);
cleanup:
if (has_heuristic)
*has_heuristic = (list.heuristic != BUNDLE_HEURISTIC_NONE);
for_all_bundles_in_list(&list, unlink_bundle, NULL);
clear_bundle_list(&list);
clear_remote_bundle_info(&bundle, NULL);
return result;
}
int fetch_bundle_list(struct repository *r, struct bundle_list *list)
{
int result;
struct bundle_list global_list;
/*
* If the creationToken heuristic is used, then the URIs
* advertised by 'list' are not nested lists and instead
* direct bundles. We do not need to use global_list.
*/
if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN)
return fetch_bundles_by_token(r, list);
init_bundle_list(&global_list);
/* If a bundle is added to this global list, then it is required. */
global_list.mode = BUNDLE_MODE_ALL;
if ((result = download_bundle_list(r, list, &global_list, 0)))
goto cleanup;
if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN)
result = fetch_bundles_by_token(r, list);
else
result = unbundle_all_bundles(r, &global_list);
cleanup:
for_all_bundles_in_list(&global_list, unlink_bundle, NULL);
clear_bundle_list(&global_list);
return result;
}
/**
* API for serve.c.
*/
int bundle_uri_advertise(struct repository *r, struct strbuf *value UNUSED)
{
static int advertise_bundle_uri = -1;
if (advertise_bundle_uri != -1)
goto cached;
advertise_bundle_uri = 0;
repo_config_get_maybe_bool(r, "uploadpack.advertisebundleuris", &advertise_bundle_uri);
cached:
return advertise_bundle_uri;
}
static int config_to_packet_line(const char *key, const char *value, void *data)
{
struct packet_reader *writer = data;
if (starts_with(key, "bundle."))
packet_write_fmt(writer->fd, "%s=%s", key, value);
return 0;
}
int bundle_uri_command(struct repository *r,
struct packet_reader *request)
{
struct packet_writer writer;
packet_writer_init(&writer, 1);
while (packet_reader_read(request) == PACKET_READ_NORMAL)
die(_("bundle-uri: unexpected argument: '%s'"), request->line);
if (request->status != PACKET_READ_FLUSH)
die(_("bundle-uri: expected flush after arguments"));
/*
* Read all "bundle.*" config lines to the client as key=value
* packet lines.
*/
repo_config(r, config_to_packet_line, &writer);
packet_writer_flush(&writer);
return 0;
}
/**
* General API for {transport,connect}.c etc.
*/
int bundle_uri_parse_line(struct bundle_list *list, const char *line)
{
int result;
const char *equals;
struct strbuf key = STRBUF_INIT;
if (!strlen(line))
return error(_("bundle-uri: got an empty line"));
equals = strchr(line, '=');
if (!equals)
return error(_("bundle-uri: line is not of the form 'key=value'"));
if (line == equals || !*(equals + 1))
return error(_("bundle-uri: line has empty key or value"));
strbuf_add(&key, line, equals - line);
result = bundle_list_update(key.buf, equals + 1, list);
strbuf_release(&key);
return result;
}