39b44ba771
When we receive a remote ref update to sha1 "X", we want to check that we have all of the objects needed by "X". We can assume that our repository is not currently corrupted, and therefore if we have a ref pointing at "Y", we have all of its objects. So we can stop our traversal from "X" as soon as we hit "Y". If we make the same non-corruption assumption about any repositories we use to store alternates, then we can also use their ref tips to shorten the traversal. This is especially useful when cloning with "--reference", as we otherwise do not have any local refs to check against, and have to traverse the whole history, even though the other side may have sent us few or no objects. Here are results for the included perf test (which shows off more or less the maximal savings, getting one new commit and sharing the whole history): Test HEAD^ HEAD -------------------------------------------------------------------- [on git.git] 5600.3: clone --reference 2.94(2.86+0.08) 0.09(0.08+0.01) -96.9% [on linux.git] 5600.3: clone --reference 45.74(45.34+0.41) 0.36(0.30+0.08) -99.2% Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
130 lines
3.8 KiB
C
130 lines
3.8 KiB
C
#include "cache.h"
|
|
#include "object-store.h"
|
|
#include "run-command.h"
|
|
#include "sigchain.h"
|
|
#include "connected.h"
|
|
#include "transport.h"
|
|
#include "packfile.h"
|
|
|
|
/*
|
|
* If we feed all the commits we want to verify to this command
|
|
*
|
|
* $ git rev-list --objects --stdin --not --all
|
|
*
|
|
* and if it does not error out, that means everything reachable from
|
|
* these commits locally exists and is connected to our existing refs.
|
|
* Note that this does _not_ validate the individual objects.
|
|
*
|
|
* Returns 0 if everything is connected, non-zero otherwise.
|
|
*/
|
|
int check_connected(oid_iterate_fn fn, void *cb_data,
|
|
struct check_connected_options *opt)
|
|
{
|
|
struct child_process rev_list = CHILD_PROCESS_INIT;
|
|
struct check_connected_options defaults = CHECK_CONNECTED_INIT;
|
|
char commit[GIT_MAX_HEXSZ + 1];
|
|
struct object_id oid;
|
|
int err = 0;
|
|
struct packed_git *new_pack = NULL;
|
|
struct transport *transport;
|
|
size_t base_len;
|
|
|
|
if (!opt)
|
|
opt = &defaults;
|
|
transport = opt->transport;
|
|
|
|
if (fn(cb_data, &oid)) {
|
|
if (opt->err_fd)
|
|
close(opt->err_fd);
|
|
return err;
|
|
}
|
|
|
|
if (transport && transport->smart_options &&
|
|
transport->smart_options->self_contained_and_connected &&
|
|
transport->pack_lockfile &&
|
|
strip_suffix(transport->pack_lockfile, ".keep", &base_len)) {
|
|
struct strbuf idx_file = STRBUF_INIT;
|
|
strbuf_add(&idx_file, transport->pack_lockfile, base_len);
|
|
strbuf_addstr(&idx_file, ".idx");
|
|
new_pack = add_packed_git(idx_file.buf, idx_file.len, 1);
|
|
strbuf_release(&idx_file);
|
|
}
|
|
|
|
if (opt->check_refs_only) {
|
|
/*
|
|
* For partial clones, we don't want to have to do a regular
|
|
* connectivity check because we have to enumerate and exclude
|
|
* all promisor objects (slow), and then the connectivity check
|
|
* itself becomes a no-op because in a partial clone every
|
|
* object is a promisor object. Instead, just make sure we
|
|
* received the objects pointed to by each wanted ref.
|
|
*/
|
|
do {
|
|
if (!repo_has_object_file(the_repository, &oid))
|
|
return 1;
|
|
} while (!fn(cb_data, &oid));
|
|
return 0;
|
|
}
|
|
|
|
if (opt->shallow_file) {
|
|
argv_array_push(&rev_list.args, "--shallow-file");
|
|
argv_array_push(&rev_list.args, opt->shallow_file);
|
|
}
|
|
argv_array_push(&rev_list.args,"rev-list");
|
|
argv_array_push(&rev_list.args, "--objects");
|
|
argv_array_push(&rev_list.args, "--stdin");
|
|
if (repository_format_partial_clone)
|
|
argv_array_push(&rev_list.args, "--exclude-promisor-objects");
|
|
if (!opt->is_deepening_fetch) {
|
|
argv_array_push(&rev_list.args, "--not");
|
|
argv_array_push(&rev_list.args, "--all");
|
|
}
|
|
argv_array_push(&rev_list.args, "--quiet");
|
|
argv_array_push(&rev_list.args, "--alternate-refs");
|
|
if (opt->progress)
|
|
argv_array_pushf(&rev_list.args, "--progress=%s",
|
|
_("Checking connectivity"));
|
|
|
|
rev_list.git_cmd = 1;
|
|
rev_list.env = opt->env;
|
|
rev_list.in = -1;
|
|
rev_list.no_stdout = 1;
|
|
if (opt->err_fd)
|
|
rev_list.err = opt->err_fd;
|
|
else
|
|
rev_list.no_stderr = opt->quiet;
|
|
|
|
if (start_command(&rev_list))
|
|
return error(_("Could not run 'git rev-list'"));
|
|
|
|
sigchain_push(SIGPIPE, SIG_IGN);
|
|
|
|
commit[GIT_SHA1_HEXSZ] = '\n';
|
|
do {
|
|
/*
|
|
* If index-pack already checked that:
|
|
* - there are no dangling pointers in the new pack
|
|
* - the pack is self contained
|
|
* Then if the updated ref is in the new pack, then we
|
|
* are sure the ref is good and not sending it to
|
|
* rev-list for verification.
|
|
*/
|
|
if (new_pack && find_pack_entry_one(oid.hash, new_pack))
|
|
continue;
|
|
|
|
memcpy(commit, oid_to_hex(&oid), GIT_SHA1_HEXSZ);
|
|
if (write_in_full(rev_list.in, commit, GIT_SHA1_HEXSZ + 1) < 0) {
|
|
if (errno != EPIPE && errno != EINVAL)
|
|
error_errno(_("failed write to rev-list"));
|
|
err = -1;
|
|
break;
|
|
}
|
|
} while (!fn(cb_data, &oid));
|
|
|
|
if (close(rev_list.in))
|
|
err = error_errno(_("failed to close rev-list's stdin"));
|
|
|
|
sigchain_pop(SIGPIPE);
|
|
return finish_command(&rev_list) || err;
|
|
}
|