Merge branch 'ps/fetch-optim'

Optimize code that handles large number of refs in the "git fetch"
code path.

* ps/fetch-optim:
  fetch: avoid second connectivity check if we already have all objects
  fetch: merge fetching and consuming refs
  fetch: refactor fetch refs to be more extendable
  fetch-pack: optimize loading of refs via commit graph
  connected: refactor iterator to return next object ID directly
  fetch: avoid unpacking headers in object existence check
  fetch: speed up lookup of want refs via commit-graph
This commit is contained in:
Junio C Hamano 2021-09-20 15:20:39 -07:00
commit deec8aa2d0
6 changed files with 67 additions and 61 deletions

View File

@ -657,7 +657,7 @@ static void write_followtags(const struct ref *refs, const char *msg)
}
}
static int iterate_ref_map(void *cb_data, struct object_id *oid)
static const struct object_id *iterate_ref_map(void *cb_data)
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
@ -668,13 +668,11 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid)
*/
while (ref && !ref->peer_ref)
ref = ref->next;
/* Returning -1 notes "end of list" to the caller. */
if (!ref)
return -1;
return NULL;
oidcpy(oid, &ref->old_oid);
*rm = ref->next;
return 0;
return &ref->old_oid;
}
static void update_remote_refs(const struct ref *refs,

View File

@ -854,13 +854,11 @@ static int update_local_ref(struct ref *ref,
int summary_width)
{
struct commit *current = NULL, *updated;
enum object_type type;
struct branch *current_branch = branch_get(NULL);
const char *pretty_ref = prettify_refname(ref->name);
int fast_forward = 0;
type = oid_object_info(the_repository, &ref->new_oid, NULL);
if (type < 0)
if (!repo_has_object_file(the_repository, &ref->new_oid))
die(_("object %s not found"), oid_to_hex(&ref->new_oid));
if (oideq(&ref->old_oid, &ref->new_oid)) {
@ -972,7 +970,7 @@ static int update_local_ref(struct ref *ref,
}
}
static int iterate_ref_map(void *cb_data, struct object_id *oid)
static const struct object_id *iterate_ref_map(void *cb_data)
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
@ -980,10 +978,9 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid)
while (ref && ref->status == REF_STATUS_REJECT_SHALLOW)
ref = ref->next;
if (!ref)
return -1; /* end of the list */
return NULL;
*rm = ref->next;
oidcpy(oid, &ref->old_oid);
return 0;
return &ref->old_oid;
}
struct fetch_head {
@ -1082,7 +1079,6 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
int connectivity_checked, struct ref *ref_map)
{
struct fetch_head fetch_head;
struct commit *commit;
int url_len, i, rc = 0;
struct strbuf note = STRBUF_INIT, err = STRBUF_INIT;
struct ref_transaction *transaction = NULL;
@ -1130,6 +1126,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
want_status <= FETCH_HEAD_IGNORE;
want_status++) {
for (rm = ref_map; rm; rm = rm->next) {
struct commit *commit = NULL;
struct ref *ref = NULL;
if (rm->status == REF_STATUS_REJECT_SHALLOW) {
@ -1139,11 +1136,23 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
continue;
}
commit = lookup_commit_reference_gently(the_repository,
&rm->old_oid,
1);
if (!commit)
rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE;
/*
* References in "refs/tags/" are often going to point
* to annotated tags, which are not part of the
* commit-graph. We thus only try to look up refs in
* the graph which are not in that namespace to not
* regress performance in repositories with many
* annotated tags.
*/
if (!starts_with(rm->name, "refs/tags/"))
commit = lookup_commit_in_graph(the_repository, &rm->old_oid);
if (!commit) {
commit = lookup_commit_reference_gently(the_repository,
&rm->old_oid,
1);
if (!commit)
rm->fetch_head_status = FETCH_HEAD_NOT_FOR_MERGE;
}
if (rm->fetch_head_status != want_status)
continue;
@ -1289,37 +1298,35 @@ static int check_exist_and_connected(struct ref *ref_map)
return check_connected(iterate_ref_map, &rm, &opt);
}
static int fetch_refs(struct transport *transport, struct ref *ref_map)
static int fetch_and_consume_refs(struct transport *transport, struct ref *ref_map)
{
int ret = check_exist_and_connected(ref_map);
int connectivity_checked = 1;
int ret;
/*
* We don't need to perform a fetch in case we can already satisfy all
* refs.
*/
ret = check_exist_and_connected(ref_map);
if (ret) {
trace2_region_enter("fetch", "fetch_refs", the_repository);
ret = transport_fetch_refs(transport, ref_map);
trace2_region_leave("fetch", "fetch_refs", the_repository);
if (ret)
goto out;
connectivity_checked = transport->smart_options ?
transport->smart_options->connectivity_checked : 0;
}
if (!ret)
/*
* Keep the new pack's ".keep" file around to allow the caller
* time to update refs to reference the new objects.
*/
return 0;
transport_unlock_pack(transport);
return ret;
}
/* Update local refs based on the ref values fetched from a remote */
static int consume_refs(struct transport *transport, struct ref *ref_map)
{
int connectivity_checked = transport->smart_options
? transport->smart_options->connectivity_checked : 0;
int ret;
trace2_region_enter("fetch", "consume_refs", the_repository);
ret = store_updated_refs(transport->url,
transport->remote->name,
connectivity_checked,
ref_map);
transport_unlock_pack(transport);
trace2_region_leave("fetch", "consume_refs", the_repository);
out:
transport_unlock_pack(transport);
return ret;
}
@ -1508,8 +1515,7 @@ static void backfill_tags(struct transport *transport, struct ref *ref_map)
transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL);
transport_set_option(transport, TRANS_OPT_DEPTH, "0");
transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL);
if (!fetch_refs(transport, ref_map))
consume_refs(transport, ref_map);
fetch_and_consume_refs(transport, ref_map);
if (gsecondary) {
transport_disconnect(gsecondary);
@ -1600,7 +1606,7 @@ static int do_fetch(struct transport *transport,
transport->url);
}
}
if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) {
if (fetch_and_consume_refs(transport, ref_map)) {
free_refs(ref_map);
retcode = 1;
goto cleanup;

View File

@ -1306,7 +1306,7 @@ static void refuse_unconfigured_deny_delete_current(void)
rp_error("%s", _(refuse_unconfigured_deny_delete_current_msg));
}
static int command_singleton_iterator(void *cb_data, struct object_id *oid);
static const struct object_id *command_singleton_iterator(void *cb_data);
static int update_shallow_ref(struct command *cmd, struct shallow_info *si)
{
struct shallow_lock shallow_lock = SHALLOW_LOCK_INIT;
@ -1731,16 +1731,15 @@ static void check_aliased_updates(struct command *commands)
string_list_clear(&ref_list, 0);
}
static int command_singleton_iterator(void *cb_data, struct object_id *oid)
static const struct object_id *command_singleton_iterator(void *cb_data)
{
struct command **cmd_list = cb_data;
struct command *cmd = *cmd_list;
if (!cmd || is_null_oid(&cmd->new_oid))
return -1; /* end of list */
return NULL;
*cmd_list = NULL; /* this returns only one */
oidcpy(oid, &cmd->new_oid);
return 0;
return &cmd->new_oid;
}
static void set_connectivity_errors(struct command *commands,
@ -1770,7 +1769,7 @@ struct iterate_data {
struct shallow_info *si;
};
static int iterate_receive_command_list(void *cb_data, struct object_id *oid)
static const struct object_id *iterate_receive_command_list(void *cb_data)
{
struct iterate_data *data = cb_data;
struct command **cmd_list = &data->cmds;
@ -1781,13 +1780,11 @@ static int iterate_receive_command_list(void *cb_data, struct object_id *oid)
/* to be checked in update_shallow_ref() */
continue;
if (!is_null_oid(&cmd->new_oid) && !cmd->skip_update) {
oidcpy(oid, &cmd->new_oid);
*cmd_list = cmd->next;
return 0;
return &cmd->new_oid;
}
}
*cmd_list = NULL;
return -1; /* end of list */
return NULL;
}
static void reject_updates_to_hidden(struct command *commands)

View File

@ -24,7 +24,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
struct child_process rev_list = CHILD_PROCESS_INIT;
FILE *rev_list_in;
struct check_connected_options defaults = CHECK_CONNECTED_INIT;
struct object_id oid;
const struct object_id *oid;
int err = 0;
struct packed_git *new_pack = NULL;
struct transport *transport;
@ -34,7 +34,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
opt = &defaults;
transport = opt->transport;
if (fn(cb_data, &oid)) {
oid = fn(cb_data);
if (!oid) {
if (opt->err_fd)
close(opt->err_fd);
return err;
@ -73,7 +74,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!p->pack_promisor)
continue;
if (find_pack_entry_one(oid.hash, p))
if (find_pack_entry_one(oid->hash, p))
goto promisor_pack_found;
}
/*
@ -83,7 +84,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
goto no_promisor_pack_found;
promisor_pack_found:
;
} while (!fn(cb_data, &oid));
} while ((oid = fn(cb_data)) != NULL);
return 0;
}
@ -133,12 +134,12 @@ no_promisor_pack_found:
* are sure the ref is good and not sending it to
* rev-list for verification.
*/
if (new_pack && find_pack_entry_one(oid.hash, new_pack))
if (new_pack && find_pack_entry_one(oid->hash, new_pack))
continue;
if (fprintf(rev_list_in, "%s\n", oid_to_hex(&oid)) < 0)
if (fprintf(rev_list_in, "%s\n", oid_to_hex(oid)) < 0)
break;
} while (!fn(cb_data, &oid));
} while ((oid = fn(cb_data)) != NULL);
if (ferror(rev_list_in) || fflush(rev_list_in)) {
if (errno != EPIPE && errno != EINVAL)

View File

@ -9,7 +9,7 @@ struct transport;
* When called after returning the name for the last object, return -1
* to signal EOF, otherwise return 0.
*/
typedef int (*oid_iterate_fn)(void *, struct object_id *oid);
typedef const struct object_id *(*oid_iterate_fn)(void *);
/*
* Named-arguments struct for check_connected. All arguments are

View File

@ -119,6 +119,11 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid,
{
enum object_type type;
struct object_info info = { .typep = &type };
struct commit *commit;
commit = lookup_commit_in_graph(the_repository, oid);
if (commit)
return commit;
while (1) {
if (oid_object_info_extended(the_repository, oid, &info,
@ -1912,16 +1917,15 @@ static void update_shallow(struct fetch_pack_args *args,
oid_array_clear(&ref);
}
static int iterate_ref_map(void *cb_data, struct object_id *oid)
static const struct object_id *iterate_ref_map(void *cb_data)
{
struct ref **rm = cb_data;
struct ref *ref = *rm;
if (!ref)
return -1; /* end of the list */
return NULL;
*rm = ref->next;
oidcpy(oid, &ref->old_oid);
return 0;
return &ref->old_oid;
}
struct ref *fetch_pack(struct fetch_pack_args *args,