connected: do not sort input revisions

In order to compute whether objects reachable from a set of tips are all
connected, we do a revision walk with these tips as positive references
and `--not --all`. `--not --all` will cause the revision walk to load
all preexisting references as uninteresting, which can be very expensive
in repositories with many references.

Benchmarking the git-rev-list(1) command highlights that by far the most
expensive single phase is initial sorting of the input revisions: after
all references have been loaded, we first sort commits by author date.
In a real-world repository with about 2.2 million references, it makes
up about 40% of the total runtime of git-rev-list(1).

Ultimately, the connectivity check shouldn't really bother about the
order of input revisions at all. We only care whether we can actually
walk all objects until we hit the cut-off point. So sorting the input is
a complete waste of time.

Introduce a new "--unsorted-input" flag to git-rev-list(1) which will
cause it to not sort the commits and adjust the connectivity check to
always pass the flag. This results in the following speedups, executed
in a clone of gitlab-org/gitlab [1]:

    Benchmark #1: git rev-list  --objects --quiet --not --all --not $(cat newrev)
      Time (mean ± σ):      7.639 s ±  0.065 s    [User: 7.304 s, System: 0.335 s]
      Range (min … max):    7.543 s …  7.742 s    10 runs

    Benchmark #2: git rev-list --unsorted-input --objects --quiet --not --all --not $newrev
      Time (mean ± σ):      4.995 s ±  0.044 s    [User: 4.657 s, System: 0.337 s]
      Range (min … max):    4.909 s …  5.048 s    10 runs

    Summary
      'git rev-list --unsorted-input --objects --quiet --not --all --not $(cat newrev)' ran
        1.53 ± 0.02 times faster than 'git rev-list  --objects --quiet --not --all --not $newrev'

[1]: https://gitlab.com/gitlab-org/gitlab.git. Note that not all refs
     are visible to clients.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Patrick Steinhardt 2021-08-09 10:11:50 +02:00 committed by Junio C Hamano
parent 29ef1f27fe
commit f45022dc2f
4 changed files with 48 additions and 1 deletions

View File

@ -968,6 +968,11 @@ list of the missing objects. Object IDs are prefixed with a ``?'' character.
objects. objects.
endif::git-rev-list[] endif::git-rev-list[]
--unsorted-input::
Show commits in the order they were given on the command line instead
of sorting them in reverse chronological order by commit time. Cannot
be combined with `--no-walk` or `--no-walk=sorted`.
--no-walk[=(sorted|unsorted)]:: --no-walk[=(sorted|unsorted)]::
Only show the given commits, but do not traverse their ancestors. Only show the given commits, but do not traverse their ancestors.
This has no effect if a range is specified. If the argument This has no effect if a range is specified. If the argument
@ -975,7 +980,8 @@ endif::git-rev-list[]
given on the command line. Otherwise (if `sorted` or no argument given on the command line. Otherwise (if `sorted` or no argument
was given), the commits are shown in reverse chronological order was given), the commits are shown in reverse chronological order
by commit time. by commit time.
Cannot be combined with `--graph`. Cannot be combined with `--graph`. Cannot be combined with
`--unsorted-input` if `sorted` or no argument was given.
--do-walk:: --do-walk::
Overrides a previous `--no-walk`. Overrides a previous `--no-walk`.

View File

@ -106,6 +106,7 @@ no_promisor_pack_found:
if (opt->progress) if (opt->progress)
strvec_pushf(&rev_list.args, "--progress=%s", strvec_pushf(&rev_list.args, "--progress=%s",
_("Checking connectivity")); _("Checking connectivity"));
strvec_push(&rev_list.args, "--unsorted-input");
rev_list.git_cmd = 1; rev_list.git_cmd = 1;
rev_list.env = opt->env; rev_list.env = opt->env;

View File

@ -2256,6 +2256,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
} else if (!strcmp(arg, "--author-date-order")) { } else if (!strcmp(arg, "--author-date-order")) {
revs->sort_order = REV_SORT_BY_AUTHOR_DATE; revs->sort_order = REV_SORT_BY_AUTHOR_DATE;
revs->topo_order = 1; revs->topo_order = 1;
} else if (!strcmp(arg, "--unsorted-input")) {
if (revs->no_walk)
die(_("--unsorted-input is incompatible with --no-walk"));
revs->unsorted_input = 1;
} else if (!strcmp(arg, "--early-output")) { } else if (!strcmp(arg, "--early-output")) {
revs->early_output = 100; revs->early_output = 100;
revs->topo_order = 1; revs->topo_order = 1;
@ -2651,8 +2655,13 @@ static int handle_revision_pseudo_opt(const char *submodule,
} else if (!strcmp(arg, "--not")) { } else if (!strcmp(arg, "--not")) {
*flags ^= UNINTERESTING | BOTTOM; *flags ^= UNINTERESTING | BOTTOM;
} else if (!strcmp(arg, "--no-walk")) { } else if (!strcmp(arg, "--no-walk")) {
if (!revs->no_walk && revs->unsorted_input)
die(_("--no-walk is incompatible with --unsorted-input"));
revs->no_walk = 1; revs->no_walk = 1;
} else if (skip_prefix(arg, "--no-walk=", &optarg)) { } else if (skip_prefix(arg, "--no-walk=", &optarg)) {
if (!revs->no_walk && revs->unsorted_input)
die(_("--no-walk is incompatible with --unsorted-input"));
/* /*
* Detached form ("--no-walk X" as opposed to "--no-walk=X") * Detached form ("--no-walk X" as opposed to "--no-walk=X")
* not allowed, since the argument is optional. * not allowed, since the argument is optional.

View File

@ -169,4 +169,35 @@ test_expect_success 'rev-list --count --objects' '
test_line_count = $count actual test_line_count = $count actual
' '
test_expect_success 'rev-list --unsorted-input results in different sorting' '
git rev-list --unsorted-input HEAD HEAD~ >first &&
git rev-list --unsorted-input HEAD~ HEAD >second &&
! test_cmp first second &&
sort first >first.sorted &&
sort second >second.sorted &&
test_cmp first.sorted second.sorted
'
test_expect_success 'rev-list --unsorted-input incompatible with --no-walk' '
cat >expect <<-EOF &&
fatal: --no-walk is incompatible with --unsorted-input
EOF
test_must_fail git rev-list --unsorted-input --no-walk HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --unsorted-input --no-walk=sorted HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --unsorted-input --no-walk=unsorted HEAD 2>error &&
test_cmp expect error &&
cat >expect <<-EOF &&
fatal: --unsorted-input is incompatible with --no-walk
EOF
test_must_fail git rev-list --no-walk --unsorted-input HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --no-walk=sorted --unsorted-input HEAD 2>error &&
test_cmp expect error &&
test_must_fail git rev-list --no-walk=unsorted --unsorted-input HEAD 2>error &&
test_cmp expect error
'
test_done test_done