2005-04-24 04:04:40 +02:00
|
|
|
#include "cache.h"
|
2005-10-05 23:49:54 +02:00
|
|
|
#include "refs.h"
|
2005-06-29 20:30:24 +02:00
|
|
|
#include "tag.h"
|
2005-04-24 04:04:40 +02:00
|
|
|
#include "commit.h"
|
2005-06-25 07:56:58 +02:00
|
|
|
#include "tree.h"
|
|
|
|
#include "blob.h"
|
2006-03-30 08:55:43 +02:00
|
|
|
#include "tree-walk.h"
|
2006-04-11 03:14:54 +02:00
|
|
|
#include "diff.h"
|
2006-02-26 01:19:46 +01:00
|
|
|
#include "revision.h"
|
2006-09-05 06:50:12 +02:00
|
|
|
#include "list-objects.h"
|
2006-05-18 23:19:20 +02:00
|
|
|
#include "builtin.h"
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2006-04-17 03:12:49 +02:00
|
|
|
/* bits #0-15 in revision.h */
|
2005-04-24 04:04:40 +02:00
|
|
|
|
2006-04-17 03:12:49 +02:00
|
|
|
#define COUNTED (1u<<16)
|
2005-05-31 03:46:32 +02:00
|
|
|
|
2005-05-26 03:29:09 +02:00
|
|
|
static const char rev_list_usage[] =
|
2005-10-30 10:03:45 +01:00
|
|
|
"git-rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
|
|
|
|
" limiting output:\n"
|
|
|
|
" --max-count=nr\n"
|
|
|
|
" --max-age=epoch\n"
|
|
|
|
" --min-age=epoch\n"
|
|
|
|
" --sparse\n"
|
|
|
|
" --no-merges\n"
|
2006-01-27 10:39:24 +01:00
|
|
|
" --remove-empty\n"
|
2005-10-30 10:03:45 +01:00
|
|
|
" --all\n"
|
2006-09-06 06:39:02 +02:00
|
|
|
" --stdin\n"
|
2005-10-30 10:03:45 +01:00
|
|
|
" ordering output:\n"
|
|
|
|
" --topo-order\n"
|
2006-02-16 07:05:33 +01:00
|
|
|
" --date-order\n"
|
2005-10-30 10:03:45 +01:00
|
|
|
" formatting output:\n"
|
|
|
|
" --parents\n"
|
2006-02-19 12:32:31 +01:00
|
|
|
" --objects | --objects-edge\n"
|
2005-10-30 10:03:45 +01:00
|
|
|
" --unpacked\n"
|
|
|
|
" --header | --pretty\n"
|
2006-02-10 20:56:42 +01:00
|
|
|
" --abbrev=nr | --no-abbrev\n"
|
2006-04-07 06:32:36 +02:00
|
|
|
" --abbrev-commit\n"
|
2007-04-05 16:53:07 +02:00
|
|
|
" --left-right\n"
|
2005-10-30 10:03:45 +01:00
|
|
|
" special purpose:\n"
|
2007-03-22 06:15:54 +01:00
|
|
|
" --bisect\n"
|
|
|
|
" --bisect-vars"
|
2005-10-30 10:03:45 +01:00
|
|
|
;
|
2005-05-26 03:29:09 +02:00
|
|
|
|
2006-05-18 23:19:20 +02:00
|
|
|
static struct rev_info revs;
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2006-08-15 19:23:48 +02:00
|
|
|
static int bisect_list;
|
|
|
|
static int show_timestamp;
|
|
|
|
static int hdr_termination;
|
Log message printout cleanups
On Sun, 16 Apr 2006, Junio C Hamano wrote:
>
> In the mid-term, I am hoping we can drop the generate_header()
> callchain _and_ the custom code that formats commit log in-core,
> found in cmd_log_wc().
Ok, this was nastier than expected, just because the dependencies between
the different log-printing stuff were absolutely _everywhere_, but here's
a patch that does exactly that.
The patch is not very easy to read, and the "--patch-with-stat" thing is
still broken (it does not call the "show_log()" thing properly for
merges). That's not a new bug. In the new world order it _should_ do
something like
if (rev->logopt)
show_log(rev, rev->logopt, "---\n");
but it doesn't. I haven't looked at the --with-stat logic, so I left it
alone.
That said, this patch removes more lines than it adds, and in particular,
the "cmd_log_wc()" loop is now a very clean:
while ((commit = get_revision(rev)) != NULL) {
log_tree_commit(rev, commit);
free(commit->buffer);
commit->buffer = NULL;
}
so it doesn't get much prettier than this. All the complexity is entirely
hidden in log-tree.c, and any code that needs to flush the log literally
just needs to do the "if (rev->logopt) show_log(...)" incantation.
I had to make the combined_diff() logic take a "struct rev_info" instead
of just a "struct diff_options", but that part is pretty clean.
This does change "git whatchanged" from using "diff-tree" as the commit
descriptor to "commit", and I changed one of the tests to reflect that new
reality. Otherwise everything still passes, and my other tests look fine
too.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 20:59:32 +02:00
|
|
|
static const char *header_prefix;
|
2006-02-23 07:10:24 +01:00
|
|
|
|
2005-06-02 18:19:53 +02:00
|
|
|
static void show_commit(struct commit *commit)
|
|
|
|
{
|
2006-03-22 09:22:00 +01:00
|
|
|
if (show_timestamp)
|
|
|
|
printf("%lu ", commit->date);
|
Log message printout cleanups
On Sun, 16 Apr 2006, Junio C Hamano wrote:
>
> In the mid-term, I am hoping we can drop the generate_header()
> callchain _and_ the custom code that formats commit log in-core,
> found in cmd_log_wc().
Ok, this was nastier than expected, just because the dependencies between
the different log-printing stuff were absolutely _everywhere_, but here's
a patch that does exactly that.
The patch is not very easy to read, and the "--patch-with-stat" thing is
still broken (it does not call the "show_log()" thing properly for
merges). That's not a new bug. In the new world order it _should_ do
something like
if (rev->logopt)
show_log(rev, rev->logopt, "---\n");
but it doesn't. I haven't looked at the --with-stat logic, so I left it
alone.
That said, this patch removes more lines than it adds, and in particular,
the "cmd_log_wc()" loop is now a very clean:
while ((commit = get_revision(rev)) != NULL) {
log_tree_commit(rev, commit);
free(commit->buffer);
commit->buffer = NULL;
}
so it doesn't get much prettier than this. All the complexity is entirely
hidden in log-tree.c, and any code that needs to flush the log literally
just needs to do the "if (rev->logopt) show_log(...)" incantation.
I had to make the combined_diff() logic take a "struct rev_info" instead
of just a "struct diff_options", but that part is pretty clean.
This does change "git whatchanged" from using "diff-tree" as the commit
descriptor to "commit", and I changed one of the tests to reflect that new
reality. Otherwise everything still passes, and my other tests look fine
too.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 20:59:32 +02:00
|
|
|
if (header_prefix)
|
|
|
|
fputs(header_prefix, stdout);
|
2006-03-28 09:58:34 +02:00
|
|
|
if (commit->object.flags & BOUNDARY)
|
|
|
|
putchar('-');
|
2006-12-17 00:31:25 +01:00
|
|
|
else if (revs.left_right) {
|
2006-10-23 02:32:47 +02:00
|
|
|
if (commit->object.flags & SYMMETRIC_LEFT)
|
|
|
|
putchar('<');
|
|
|
|
else
|
|
|
|
putchar('>');
|
|
|
|
}
|
2006-04-16 08:48:27 +02:00
|
|
|
if (revs.abbrev_commit && revs.abbrev)
|
|
|
|
fputs(find_unique_abbrev(commit->object.sha1, revs.abbrev),
|
|
|
|
stdout);
|
2006-04-07 06:32:36 +02:00
|
|
|
else
|
|
|
|
fputs(sha1_to_hex(commit->object.sha1), stdout);
|
2006-03-31 02:52:42 +02:00
|
|
|
if (revs.parents) {
|
2005-06-02 18:19:53 +02:00
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
while (parents) {
|
2007-07-09 04:05:31 +02:00
|
|
|
printf(" %s", sha1_to_hex(parents->item->object.sha1));
|
2005-06-02 18:19:53 +02:00
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
}
|
2006-04-16 08:48:27 +02:00
|
|
|
if (revs.commit_format == CMIT_FMT_ONELINE)
|
2005-08-09 07:15:40 +02:00
|
|
|
putchar(' ');
|
|
|
|
else
|
|
|
|
putchar('\n');
|
|
|
|
|
2006-04-16 08:48:27 +02:00
|
|
|
if (revs.verbose_header) {
|
2007-09-10 12:35:06 +02:00
|
|
|
struct strbuf buf;
|
|
|
|
strbuf_init(&buf, 0);
|
|
|
|
pretty_print_commit(revs.commit_format, commit,
|
|
|
|
&buf, revs.abbrev, NULL, NULL, revs.date_mode);
|
|
|
|
printf("%s%c", buf.buf, hdr_termination);
|
|
|
|
strbuf_release(&buf);
|
2005-07-05 01:36:48 +02:00
|
|
|
}
|
2007-06-29 19:40:46 +02:00
|
|
|
maybe_flush_or_die(stdout, "stdout");
|
2006-06-18 03:47:58 +02:00
|
|
|
if (commit->parents) {
|
|
|
|
free_commit_list(commit->parents);
|
|
|
|
commit->parents = NULL;
|
|
|
|
}
|
2006-08-28 06:19:39 +02:00
|
|
|
free(commit->buffer);
|
|
|
|
commit->buffer = NULL;
|
2005-06-06 17:39:40 +02:00
|
|
|
}
|
|
|
|
|
2006-09-05 06:50:12 +02:00
|
|
|
static void show_object(struct object_array_entry *p)
|
2005-06-25 07:56:58 +02:00
|
|
|
{
|
2006-09-05 06:50:12 +02:00
|
|
|
/* An object with name "foo\n0000000..." can be used to
|
|
|
|
* confuse downstream git-pack-objects very badly.
|
|
|
|
*/
|
|
|
|
const char *ep = strchr(p->name, '\n');
|
Make sure quickfetch is not fooled with a previous, incomplete fetch.
This updates git-rev-list --objects to be a bit more careful
when listing a blob object to make sure the blob actually
exists, and uses it to make sure the quick-fetch optimization we
introduced earlier is not fooled by a previous incomplete fetch.
The quick-fetch optimization works by running this command:
git rev-list --objects <<commit-list>> --not --all
where <<commit-list>> is a list of commits that we are going to
fetch from the other side. If there is any object missing to
complete the <<commit-list>>, the rev-list would fail and die
(say, the commit was in our repository, but its tree wasn't --
then it will barf while trying to list the blobs the tree
contains because it cannot read that tree).
Usually we do not have the objects (otherwise why would we
fetching?), but in one important special case we do: when the
remote repository is used as an alternate object store
(i.e. pointed by .git/objects/info/alternates). We could check
.git/objects/info/alternates to see if the remote we are
interacting with is one of them (or is used as an alternate,
recursively, by one of them), but that check is more cumbersome
than it is worth.
The above check however did not catch missing blob, because
object listing code did not read nor check blob objects, knowing
that blobs do not contain any further references to other
objects. This commit fixes it with practically unmeasurable
overhead.
I've benched this with
git rev-list --objects --all >/dev/null
in the kernel repository, with three different implementations
of the "check-blob".
- Checking with has_sha1_file() has negligible (unmeasurable)
performance penalty.
- Checking with sha1_object_info() makes it somewhat slower,
perhaps by 5%.
- Checking with read_sha1_file() to cause a fully re-validation
is prohibitively expensive (about 4 times as much runtime).
In my original patch, I had this as a command line option, but
the overhead is small enough that it is not really worth it.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-04-16 09:42:29 +02:00
|
|
|
|
|
|
|
if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
|
|
|
|
die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
|
|
|
|
|
2006-09-05 06:50:12 +02:00
|
|
|
if (ep) {
|
|
|
|
printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
|
|
|
|
(int) (ep - p->name),
|
|
|
|
p->name);
|
2005-06-25 07:56:58 +02:00
|
|
|
}
|
2006-09-05 06:50:12 +02:00
|
|
|
else
|
|
|
|
printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
|
2005-06-25 07:56:58 +02:00
|
|
|
}
|
|
|
|
|
2006-09-06 10:42:23 +02:00
|
|
|
static void show_edge(struct commit *commit)
|
|
|
|
{
|
|
|
|
printf("-%s\n", sha1_to_hex(commit->object.sha1));
|
|
|
|
}
|
|
|
|
|
2005-06-18 07:54:50 +02:00
|
|
|
/*
|
|
|
|
* This is a truly stupid algorithm, but it's only
|
|
|
|
* used for bisection, and we just don't care enough.
|
|
|
|
*
|
|
|
|
* We care just barely enough to avoid recursing for
|
|
|
|
* non-merge entries.
|
|
|
|
*/
|
|
|
|
static int count_distance(struct commit_list *entry)
|
|
|
|
{
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
while (entry) {
|
|
|
|
struct commit *commit = entry->item;
|
|
|
|
struct commit_list *p;
|
|
|
|
|
|
|
|
if (commit->object.flags & (UNINTERESTING | COUNTED))
|
|
|
|
break;
|
2006-03-10 10:21:39 +01:00
|
|
|
if (!revs.prune_fn || (commit->object.flags & TREECHANGE))
|
2005-11-27 20:32:03 +01:00
|
|
|
nr++;
|
2005-06-18 07:54:50 +02:00
|
|
|
commit->object.flags |= COUNTED;
|
|
|
|
p = commit->parents;
|
|
|
|
entry = p;
|
|
|
|
if (p) {
|
|
|
|
p = p->next;
|
|
|
|
while (p) {
|
|
|
|
nr += count_distance(p);
|
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-11-27 20:32:03 +01:00
|
|
|
|
2005-06-18 07:54:50 +02:00
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
|
2005-06-19 05:02:49 +02:00
|
|
|
static void clear_distance(struct commit_list *list)
|
2005-06-18 07:54:50 +02:00
|
|
|
{
|
|
|
|
while (list) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
commit->object.flags &= ~COUNTED;
|
|
|
|
list = list->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
#define DEBUG_BISECT 0
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
|
|
|
|
static inline int weight(struct commit_list *elem)
|
|
|
|
{
|
|
|
|
return *((int*)(elem->item->util));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void weight_set(struct commit_list *elem, int weight)
|
|
|
|
{
|
|
|
|
*((int*)(elem->item->util)) = weight;
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
static int count_interesting_parents(struct commit *commit)
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
{
|
2007-03-24 01:54:03 +01:00
|
|
|
struct commit_list *p;
|
|
|
|
int count;
|
|
|
|
|
|
|
|
for (count = 0, p = commit->parents; p; p = p->next) {
|
|
|
|
if (p->item->object.flags & UNINTERESTING)
|
|
|
|
continue;
|
|
|
|
count++;
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
}
|
2007-03-24 01:54:03 +01:00
|
|
|
return count;
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
}
|
|
|
|
|
2007-09-17 05:28:36 +02:00
|
|
|
static inline int halfway(struct commit_list *p, int nr)
|
2007-03-23 08:40:54 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Don't short-cut something we are not going to return!
|
|
|
|
*/
|
|
|
|
if (revs.prune_fn && !(p->item->object.flags & TREECHANGE))
|
|
|
|
return 0;
|
2007-03-24 01:54:03 +01:00
|
|
|
if (DEBUG_BISECT)
|
|
|
|
return 0;
|
2007-03-23 08:40:54 +01:00
|
|
|
/*
|
|
|
|
* 2 and 3 are halfway of 5.
|
|
|
|
* 3 is halfway of 6 but 2 and 4 are not.
|
|
|
|
*/
|
2007-09-17 05:28:36 +02:00
|
|
|
switch (2 * weight(p) - nr) {
|
2007-03-23 08:40:54 +01:00
|
|
|
case -1: case 0: case 1:
|
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
#if !DEBUG_BISECT
|
|
|
|
#define show_list(a,b,c,d) do { ; } while (0)
|
|
|
|
#else
|
|
|
|
static void show_list(const char *debug, int counted, int nr,
|
|
|
|
struct commit_list *list)
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
{
|
2007-03-24 01:54:03 +01:00
|
|
|
struct commit_list *p;
|
|
|
|
|
|
|
|
fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
|
|
|
|
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit_list *pp;
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
unsigned flags = commit->object.flags;
|
|
|
|
enum object_type type;
|
|
|
|
unsigned long size;
|
|
|
|
char *buf = read_sha1_file(commit->object.sha1, &type, &size);
|
|
|
|
char *ep, *sp;
|
|
|
|
|
|
|
|
fprintf(stderr, "%c%c%c ",
|
|
|
|
(flags & TREECHANGE) ? 'T' : ' ',
|
|
|
|
(flags & UNINTERESTING) ? 'U' : ' ',
|
|
|
|
(flags & COUNTED) ? 'C' : ' ');
|
|
|
|
if (commit->util)
|
|
|
|
fprintf(stderr, "%3d", weight(p));
|
|
|
|
else
|
|
|
|
fprintf(stderr, "---");
|
|
|
|
fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
|
|
|
|
for (pp = commit->parents; pp; pp = pp->next)
|
|
|
|
fprintf(stderr, " %.*s", 8,
|
|
|
|
sha1_to_hex(pp->item->object.sha1));
|
|
|
|
|
|
|
|
sp = strstr(buf, "\n\n");
|
|
|
|
if (sp) {
|
|
|
|
sp += 2;
|
|
|
|
for (ep = sp; *ep && *ep != '\n'; ep++)
|
|
|
|
;
|
|
|
|
fprintf(stderr, " %.*s", (int)(ep - sp), sp);
|
|
|
|
}
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* DEBUG_BISECT */
|
|
|
|
|
2007-09-17 05:28:29 +02:00
|
|
|
static struct commit_list *best_bisection(struct commit_list *list, int nr)
|
|
|
|
{
|
|
|
|
struct commit_list *p, *best;
|
|
|
|
int best_distance = -1;
|
|
|
|
|
|
|
|
best = list;
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
int distance;
|
|
|
|
unsigned flags = p->item->object.flags;
|
|
|
|
|
|
|
|
if (revs.prune_fn && !(flags & TREECHANGE))
|
|
|
|
continue;
|
|
|
|
distance = weight(p);
|
|
|
|
if (nr - distance < distance)
|
|
|
|
distance = nr - distance;
|
|
|
|
if (distance > best_distance) {
|
|
|
|
best = p;
|
|
|
|
best_distance = distance;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return best;
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
/*
|
|
|
|
* zero or positive weight is the number of interesting commits it can
|
|
|
|
* reach, including itself. Especially, weight = 0 means it does not
|
|
|
|
* reach any tree-changing commits (e.g. just above uninteresting one
|
|
|
|
* but traversal is with pathspec).
|
|
|
|
*
|
|
|
|
* weight = -1 means it has one parent and its distance is yet to
|
|
|
|
* be computed.
|
|
|
|
*
|
|
|
|
* weight = -2 means it has more than one parent and its distance is
|
|
|
|
* unknown. After running count_distance() first, they will get zero
|
|
|
|
* or positive distance.
|
|
|
|
*/
|
2007-09-17 05:28:20 +02:00
|
|
|
static struct commit_list *do_find_bisection(struct commit_list *list,
|
|
|
|
int nr, int *weights)
|
2007-03-24 01:54:03 +01:00
|
|
|
{
|
2007-09-17 05:28:36 +02:00
|
|
|
int n, counted;
|
2007-09-17 05:28:29 +02:00
|
|
|
struct commit_list *p;
|
2007-03-24 01:54:03 +01:00
|
|
|
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
counted = 0;
|
|
|
|
|
|
|
|
for (n = 0, p = list; p; p = p->next) {
|
2007-03-24 01:54:03 +01:00
|
|
|
struct commit *commit = p->item;
|
|
|
|
unsigned flags = commit->object.flags;
|
|
|
|
|
|
|
|
p->item->util = &weights[n++];
|
|
|
|
switch (count_interesting_parents(commit)) {
|
|
|
|
case 0:
|
|
|
|
if (!revs.prune_fn || (flags & TREECHANGE)) {
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
weight_set(p, 1);
|
|
|
|
counted++;
|
2007-03-24 01:54:03 +01:00
|
|
|
show_list("bisection 2 count one",
|
|
|
|
counted, nr, list);
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
}
|
2007-03-24 01:54:03 +01:00
|
|
|
/*
|
|
|
|
* otherwise, it is known not to reach any
|
|
|
|
* tree-changing commit and gets weight 0.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
weight_set(p, -1);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
weight_set(p, -2);
|
|
|
|
break;
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
show_list("bisection 2 initialize", counted, nr, list);
|
|
|
|
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
/*
|
|
|
|
* If you have only one parent in the resulting set
|
|
|
|
* then you can reach one commit more than that parent
|
|
|
|
* can reach. So we do not have to run the expensive
|
|
|
|
* count_distance() for single strand of pearls.
|
|
|
|
*
|
|
|
|
* However, if you have more than one parents, you cannot
|
|
|
|
* just add their distance and one for yourself, since
|
|
|
|
* they usually reach the same ancestor and you would
|
|
|
|
* end up counting them twice that way.
|
|
|
|
*
|
|
|
|
* So we will first count distance of merges the usual
|
|
|
|
* way, and then fill the blanks using cheaper algorithm.
|
|
|
|
*/
|
|
|
|
for (p = list; p; p = p->next) {
|
2007-03-24 01:54:03 +01:00
|
|
|
if (p->item->object.flags & UNINTERESTING)
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
continue;
|
2007-09-17 05:28:36 +02:00
|
|
|
if (weight(p) != -2)
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
continue;
|
2007-09-17 05:28:36 +02:00
|
|
|
weight_set(p, count_distance(p));
|
2007-03-24 01:54:03 +01:00
|
|
|
clear_distance(list);
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
|
|
|
|
/* Does it happen to be at exactly half-way? */
|
2007-09-17 05:28:36 +02:00
|
|
|
if (halfway(p, nr))
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
return p;
|
|
|
|
counted++;
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
show_list("bisection 2 count_distance", counted, nr, list);
|
|
|
|
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
while (counted < nr) {
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit_list *q;
|
2007-03-24 01:54:03 +01:00
|
|
|
unsigned flags = p->item->object.flags;
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
if (0 <= weight(p))
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
continue;
|
2007-03-24 01:54:03 +01:00
|
|
|
for (q = p->item->parents; q; q = q->next) {
|
|
|
|
if (q->item->object.flags & UNINTERESTING)
|
|
|
|
continue;
|
|
|
|
if (0 <= weight(q))
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
break;
|
2007-03-24 01:54:03 +01:00
|
|
|
}
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
if (!q)
|
|
|
|
continue;
|
2007-03-24 01:54:03 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* weight for p is unknown but q is known.
|
|
|
|
* add one for p itself if p is to be counted,
|
|
|
|
* otherwise inherit it from q directly.
|
|
|
|
*/
|
|
|
|
if (!revs.prune_fn || (flags & TREECHANGE)) {
|
|
|
|
weight_set(p, weight(q)+1);
|
|
|
|
counted++;
|
|
|
|
show_list("bisection 2 count one",
|
|
|
|
counted, nr, list);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
weight_set(p, weight(q));
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
|
|
|
|
/* Does it happen to be at exactly half-way? */
|
2007-09-17 05:28:36 +02:00
|
|
|
if (halfway(p, nr))
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
show_list("bisection 2 counted all", counted, nr, list);
|
|
|
|
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
/* Then find the best one */
|
2007-09-17 05:28:29 +02:00
|
|
|
return best_bisection(list, nr);
|
2007-09-17 05:28:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct commit_list *find_bisection(struct commit_list *list,
|
|
|
|
int *reaches, int *all)
|
|
|
|
{
|
|
|
|
int nr, on_list;
|
|
|
|
struct commit_list *p, *best, *next, *last;
|
|
|
|
int *weights;
|
|
|
|
|
|
|
|
show_list("bisection 2 entry", 0, 0, list);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Count the number of total and tree-changing items on the
|
|
|
|
* list, while reversing the list.
|
|
|
|
*/
|
|
|
|
for (nr = on_list = 0, last = NULL, p = list;
|
|
|
|
p;
|
|
|
|
p = next) {
|
|
|
|
unsigned flags = p->item->object.flags;
|
|
|
|
|
|
|
|
next = p->next;
|
|
|
|
if (flags & UNINTERESTING)
|
|
|
|
continue;
|
|
|
|
p->next = last;
|
|
|
|
last = p;
|
|
|
|
if (!revs.prune_fn || (flags & TREECHANGE))
|
|
|
|
nr++;
|
|
|
|
on_list++;
|
|
|
|
}
|
|
|
|
list = last;
|
|
|
|
show_list("bisection 2 sorted", 0, nr, list);
|
|
|
|
|
|
|
|
*all = nr;
|
|
|
|
weights = xcalloc(on_list, sizeof(*weights));
|
|
|
|
|
|
|
|
/* Do the real work of finding bisection commit. */
|
|
|
|
best = do_find_bisection(list, nr, weights);
|
|
|
|
|
2007-09-20 07:23:01 +02:00
|
|
|
if (best) {
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
best->next = NULL;
|
2007-09-20 07:23:01 +02:00
|
|
|
*reaches = weight(best);
|
|
|
|
}
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
free(weights);
|
2007-09-17 05:28:20 +02:00
|
|
|
|
git-rev-list --bisect: optimization
This improves the performance of revision bisection.
The idea is to avoid rather expensive count_distance() function,
which counts the number of commits that are reachable from any
given commit (including itself) in the set. When a commit has
only one relevant parent commit, the number of commits the
commit can reach is exactly the number of commits that the
parent can reach plus one; instead of running count_distance()
on commits that are on straight single strand of pearls, we can
just add one to the parents' count.
On the other hand, for a merge commit, because the commits
reachable from one parent can be reachable from another parent,
you cannot just add the parents' counts up plus one for the
commit itself; that would overcount ancestors that are reachable
from more than one parents.
The algorithm used in the patch runs count_distance() on merge
commits, and uses the util field of commit objects to remember
them. After that, the number of commits reachable from each of
the remaining commits is counted by finding a commit whose count
is not yet known but the count for its (sole) parent is known,
and adding one to the parent's count, until we assign numbers to
everybody.
Another small optimization is whenever we find a half-way commit
(that is, a commit that can reach exactly half of the commits),
we stop giving counts to remaining commits, as we will not find
any better commit than we just found.
The performance to bisect between v1.0.0 and v1.5.0 in git.git
repository was improved by saying good and bad in turns from
3.68 seconds down to 1.26 seconds. Bisecting the kernel between
v2.6.18 and v2.6.20 was sped up from 21.84 seconds down to 4.22
seconds.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-22 06:16:24 +01:00
|
|
|
return best;
|
|
|
|
}
|
|
|
|
|
2006-09-06 06:39:02 +02:00
|
|
|
static void read_revisions_from_stdin(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
char line[1000];
|
|
|
|
|
|
|
|
while (fgets(line, sizeof(line), stdin) != NULL) {
|
|
|
|
int len = strlen(line);
|
|
|
|
if (line[len - 1] == '\n')
|
|
|
|
line[--len] = 0;
|
|
|
|
if (!len)
|
|
|
|
break;
|
|
|
|
if (line[0] == '-')
|
|
|
|
die("options not supported in --stdin mode");
|
|
|
|
if (handle_revision_arg(line, revs, 0, 1))
|
|
|
|
die("bad revision '%s'", line);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-29 07:44:25 +02:00
|
|
|
int cmd_rev_list(int argc, const char **argv, const char *prefix)
|
2005-04-24 04:04:40 +02:00
|
|
|
{
|
2006-02-26 01:19:46 +01:00
|
|
|
struct commit_list *list;
|
2006-02-27 17:54:36 +01:00
|
|
|
int i;
|
2006-09-06 06:39:02 +02:00
|
|
|
int read_from_stdin = 0;
|
2007-03-22 06:15:54 +01:00
|
|
|
int bisect_show_vars = 0;
|
2005-04-24 04:04:40 +02:00
|
|
|
|
2007-02-18 10:36:22 +01:00
|
|
|
git_config(git_default_config);
|
2006-07-29 07:44:25 +02:00
|
|
|
init_revisions(&revs, prefix);
|
2006-04-16 08:48:27 +02:00
|
|
|
revs.abbrev = 0;
|
|
|
|
revs.commit_format = CMIT_FMT_UNSPECIFIED;
|
2006-02-28 20:24:00 +01:00
|
|
|
argc = setup_revisions(argc, argv, &revs, NULL);
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2005-05-06 10:00:11 +02:00
|
|
|
for (i = 1 ; i < argc; i++) {
|
2005-10-21 06:25:09 +02:00
|
|
|
const char *arg = argv[i];
|
2005-05-06 10:00:11 +02:00
|
|
|
|
2005-05-26 03:29:09 +02:00
|
|
|
if (!strcmp(arg, "--header")) {
|
2006-04-16 08:48:27 +02:00
|
|
|
revs.verbose_header = 1;
|
2005-06-01 17:42:22 +02:00
|
|
|
continue;
|
|
|
|
}
|
2006-03-22 09:22:00 +01:00
|
|
|
if (!strcmp(arg, "--timestamp")) {
|
|
|
|
show_timestamp = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-06-18 07:54:50 +02:00
|
|
|
if (!strcmp(arg, "--bisect")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2007-03-22 06:15:54 +01:00
|
|
|
if (!strcmp(arg, "--bisect-vars")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
bisect_show_vars = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2006-09-06 06:39:02 +02:00
|
|
|
if (!strcmp(arg, "--stdin")) {
|
|
|
|
if (read_from_stdin++)
|
|
|
|
die("--stdin given twice?");
|
|
|
|
read_revisions_from_stdin(&revs);
|
|
|
|
continue;
|
|
|
|
}
|
2006-02-26 01:19:46 +01:00
|
|
|
usage(rev_list_usage);
|
2005-05-26 03:29:09 +02:00
|
|
|
|
2005-05-06 10:00:11 +02:00
|
|
|
}
|
2006-04-16 08:48:27 +02:00
|
|
|
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
|
|
|
|
/* The command line has a --pretty */
|
|
|
|
hdr_termination = '\n';
|
|
|
|
if (revs.commit_format == CMIT_FMT_ONELINE)
|
Log message printout cleanups
On Sun, 16 Apr 2006, Junio C Hamano wrote:
>
> In the mid-term, I am hoping we can drop the generate_header()
> callchain _and_ the custom code that formats commit log in-core,
> found in cmd_log_wc().
Ok, this was nastier than expected, just because the dependencies between
the different log-printing stuff were absolutely _everywhere_, but here's
a patch that does exactly that.
The patch is not very easy to read, and the "--patch-with-stat" thing is
still broken (it does not call the "show_log()" thing properly for
merges). That's not a new bug. In the new world order it _should_ do
something like
if (rev->logopt)
show_log(rev, rev->logopt, "---\n");
but it doesn't. I haven't looked at the --with-stat logic, so I left it
alone.
That said, this patch removes more lines than it adds, and in particular,
the "cmd_log_wc()" loop is now a very clean:
while ((commit = get_revision(rev)) != NULL) {
log_tree_commit(rev, commit);
free(commit->buffer);
commit->buffer = NULL;
}
so it doesn't get much prettier than this. All the complexity is entirely
hidden in log-tree.c, and any code that needs to flush the log literally
just needs to do the "if (rev->logopt) show_log(...)" incantation.
I had to make the combined_diff() logic take a "struct rev_info" instead
of just a "struct diff_options", but that part is pretty clean.
This does change "git whatchanged" from using "diff-tree" as the commit
descriptor to "commit", and I changed one of the tests to reflect that new
reality. Otherwise everything still passes, and my other tests look fine
too.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 20:59:32 +02:00
|
|
|
header_prefix = "";
|
2006-04-16 08:48:27 +02:00
|
|
|
else
|
Log message printout cleanups
On Sun, 16 Apr 2006, Junio C Hamano wrote:
>
> In the mid-term, I am hoping we can drop the generate_header()
> callchain _and_ the custom code that formats commit log in-core,
> found in cmd_log_wc().
Ok, this was nastier than expected, just because the dependencies between
the different log-printing stuff were absolutely _everywhere_, but here's
a patch that does exactly that.
The patch is not very easy to read, and the "--patch-with-stat" thing is
still broken (it does not call the "show_log()" thing properly for
merges). That's not a new bug. In the new world order it _should_ do
something like
if (rev->logopt)
show_log(rev, rev->logopt, "---\n");
but it doesn't. I haven't looked at the --with-stat logic, so I left it
alone.
That said, this patch removes more lines than it adds, and in particular,
the "cmd_log_wc()" loop is now a very clean:
while ((commit = get_revision(rev)) != NULL) {
log_tree_commit(rev, commit);
free(commit->buffer);
commit->buffer = NULL;
}
so it doesn't get much prettier than this. All the complexity is entirely
hidden in log-tree.c, and any code that needs to flush the log literally
just needs to do the "if (rev->logopt) show_log(...)" incantation.
I had to make the combined_diff() logic take a "struct rev_info" instead
of just a "struct diff_options", but that part is pretty clean.
This does change "git whatchanged" from using "diff-tree" as the commit
descriptor to "commit", and I changed one of the tests to reflect that new
reality. Otherwise everything still passes, and my other tests look fine
too.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-17 20:59:32 +02:00
|
|
|
header_prefix = "commit ";
|
2006-04-16 08:48:27 +02:00
|
|
|
}
|
2006-04-17 21:42:36 +02:00
|
|
|
else if (revs.verbose_header)
|
|
|
|
/* Only --header was specified */
|
|
|
|
revs.commit_format = CMIT_FMT_RAW;
|
2005-05-06 10:00:11 +02:00
|
|
|
|
2006-02-26 01:19:46 +01:00
|
|
|
list = revs.commits;
|
|
|
|
|
2006-04-15 07:43:34 +02:00
|
|
|
if ((!list &&
|
|
|
|
(!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
!revs.pending.nr)) ||
|
2006-04-15 07:43:34 +02:00
|
|
|
revs.diff)
|
2005-10-26 00:24:55 +02:00
|
|
|
usage(rev_list_usage);
|
|
|
|
|
2006-09-20 22:21:56 +02:00
|
|
|
save_commit_buffer = revs.verbose_header || revs.grep_filter;
|
2006-03-29 03:28:04 +02:00
|
|
|
track_object_refs = 0;
|
rev-list --bisect: limit list before bisecting.
I noticed bisect does not work well without both good and bad.
Running this script in git.git repository would give you quite
different results:
#!/bin/sh
initial=e83c5163316f89bfbde7d9ab23ca2e25604af290
mid0=`git rev-list --bisect ^$initial --all`
git rev-list $mid0 | wc -l
git rev-list ^$mid0 --all | wc -l
mid1=`git rev-list --bisect --all`
git rev-list $mid1 | wc -l
git rev-list ^$mid1 --all | wc -l
The $initial commit is the very first commit you made. The
first midpoint bisects things evenly as designed, but the latter
does not.
The reason I got interested in this was because I was wondering
if something like the following would help people converting a
huge repository from foreign SCM, or preparing a repository to
be fetched over plain dumb HTTP only:
#!/bin/sh
N=4
P=.git/objects/pack
bottom=
while test 0 \< $N
do
N=$((N-1))
if test -z "$bottom"
then
newbottom=`git rev-list --bisect --all`
else
newbottom=`git rev-list --bisect ^$bottom --all`
fi
if test -z "$bottom"
then
rev_list="$newbottom"
elif test 0 = $N
then
rev_list="^$bottom --all"
else
rev_list="^$bottom $newbottom"
fi
p=$(git rev-list --unpacked --objects $rev_list |
git pack-objects $P/pack)
git show-index <$P/pack-$p.idx | wc -l
bottom=$newbottom
done
The idea is to pack older half of the history to one pack, then
older half of the remaining history to another, to continue a
few times, using finer granularity as we get closer to the tip.
This may not matter, since for a truly huge history, running
bisect number of times could be quite time consuming, and we
might be better off running "git rev-list --all" once into a
temporary file, and manually pick cut-off points from the
resulting list of commits. After all we are talking about
"approximately half" for such an usage, and older history does
not matter much.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 00:57:32 +02:00
|
|
|
if (bisect_list)
|
|
|
|
revs.limited = 1;
|
2006-03-29 03:28:04 +02:00
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
prepare_revision_walk(&revs);
|
|
|
|
if (revs.tree_objects)
|
2006-09-06 10:42:23 +02:00
|
|
|
mark_edges_uninteresting(revs.commits, &revs, show_edge);
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2007-03-22 06:15:54 +01:00
|
|
|
if (bisect_list) {
|
|
|
|
int reaches = reaches, all = all;
|
|
|
|
|
2007-03-24 01:54:03 +01:00
|
|
|
revs.commits = find_bisection(revs.commits, &reaches, &all);
|
2007-03-22 06:15:54 +01:00
|
|
|
if (bisect_show_vars) {
|
|
|
|
int cnt;
|
|
|
|
if (!revs.commits)
|
|
|
|
return 1;
|
|
|
|
/*
|
|
|
|
* revs.commits can reach "reaches" commits among
|
|
|
|
* "all" commits. If it is good, then there are
|
|
|
|
* (all-reaches) commits left to be bisected.
|
|
|
|
* On the other hand, if it is bad, then the set
|
|
|
|
* to bisect is "reaches".
|
|
|
|
* A bisect set of size N has (N-1) commits further
|
|
|
|
* to test, as we already know one bad one.
|
|
|
|
*/
|
|
|
|
cnt = all-reaches;
|
|
|
|
if (cnt < reaches)
|
|
|
|
cnt = reaches;
|
|
|
|
printf("bisect_rev=%s\n"
|
|
|
|
"bisect_nr=%d\n"
|
|
|
|
"bisect_good=%d\n"
|
|
|
|
"bisect_bad=%d\n"
|
|
|
|
"bisect_all=%d\n",
|
|
|
|
sha1_to_hex(revs.commits->item->object.sha1),
|
|
|
|
cnt - 1,
|
|
|
|
all - reaches - 1,
|
|
|
|
reaches - 1,
|
|
|
|
all);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2005-10-26 00:24:55 +02:00
|
|
|
|
2006-09-05 06:50:12 +02:00
|
|
|
traverse_commit_list(&revs, show_commit, show_object);
|
2005-05-31 03:46:32 +02:00
|
|
|
|
2005-04-24 04:04:40 +02:00
|
|
|
return 0;
|
|
|
|
}
|