git-commit-vandalism/builtin/rev-list.c

417 lines
10 KiB
C
Raw Normal View History

#include "cache.h"
#include "commit.h"
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
#include "pack.h"
#include "pack-bitmap.h"
#include "builtin.h"
#include "log-tree.h"
#include "graph.h"
#include "bisect.h"
#include "progress.h"
static const char rev_list_usage[] =
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
" limiting output:\n"
" --max-count=<n>\n"
" --max-age=<epoch>\n"
" --min-age=<epoch>\n"
" --sparse\n"
" --no-merges\n"
" --min-parents=<n>\n"
" --no-min-parents\n"
" --max-parents=<n>\n"
" --no-max-parents\n"
" --remove-empty\n"
" --all\n"
" --branches\n"
" --tags\n"
" --remotes\n"
" --stdin\n"
" --quiet\n"
" ordering output:\n"
" --topo-order\n"
" --date-order\n"
" --reverse\n"
" formatting output:\n"
" --parents\n"
" --children\n"
" --objects | --objects-edge\n"
" --unpacked\n"
" --header | --pretty\n"
" --abbrev=<n> | --no-abbrev\n"
" --abbrev-commit\n"
" --left-right\n"
" --count\n"
" special purpose:\n"
" --bisect\n"
" --bisect-vars\n"
" --bisect-all"
;
static struct progress *progress;
static unsigned progress_counter;
static void finish_commit(struct commit *commit, void *data);
static void show_commit(struct commit *commit, void *data)
{
struct rev_list_info *info = data;
struct rev_info *revs = info->revs;
display_progress(progress, ++progress_counter);
if (info->flags & REV_LIST_QUIET) {
finish_commit(commit, data);
return;
}
graph_show_commit(revs->graph);
if (revs->count) {
if (commit->object.flags & PATCHSAME)
revs->count_same++;
else if (commit->object.flags & SYMMETRIC_LEFT)
revs->count_left++;
else
revs->count_right++;
finish_commit(commit, data);
return;
}
if (info->show_timestamp)
printf("%lu ", commit->date);
if (info->header_prefix)
fputs(info->header_prefix, stdout);
if (!revs->graph)
fputs(get_revision_mark(revs, commit), stdout);
if (revs->abbrev_commit && revs->abbrev)
fputs(find_unique_abbrev(commit->object.oid.hash, revs->abbrev),
stdout);
else
fputs(oid_to_hex(&commit->object.oid), stdout);
if (revs->print_parents) {
struct commit_list *parents = commit->parents;
while (parents) {
printf(" %s", oid_to_hex(&parents->item->object.oid));
parents = parents->next;
}
}
if (revs->children.name) {
struct commit_list *children;
children = lookup_decoration(&revs->children, &commit->object);
while (children) {
printf(" %s", oid_to_hex(&children->item->object.oid));
children = children->next;
}
}
show_decorations(revs, commit);
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar(' ');
else
putchar('\n');
if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) {
struct strbuf buf = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.abbrev = revs->abbrev;
ctx.date_mode = revs->date_mode;
ctx.date_mode_explicit = revs->date_mode_explicit;
ctx.fmt = revs->commit_format;
ctx.output_encoding = get_log_output_encoding();
pretty_print_commit(&ctx, commit, &buf);
if (buf.len) {
if (revs->commit_format != CMIT_FMT_ONELINE)
graph_show_oneline(revs->graph);
graph_show_commit_msg(revs->graph, stdout, &buf);
/*
* Add a newline after the commit message.
*
* Usually, this newline produces a blank
* padding line between entries, in which case
* we need to add graph padding on this line.
*
* However, the commit message may not end in a
* newline. In this case the newline simply
* ends the last line of the commit message,
* and we don't need any graph output. (This
* always happens with CMIT_FMT_ONELINE, and it
* happens with CMIT_FMT_USERFORMAT when the
* format doesn't explicitly end in a newline.)
*/
if (buf.len && buf.buf[buf.len - 1] == '\n')
graph_show_padding(revs->graph);
putchar(info->hdr_termination);
} else {
/*
* If the message buffer is empty, just show
* the rest of the graph output for this
* commit.
*/
if (graph_show_remainder(revs->graph))
putchar('\n');
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar('\n');
}
strbuf_release(&buf);
} else {
if (graph_show_remainder(revs->graph))
putchar('\n');
}
maybe_flush_or_die(stdout, "stdout");
finish_commit(commit, data);
}
static void finish_commit(struct commit *commit, void *data)
{
if (commit->parents) {
free_commit_list(commit->parents);
commit->parents = NULL;
}
free_commit_buffer(commit);
[PATCH] Modify git-rev-list to linearise the commit history in merge order. This patch linearises the GIT commit history graph into merge order which is defined by invariants specified in Documentation/git-rev-list.txt. The linearisation produced by this patch is superior in an objective sense to that produced by the existing git-rev-list implementation in that the linearisation produced is guaranteed to have the minimum number of discontinuities, where a discontinuity is defined as an adjacent pair of commits in the output list which are not related in a direct child-parent relationship. With this patch a graph like this: a4 --- | \ \ | b4 | |/ | | a3 | | | | | a2 | | | | c3 | | | | | c2 | b3 | | | /| | b2 | | | c1 | | / | b1 a1 | | | a0 | | / root Sorts like this: = a4 | c3 | c2 | c1 ^ b4 | b3 | b2 | b1 ^ a3 | a2 | a1 | a0 = root Instead of this: = a4 | c3 ^ b4 | a3 ^ c2 ^ b3 ^ a2 ^ b2 ^ c1 ^ a1 ^ b1 ^ a0 = root A test script, t/t6000-rev-list.sh, includes a test which demonstrates that the linearisation produced by --merge-order has less discontinuities than the linearisation produced by git-rev-list without the --merge-order flag specified. To see this, do the following: cd t ./t6000-rev-list.sh cd trash cat actual-default-order cat actual-merge-order The existing behaviour of git-rev-list is preserved, by default. To obtain the modified behaviour, specify --merge-order or --merge-order --show-breaks on the command line. This version of the patch has been tested on the git repository and also on the linux-2.6 repository and has reasonable performance on both - ~50-100% slower than the original algorithm. This version of the patch has incorporated a functional equivalent of the Linus' output limiting algorithm into the merge-order algorithm itself. This operates per the notes associated with Linus' commit 337cb3fb8da45f10fe9a0c3cf571600f55ead2ce. This version has incorporated Linus' feedback regarding proposed changes to rev-list.c. (see: [PATCH] Factor out filtering in rev-list.c) This version has improved the way sort_first_epoch marks commits as uninteresting. For more details about this change, refer to Documentation/git-rev-list.txt and http://blackcubes.dyndns.org/epoch/. Signed-off-by: Jon Seymour <jon.seymour@gmail.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-06 17:39:40 +02:00
}
static void finish_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid))
die("missing blob object '%s'", oid_to_hex(&obj->oid));
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(obj->oid.hash);
}
static void show_object(struct object *obj, const char *name, void *cb_data)
{
struct rev_list_info *info = cb_data;
finish_object(obj, name, cb_data);
display_progress(progress, ++progress_counter);
if (info->flags & REV_LIST_QUIET)
return;
show_object_with_name(stdout, obj, name);
}
static void show_edge(struct commit *commit)
{
printf("-%s\n", oid_to_hex(&commit->object.oid));
}
static void print_var_str(const char *var, const char *val)
{
printf("%s='%s'\n", var, val);
}
static void print_var_int(const char *var, int val)
{
printf("%s=%d\n", var, val);
}
static int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
{
int cnt, flags = info->flags;
char hex[GIT_MAX_HEXSZ + 1] = "";
struct commit_list *tried;
struct rev_info *revs = info->revs;
if (!revs->commits)
return 1;
revs->commits = filter_skipped(revs->commits, &tried,
flags & BISECT_SHOW_ALL,
NULL, NULL);
/*
* revs->commits can reach "reaches" commits among
* "all" commits. If it is good, then there are
* (all-reaches) commits left to be bisected.
* On the other hand, if it is bad, then the set
* to bisect is "reaches".
* A bisect set of size N has (N-1) commits further
* to test, as we already know one bad one.
*/
cnt = all - reaches;
if (cnt < reaches)
cnt = reaches;
if (revs->commits)
oid_to_hex_r(hex, &revs->commits->item->object.oid);
if (flags & BISECT_SHOW_ALL) {
traverse_commit_list(revs, show_commit, show_object, info);
printf("------\n");
}
print_var_str("bisect_rev", hex);
print_var_int("bisect_nr", cnt - 1);
print_var_int("bisect_good", all - reaches - 1);
print_var_int("bisect_bad", reaches - 1);
print_var_int("bisect_all", all);
print_var_int("bisect_steps", estimate_bisect_steps(all));
return 0;
}
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
static int show_object_fast(
const unsigned char *sha1,
enum object_type type,
int exclude,
uint32_t name_hash,
struct packed_git *found_pack,
off_t found_offset)
{
fprintf(stdout, "%s\n", sha1_to_hex(sha1));
return 1;
}
int cmd_rev_list(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
struct rev_list_info info;
int i;
int bisect_list = 0;
int bisect_show_vars = 0;
int bisect_find_all = 0;
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
int use_bitmap_index = 0;
const char *show_progress = NULL;
git_config(git_default_config, NULL);
init_revisions(&revs, prefix);
revs.abbrev = DEFAULT_ABBREV;
revs.commit_format = CMIT_FMT_UNSPECIFIED;
argc = setup_revisions(argc, argv, &revs, NULL);
memset(&info, 0, sizeof(info));
info.revs = &revs;
if (revs.bisect)
bisect_list = 1;
if (DIFF_OPT_TST(&revs.diffopt, QUICK))
info.flags |= REV_LIST_QUIET;
for (i = 1 ; i < argc; i++) {
2005-10-21 06:25:09 +02:00
const char *arg = argv[i];
if (!strcmp(arg, "--header")) {
revs.verbose_header = 1;
continue;
}
if (!strcmp(arg, "--timestamp")) {
info.show_timestamp = 1;
continue;
}
if (!strcmp(arg, "--bisect")) {
bisect_list = 1;
continue;
}
if (!strcmp(arg, "--bisect-all")) {
bisect_list = 1;
bisect_find_all = 1;
info.flags |= BISECT_SHOW_ALL;
revs.show_decorations = 1;
continue;
}
if (!strcmp(arg, "--bisect-vars")) {
bisect_list = 1;
bisect_show_vars = 1;
continue;
}
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
if (!strcmp(arg, "--use-bitmap-index")) {
use_bitmap_index = 1;
continue;
}
if (!strcmp(arg, "--test-bitmap")) {
test_bitmap_walk(&revs);
return 0;
}
if (skip_prefix(arg, "--progress=", &arg)) {
show_progress = arg;
continue;
}
usage(rev_list_usage);
}
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
/* The command line has a --pretty */
info.hdr_termination = '\n';
if (revs.commit_format == CMIT_FMT_ONELINE)
info.header_prefix = "";
else
info.header_prefix = "commit ";
}
else if (revs.verbose_header)
/* Only --header was specified */
revs.commit_format = CMIT_FMT_RAW;
if ((!revs.commits &&
(!(revs.tag_objects || revs.tree_objects || revs.blob_objects) &&
Add "named object array" concept We've had this notion of a "object_list" for a long time, which eventually grew a "name" member because some users (notably git-rev-list) wanted to name each object as it is generated. That object_list is great for some things, but it isn't all that wonderful for others, and the "name" member is generally not used by everybody. This patch splits the users of the object_list array up into two: the traditional list users, who want the list-like format, and who don't actually use or want the name. And another class of users that really used the list as an extensible array, and generally wanted to name the objects. The patch is fairly straightforward, but it's also biggish. Most of it really just cleans things up: switching the revision parsing and listing over to the array makes things like the builtin-diff usage much simpler (we now see exactly how many members the array has, and we don't get the objects reversed from the order they were on the command line). One of the main reasons for doing this at all is that the malloc overhead of the simple object list was actually pretty high, and the array is just a lot denser. So this patch brings down memory usage by git-rev-list by just under 3% (on top of all the other memory use optimizations) on the mozilla archive. It does add more lines than it removes, and more importantly, it adds a whole new infrastructure for maintaining lists of objects, but on the other hand, the new dynamic array code is pretty obvious. The change to builtin-diff-tree.c shows a fairly good example of why an array interface is sometimes more natural, and just much simpler for everybody. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
!revs.pending.nr)) ||
revs.diff)
usage(rev_list_usage);
if (revs.show_notes)
die(_("rev-list does not support display of notes"));
save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
rev-list --bisect: limit list before bisecting. I noticed bisect does not work well without both good and bad. Running this script in git.git repository would give you quite different results: #!/bin/sh initial=e83c5163316f89bfbde7d9ab23ca2e25604af290 mid0=`git rev-list --bisect ^$initial --all` git rev-list $mid0 | wc -l git rev-list ^$mid0 --all | wc -l mid1=`git rev-list --bisect --all` git rev-list $mid1 | wc -l git rev-list ^$mid1 --all | wc -l The $initial commit is the very first commit you made. The first midpoint bisects things evenly as designed, but the latter does not. The reason I got interested in this was because I was wondering if something like the following would help people converting a huge repository from foreign SCM, or preparing a repository to be fetched over plain dumb HTTP only: #!/bin/sh N=4 P=.git/objects/pack bottom= while test 0 \< $N do N=$((N-1)) if test -z "$bottom" then newbottom=`git rev-list --bisect --all` else newbottom=`git rev-list --bisect ^$bottom --all` fi if test -z "$bottom" then rev_list="$newbottom" elif test 0 = $N then rev_list="^$bottom --all" else rev_list="^$bottom $newbottom" fi p=$(git rev-list --unpacked --objects $rev_list | git pack-objects $P/pack) git show-index <$P/pack-$p.idx | wc -l bottom=$newbottom done The idea is to pack older half of the history to one pack, then older half of the remaining history to another, to continue a few times, using finer granularity as we get closer to the tip. This may not matter, since for a truly huge history, running bisect number of times could be quite time consuming, and we might be better off running "git rev-list --all" once into a temporary file, and manually pick cut-off points from the resulting list of commits. After all we are talking about "approximately half" for such an usage, and older history does not matter much. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 00:57:32 +02:00
if (bisect_list)
revs.limited = 1;
if (show_progress)
progress = start_progress_delay(show_progress, 0, 0, 2);
if (use_bitmap_index && !revs.prune) {
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
if (revs.count && !revs.left_right && !revs.cherry_mark) {
uint32_t commit_count;
rev-list: "adjust" results of "--count --use-bitmap-index -n" If you ask rev-list for: git rev-list --count --use-bitmap-index HEAD we optimize out the actual traversal and just give you the number of bits set in the commit bitmap. This is faster, which is good. But if you ask to limit the size of the traversal, like: git rev-list --count --use-bitmap-index -n 100 HEAD we'll still output the full bitmapped number we found. On the surface, that might even seem OK. You explicitly asked to use the bitmap index, and it was cheap to compute the real answer, so we gave it to you. But there's something much more complicated going on under the hood. If we don't have a bitmap directly for HEAD, then we have to actually traverse backwards, looking for a bitmapped commit. And _that_ traversal is bounded by our `-n` count. This is a good thing, because it bounds the work we have to do, which is probably what the user wanted by asking for `-n`. But now it makes the output quite confusing. You might get many values: - your `-n` value, if we walked back and never found a bitmap (or fewer if there weren't that many commits) - the actual full count, if we found a bitmap root for every path of our traversal with in the `-n` limit - any number in between! We might have walked back and found _some_ bitmaps, but then cut off the traversal early with some commits not accounted for in the result. So you cannot even see a value higher than your `-n` and say "OK, bitmaps kicked in, this must be the real full count". The only sane thing is for git to just clamp the value to a maximum of the `-n` value, which means we should output the exact same results whether bitmaps are in use or not. The test in t5310 demonstrates this by using `-n 1`. Without this patch we fail in the full-bitmap case (where we do not have to traverse at all) but _not_ in the partial-bitmap case (where we have to walk down to find an actual bitmap). With this patch, both cases just work. I didn't implement the crazy in-between case, just because it's complicated to set up, and is really a subset of the full-count case, which we do cover. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 09:07:34 +02:00
int max_count = revs.max_count;
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
if (!prepare_bitmap_walk(&revs)) {
count_bitmap_commit_list(&commit_count, NULL, NULL, NULL);
rev-list: "adjust" results of "--count --use-bitmap-index -n" If you ask rev-list for: git rev-list --count --use-bitmap-index HEAD we optimize out the actual traversal and just give you the number of bits set in the commit bitmap. This is faster, which is good. But if you ask to limit the size of the traversal, like: git rev-list --count --use-bitmap-index -n 100 HEAD we'll still output the full bitmapped number we found. On the surface, that might even seem OK. You explicitly asked to use the bitmap index, and it was cheap to compute the real answer, so we gave it to you. But there's something much more complicated going on under the hood. If we don't have a bitmap directly for HEAD, then we have to actually traverse backwards, looking for a bitmapped commit. And _that_ traversal is bounded by our `-n` count. This is a good thing, because it bounds the work we have to do, which is probably what the user wanted by asking for `-n`. But now it makes the output quite confusing. You might get many values: - your `-n` value, if we walked back and never found a bitmap (or fewer if there weren't that many commits) - the actual full count, if we found a bitmap root for every path of our traversal with in the `-n` limit - any number in between! We might have walked back and found _some_ bitmaps, but then cut off the traversal early with some commits not accounted for in the result. So you cannot even see a value higher than your `-n` and say "OK, bitmaps kicked in, this must be the real full count". The only sane thing is for git to just clamp the value to a maximum of the `-n` value, which means we should output the exact same results whether bitmaps are in use or not. The test in t5310 demonstrates this by using `-n 1`. Without this patch we fail in the full-bitmap case (where we do not have to traverse at all) but _not_ in the partial-bitmap case (where we have to walk down to find an actual bitmap). With this patch, both cases just work. I didn't implement the crazy in-between case, just because it's complicated to set up, and is really a subset of the full-count case, which we do cover. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 09:07:34 +02:00
if (max_count >= 0 && max_count < commit_count)
commit_count = max_count;
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
printf("%d\n", commit_count);
return 0;
}
rev-list: disable bitmaps when "-n" is used with listing objects You can ask rev-list to use bitmaps to speed up an --objects traversal, which should generally give you your answers much faster. Likewise, you can ask rev-list to limit such a traversal with `-n`, in which case we'll show only a limited set of commits (and only the tree and commit objects directly reachable from those commits). But if you do both together, the results are nonsensical. We end up limiting any fallback traversal we do to _find_ the bitmaps, but the actual set of objects we output will be picked arbitrarily from the union of any bitmaps we do find, and will involve the objects of many more commits. It's possible that somebody might want this as a "show me what you can, but limit the amount of work you do" flag. But as with the prior commit clamping "--count", the results are basically non-deterministic; you'll get the values from some commits between `n` and the total number, and you can't tell which. And unlike the `--count` case, we can't easily generate the "real" value from the bitmap values (you can't just walk back `-n` commits and subtract out the reachable objects from the boundary commits; the bitmaps for `X` record its total reachability, so you don't know which objects are directly from `X` itself, which from `X^`, and so on). So let's just fallback to the non-bitmap code path in this case, so we always give a sane answer. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-03 09:08:05 +02:00
} else if (revs.max_count < 0 &&
revs.tag_objects && revs.tree_objects && revs.blob_objects) {
rev-list: add bitmap mode to speed up object lists The bitmap reachability index used to speed up the counting objects phase during `pack-objects` can also be used to optimize a normal rev-list if the only thing required are the SHA1s of the objects during the list (i.e., not the path names at which trees and blobs were found). Calling `git rev-list --objects --use-bitmap-index [committish]` will perform an object iteration based on a bitmap result instead of actually walking the object graph. These are some example timings for `torvalds/linux` (warm cache, best-of-five): $ time git rev-list --objects master > /dev/null real 0m34.191s user 0m33.904s sys 0m0.268s $ time git rev-list --objects --use-bitmap-index master > /dev/null real 0m1.041s user 0m0.976s sys 0m0.064s Likewise, using `git rev-list --count --use-bitmap-index` will speed up the counting operation by building the resulting bitmap and performing a fast popcount (number of bits set on the bitmap) on the result. Here are some sample timings of different ways to count commits in `torvalds/linux`: $ time git rev-list master | wc -l 399882 real 0m6.524s user 0m6.060s sys 0m3.284s $ time git rev-list --count master 399882 real 0m4.318s user 0m4.236s sys 0m0.076s $ time git rev-list --use-bitmap-index --count master 399882 real 0m0.217s user 0m0.176s sys 0m0.040s This also respects negative refs, so you can use it to count a slice of history: $ time git rev-list --count v3.0..master 144843 real 0m1.971s user 0m1.932s sys 0m0.036s $ time git rev-list --use-bitmap-index --count v3.0..master real 0m0.280s user 0m0.220s sys 0m0.056s Though note that the closer the endpoints, the less it helps. In the traversal case, we have fewer commits to cross, so we take less time. But the bitmap time is dominated by generating the pack revindex, which is constant with respect to the refs given. Note that you cannot yet get a fast --left-right count of a symmetric difference (e.g., "--count --left-right master...topic"). The slow part of that walk actually happens during the merge-base determination when we parse "master...topic". Even though a count does not actually need to know the real merge base (it only needs to take the symmetric difference of the bitmaps), the revision code would require some refactoring to handle this case. Additionally, a `--test-bitmap` flag has been added that will perform the same rev-list manually (i.e. using a normal revwalk) and using bitmaps, and verify that the results are the same. This can be used to exercise the bitmap code, and also to verify that the contents of the .bitmap file are sane. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-21 15:00:12 +01:00
if (!prepare_bitmap_walk(&revs)) {
traverse_bitmap_commit_list(&show_object_fast);
return 0;
}
}
}
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
mark_edges_uninteresting(&revs, show_edge);
if (bisect_list) {
int reaches = reaches, all = all;
revs.commits = find_bisection(revs.commits, &reaches, &all,
bisect_find_all);
if (bisect_show_vars)
return show_bisect_vars(&info, reaches, all);
}
traverse_commit_list(&revs, show_commit, show_object, &info);
stop_progress(&progress);
if (revs.count) {
if (revs.left_right && revs.cherry_mark)
printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same);
else if (revs.left_right)
printf("%d\t%d\n", revs.count_left, revs.count_right);
else if (revs.cherry_mark)
printf("%d\t%d\n", revs.count_left + revs.count_right, revs.count_same);
else
printf("%d\n", revs.count_left + revs.count_right);
}
return 0;
}