git-commit-vandalism/builtin/rev-list.c

414 lines
10 KiB
C
Raw Normal View History

#include "cache.h"
#include "commit.h"
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
#include "builtin.h"
#include "log-tree.h"
#include "graph.h"
#include "bisect.h"
static const char rev_list_usage[] =
"git rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
" limiting output:\n"
" --max-count=<n>\n"
" --max-age=<epoch>\n"
" --min-age=<epoch>\n"
" --sparse\n"
" --no-merges\n"
" --min-parents=<n>\n"
" --no-min-parents\n"
" --max-parents=<n>\n"
" --no-max-parents\n"
" --remove-empty\n"
" --all\n"
" --branches\n"
" --tags\n"
" --remotes\n"
" --stdin\n"
" --quiet\n"
" ordering output:\n"
" --topo-order\n"
" --date-order\n"
" --reverse\n"
" formatting output:\n"
" --parents\n"
" --children\n"
" --objects | --objects-edge\n"
" --unpacked\n"
" --header | --pretty\n"
" --abbrev=<n> | --no-abbrev\n"
" --abbrev-commit\n"
" --left-right\n"
" special purpose:\n"
" --bisect\n"
" --bisect-vars\n"
" --bisect-all"
;
static void finish_commit(struct commit *commit, void *data);
static void show_commit(struct commit *commit, void *data)
{
struct rev_list_info *info = data;
struct rev_info *revs = info->revs;
if (info->flags & REV_LIST_QUIET) {
finish_commit(commit, data);
return;
}
graph_show_commit(revs->graph);
if (revs->count) {
if (commit->object.flags & PATCHSAME)
revs->count_same++;
else if (commit->object.flags & SYMMETRIC_LEFT)
revs->count_left++;
else
revs->count_right++;
finish_commit(commit, data);
return;
}
if (info->show_timestamp)
printf("%lu ", commit->date);
if (info->header_prefix)
fputs(info->header_prefix, stdout);
if (!revs->graph)
fputs(get_revision_mark(revs, commit), stdout);
if (revs->abbrev_commit && revs->abbrev)
fputs(find_unique_abbrev(commit->object.sha1, revs->abbrev),
stdout);
else
fputs(sha1_to_hex(commit->object.sha1), stdout);
if (revs->print_parents) {
struct commit_list *parents = commit->parents;
while (parents) {
printf(" %s", sha1_to_hex(parents->item->object.sha1));
parents = parents->next;
}
}
if (revs->children.name) {
struct commit_list *children;
children = lookup_decoration(&revs->children, &commit->object);
while (children) {
printf(" %s", sha1_to_hex(children->item->object.sha1));
children = children->next;
}
}
show_decorations(revs, commit);
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar(' ');
else
putchar('\n');
if (revs->verbose_header && commit->buffer) {
struct strbuf buf = STRBUF_INIT;
struct pretty_print_context ctx = {0};
ctx.abbrev = revs->abbrev;
ctx.date_mode = revs->date_mode;
ctx.date_mode_explicit = revs->date_mode_explicit;
ctx.fmt = revs->commit_format;
pretty_print_commit(&ctx, commit, &buf);
if (revs->graph) {
if (buf.len) {
if (revs->commit_format != CMIT_FMT_ONELINE)
graph_show_oneline(revs->graph);
graph_show_commit_msg(revs->graph, &buf);
/*
* Add a newline after the commit message.
*
* Usually, this newline produces a blank
* padding line between entries, in which case
* we need to add graph padding on this line.
*
* However, the commit message may not end in a
* newline. In this case the newline simply
* ends the last line of the commit message,
* and we don't need any graph output. (This
* always happens with CMIT_FMT_ONELINE, and it
* happens with CMIT_FMT_USERFORMAT when the
* format doesn't explicitly end in a newline.)
*/
if (buf.len && buf.buf[buf.len - 1] == '\n')
graph_show_padding(revs->graph);
putchar('\n');
} else {
/*
* If the message buffer is empty, just show
* the rest of the graph output for this
* commit.
*/
if (graph_show_remainder(revs->graph))
putchar('\n');
if (revs->commit_format == CMIT_FMT_ONELINE)
putchar('\n');
}
} else {
if (revs->commit_format != CMIT_FMT_USERFORMAT ||
buf.len) {
fwrite(buf.buf, 1, buf.len, stdout);
putchar(info->hdr_termination);
}
}
strbuf_release(&buf);
} else {
if (graph_show_remainder(revs->graph))
putchar('\n');
}
maybe_flush_or_die(stdout, "stdout");
finish_commit(commit, data);
}
static void finish_commit(struct commit *commit, void *data)
{
if (commit->parents) {
free_commit_list(commit->parents);
commit->parents = NULL;
}
free(commit->buffer);
commit->buffer = NULL;
[PATCH] Modify git-rev-list to linearise the commit history in merge order. This patch linearises the GIT commit history graph into merge order which is defined by invariants specified in Documentation/git-rev-list.txt. The linearisation produced by this patch is superior in an objective sense to that produced by the existing git-rev-list implementation in that the linearisation produced is guaranteed to have the minimum number of discontinuities, where a discontinuity is defined as an adjacent pair of commits in the output list which are not related in a direct child-parent relationship. With this patch a graph like this: a4 --- | \ \ | b4 | |/ | | a3 | | | | | a2 | | | | c3 | | | | | c2 | b3 | | | /| | b2 | | | c1 | | / | b1 a1 | | | a0 | | / root Sorts like this: = a4 | c3 | c2 | c1 ^ b4 | b3 | b2 | b1 ^ a3 | a2 | a1 | a0 = root Instead of this: = a4 | c3 ^ b4 | a3 ^ c2 ^ b3 ^ a2 ^ b2 ^ c1 ^ a1 ^ b1 ^ a0 = root A test script, t/t6000-rev-list.sh, includes a test which demonstrates that the linearisation produced by --merge-order has less discontinuities than the linearisation produced by git-rev-list without the --merge-order flag specified. To see this, do the following: cd t ./t6000-rev-list.sh cd trash cat actual-default-order cat actual-merge-order The existing behaviour of git-rev-list is preserved, by default. To obtain the modified behaviour, specify --merge-order or --merge-order --show-breaks on the command line. This version of the patch has been tested on the git repository and also on the linux-2.6 repository and has reasonable performance on both - ~50-100% slower than the original algorithm. This version of the patch has incorporated a functional equivalent of the Linus' output limiting algorithm into the merge-order algorithm itself. This operates per the notes associated with Linus' commit 337cb3fb8da45f10fe9a0c3cf571600f55ead2ce. This version has incorporated Linus' feedback regarding proposed changes to rev-list.c. (see: [PATCH] Factor out filtering in rev-list.c) This version has improved the way sort_first_epoch marks commits as uninteresting. For more details about this change, refer to Documentation/git-rev-list.txt and http://blackcubes.dyndns.org/epoch/. Signed-off-by: Jon Seymour <jon.seymour@gmail.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-06 17:39:40 +02:00
}
static void finish_object(struct object *obj,
const struct name_path *path, const char *name,
void *cb_data)
{
struct rev_list_info *info = cb_data;
process_{tree,blob}: show objects without buffering Here's a less trivial thing, and slightly more dubious one. I was looking at that "struct object_array objects", and wondering why we do that. I have honestly totally forgotten. Why not just call the "show()" function as we encounter the objects? Rather than add the objects to the object_array, and then at the very end going through the array and doing a 'show' on all, just do things more incrementally. Now, there are possible downsides to this: - the "buffer using object_array" _can_ in theory result in at least better I-cache usage (two tight loops rather than one more spread out one). I don't think this is a real issue, but in theory.. - this _does_ change the order of the objects printed. Instead of doing a "process_tree(revs, commit->tree, &objects, NULL, "");" in the loop over the commits (which puts all the root trees _first_ in the object list, this patch just adds them to the list of pending objects, and then we'll traverse them in that order (and thus show each root tree object together with the objects we discover under it) I _think_ the new ordering actually makes more sense, but the object ordering is actually a subtle thing when it comes to packing efficiency, so any change in order is going to have implications for packing. Good or bad, I dunno. - There may be some reason why we did it that odd way with the object array, that I have simply forgotten. Anyway, now that we don't buffer up the objects before showing them that may actually result in lower memory usage during that whole traverse_commit_list() phase. This is seriously not very deeply tested. It makes sense to me, it seems to pass all the tests, it looks ok, but... Does anybody remember why we did that "object_array" thing? It used to be an "object_list" a long long time ago, but got changed into the array due to better memory usage patterns (those linked lists of obejcts are horrible from a memory allocation standpoint). But I wonder why we didn't do this back then. Maybe there's a reason for it. Or maybe there _used_ to be a reason, and no longer is. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 02:27:58 +02:00
if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1))
die("missing blob object '%s'", sha1_to_hex(obj->sha1));
if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT)
parse_object(obj->sha1);
}
static void show_object(struct object *obj,
const struct name_path *path, const char *component,
void *cb_data)
{
struct rev_list_info *info = cb_data;
finish_object(obj, path, component, cb_data);
if (info->flags & REV_LIST_QUIET)
return;
show_object_with_name(stdout, obj, path, component);
}
static void show_edge(struct commit *commit)
{
printf("-%s\n", sha1_to_hex(commit->object.sha1));
}
rev-list: estimate number of bisection step left This patch teaches "git rev-list --bisect-vars" to output an estimate of the number of bisection step left _after the current one_ along with the other variables it already outputs. This patch also makes "git-bisect.sh" display this number of steps left _after the current one_, along with the estimate of the number of revisions left to test (after the current one). Here is a table to help analyse what should be the best estimate for the number of bisect steps left. N : linear case --> probabilities --> best ------------------------------------------------------------- 1 : G-B --> 0 --> 0 2 : G-U1-B --> 0 --> 0 3 : G-U1-U2-B --> 0(1/3) 1(2/3) --> 1 4 : G-U1-U2-U3-B --> 1 --> 1 5 : G-U1-U2-U3-U4-B --> 1(3/5) 2(2/5) --> 1 6 : G-U1-U2-U3-U4-U5-B --> 1(2/6) 2(4/6) --> 2 7 : G-U1-U2-U3-U4-U5-U6-B --> 1(1/7) 2(6/7) --> 2 8 : G-U1-U2-U3-U4-U5-U6-U7-B --> 2 --> 2 9 : G-U1-U2-U3-U4-U5-U6-U7-U8-B --> 2(7/9) 3(2/9) --> 2 10: G-U1-U2-U3-U4-U5-U6-U7-U8-U9-B --> 2(6/10)3(4/10)--> 2 In the column "N", there is the number of revisions that could _now_ be the first bad commit we are looking for. The "linear case" column describes the linear history corresponding to the number in column N. G means good, B means bad, and Ux means unknown. Note that the first bad revision we are looking for can be any Ux or B. In the "probabilities" column, there are the different outcomes in number of steps with the odds of each outcome in parenthesis corresponding to the linear case. The "best" column gives the most accurate estimate among the different outcomes in the "probabilities" column. We have the following: best(2^n) == n - 1 and for any x between 0 included and 2^n excluded, the probability for n - 1 steps left looks like: P(2^n + x) == (2^n - x) / (2^n + x) and P(2^n + x) < 0.5 means 2^n < 3x So the algorithm used in this patch calculates 2^n and x, and then choose between returning n - 1 and n. Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-21 09:26:01 +01:00
static inline int log2i(int n)
{
int log2 = 0;
for (; n > 1; n >>= 1)
log2++;
return log2;
}
static inline int exp2i(int n)
{
return 1 << n;
}
/*
* Estimate the number of bisect steps left (after the current step)
*
* For any x between 0 included and 2^n excluded, the probability for
* n - 1 steps left looks like:
*
* P(2^n + x) == (2^n - x) / (2^n + x)
*
* and P(2^n + x) < 0.5 means 2^n < 3x
*/
int estimate_bisect_steps(int all)
rev-list: estimate number of bisection step left This patch teaches "git rev-list --bisect-vars" to output an estimate of the number of bisection step left _after the current one_ along with the other variables it already outputs. This patch also makes "git-bisect.sh" display this number of steps left _after the current one_, along with the estimate of the number of revisions left to test (after the current one). Here is a table to help analyse what should be the best estimate for the number of bisect steps left. N : linear case --> probabilities --> best ------------------------------------------------------------- 1 : G-B --> 0 --> 0 2 : G-U1-B --> 0 --> 0 3 : G-U1-U2-B --> 0(1/3) 1(2/3) --> 1 4 : G-U1-U2-U3-B --> 1 --> 1 5 : G-U1-U2-U3-U4-B --> 1(3/5) 2(2/5) --> 1 6 : G-U1-U2-U3-U4-U5-B --> 1(2/6) 2(4/6) --> 2 7 : G-U1-U2-U3-U4-U5-U6-B --> 1(1/7) 2(6/7) --> 2 8 : G-U1-U2-U3-U4-U5-U6-U7-B --> 2 --> 2 9 : G-U1-U2-U3-U4-U5-U6-U7-U8-B --> 2(7/9) 3(2/9) --> 2 10: G-U1-U2-U3-U4-U5-U6-U7-U8-U9-B --> 2(6/10)3(4/10)--> 2 In the column "N", there is the number of revisions that could _now_ be the first bad commit we are looking for. The "linear case" column describes the linear history corresponding to the number in column N. G means good, B means bad, and Ux means unknown. Note that the first bad revision we are looking for can be any Ux or B. In the "probabilities" column, there are the different outcomes in number of steps with the odds of each outcome in parenthesis corresponding to the linear case. The "best" column gives the most accurate estimate among the different outcomes in the "probabilities" column. We have the following: best(2^n) == n - 1 and for any x between 0 included and 2^n excluded, the probability for n - 1 steps left looks like: P(2^n + x) == (2^n - x) / (2^n + x) and P(2^n + x) < 0.5 means 2^n < 3x So the algorithm used in this patch calculates 2^n and x, and then choose between returning n - 1 and n. Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-21 09:26:01 +01:00
{
int n, x, e;
if (all < 3)
return 0;
n = log2i(all);
e = exp2i(n);
x = all - e;
return (e < 3 * x) ? n : n - 1;
}
void print_commit_list(struct commit_list *list,
const char *format_cur,
const char *format_last)
{
for ( ; list; list = list->next) {
const char *format = list->next ? format_cur : format_last;
printf(format, sha1_to_hex(list->item->object.sha1));
}
}
static void print_var_str(const char *var, const char *val)
{
printf("%s='%s'\n", var, val);
}
static void print_var_int(const char *var, int val)
{
printf("%s=%d\n", var, val);
}
static int show_bisect_vars(struct rev_list_info *info, int reaches, int all)
{
int cnt, flags = info->flags;
char hex[41] = "";
struct commit_list *tried;
struct rev_info *revs = info->revs;
if (!revs->commits)
return 1;
revs->commits = filter_skipped(revs->commits, &tried,
flags & BISECT_SHOW_ALL,
NULL, NULL);
/*
* revs->commits can reach "reaches" commits among
* "all" commits. If it is good, then there are
* (all-reaches) commits left to be bisected.
* On the other hand, if it is bad, then the set
* to bisect is "reaches".
* A bisect set of size N has (N-1) commits further
* to test, as we already know one bad one.
*/
cnt = all - reaches;
if (cnt < reaches)
cnt = reaches;
if (revs->commits)
strcpy(hex, sha1_to_hex(revs->commits->item->object.sha1));
if (flags & BISECT_SHOW_ALL) {
traverse_commit_list(revs, show_commit, show_object, info);
printf("------\n");
}
print_var_str("bisect_rev", hex);
print_var_int("bisect_nr", cnt - 1);
print_var_int("bisect_good", all - reaches - 1);
print_var_int("bisect_bad", reaches - 1);
print_var_int("bisect_all", all);
print_var_int("bisect_steps", estimate_bisect_steps(all));
return 0;
}
int cmd_rev_list(int argc, const char **argv, const char *prefix)
{
struct rev_info revs;
struct rev_list_info info;
int i;
int bisect_list = 0;
int bisect_show_vars = 0;
int bisect_find_all = 0;
git_config(git_default_config, NULL);
init_revisions(&revs, prefix);
revs.abbrev = DEFAULT_ABBREV;
revs.commit_format = CMIT_FMT_UNSPECIFIED;
argc = setup_revisions(argc, argv, &revs, NULL);
memset(&info, 0, sizeof(info));
info.revs = &revs;
if (revs.bisect)
bisect_list = 1;
if (DIFF_OPT_TST(&revs.diffopt, QUICK))
info.flags |= REV_LIST_QUIET;
for (i = 1 ; i < argc; i++) {
2005-10-21 06:25:09 +02:00
const char *arg = argv[i];
if (!strcmp(arg, "--header")) {
revs.verbose_header = 1;
continue;
}
if (!strcmp(arg, "--timestamp")) {
info.show_timestamp = 1;
continue;
}
if (!strcmp(arg, "--bisect")) {
bisect_list = 1;
continue;
}
if (!strcmp(arg, "--bisect-all")) {
bisect_list = 1;
bisect_find_all = 1;
info.flags |= BISECT_SHOW_ALL;
revs.show_decorations = 1;
continue;
}
if (!strcmp(arg, "--bisect-vars")) {
bisect_list = 1;
bisect_show_vars = 1;
continue;
}
usage(rev_list_usage);
}
if (revs.commit_format != CMIT_FMT_UNSPECIFIED) {
/* The command line has a --pretty */
info.hdr_termination = '\n';
if (revs.commit_format == CMIT_FMT_ONELINE)
info.header_prefix = "";
else
info.header_prefix = "commit ";
}
else if (revs.verbose_header)
/* Only --header was specified */
revs.commit_format = CMIT_FMT_RAW;
if ((!revs.commits &&
(!(revs.tag_objects||revs.tree_objects||revs.blob_objects) &&
Add "named object array" concept We've had this notion of a "object_list" for a long time, which eventually grew a "name" member because some users (notably git-rev-list) wanted to name each object as it is generated. That object_list is great for some things, but it isn't all that wonderful for others, and the "name" member is generally not used by everybody. This patch splits the users of the object_list array up into two: the traditional list users, who want the list-like format, and who don't actually use or want the name. And another class of users that really used the list as an extensible array, and generally wanted to name the objects. The patch is fairly straightforward, but it's also biggish. Most of it really just cleans things up: switching the revision parsing and listing over to the array makes things like the builtin-diff usage much simpler (we now see exactly how many members the array has, and we don't get the objects reversed from the order they were on the command line). One of the main reasons for doing this at all is that the malloc overhead of the simple object list was actually pretty high, and the array is just a lot denser. So this patch brings down memory usage by git-rev-list by just under 3% (on top of all the other memory use optimizations) on the mozilla archive. It does add more lines than it removes, and more importantly, it adds a whole new infrastructure for maintaining lists of objects, but on the other hand, the new dynamic array code is pretty obvious. The change to builtin-diff-tree.c shows a fairly good example of why an array interface is sometimes more natural, and just much simpler for everybody. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
!revs.pending.nr)) ||
revs.diff)
usage(rev_list_usage);
save_commit_buffer = (revs.verbose_header ||
revs.grep_filter.pattern_list ||
revs.grep_filter.header_list);
rev-list --bisect: limit list before bisecting. I noticed bisect does not work well without both good and bad. Running this script in git.git repository would give you quite different results: #!/bin/sh initial=e83c5163316f89bfbde7d9ab23ca2e25604af290 mid0=`git rev-list --bisect ^$initial --all` git rev-list $mid0 | wc -l git rev-list ^$mid0 --all | wc -l mid1=`git rev-list --bisect --all` git rev-list $mid1 | wc -l git rev-list ^$mid1 --all | wc -l The $initial commit is the very first commit you made. The first midpoint bisects things evenly as designed, but the latter does not. The reason I got interested in this was because I was wondering if something like the following would help people converting a huge repository from foreign SCM, or preparing a repository to be fetched over plain dumb HTTP only: #!/bin/sh N=4 P=.git/objects/pack bottom= while test 0 \< $N do N=$((N-1)) if test -z "$bottom" then newbottom=`git rev-list --bisect --all` else newbottom=`git rev-list --bisect ^$bottom --all` fi if test -z "$bottom" then rev_list="$newbottom" elif test 0 = $N then rev_list="^$bottom --all" else rev_list="^$bottom $newbottom" fi p=$(git rev-list --unpacked --objects $rev_list | git pack-objects $P/pack) git show-index <$P/pack-$p.idx | wc -l bottom=$newbottom done The idea is to pack older half of the history to one pack, then older half of the remaining history to another, to continue a few times, using finer granularity as we get closer to the tip. This may not matter, since for a truly huge history, running bisect number of times could be quite time consuming, and we might be better off running "git rev-list --all" once into a temporary file, and manually pick cut-off points from the resulting list of commits. After all we are talking about "approximately half" for such an usage, and older history does not matter much. Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 00:57:32 +02:00
if (bisect_list)
revs.limited = 1;
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
mark_edges_uninteresting(revs.commits, &revs, show_edge);
if (bisect_list) {
int reaches = reaches, all = all;
revs.commits = find_bisection(revs.commits, &reaches, &all,
bisect_find_all);
if (bisect_show_vars)
return show_bisect_vars(&info, reaches, all);
}
traverse_commit_list(&revs, show_commit, show_object, &info);
if (revs.count) {
if (revs.left_right && revs.cherry_mark)
printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same);
else if (revs.left_right)
printf("%d\t%d\n", revs.count_left, revs.count_right);
else if (revs.cherry_mark)
printf("%d\t%d\n", revs.count_left + revs.count_right, revs.count_same);
else
printf("%d\n", revs.count_left + revs.count_right);
}
return 0;
}