2006-02-26 01:19:46 +01:00
|
|
|
#include "cache.h"
|
|
|
|
#include "tag.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "commit.h"
|
2006-02-28 20:24:00 +01:00
|
|
|
#include "diff.h"
|
2006-02-26 01:19:46 +01:00
|
|
|
#include "refs.h"
|
|
|
|
#include "revision.h"
|
2008-05-04 12:36:54 +02:00
|
|
|
#include "graph.h"
|
2006-09-18 00:43:40 +02:00
|
|
|
#include "grep.h"
|
2007-01-11 11:47:48 +01:00
|
|
|
#include "reflog-walk.h"
|
2007-04-09 12:40:38 +02:00
|
|
|
#include "patch-ids.h"
|
2008-04-03 11:12:06 +02:00
|
|
|
#include "decorate.h"
|
2008-11-03 20:25:46 +01:00
|
|
|
#include "log-tree.h"
|
2010-03-12 18:04:26 +01:00
|
|
|
#include "string-list.h"
|
Implement line-history search (git log -L)
This is a rewrite of much of Bo's work, mainly in an effort to split
it into smaller, easier to understand routines.
The algorithm is built around the struct range_set, which encodes a
series of line ranges as intervals [a,b). This is used in two
contexts:
* A set of lines we are tracking (which will change as we dig through
history).
* To encode diffs, as pairs of ranges.
The main routine is range_set_map_across_diff(). It processes the
diff between a commit C and some parent P. It determines which diff
hunks are relevant to the ranges tracked in C, and computes the new
ranges for P.
The algorithm is then simply to process history in topological order
from newest to oldest, computing ranges and (partial) diffs. At
branch points, we need to merge the ranges we are watching. We will
find that many commits do not affect the chosen ranges, and mark them
TREESAME (in addition to those already filtered by pathspec limiting).
Another pass of history simplification then gets rid of such commits.
This is wired as an extra filtering pass in the log machinery. This
currently only reduces code duplication, but should allow for other
simplifications and options to be used.
Finally, we hook a diff printer into the output chain. Ideally we
would wire directly into the diff logic, to optionally use features
like word diff. However, that will require some major reworking of
the diff chain, so we completely replace the output with our own diff
for now.
As this was a GSoC project, and has quite some history by now, many
people have helped. In no particular order, thanks go to
Jakub Narebski <jnareb@gmail.com>
Jens Lehmann <Jens.Lehmann@web.de>
Jonathan Nieder <jrnieder@gmail.com>
Junio C Hamano <gitster@pobox.com>
Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Will Palmer <wmpalmer@gmail.com>
Apologies to everyone I forgot.
Signed-off-by: Bo Yang <struggleyb.nku@gmail.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-28 17:47:32 +01:00
|
|
|
#include "line-log.h"
|
2013-01-05 22:26:45 +01:00
|
|
|
#include "mailmap.h"
|
log: use true parents for diff even when rewriting
When using pathspec filtering in combination with diff-based log
output, parent simplification happens before the diff is computed.
The diff is therefore against the *simplified* parents.
This works okay, arguably by accident, in the normal case:
simplification reduces to one parent as long as the commit is TREESAME
to it. So the simplified parent of any given commit must have the
same tree contents on the filtered paths as its true (unfiltered)
parent.
However, --full-diff breaks this guarantee, and indeed gives pretty
spectacular results when comparing the output of
git log --graph --stat ...
git log --graph --full-diff --stat ...
(--graph internally kicks in parent simplification, much like
--parents).
To fix it, store a copy of the parent list before simplification (in a
slab) whenever --full-diff is in effect. Then use the stored parents
instead of the simplified ones in the commit display code paths. The
latter do not actually check for --full-diff to avoid duplicated code;
they just grab the original parents if save_parents() has not been
called for this revision walk.
For ordinary commits it should be obvious that this is the right thing
to do.
Merge commits are a bit subtle. Observe that with default
simplification, merge simplification is an all-or-nothing decision:
either the merge is TREESAME to one parent and disappears, or it is
different from all parents and the parent list remains intact.
Redundant parents are not pruned, so the existing code also shows them
as a merge.
So if we do show a merge commit, the parent list just consists of the
rewrite result on each parent. Running, e.g., --cc on this in
--full-diff mode is not very useful: if any commits were skipped, some
hunks will disagree with all sides of the merge (with one side,
because commits were skipped; with the others, because they didn't
have those changes in the first place). This triggers --cc showing
these hunks spuriously.
Therefore I believe that even for merge commits it is better to show
the diffs wrt. the original parents.
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Thomas Rast <trast@inf.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-31 22:13:20 +02:00
|
|
|
#include "commit-slab.h"
|
2014-01-24 14:40:28 +01:00
|
|
|
#include "dir.h"
|
2014-10-17 02:44:23 +02:00
|
|
|
#include "cache-tree.h"
|
2015-06-29 17:40:30 +02:00
|
|
|
#include "bisect.h"
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2007-11-03 19:11:10 +01:00
|
|
|
volatile show_early_output_fn_t show_early_output;
|
|
|
|
|
2015-06-29 17:40:30 +02:00
|
|
|
static const char *term_bad;
|
|
|
|
static const char *term_good;
|
|
|
|
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 03:15:26 +02:00
|
|
|
char *path_name(const struct name_path *path, const char *name)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
show_object(): push path_name() call further down
In particular, pushing the "path_name()" call _into_ the show() function
would seem to allow
- more clarity into who "owns" the name (ie now when we free the name in
the show_object callback, it's because we generated it ourselves by
calling path_name())
- not calling path_name() at all, either because we don't care about the
name in the first place, or because we are actually happy walking the
linked list of "struct name_path *" and the last component.
Now, I didn't do that latter optimization, because it would require some
more coding, but especially looking at "builtin-pack-objects.c", we really
don't even want the whole pathname, we really would be better off with the
list of path components.
Why? We use that name for two things:
- add_preferred_base_object(), which actually _wants_ to traverse the
path, and now does it by looking for '/' characters!
- for 'name_hash()', which only cares about the last 16 characters of a
name, so again, generating the full name seems to be just unnecessary
work.
Anyway, so I didn't look any closer at those things, but it did convince
me that the "show_object()" calling convention was crazy, and we're
actually better off doing _less_ in list-objects.c, and giving people
access to the internal data structures so that they can decide whether
they want to generate a path-name or not.
This patch does that, and then for people who did use the name (even if
they might do something more clever in the future), it just does the
straightforward "name = path_name(path, component); .. free(name);" thing.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-04-11 03:15:26 +02:00
|
|
|
const struct name_path *p;
|
2006-02-26 01:19:46 +01:00
|
|
|
char *n, *m;
|
|
|
|
int nlen = strlen(name);
|
|
|
|
int len = nlen + 1;
|
|
|
|
|
|
|
|
for (p = path; p; p = p->up) {
|
|
|
|
if (p->elem_len)
|
|
|
|
len += p->elem_len + 1;
|
|
|
|
}
|
|
|
|
n = xmalloc(len);
|
|
|
|
m = n + len - (nlen + 1);
|
2015-09-24 23:08:19 +02:00
|
|
|
memcpy(m, name, nlen + 1);
|
2006-02-26 01:19:46 +01:00
|
|
|
for (p = path; p; p = p->up) {
|
|
|
|
if (p->elem_len) {
|
|
|
|
m -= p->elem_len + 1;
|
|
|
|
memcpy(m, p->elem, p->elem_len);
|
|
|
|
m[p->elem_len] = '/';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2011-08-17 23:30:35 +02:00
|
|
|
static int show_path_component_truncated(FILE *out, const char *name, int len)
|
|
|
|
{
|
|
|
|
int cnt;
|
|
|
|
for (cnt = 0; cnt < len; cnt++) {
|
|
|
|
int ch = name[cnt];
|
|
|
|
if (!ch || ch == '\n')
|
|
|
|
return -1;
|
|
|
|
fputc(ch, out);
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int show_path_truncated(FILE *out, const struct name_path *path)
|
|
|
|
{
|
|
|
|
int emitted, ours;
|
|
|
|
|
|
|
|
if (!path)
|
|
|
|
return 0;
|
|
|
|
emitted = show_path_truncated(out, path->up);
|
|
|
|
if (emitted < 0)
|
|
|
|
return emitted;
|
|
|
|
if (emitted)
|
|
|
|
fputc('/', out);
|
|
|
|
ours = show_path_component_truncated(out, path->elem, path->elem_len);
|
|
|
|
if (ours < 0)
|
|
|
|
return ours;
|
|
|
|
return ours || emitted;
|
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:07 +02:00
|
|
|
void show_object_with_name(FILE *out, struct object *obj,
|
|
|
|
const struct name_path *path, const char *component)
|
2011-08-17 23:30:34 +02:00
|
|
|
{
|
2011-08-17 23:30:35 +02:00
|
|
|
struct name_path leaf;
|
|
|
|
leaf.up = (struct name_path *)path;
|
|
|
|
leaf.elem = component;
|
|
|
|
leaf.elem_len = strlen(component);
|
2011-08-17 23:30:34 +02:00
|
|
|
|
2015-11-10 03:22:28 +01:00
|
|
|
fprintf(out, "%s ", oid_to_hex(&obj->oid));
|
2011-08-17 23:30:35 +02:00
|
|
|
show_path_truncated(out, &leaf);
|
|
|
|
fputc('\n', out);
|
2011-08-17 23:30:34 +02:00
|
|
|
}
|
|
|
|
|
2006-02-26 01:19:46 +01:00
|
|
|
static void mark_blob_uninteresting(struct blob *blob)
|
|
|
|
{
|
2008-02-18 21:47:54 +01:00
|
|
|
if (!blob)
|
|
|
|
return;
|
2006-02-26 01:19:46 +01:00
|
|
|
if (blob->object.flags & UNINTERESTING)
|
|
|
|
return;
|
|
|
|
blob->object.flags |= UNINTERESTING;
|
|
|
|
}
|
|
|
|
|
2014-01-16 00:38:01 +01:00
|
|
|
static void mark_tree_contents_uninteresting(struct tree *tree)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
2006-05-29 21:20:14 +02:00
|
|
|
struct tree_desc desc;
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
struct name_entry entry;
|
2006-02-26 01:19:46 +01:00
|
|
|
struct object *obj = &tree->object;
|
|
|
|
|
2015-11-10 03:22:28 +01:00
|
|
|
if (!has_object_file(&obj->oid))
|
2006-02-26 01:19:46 +01:00
|
|
|
return;
|
|
|
|
if (parse_tree(tree) < 0)
|
2015-11-10 03:22:28 +01:00
|
|
|
die("bad tree %s", oid_to_hex(&obj->oid));
|
2006-05-29 21:20:14 +02:00
|
|
|
|
2007-03-21 18:08:25 +01:00
|
|
|
init_tree_desc(&desc, tree->buffer, tree->size);
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
while (tree_entry(&desc, &entry)) {
|
2007-11-12 00:35:23 +01:00
|
|
|
switch (object_type(entry.mode)) {
|
|
|
|
case OBJ_TREE:
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
mark_tree_uninteresting(lookup_tree(entry.sha1));
|
2007-11-12 00:35:23 +01:00
|
|
|
break;
|
|
|
|
case OBJ_BLOB:
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-30 18:45:45 +02:00
|
|
|
mark_blob_uninteresting(lookup_blob(entry.sha1));
|
2007-11-12 00:35:23 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* Subproject commit - not in this repository */
|
|
|
|
break;
|
|
|
|
}
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
2006-05-29 21:20:14 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't care about the tree any more
|
|
|
|
* after it has been marked uninteresting.
|
|
|
|
*/
|
2013-06-06 00:37:39 +02:00
|
|
|
free_tree_buffer(tree);
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
2014-01-16 00:38:01 +01:00
|
|
|
void mark_tree_uninteresting(struct tree *tree)
|
|
|
|
{
|
|
|
|
struct object *obj = &tree->object;
|
|
|
|
|
|
|
|
if (!tree)
|
|
|
|
return;
|
|
|
|
if (obj->flags & UNINTERESTING)
|
|
|
|
return;
|
|
|
|
obj->flags |= UNINTERESTING;
|
|
|
|
mark_tree_contents_uninteresting(tree);
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void mark_parents_uninteresting(struct commit *commit)
|
|
|
|
{
|
2012-01-14 13:19:53 +01:00
|
|
|
struct commit_list *parents = NULL, *l;
|
|
|
|
|
|
|
|
for (l = commit->parents; l; l = l->next)
|
|
|
|
commit_list_insert(l->item, &parents);
|
2006-02-26 01:19:46 +01:00
|
|
|
|
|
|
|
while (parents) {
|
2015-10-24 18:21:31 +02:00
|
|
|
struct commit *commit = pop_commit(&parents);
|
2012-01-14 13:19:53 +01:00
|
|
|
|
|
|
|
while (commit) {
|
|
|
|
/*
|
|
|
|
* A missing commit is ok iff its parent is marked
|
|
|
|
* uninteresting.
|
|
|
|
*
|
|
|
|
* We just mark such a thing parsed, so that when
|
|
|
|
* it is popped next time around, we won't be trying
|
|
|
|
* to parse it and get an error.
|
|
|
|
*/
|
2015-11-10 03:22:28 +01:00
|
|
|
if (!has_object_file(&commit->object.oid))
|
2012-01-14 13:19:53 +01:00
|
|
|
commit->object.parsed = 1;
|
|
|
|
|
|
|
|
if (commit->object.flags & UNINTERESTING)
|
|
|
|
break;
|
|
|
|
|
2006-03-09 05:04:36 +01:00
|
|
|
commit->object.flags |= UNINTERESTING;
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2006-03-09 05:04:36 +01:00
|
|
|
/*
|
|
|
|
* Normally we haven't parsed the parent
|
|
|
|
* yet, so we won't have a parent of a parent
|
|
|
|
* here. However, it may turn out that we've
|
|
|
|
* reached this commit some other way (where it
|
|
|
|
* wasn't uninteresting), in which case we need
|
|
|
|
* to mark its parents recursively too..
|
|
|
|
*/
|
2012-01-14 13:19:53 +01:00
|
|
|
if (!commit->parents)
|
|
|
|
break;
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2012-01-14 13:19:53 +01:00
|
|
|
for (l = commit->parents->next; l; l = l->next)
|
|
|
|
commit_list_insert(l->item, &parents);
|
|
|
|
commit = commit->parents->item;
|
|
|
|
}
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
static void add_pending_object_with_path(struct rev_info *revs,
|
2013-05-25 11:08:07 +02:00
|
|
|
struct object *obj,
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
const char *name, unsigned mode,
|
|
|
|
const char *path)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
2011-05-19 03:08:09 +02:00
|
|
|
if (!obj)
|
|
|
|
return;
|
2007-02-28 01:22:52 +01:00
|
|
|
if (revs->no_walk && (obj->flags & UNINTERESTING))
|
Make 'git show' more useful
For some reason, I ended up doing
git show HEAD~5..
as an odd way of asking for a log. I realize I should just have used "git
log", but at the same time it does make perfect conceptual sense. After
all, you _could_ have done
git show HEAD HEAD~1 HEAD~2 HEAD~3 HEAD~4
and saying "git show HEAD~5.." is pretty natural. It's not like "git show"
only ever showed a single commit (or other object) before either! So
conceptually, giving a commit range is a very sensible operation, even
though you'd traditionally have used "git log" for that.
However, doing that currently results in an error
fatal: object ranges do not make sense when not walking revisions
which admittedly _also_ makes perfect sense - from an internal git
implementation standpoint in 'revision.c'.
However, I think that asking to show a range makes sense to a user, while
saying "object ranges no not make sense when not walking revisions" only
makes sense to a git developer.
So on the whole, of the two different "makes perfect sense" behaviors, I
think I originally picked the wrong one. And quite frankly, I don't really
see anybody actually _depending_ on that error case. So why not change it?
So rather than error out, just turn that non-walking error case into a
"silently turn on walking" instead.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-13 23:41:12 +02:00
|
|
|
revs->no_walk = 0;
|
2010-01-26 22:48:28 +01:00
|
|
|
if (revs->reflog_info && obj->type == OBJ_COMMIT) {
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2013-09-02 08:34:29 +02:00
|
|
|
int len = interpret_branch_name(name, 0, &buf);
|
2010-01-26 22:48:28 +01:00
|
|
|
int st;
|
|
|
|
|
|
|
|
if (0 < len && name[len] && buf.len)
|
|
|
|
strbuf_addstr(&buf, name + len);
|
|
|
|
st = add_reflog_for_walk(revs->reflog_info,
|
|
|
|
(struct commit *)obj,
|
|
|
|
buf.buf[0] ? buf.buf: name);
|
|
|
|
strbuf_release(&buf);
|
|
|
|
if (st)
|
|
|
|
return;
|
|
|
|
}
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
add_object_array_with_path(obj, name, &revs->pending, mode, path);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_pending_object_with_mode(struct rev_info *revs,
|
|
|
|
struct object *obj,
|
|
|
|
const char *name, unsigned mode)
|
|
|
|
{
|
|
|
|
add_pending_object_with_path(revs, obj, name, mode, NULL);
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:07 +02:00
|
|
|
void add_pending_object(struct rev_info *revs,
|
|
|
|
struct object *obj, const char *name)
|
2007-06-08 11:24:58 +02:00
|
|
|
{
|
|
|
|
add_pending_object_with_mode(revs, obj, name, S_IFINVALID);
|
|
|
|
}
|
|
|
|
|
2007-12-11 19:09:04 +01:00
|
|
|
void add_head_to_pending(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
unsigned char sha1[20];
|
|
|
|
struct object *obj;
|
|
|
|
if (get_sha1("HEAD", sha1))
|
|
|
|
return;
|
|
|
|
obj = parse_object(sha1);
|
|
|
|
if (!obj)
|
|
|
|
return;
|
|
|
|
add_pending_object(revs, obj, "HEAD");
|
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:07 +02:00
|
|
|
static struct object *get_reference(struct rev_info *revs, const char *name,
|
|
|
|
const unsigned char *sha1,
|
|
|
|
unsigned int flags)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
|
|
|
struct object *object;
|
|
|
|
|
|
|
|
object = parse_object(sha1);
|
2011-05-19 03:08:09 +02:00
|
|
|
if (!object) {
|
|
|
|
if (revs->ignore_missing)
|
|
|
|
return object;
|
2006-02-26 01:19:46 +01:00
|
|
|
die("bad object %s", name);
|
2011-05-19 03:08:09 +02:00
|
|
|
}
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
object->flags |= flags;
|
|
|
|
return object;
|
|
|
|
}
|
|
|
|
|
2011-10-01 17:43:52 +02:00
|
|
|
void add_pending_sha1(struct rev_info *revs, const char *name,
|
|
|
|
const unsigned char *sha1, unsigned int flags)
|
|
|
|
{
|
|
|
|
struct object *object = get_reference(revs, name, sha1, flags);
|
|
|
|
add_pending_object(revs, object, name);
|
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:07 +02:00
|
|
|
static struct commit *handle_commit(struct rev_info *revs,
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
struct object_array_entry *entry)
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
{
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
struct object *object = entry->item;
|
|
|
|
const char *name = entry->name;
|
|
|
|
const char *path = entry->path;
|
|
|
|
unsigned int mode = entry->mode;
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
unsigned long flags = object->flags;
|
2006-02-26 01:19:46 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Tag object? Look what it points to..
|
|
|
|
*/
|
2006-07-12 05:45:31 +02:00
|
|
|
while (object->type == OBJ_TAG) {
|
2006-02-26 01:19:46 +01:00
|
|
|
struct tag *tag = (struct tag *) object;
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
if (revs->tag_objects && !(flags & UNINTERESTING))
|
2006-02-26 01:19:46 +01:00
|
|
|
add_pending_object(revs, object, tag->tag);
|
2008-02-18 21:48:01 +01:00
|
|
|
if (!tag->tagged)
|
|
|
|
die("bad tag");
|
2015-11-10 03:22:29 +01:00
|
|
|
object = parse_object(tag->tagged->oid.hash);
|
2009-01-28 08:19:30 +01:00
|
|
|
if (!object) {
|
|
|
|
if (flags & UNINTERESTING)
|
|
|
|
return NULL;
|
2015-11-10 03:22:28 +01:00
|
|
|
die("bad object %s", oid_to_hex(&tag->tagged->oid));
|
2009-01-28 08:19:30 +01:00
|
|
|
}
|
2014-01-15 21:26:13 +01:00
|
|
|
object->flags |= flags;
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
/*
|
|
|
|
* We'll handle the tagged object by looping or dropping
|
|
|
|
* through to the non-tag handlers below. Do not
|
|
|
|
* propagate data from the tag's pending entry.
|
|
|
|
*/
|
|
|
|
name = "";
|
|
|
|
path = NULL;
|
|
|
|
mode = 0;
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit object? Just return it, we'll do all the complex
|
|
|
|
* reachability crud.
|
|
|
|
*/
|
2006-07-12 05:45:31 +02:00
|
|
|
if (object->type == OBJ_COMMIT) {
|
2006-02-26 01:19:46 +01:00
|
|
|
struct commit *commit = (struct commit *)object;
|
|
|
|
if (parse_commit(commit) < 0)
|
|
|
|
die("unable to parse commit %s", name);
|
2006-02-27 17:54:36 +01:00
|
|
|
if (flags & UNINTERESTING) {
|
2006-02-26 01:19:46 +01:00
|
|
|
mark_parents_uninteresting(commit);
|
2006-02-27 17:54:36 +01:00
|
|
|
revs->limited = 1;
|
|
|
|
}
|
2008-10-27 20:51:59 +01:00
|
|
|
if (revs->show_source && !commit->util)
|
clean up name allocation in prepare_revision_walk
When we enter prepare_revision_walk, we have zero or more
entries in our "pending" array. We disconnect that array
from the rev_info, and then process each entry:
1. If the entry is a commit and the --source option is in
effect, we keep a pointer to the object name.
2. Otherwise, we re-add the item to the pending list with
a blank name.
We then throw away the old array by freeing the array
itself, but do not touch the "name" field of each entry. For
any items of type (2), we leak the memory associated with
the name. This commit fixes that by calling object_array_clear,
which handles the cleanup for us.
That breaks (1), though, because it depends on the memory
pointed to by the name to last forever. We can solve that by
making a copy of the name. This is slightly less efficient,
but it shouldn't matter in practice, as we do it only for
the tip commits of the traversal.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:35:12 +02:00
|
|
|
commit->util = xstrdup(name);
|
2006-02-26 01:19:46 +01:00
|
|
|
return commit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-04-17 20:13:30 +02:00
|
|
|
* Tree object? Either mark it uninteresting, or add it
|
2006-02-26 01:19:46 +01:00
|
|
|
* to the list of objects to look at later..
|
|
|
|
*/
|
2006-07-12 05:45:31 +02:00
|
|
|
if (object->type == OBJ_TREE) {
|
2006-02-26 01:19:46 +01:00
|
|
|
struct tree *tree = (struct tree *)object;
|
|
|
|
if (!revs->tree_objects)
|
|
|
|
return NULL;
|
|
|
|
if (flags & UNINTERESTING) {
|
2014-01-16 00:38:01 +01:00
|
|
|
mark_tree_contents_uninteresting(tree);
|
2006-02-26 01:19:46 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
add_pending_object_with_path(revs, object, name, mode, path);
|
2006-02-26 01:19:46 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Blob object? You know the drill by now..
|
|
|
|
*/
|
2006-07-12 05:45:31 +02:00
|
|
|
if (object->type == OBJ_BLOB) {
|
2006-02-26 01:19:46 +01:00
|
|
|
if (!revs->blob_objects)
|
|
|
|
return NULL;
|
2014-01-15 21:26:13 +01:00
|
|
|
if (flags & UNINTERESTING)
|
2006-02-26 01:19:46 +01:00
|
|
|
return NULL;
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
add_pending_object_with_path(revs, object, name, mode, path);
|
2006-02-26 01:19:46 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
die("%s is unknown object", name);
|
|
|
|
}
|
|
|
|
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
static int everybody_uninteresting(struct commit_list *orig,
|
|
|
|
struct commit **interesting_cache)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
|
|
|
struct commit_list *list = orig;
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
|
|
|
|
if (*interesting_cache) {
|
|
|
|
struct commit *commit = *interesting_cache;
|
|
|
|
if (!(commit->object.flags & UNINTERESTING))
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
while (list) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
list = list->next;
|
|
|
|
if (commit->object.flags & UNINTERESTING)
|
|
|
|
continue;
|
2015-06-26 21:40:19 +02:00
|
|
|
|
|
|
|
*interesting_cache = commit;
|
2006-02-28 20:24:00 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2013-05-16 17:32:39 +02:00
|
|
|
/*
|
|
|
|
* A definition of "relevant" commit that we can use to simplify limited graphs
|
|
|
|
* by eliminating side branches.
|
|
|
|
*
|
|
|
|
* A "relevant" commit is one that is !UNINTERESTING (ie we are including it
|
|
|
|
* in our list), or that is a specified BOTTOM commit. Then after computing
|
|
|
|
* a limited list, during processing we can generally ignore boundary merges
|
|
|
|
* coming from outside the graph, (ie from irrelevant parents), and treat
|
|
|
|
* those merges as if they were single-parent. TREESAME is defined to consider
|
|
|
|
* only relevant parents, if any. If we are TREESAME to our on-graph parents,
|
|
|
|
* we don't care if we were !TREESAME to non-graph parents.
|
|
|
|
*
|
|
|
|
* Treating bottom commits as relevant ensures that a limited graph's
|
|
|
|
* connection to the actual bottom commit is not viewed as a side branch, but
|
|
|
|
* treated as part of the graph. For example:
|
|
|
|
*
|
|
|
|
* ....Z...A---X---o---o---B
|
|
|
|
* . /
|
|
|
|
* W---Y
|
|
|
|
*
|
|
|
|
* When computing "A..B", the A-X connection is at least as important as
|
|
|
|
* Y-X, despite A being flagged UNINTERESTING.
|
|
|
|
*
|
|
|
|
* And when computing --ancestry-path "A..B", the A-X connection is more
|
|
|
|
* important than Y-X, despite both A and Y being flagged UNINTERESTING.
|
|
|
|
*/
|
|
|
|
static inline int relevant_commit(struct commit *commit)
|
|
|
|
{
|
|
|
|
return (commit->object.flags & (UNINTERESTING | BOTTOM)) != UNINTERESTING;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return a single relevant commit from a parent list. If we are a TREESAME
|
|
|
|
* commit, and this selects one of our parents, then we can safely simplify to
|
|
|
|
* that parent.
|
|
|
|
*/
|
|
|
|
static struct commit *one_relevant_parent(const struct rev_info *revs,
|
|
|
|
struct commit_list *orig)
|
|
|
|
{
|
|
|
|
struct commit_list *list = orig;
|
|
|
|
struct commit *relevant = NULL;
|
|
|
|
|
|
|
|
if (!orig)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For 1-parent commits, or if first-parent-only, then return that
|
|
|
|
* first parent (even if not "relevant" by the above definition).
|
|
|
|
* TREESAME will have been set purely on that parent.
|
|
|
|
*/
|
|
|
|
if (revs->first_parent_only || !orig->next)
|
|
|
|
return orig->item;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For multi-parent commits, identify a sole relevant parent, if any.
|
|
|
|
* If we have only one relevant parent, then TREESAME will be set purely
|
|
|
|
* with regard to that parent, and we can simplify accordingly.
|
|
|
|
*
|
|
|
|
* If we have more than one relevant parent, or no relevant parents
|
|
|
|
* (and multiple irrelevant ones), then we can't select a parent here
|
|
|
|
* and return NULL.
|
|
|
|
*/
|
|
|
|
while (list) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
list = list->next;
|
|
|
|
if (relevant_commit(commit)) {
|
|
|
|
if (relevant)
|
|
|
|
return NULL;
|
|
|
|
relevant = commit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return relevant;
|
|
|
|
}
|
|
|
|
|
2007-03-14 21:12:18 +01:00
|
|
|
/*
|
|
|
|
* The goal is to get REV_TREE_NEW as the result only if the
|
2009-06-03 03:34:01 +02:00
|
|
|
* diff consists of all '+' (and no other changes), REV_TREE_OLD
|
|
|
|
* if the whole diff is removal of old data, and otherwise
|
|
|
|
* REV_TREE_DIFFERENT (of course if the trees are the same we
|
|
|
|
* want REV_TREE_SAME).
|
|
|
|
* That means that once we get to REV_TREE_DIFFERENT, we do not
|
|
|
|
* have to look any further.
|
2007-03-14 21:12:18 +01:00
|
|
|
*/
|
2006-03-10 10:21:39 +01:00
|
|
|
static int tree_difference = REV_TREE_SAME;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
|
|
|
static void file_add_remove(struct diff_options *options,
|
|
|
|
int addremove, unsigned mode,
|
|
|
|
const unsigned char *sha1,
|
diff: do not use null sha1 as a sentinel value
The diff code represents paths using the diff_filespec
struct. This struct has a sha1 to represent the sha1 of the
content at that path, as well as a sha1_valid member which
indicates whether its sha1 field is actually useful. If
sha1_valid is not true, then the filespec represents a
working tree file (e.g., for the no-index case, or for when
the index is not up-to-date).
The diff_filespec is only used internally, though. At the
interfaces to the diff subsystem, callers feed the sha1
directly, and we create a diff_filespec from it. It's at
that point that we look at the sha1 and decide whether it is
valid or not; callers may pass the null sha1 as a sentinel
value to indicate that it is not.
We should not typically see the null sha1 coming from any
other source (e.g., in the index itself, or from a tree).
However, a corrupt tree might have a null sha1, which would
cause "diff --patch" to accidentally diff the working tree
version of a file instead of treating it as a blob.
This patch extends the edges of the diff interface to accept
a "sha1_valid" flag whenever we accept a sha1, and to use
that flag when creating a filespec. In some cases, this
means passing the flag through several layers, making the
code change larger than would be desirable.
One alternative would be to simply die() upon seeing
corrupted trees with null sha1s. However, this fix more
directly addresses the problem (while bogus sha1s in a tree
are probably a bad thing, it is really the sentinel
confusion sending us down the wrong code path that is what
makes it devastating). And it means that git is more capable
of examining and debugging these corrupted trees. For
example, you can still "diff --raw" such a tree to find out
when the bogus entry was introduced; you just cannot do a
"--patch" diff (just as you could not with any other
corrupted tree, as we do not have any content to diff).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-28 17:03:01 +02:00
|
|
|
int sha1_valid,
|
2010-01-18 21:26:18 +01:00
|
|
|
const char *fullpath, unsigned dirty_submodule)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
2009-06-03 03:34:01 +02:00
|
|
|
int diff = addremove == '+' ? REV_TREE_NEW : REV_TREE_OLD;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2009-06-03 03:34:01 +02:00
|
|
|
tree_difference |= diff;
|
2007-03-14 21:18:15 +01:00
|
|
|
if (tree_difference == REV_TREE_DIFFERENT)
|
2007-11-10 20:05:14 +01:00
|
|
|
DIFF_OPT_SET(options, HAS_CHANGES);
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void file_change(struct diff_options *options,
|
|
|
|
unsigned old_mode, unsigned new_mode,
|
|
|
|
const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1,
|
diff: do not use null sha1 as a sentinel value
The diff code represents paths using the diff_filespec
struct. This struct has a sha1 to represent the sha1 of the
content at that path, as well as a sha1_valid member which
indicates whether its sha1 field is actually useful. If
sha1_valid is not true, then the filespec represents a
working tree file (e.g., for the no-index case, or for when
the index is not up-to-date).
The diff_filespec is only used internally, though. At the
interfaces to the diff subsystem, callers feed the sha1
directly, and we create a diff_filespec from it. It's at
that point that we look at the sha1 and decide whether it is
valid or not; callers may pass the null sha1 as a sentinel
value to indicate that it is not.
We should not typically see the null sha1 coming from any
other source (e.g., in the index itself, or from a tree).
However, a corrupt tree might have a null sha1, which would
cause "diff --patch" to accidentally diff the working tree
version of a file instead of treating it as a blob.
This patch extends the edges of the diff interface to accept
a "sha1_valid" flag whenever we accept a sha1, and to use
that flag when creating a filespec. In some cases, this
means passing the flag through several layers, making the
code change larger than would be desirable.
One alternative would be to simply die() upon seeing
corrupted trees with null sha1s. However, this fix more
directly addresses the problem (while bogus sha1s in a tree
are probably a bad thing, it is really the sentinel
confusion sending us down the wrong code path that is what
makes it devastating). And it means that git is more capable
of examining and debugging these corrupted trees. For
example, you can still "diff --raw" such a tree to find out
when the bogus entry was introduced; you just cannot do a
"--patch" diff (just as you could not with any other
corrupted tree, as we do not have any content to diff).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-28 17:03:01 +02:00
|
|
|
int old_sha1_valid, int new_sha1_valid,
|
2010-01-18 21:26:18 +01:00
|
|
|
const char *fullpath,
|
|
|
|
unsigned old_dirty_submodule, unsigned new_dirty_submodule)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
2006-03-10 10:21:39 +01:00
|
|
|
tree_difference = REV_TREE_DIFFERENT;
|
2007-11-10 20:05:14 +01:00
|
|
|
DIFF_OPT_SET(options, HAS_CHANGES);
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:07 +02:00
|
|
|
static int rev_compare_tree(struct rev_info *revs,
|
|
|
|
struct commit *parent, struct commit *commit)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
2008-11-03 19:45:41 +01:00
|
|
|
struct tree *t1 = parent->tree;
|
|
|
|
struct tree *t2 = commit->tree;
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
if (!t1)
|
2006-03-10 10:21:39 +01:00
|
|
|
return REV_TREE_NEW;
|
2009-06-03 03:34:01 +02:00
|
|
|
if (!t2)
|
|
|
|
return REV_TREE_OLD;
|
2008-11-03 20:25:46 +01:00
|
|
|
|
|
|
|
if (revs->simplify_by_decoration) {
|
|
|
|
/*
|
|
|
|
* If we are simplifying by decoration, then the commit
|
|
|
|
* is worth showing if it has a tag pointing at it.
|
|
|
|
*/
|
2014-08-26 12:23:54 +02:00
|
|
|
if (get_name_decoration(&commit->object))
|
2008-11-03 20:25:46 +01:00
|
|
|
return REV_TREE_DIFFERENT;
|
|
|
|
/*
|
|
|
|
* A commit that is not pointed by a tag is uninteresting
|
|
|
|
* if we are not limited by path. This means that you will
|
|
|
|
* see the usual "commits that touch the paths" plus any
|
|
|
|
* tagged commit by specifying both --simplify-by-decoration
|
|
|
|
* and pathspec.
|
|
|
|
*/
|
2010-12-17 13:43:06 +01:00
|
|
|
if (!revs->prune_data.nr)
|
2008-11-03 20:25:46 +01:00
|
|
|
return REV_TREE_SAME;
|
|
|
|
}
|
2009-06-03 03:34:01 +02:00
|
|
|
|
2006-03-10 10:21:39 +01:00
|
|
|
tree_difference = REV_TREE_SAME;
|
2007-11-10 20:05:14 +01:00
|
|
|
DIFF_OPT_CLR(&revs->pruning, HAS_CHANGES);
|
2015-11-10 03:22:29 +01:00
|
|
|
if (diff_tree_sha1(t1->object.oid.hash, t2->object.oid.hash, "",
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
&revs->pruning) < 0)
|
2006-03-10 10:21:39 +01:00
|
|
|
return REV_TREE_DIFFERENT;
|
2006-02-28 20:24:00 +01:00
|
|
|
return tree_difference;
|
|
|
|
}
|
|
|
|
|
2008-11-03 19:45:41 +01:00
|
|
|
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
|
|
|
int retval;
|
2008-11-03 19:45:41 +01:00
|
|
|
struct tree *t1 = commit->tree;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
|
|
|
if (!t1)
|
|
|
|
return 0;
|
|
|
|
|
2007-03-14 21:12:18 +01:00
|
|
|
tree_difference = REV_TREE_SAME;
|
2007-11-10 20:05:14 +01:00
|
|
|
DIFF_OPT_CLR(&revs->pruning, HAS_CHANGES);
|
2015-11-10 03:22:29 +01:00
|
|
|
retval = diff_tree_sha1(NULL, t1->object.oid.hash, "", &revs->pruning);
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2007-03-14 21:12:18 +01:00
|
|
|
return retval >= 0 && (tree_difference == REV_TREE_SAME);
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
struct treesame_state {
|
|
|
|
unsigned int nparents;
|
|
|
|
unsigned char treesame[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct treesame_state *initialise_treesame(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
unsigned n = commit_list_count(commit->parents);
|
|
|
|
struct treesame_state *st = xcalloc(1, sizeof(*st) + n);
|
|
|
|
st->nparents = n;
|
|
|
|
add_decoration(&revs->treesame, &commit->object, st);
|
|
|
|
return st;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Must be called immediately after removing the nth_parent from a commit's
|
|
|
|
* parent list, if we are maintaining the per-parent treesame[] decoration.
|
|
|
|
* This does not recalculate the master TREESAME flag - update_treesame()
|
|
|
|
* should be called to update it after a sequence of treesame[] modifications
|
|
|
|
* that may have affected it.
|
|
|
|
*/
|
|
|
|
static int compact_treesame(struct rev_info *revs, struct commit *commit, unsigned nth_parent)
|
|
|
|
{
|
|
|
|
struct treesame_state *st;
|
|
|
|
int old_same;
|
|
|
|
|
|
|
|
if (!commit->parents) {
|
|
|
|
/*
|
|
|
|
* Have just removed the only parent from a non-merge.
|
|
|
|
* Different handling, as we lack decoration.
|
|
|
|
*/
|
|
|
|
if (nth_parent != 0)
|
|
|
|
die("compact_treesame %u", nth_parent);
|
|
|
|
old_same = !!(commit->object.flags & TREESAME);
|
|
|
|
if (rev_same_tree_as_empty(revs, commit))
|
|
|
|
commit->object.flags |= TREESAME;
|
|
|
|
else
|
|
|
|
commit->object.flags &= ~TREESAME;
|
|
|
|
return old_same;
|
|
|
|
}
|
|
|
|
|
|
|
|
st = lookup_decoration(&revs->treesame, &commit->object);
|
|
|
|
if (!st || nth_parent >= st->nparents)
|
|
|
|
die("compact_treesame %u", nth_parent);
|
|
|
|
|
|
|
|
old_same = st->treesame[nth_parent];
|
|
|
|
memmove(st->treesame + nth_parent,
|
|
|
|
st->treesame + nth_parent + 1,
|
|
|
|
st->nparents - nth_parent - 1);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we've just become a non-merge commit, update TREESAME
|
|
|
|
* immediately, and remove the no-longer-needed decoration.
|
|
|
|
* If still a merge, defer update until update_treesame().
|
|
|
|
*/
|
|
|
|
if (--st->nparents == 1) {
|
|
|
|
if (commit->parents->next)
|
|
|
|
die("compact_treesame parents mismatch");
|
|
|
|
if (st->treesame[0] && revs->dense)
|
|
|
|
commit->object.flags |= TREESAME;
|
|
|
|
else
|
|
|
|
commit->object.flags &= ~TREESAME;
|
|
|
|
free(add_decoration(&revs->treesame, &commit->object, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
return old_same;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned update_treesame(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
if (commit->parents && commit->parents->next) {
|
|
|
|
unsigned n;
|
|
|
|
struct treesame_state *st;
|
2013-05-16 17:32:39 +02:00
|
|
|
struct commit_list *p;
|
|
|
|
unsigned relevant_parents;
|
|
|
|
unsigned relevant_change, irrelevant_change;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
|
|
|
|
st = lookup_decoration(&revs->treesame, &commit->object);
|
|
|
|
if (!st)
|
2015-11-10 03:22:28 +01:00
|
|
|
die("update_treesame %s", oid_to_hex(&commit->object.oid));
|
2013-05-16 17:32:39 +02:00
|
|
|
relevant_parents = 0;
|
|
|
|
relevant_change = irrelevant_change = 0;
|
|
|
|
for (p = commit->parents, n = 0; p; n++, p = p->next) {
|
|
|
|
if (relevant_commit(p->item)) {
|
|
|
|
relevant_change |= !st->treesame[n];
|
|
|
|
relevant_parents++;
|
|
|
|
} else
|
|
|
|
irrelevant_change |= !st->treesame[n];
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
}
|
2013-05-16 17:32:39 +02:00
|
|
|
if (relevant_parents ? relevant_change : irrelevant_change)
|
|
|
|
commit->object.flags &= ~TREESAME;
|
|
|
|
else
|
|
|
|
commit->object.flags |= TREESAME;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return commit->object.flags & TREESAME;
|
|
|
|
}
|
|
|
|
|
2013-05-16 17:32:39 +02:00
|
|
|
static inline int limiting_can_increase_treesame(const struct rev_info *revs)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* TREESAME is irrelevant unless prune && dense;
|
|
|
|
* if simplify_history is set, we can't have a mixture of TREESAME and
|
|
|
|
* !TREESAME INTERESTING parents (and we don't have treesame[]
|
|
|
|
* decoration anyway);
|
|
|
|
* if first_parent_only is set, then the TREESAME flag is locked
|
|
|
|
* against the first parent (and again we lack treesame[] decoration).
|
|
|
|
*/
|
|
|
|
return revs->prune && revs->dense &&
|
|
|
|
!revs->simplify_history &&
|
|
|
|
!revs->first_parent_only;
|
|
|
|
}
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list **pp, *parent;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
struct treesame_state *ts = NULL;
|
2013-05-16 17:32:39 +02:00
|
|
|
int relevant_change = 0, irrelevant_change = 0;
|
|
|
|
int relevant_parents, nth_parent;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2007-11-05 22:22:34 +01:00
|
|
|
/*
|
|
|
|
* If we don't do pruning, everything is interesting
|
|
|
|
*/
|
2007-11-13 08:16:08 +01:00
|
|
|
if (!revs->prune)
|
2007-11-05 22:22:34 +01:00
|
|
|
return;
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
if (!commit->tree)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!commit->parents) {
|
2008-11-03 19:45:41 +01:00
|
|
|
if (rev_same_tree_as_empty(revs, commit))
|
2007-11-13 08:16:08 +01:00
|
|
|
commit->object.flags |= TREESAME;
|
2006-02-28 20:24:00 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-11-05 22:22:34 +01:00
|
|
|
/*
|
|
|
|
* Normal non-merge commit? If we don't want to make the
|
|
|
|
* history dense, we consider it always to be a change..
|
|
|
|
*/
|
2007-11-13 08:16:08 +01:00
|
|
|
if (!revs->dense && !commit->parents->next)
|
2007-11-05 22:22:34 +01:00
|
|
|
return;
|
|
|
|
|
2013-05-16 17:32:39 +02:00
|
|
|
for (pp = &commit->parents, nth_parent = 0, relevant_parents = 0;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
(parent = *pp) != NULL;
|
|
|
|
pp = &parent->next, nth_parent++) {
|
2006-02-28 20:24:00 +01:00
|
|
|
struct commit *p = parent->item;
|
2013-05-16 17:32:39 +02:00
|
|
|
if (relevant_commit(p))
|
|
|
|
relevant_parents++;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
if (nth_parent == 1) {
|
|
|
|
/*
|
|
|
|
* This our second loop iteration - so we now know
|
|
|
|
* we're dealing with a merge.
|
|
|
|
*
|
|
|
|
* Do not compare with later parents when we care only about
|
|
|
|
* the first parent chain, in order to avoid derailing the
|
|
|
|
* traversal to follow a side branch that brought everything
|
|
|
|
* in the path we are limited to by the pathspec.
|
|
|
|
*/
|
|
|
|
if (revs->first_parent_only)
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* If this will remain a potentially-simplifiable
|
|
|
|
* merge, remember per-parent treesame if needed.
|
|
|
|
* Initialise the array with the comparison from our
|
|
|
|
* first iteration.
|
|
|
|
*/
|
|
|
|
if (revs->treesame.name &&
|
|
|
|
!revs->simplify_history &&
|
|
|
|
!(commit->object.flags & UNINTERESTING)) {
|
|
|
|
ts = initialise_treesame(revs, commit);
|
2013-05-16 17:32:39 +02:00
|
|
|
if (!(irrelevant_change || relevant_change))
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
ts->treesame[0] = 1;
|
|
|
|
}
|
|
|
|
}
|
2007-05-04 23:54:57 +02:00
|
|
|
if (parse_commit(p) < 0)
|
|
|
|
die("cannot simplify commit %s (because of %s)",
|
2015-11-10 03:22:28 +01:00
|
|
|
oid_to_hex(&commit->object.oid),
|
|
|
|
oid_to_hex(&p->object.oid));
|
2008-11-03 19:45:41 +01:00
|
|
|
switch (rev_compare_tree(revs, p, commit)) {
|
2006-03-10 10:21:39 +01:00
|
|
|
case REV_TREE_SAME:
|
2013-05-16 17:32:41 +02:00
|
|
|
if (!revs->simplify_history || !relevant_commit(p)) {
|
2006-03-11 06:59:37 +01:00
|
|
|
/* Even if a merge with an uninteresting
|
|
|
|
* side branch brought the entire change
|
|
|
|
* we are interested in, we do not want
|
|
|
|
* to lose the other branches of this
|
|
|
|
* merge, so we just keep going.
|
|
|
|
*/
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
if (ts)
|
|
|
|
ts->treesame[nth_parent] = 1;
|
2006-03-11 06:59:37 +01:00
|
|
|
continue;
|
|
|
|
}
|
2006-02-28 20:24:00 +01:00
|
|
|
parent->next = NULL;
|
|
|
|
commit->parents = parent;
|
2007-11-13 08:16:08 +01:00
|
|
|
commit->object.flags |= TREESAME;
|
2006-02-28 20:24:00 +01:00
|
|
|
return;
|
|
|
|
|
2006-03-10 10:21:39 +01:00
|
|
|
case REV_TREE_NEW:
|
|
|
|
if (revs->remove_empty_trees &&
|
2008-11-03 19:45:41 +01:00
|
|
|
rev_same_tree_as_empty(revs, p)) {
|
2006-03-12 22:39:31 +01:00
|
|
|
/* We are adding all the specified
|
|
|
|
* paths from this parent, so the
|
|
|
|
* history beyond this parent is not
|
|
|
|
* interesting. Remove its parents
|
|
|
|
* (they are grandparents for us).
|
|
|
|
* IOW, we pretend this parent is a
|
|
|
|
* "root" commit.
|
2006-03-12 22:39:31 +01:00
|
|
|
*/
|
2007-05-04 23:54:57 +02:00
|
|
|
if (parse_commit(p) < 0)
|
|
|
|
die("cannot simplify commit %s (invalid %s)",
|
2015-11-10 03:22:28 +01:00
|
|
|
oid_to_hex(&commit->object.oid),
|
|
|
|
oid_to_hex(&p->object.oid));
|
2006-03-12 22:39:31 +01:00
|
|
|
p->parents = NULL;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
/* fallthrough */
|
2009-06-03 03:34:01 +02:00
|
|
|
case REV_TREE_OLD:
|
2006-03-10 10:21:39 +01:00
|
|
|
case REV_TREE_DIFFERENT:
|
2013-05-16 17:32:39 +02:00
|
|
|
if (relevant_commit(p))
|
|
|
|
relevant_change = 1;
|
|
|
|
else
|
|
|
|
irrelevant_change = 1;
|
2006-02-28 20:24:00 +01:00
|
|
|
continue;
|
|
|
|
}
|
2015-11-10 03:22:28 +01:00
|
|
|
die("bad tree compare for commit %s", oid_to_hex(&commit->object.oid));
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
2013-05-16 17:32:39 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* TREESAME is straightforward for single-parent commits. For merge
|
|
|
|
* commits, it is most useful to define it so that "irrelevant"
|
|
|
|
* parents cannot make us !TREESAME - if we have any relevant
|
|
|
|
* parents, then we only consider TREESAMEness with respect to them,
|
|
|
|
* allowing irrelevant merges from uninteresting branches to be
|
|
|
|
* simplified away. Only if we have only irrelevant parents do we
|
|
|
|
* base TREESAME on them. Note that this logic is replicated in
|
|
|
|
* update_treesame, which should be kept in sync.
|
|
|
|
*/
|
|
|
|
if (relevant_parents ? !relevant_change : !irrelevant_change)
|
|
|
|
commit->object.flags |= TREESAME;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
2010-11-27 02:58:14 +01:00
|
|
|
static void commit_list_insert_by_date_cached(struct commit *p, struct commit_list **head,
|
2008-07-12 20:00:57 +02:00
|
|
|
struct commit_list *cached_base, struct commit_list **cache)
|
|
|
|
{
|
|
|
|
struct commit_list *new_entry;
|
|
|
|
|
|
|
|
if (cached_base && p->date < cached_base->item->date)
|
2010-11-27 02:58:14 +01:00
|
|
|
new_entry = commit_list_insert_by_date(p, &cached_base->next);
|
2008-07-12 20:00:57 +02:00
|
|
|
else
|
2010-11-27 02:58:14 +01:00
|
|
|
new_entry = commit_list_insert_by_date(p, head);
|
2008-07-12 20:00:57 +02:00
|
|
|
|
|
|
|
if (cache && (!*cache || p->date < (*cache)->item->date))
|
|
|
|
*cache = new_entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_parents_to_list(struct rev_info *revs, struct commit *commit,
|
|
|
|
struct commit_list **list, struct commit_list **cache_ptr)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
|
|
|
struct commit_list *parent = commit->parents;
|
2006-10-23 02:32:47 +02:00
|
|
|
unsigned left_flag;
|
2008-07-12 20:00:57 +02:00
|
|
|
struct commit_list *cached_base = cache_ptr ? *cache_ptr : NULL;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
Make "--parents" logs also be incremental
The parent rewriting feature caused us to create the whole history in one
go, and then simplify it later, because of how rewrite_parents() had been
written. However, with a little tweaking, it's perfectly possible to do
even that one incrementally.
Right now, this doesn't really much matter, because every user of
"--parents" will probably generally _also_ use "--topo-order", which will
cause the old non-incremental behaviour anyway. However, I'm hopeful that
we could make even the topological sort incremental, or at least
_partially_ so (for example, make it incremental up to the first merge).
In the meantime, this at least moves things in the right direction, and
removes a strange special case.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-09 02:05:58 +02:00
|
|
|
if (commit->object.flags & ADDED)
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
Make "--parents" logs also be incremental
The parent rewriting feature caused us to create the whole history in one
go, and then simplify it later, because of how rewrite_parents() had been
written. However, with a little tweaking, it's perfectly possible to do
even that one incrementally.
Right now, this doesn't really much matter, because every user of
"--parents" will probably generally _also_ use "--topo-order", which will
cause the old non-incremental behaviour anyway. However, I'm hopeful that
we could make even the topological sort incremental, or at least
_partially_ so (for example, make it incremental up to the first merge).
In the meantime, this at least moves things in the right direction, and
removes a strange special case.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-09 02:05:58 +02:00
|
|
|
commit->object.flags |= ADDED;
|
|
|
|
|
2013-10-24 20:01:41 +02:00
|
|
|
if (revs->include_check &&
|
|
|
|
!revs->include_check(commit, revs->include_check_data))
|
|
|
|
return 0;
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
/*
|
|
|
|
* If the commit is uninteresting, don't try to
|
|
|
|
* prune parents - we want the maximal uninteresting
|
|
|
|
* set.
|
|
|
|
*
|
|
|
|
* Normally we haven't parsed the parent
|
|
|
|
* yet, so we won't have a parent of a parent
|
|
|
|
* here. However, it may turn out that we've
|
|
|
|
* reached this commit some other way (where it
|
|
|
|
* wasn't uninteresting), in which case we need
|
|
|
|
* to mark its parents recursively too..
|
|
|
|
*/
|
|
|
|
if (commit->object.flags & UNINTERESTING) {
|
|
|
|
while (parent) {
|
|
|
|
struct commit *p = parent->item;
|
|
|
|
parent = parent->next;
|
2009-01-28 08:19:30 +01:00
|
|
|
if (p)
|
|
|
|
p->object.flags |= UNINTERESTING;
|
2015-06-01 11:56:40 +02:00
|
|
|
if (parse_commit_gently(p, 1) < 0)
|
2009-01-28 08:19:30 +01:00
|
|
|
continue;
|
2006-02-28 20:24:00 +01:00
|
|
|
if (p->parents)
|
|
|
|
mark_parents_uninteresting(p);
|
|
|
|
if (p->object.flags & SEEN)
|
|
|
|
continue;
|
|
|
|
p->object.flags |= SEEN;
|
2010-11-27 02:58:14 +01:00
|
|
|
commit_list_insert_by_date_cached(p, list, cached_base, cache_ptr);
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ok, the commit wasn't uninteresting. Try to
|
|
|
|
* simplify the commit history and find the parent
|
|
|
|
* that has no differences in the path set if one exists.
|
|
|
|
*/
|
2007-11-05 22:22:34 +01:00
|
|
|
try_to_simplify_commit(revs, commit);
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2006-04-15 21:09:56 +02:00
|
|
|
if (revs->no_walk)
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-04-15 21:09:56 +02:00
|
|
|
|
2006-10-23 02:32:47 +02:00
|
|
|
left_flag = (commit->object.flags & SYMMETRIC_LEFT);
|
2007-03-13 09:57:22 +01:00
|
|
|
|
2008-04-27 19:32:46 +02:00
|
|
|
for (parent = commit->parents; parent; parent = parent->next) {
|
2006-02-28 20:24:00 +01:00
|
|
|
struct commit *p = parent->item;
|
|
|
|
|
2015-06-01 11:56:37 +02:00
|
|
|
if (parse_commit_gently(p, revs->ignore_missing_links) < 0)
|
2007-05-04 23:54:57 +02:00
|
|
|
return -1;
|
2008-10-27 20:51:59 +01:00
|
|
|
if (revs->show_source && !p->util)
|
|
|
|
p->util = commit->util;
|
2006-10-23 02:32:47 +02:00
|
|
|
p->object.flags |= left_flag;
|
2008-05-12 17:12:36 +02:00
|
|
|
if (!(p->object.flags & SEEN)) {
|
|
|
|
p->object.flags |= SEEN;
|
2010-11-27 02:58:14 +01:00
|
|
|
commit_list_insert_by_date_cached(p, list, cached_base, cache_ptr);
|
2008-05-12 17:12:36 +02:00
|
|
|
}
|
2008-08-01 07:17:13 +02:00
|
|
|
if (revs->first_parent_only)
|
2008-04-27 19:32:46 +02:00
|
|
|
break;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
2007-07-10 15:50:49 +02:00
|
|
|
static void cherry_pick_list(struct commit_list *list, struct rev_info *revs)
|
2007-04-09 12:40:38 +02:00
|
|
|
{
|
|
|
|
struct commit_list *p;
|
|
|
|
int left_count = 0, right_count = 0;
|
|
|
|
int left_first;
|
|
|
|
struct patch_ids ids;
|
2011-03-07 13:31:40 +01:00
|
|
|
unsigned cherry_flag;
|
2007-04-09 12:40:38 +02:00
|
|
|
|
|
|
|
/* First count the commits on the left and on the right */
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
unsigned flags = commit->object.flags;
|
|
|
|
if (flags & BOUNDARY)
|
|
|
|
;
|
|
|
|
else if (flags & SYMMETRIC_LEFT)
|
|
|
|
left_count++;
|
|
|
|
else
|
|
|
|
right_count++;
|
|
|
|
}
|
|
|
|
|
2010-02-20 12:42:04 +01:00
|
|
|
if (!left_count || !right_count)
|
|
|
|
return;
|
|
|
|
|
2007-04-09 12:40:38 +02:00
|
|
|
left_first = left_count < right_count;
|
|
|
|
init_patch_ids(&ids);
|
2010-12-15 16:02:38 +01:00
|
|
|
ids.diffopts.pathspec = revs->diffopt.pathspec;
|
2007-04-09 12:40:38 +02:00
|
|
|
|
|
|
|
/* Compute patch-ids for one side */
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
unsigned flags = commit->object.flags;
|
|
|
|
|
|
|
|
if (flags & BOUNDARY)
|
|
|
|
continue;
|
|
|
|
/*
|
|
|
|
* If we have fewer left, left_first is set and we omit
|
|
|
|
* commits on the right branch in this loop. If we have
|
|
|
|
* fewer right, we skip the left ones.
|
|
|
|
*/
|
|
|
|
if (left_first != !!(flags & SYMMETRIC_LEFT))
|
|
|
|
continue;
|
|
|
|
commit->util = add_commit_patch_id(commit, &ids);
|
|
|
|
}
|
|
|
|
|
2011-03-07 13:31:40 +01:00
|
|
|
/* either cherry_mark or cherry_pick are true */
|
|
|
|
cherry_flag = revs->cherry_mark ? PATCHSAME : SHOWN;
|
|
|
|
|
2007-04-09 12:40:38 +02:00
|
|
|
/* Check the other side */
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
struct patch_id *id;
|
|
|
|
unsigned flags = commit->object.flags;
|
|
|
|
|
|
|
|
if (flags & BOUNDARY)
|
|
|
|
continue;
|
|
|
|
/*
|
|
|
|
* If we have fewer left, left_first is set and we omit
|
|
|
|
* commits on the left branch in this loop.
|
|
|
|
*/
|
|
|
|
if (left_first == !!(flags & SYMMETRIC_LEFT))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Have we seen the same patch id?
|
|
|
|
*/
|
|
|
|
id = has_commit_patch_id(commit, &ids);
|
|
|
|
if (!id)
|
|
|
|
continue;
|
|
|
|
id->seen = 1;
|
2011-03-07 13:31:40 +01:00
|
|
|
commit->object.flags |= cherry_flag;
|
2007-04-09 12:40:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Now check the original side for seen ones */
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
struct patch_id *ent;
|
|
|
|
|
|
|
|
ent = commit->util;
|
|
|
|
if (!ent)
|
|
|
|
continue;
|
|
|
|
if (ent->seen)
|
2011-03-07 13:31:40 +01:00
|
|
|
commit->object.flags |= cherry_flag;
|
2007-04-09 12:40:38 +02:00
|
|
|
commit->util = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
free_patch_ids(&ids);
|
|
|
|
}
|
|
|
|
|
2008-03-18 02:56:33 +01:00
|
|
|
/* How many extra uninteresting commits we want to see.. */
|
|
|
|
#define SLOP 5
|
|
|
|
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
static int still_interesting(struct commit_list *src, unsigned long date, int slop,
|
|
|
|
struct commit **interesting_cache)
|
Add "--show-all" revision walker flag for debugging
It's really not very easy to visualize the commit walker, because - on
purpose - it obvously doesn't show the uninteresting commits!
This adds a "--show-all" flag to the revision walker, which will make
it show uninteresting commits too, and they'll have a '^' in front of
them (it also fixes a logic error for !verbose_header for boundary
commits - we should show the '-' even if left_right isn't shown).
A separate patch to gitk to teach it the new '^' was sent
to paulus. With the change in place, it actually is interesting
even for the cases that git doesn't have any problems with, ie
for the kernel you can do:
gitk -d --show-all v2.6.24..
and you see just how far down it has to parse things to see it all. The
use of "-d" is a good idea, since the date-ordered toposort is much better
at showing why it goes deep down (ie the date of some of those commits
after 2.6.24 is much older, because they were merged from trees that
weren't rebased).
So I think this is a useful feature even for non-debugging - just to
visualize what git does internally more.
When it actually breaks out due to the "everybody_uninteresting()"
case, it adds the uninteresting commits (both the one it's looking at
now, and the list of pending ones) to the list
This way, we really list *all* the commits we've looked at.
Because we now end up listing commits we may not even have been parsed
at all "show_log" and "show_commit" need to protect against commits
that don't have a commit buffer entry.
That second part is debatable just how it should work. Maybe we shouldn't
show such entries at all (with this patch those entries do get shown, they
just don't get any message shown with them). But I think this is a useful
case.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-09 23:02:07 +01:00
|
|
|
{
|
2008-03-18 02:56:33 +01:00
|
|
|
/*
|
|
|
|
* No source list at all? We're definitely done..
|
|
|
|
*/
|
|
|
|
if (!src)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Does the destination list contain entries with a date
|
|
|
|
* before the source list? Definitely _not_ done.
|
|
|
|
*/
|
2013-03-22 19:38:19 +01:00
|
|
|
if (date <= src->item->date)
|
2008-03-18 02:56:33 +01:00
|
|
|
return SLOP;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Does the source list still have interesting commits in
|
|
|
|
* it? Definitely not done..
|
|
|
|
*/
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
if (!everybody_uninteresting(src, interesting_cache))
|
2008-03-18 02:56:33 +01:00
|
|
|
return SLOP;
|
|
|
|
|
|
|
|
/* Ok, we're closing in.. */
|
|
|
|
return slop-1;
|
Add "--show-all" revision walker flag for debugging
It's really not very easy to visualize the commit walker, because - on
purpose - it obvously doesn't show the uninteresting commits!
This adds a "--show-all" flag to the revision walker, which will make
it show uninteresting commits too, and they'll have a '^' in front of
them (it also fixes a logic error for !verbose_header for boundary
commits - we should show the '-' even if left_right isn't shown).
A separate patch to gitk to teach it the new '^' was sent
to paulus. With the change in place, it actually is interesting
even for the cases that git doesn't have any problems with, ie
for the kernel you can do:
gitk -d --show-all v2.6.24..
and you see just how far down it has to parse things to see it all. The
use of "-d" is a good idea, since the date-ordered toposort is much better
at showing why it goes deep down (ie the date of some of those commits
after 2.6.24 is much older, because they were merged from trees that
weren't rebased).
So I think this is a useful feature even for non-debugging - just to
visualize what git does internally more.
When it actually breaks out due to the "everybody_uninteresting()"
case, it adds the uninteresting commits (both the one it's looking at
now, and the list of pending ones) to the list
This way, we really list *all* the commits we've looked at.
Because we now end up listing commits we may not even have been parsed
at all "show_log" and "show_commit" need to protect against commits
that don't have a commit buffer entry.
That second part is debatable just how it should work. Maybe we shouldn't
show such entries at all (with this patch those entries do get shown, they
just don't get any message shown with them). But I think this is a useful
case.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-09 23:02:07 +01:00
|
|
|
}
|
|
|
|
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
/*
|
|
|
|
* "rev-list --ancestry-path A..B" computes commits that are ancestors
|
|
|
|
* of B but not ancestors of A but further limits the result to those
|
|
|
|
* that are descendants of A. This takes the list of bottom commits and
|
|
|
|
* the result of "A..B" without --ancestry-path, and limits the latter
|
|
|
|
* further to the ones that can reach one of the commits in "bottom".
|
|
|
|
*/
|
|
|
|
static void limit_to_ancestry(struct commit_list *bottom, struct commit_list *list)
|
|
|
|
{
|
|
|
|
struct commit_list *p;
|
|
|
|
struct commit_list *rlist = NULL;
|
|
|
|
int made_progress;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reverse the list so that it will be likely that we would
|
|
|
|
* process parents before children.
|
|
|
|
*/
|
|
|
|
for (p = list; p; p = p->next)
|
|
|
|
commit_list_insert(p->item, &rlist);
|
|
|
|
|
|
|
|
for (p = bottom; p; p = p->next)
|
|
|
|
p->item->object.flags |= TMP_MARK;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mark the ones that can reach bottom commits in "list",
|
|
|
|
* in a bottom-up fashion.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
made_progress = 0;
|
|
|
|
for (p = rlist; p; p = p->next) {
|
|
|
|
struct commit *c = p->item;
|
|
|
|
struct commit_list *parents;
|
|
|
|
if (c->object.flags & (TMP_MARK | UNINTERESTING))
|
|
|
|
continue;
|
|
|
|
for (parents = c->parents;
|
|
|
|
parents;
|
|
|
|
parents = parents->next) {
|
|
|
|
if (!(parents->item->object.flags & TMP_MARK))
|
|
|
|
continue;
|
|
|
|
c->object.flags |= TMP_MARK;
|
|
|
|
made_progress = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (made_progress);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NEEDSWORK: decide if we want to remove parents that are
|
|
|
|
* not marked with TMP_MARK from commit->parents for commits
|
|
|
|
* in the resulting list. We may not want to do that, though.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The ones that are not marked with TMP_MARK are uninteresting
|
|
|
|
*/
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *c = p->item;
|
|
|
|
if (c->object.flags & TMP_MARK)
|
|
|
|
continue;
|
|
|
|
c->object.flags |= UNINTERESTING;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We are done with the TMP_MARK */
|
|
|
|
for (p = list; p; p = p->next)
|
|
|
|
p->item->object.flags &= ~TMP_MARK;
|
|
|
|
for (p = bottom; p; p = p->next)
|
|
|
|
p->item->object.flags &= ~TMP_MARK;
|
|
|
|
free_commit_list(rlist);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Before walking the history, keep the set of "negative" refs the
|
|
|
|
* caller has asked to exclude.
|
|
|
|
*
|
|
|
|
* This is used to compute "rev-list --ancestry-path A..B", as we need
|
|
|
|
* to filter the result of "A..B" further to the ones that can actually
|
|
|
|
* reach A.
|
|
|
|
*/
|
2013-05-16 17:32:38 +02:00
|
|
|
static struct commit_list *collect_bottom_commits(struct commit_list *list)
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
{
|
2013-05-16 17:32:38 +02:00
|
|
|
struct commit_list *elem, *bottom = NULL;
|
|
|
|
for (elem = list; elem; elem = elem->next)
|
|
|
|
if (elem->item->object.flags & BOTTOM)
|
|
|
|
commit_list_insert(elem->item, &bottom);
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
return bottom;
|
|
|
|
}
|
|
|
|
|
2011-02-21 17:09:11 +01:00
|
|
|
/* Assumes either left_only or right_only is set */
|
|
|
|
static void limit_left_right(struct commit_list *list, struct rev_info *revs)
|
|
|
|
{
|
|
|
|
struct commit_list *p;
|
|
|
|
|
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
struct commit *commit = p->item;
|
|
|
|
|
|
|
|
if (revs->right_only) {
|
|
|
|
if (commit->object.flags & SYMMETRIC_LEFT)
|
|
|
|
commit->object.flags |= SHOWN;
|
|
|
|
} else /* revs->left_only is set */
|
|
|
|
if (!(commit->object.flags & SYMMETRIC_LEFT))
|
|
|
|
commit->object.flags |= SHOWN;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-05-04 23:54:57 +02:00
|
|
|
static int limit_list(struct rev_info *revs)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
2008-03-18 02:56:33 +01:00
|
|
|
int slop = SLOP;
|
|
|
|
unsigned long date = ~0ul;
|
2006-02-28 20:24:00 +01:00
|
|
|
struct commit_list *list = revs->commits;
|
|
|
|
struct commit_list *newlist = NULL;
|
|
|
|
struct commit_list **p = &newlist;
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
struct commit_list *bottom = NULL;
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
struct commit *interesting_cache = NULL;
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
|
|
|
|
if (revs->ancestry_path) {
|
2013-05-16 17:32:38 +02:00
|
|
|
bottom = collect_bottom_commits(list);
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
if (!bottom)
|
2010-06-04 01:17:36 +02:00
|
|
|
die("--ancestry-path given but there are no bottom commits");
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
}
|
2006-02-28 20:24:00 +01:00
|
|
|
|
|
|
|
while (list) {
|
2015-10-24 18:21:31 +02:00
|
|
|
struct commit *commit = pop_commit(&list);
|
2006-02-28 20:24:00 +01:00
|
|
|
struct object *obj = &commit->object;
|
2007-11-03 19:11:10 +01:00
|
|
|
show_early_output_fn_t show;
|
2006-02-28 20:24:00 +01:00
|
|
|
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
if (commit == interesting_cache)
|
|
|
|
interesting_cache = NULL;
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
if (revs->max_age != -1 && (commit->date < revs->max_age))
|
|
|
|
obj->flags |= UNINTERESTING;
|
2008-07-12 20:00:57 +02:00
|
|
|
if (add_parents_to_list(revs, commit, &list, NULL) < 0)
|
2007-05-04 23:54:57 +02:00
|
|
|
return -1;
|
2006-02-28 20:24:00 +01:00
|
|
|
if (obj->flags & UNINTERESTING) {
|
|
|
|
mark_parents_uninteresting(commit);
|
2008-03-18 02:56:33 +01:00
|
|
|
if (revs->show_all)
|
|
|
|
p = &commit_list_insert(commit, p)->next;
|
limit_list: avoid quadratic behavior from still_interesting
When we are limiting a rev-list traversal due to
UNINTERESTING refs, we have to walk down the tips (both
interesting and uninteresting) to find where they intersect.
We keep a queue of commits to examine, pop commits off
the queue one by one, and potentially add their parents. The
size of the queue will naturally fluctuate based on the
"width" of the history graph; i.e., the number of
simultaneous lines of development. But for the most part it
will stay in the same ballpark as the initial number of tips
we fed, shrinking over time (as we hit common ancestors of
the tips). So roughly speaking, if we start with `N` tips,
we'll spend much of the time with a queue around `N` items.
For each UNINTERESTING commit we pop, we call
still_interesting to check whether marking its parents as
UNINTERESTING has made the whole queue uninteresting (in
which case we can quit early). Because the queue is stored
as a linked list, this is `O(N)`, where `N` is the number of
items in the queue. So processing a queue with `N` commits
marked UNINTERESTING (and one or more interesting commits)
will take `O(N^2)`.
If you feed a lot of positive tips, this isn't a problem.
They aren't UNINTERESTING, so they don't incur the
still_interesting check. It also isn't a problem if you
traverse from an interesting tip to some UNINTERESTING
bases. We order the queue by recency, so the interesting
commits stay at the front of the queue as we walk down them.
The linear check can exit early as soon as it sees one
interesting commit left in the queue.
But if you want to know whether an older commit is reachable
from a set of newer tips, we end up processing in the
opposite direction: from the UNINTERESTING ones down to the
interesting one. This may happen when we call:
git rev-list $commits --not --all
in check_everything_connected after a fetch. If we fetched
something much older than most of our refs, and if we have a
large number of refs, the traversal cost is dominated by the
quadratic behavior.
These commands simulate the connectivity check of such a
fetch, when you have `$n` distinct refs in the receiver:
# positive ref is 100,000 commits deep
git rev-list --all | head -100000 | tail -1 >input
# huge number of more recent negative refs
git rev-list --all | head -$n | sed s/^/^/ >>input
time git rev-list --stdin <input
Here are timings for various `n` on the linux.git
repository. The `n=1` case provides a baseline for just
walking the commits, which lets us see the still_interesting
overhead. The times marked with `+` subtract that baseline
to show just the extra time growth due to the large number
of refs. The `x` numbers show the slowdown of the adjusted
time versus the prior trial.
n | before | after
--------------------------------------------------------
1 | 0.991s | 0.848s
10000 | 1.120s (+0.129s) | 0.885s (+0.037s)
20000 | 1.451s (+0.460s, 3.5x) | 0.923s (+0.075s, 2.0x)
40000 | 2.731s (+1.740s, 3.8x) | 0.994s (+0.146s, 1.9x)
80000 | 8.235s (+7.244s, 4.2x) | 1.123s (+0.275s, 1.9x)
Each trial doubles `n`, so you can see the quadratic (`4x`)
behavior before this patch. Afterwards, we have a roughly
linear relationship.
The implementation is fairly straightforward. Whenever we do
the linear search, we cache the interesting commit we find,
and next time check it before doing another linear search.
If that commit is removed from the list or becomes
UNINTERESTING itself, then we fall back to the linear
search. This is very similar to the trick used by fce87ae
(Fix quadratic performance in rewrite_one., 2008-07-12).
I considered and rejected several possible alternatives:
1. Keep a count of UNINTERESTING commits in the queue.
This requires managing the count not only when removing
an item from the queue, but also when marking an item
as UNINTERESTING. That requires touching the other
functions which mark commits, and would require knowing
quickly which commits are in the queue (lookup in the
queue is linear, so we would need an auxiliary
structure or to also maintain an IN_QUEUE flag in each
commit object).
2. Keep a separate list of interesting commits. Drop items
from it when they are dropped from the queue, or if
they become UNINTERESTING. This again suffers from
extra complexity to maintain the list, not to mention
CPU and memory.
3. Use a better data structure for the queue. This is
something that could help the fix in fce87ae, because
we order the queue by recency, and it is about
inserting quickly in recency order. So a normal
priority queue would help there. But here, we cannot
disturb the order of the queue, which makes things
harder. We really do need an auxiliary index to track
the flag we care about, which is basically option (2)
above.
The "cache" trick is simple, and the numbers above show that
it works well in practice. This is because the length of
time it takes to find an interesting commit is proportional
to the length of time it will remain cached (i.e., if we
have to walk a long way to find it, it also means we have to
pop a lot of elements in the queue until we get rid of it
and have to find another interesting commit).
The worst case is still quadratic, though. We could have `N`
uninteresting commits at the front of the queue, followed by
`N` interesting commits, where commit `i` has parent `i+N`.
When we pop commit `i`, we will notice that the parent of
the next commit, `i+1+N` is still interesting and cache it.
But then handling commit `i+1`, we will mark its parent
`i+1+N` uninteresting, and immediately invalidate our cache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-04-18 00:11:04 +02:00
|
|
|
slop = still_interesting(list, date, slop, &interesting_cache);
|
2008-03-18 02:56:33 +01:00
|
|
|
if (slop)
|
Add "--show-all" revision walker flag for debugging
It's really not very easy to visualize the commit walker, because - on
purpose - it obvously doesn't show the uninteresting commits!
This adds a "--show-all" flag to the revision walker, which will make
it show uninteresting commits too, and they'll have a '^' in front of
them (it also fixes a logic error for !verbose_header for boundary
commits - we should show the '-' even if left_right isn't shown).
A separate patch to gitk to teach it the new '^' was sent
to paulus. With the change in place, it actually is interesting
even for the cases that git doesn't have any problems with, ie
for the kernel you can do:
gitk -d --show-all v2.6.24..
and you see just how far down it has to parse things to see it all. The
use of "-d" is a good idea, since the date-ordered toposort is much better
at showing why it goes deep down (ie the date of some of those commits
after 2.6.24 is much older, because they were merged from trees that
weren't rebased).
So I think this is a useful feature even for non-debugging - just to
visualize what git does internally more.
When it actually breaks out due to the "everybody_uninteresting()"
case, it adds the uninteresting commits (both the one it's looking at
now, and the list of pending ones) to the list
This way, we really list *all* the commits we've looked at.
Because we now end up listing commits we may not even have been parsed
at all "show_log" and "show_commit" need to protect against commits
that don't have a commit buffer entry.
That second part is debatable just how it should work. Maybe we shouldn't
show such entries at all (with this patch those entries do get shown, they
just don't get any message shown with them). But I think this is a useful
case.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-09 23:02:07 +01:00
|
|
|
continue;
|
2008-03-18 02:56:33 +01:00
|
|
|
/* If showing all, add the whole pending list to the end */
|
|
|
|
if (revs->show_all)
|
|
|
|
*p = list;
|
|
|
|
break;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
if (revs->min_age != -1 && (commit->date > revs->min_age))
|
|
|
|
continue;
|
2008-03-18 02:56:33 +01:00
|
|
|
date = commit->date;
|
2006-02-28 20:24:00 +01:00
|
|
|
p = &commit_list_insert(commit, p)->next;
|
2007-11-03 19:11:10 +01:00
|
|
|
|
|
|
|
show = show_early_output;
|
|
|
|
if (!show)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
show(revs, newlist);
|
|
|
|
show_early_output = NULL;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
2011-03-07 13:31:40 +01:00
|
|
|
if (revs->cherry_pick || revs->cherry_mark)
|
2007-07-10 15:50:49 +02:00
|
|
|
cherry_pick_list(newlist, revs);
|
2007-04-09 12:40:38 +02:00
|
|
|
|
2011-02-21 17:09:11 +01:00
|
|
|
if (revs->left_only || revs->right_only)
|
|
|
|
limit_left_right(newlist, revs);
|
|
|
|
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
if (bottom) {
|
|
|
|
limit_to_ancestry(bottom, newlist);
|
|
|
|
free_commit_list(bottom);
|
|
|
|
}
|
|
|
|
|
2013-05-16 17:32:39 +02:00
|
|
|
/*
|
|
|
|
* Check if any commits have become TREESAME by some of their parents
|
|
|
|
* becoming UNINTERESTING.
|
|
|
|
*/
|
|
|
|
if (limiting_can_increase_treesame(revs))
|
|
|
|
for (list = newlist; list; list = list->next) {
|
|
|
|
struct commit *c = list->item;
|
|
|
|
if (c->object.flags & (UNINTERESTING | TREESAME))
|
|
|
|
continue;
|
|
|
|
update_treesame(revs, c);
|
|
|
|
}
|
|
|
|
|
2006-02-28 20:24:00 +01:00
|
|
|
revs->commits = newlist;
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
2013-05-25 11:08:02 +02:00
|
|
|
/*
|
|
|
|
* Add an entry to refs->cmdline with the specified information.
|
|
|
|
* *name is copied.
|
|
|
|
*/
|
2011-08-26 02:35:39 +02:00
|
|
|
static void add_rev_cmdline(struct rev_info *revs,
|
|
|
|
struct object *item,
|
|
|
|
const char *name,
|
|
|
|
int whence,
|
|
|
|
unsigned flags)
|
|
|
|
{
|
|
|
|
struct rev_cmdline_info *info = &revs->cmdline;
|
|
|
|
int nr = info->nr;
|
|
|
|
|
|
|
|
ALLOC_GROW(info->rev, nr + 1, info->alloc);
|
|
|
|
info->rev[nr].item = item;
|
2013-05-25 11:08:02 +02:00
|
|
|
info->rev[nr].name = xstrdup(name);
|
2011-08-26 02:35:39 +02:00
|
|
|
info->rev[nr].whence = whence;
|
|
|
|
info->rev[nr].flags = flags;
|
|
|
|
info->nr++;
|
|
|
|
}
|
|
|
|
|
2013-05-13 17:00:47 +02:00
|
|
|
static void add_rev_cmdline_list(struct rev_info *revs,
|
|
|
|
struct commit_list *commit_list,
|
|
|
|
int whence,
|
|
|
|
unsigned flags)
|
|
|
|
{
|
|
|
|
while (commit_list) {
|
|
|
|
struct object *object = &commit_list->item->object;
|
2015-11-10 03:22:28 +01:00
|
|
|
add_rev_cmdline(revs, object, oid_to_hex(&object->oid),
|
2013-05-13 17:00:47 +02:00
|
|
|
whence, flags);
|
|
|
|
commit_list = commit_list->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-12-19 02:25:28 +01:00
|
|
|
struct all_refs_cb {
|
|
|
|
int all_flags;
|
2006-12-22 01:49:06 +01:00
|
|
|
int warned_bad_reflog;
|
2006-12-19 02:25:28 +01:00
|
|
|
struct rev_info *all_revs;
|
|
|
|
const char *name_for_errormsg;
|
|
|
|
};
|
2006-02-26 01:19:46 +01:00
|
|
|
|
2013-11-01 20:02:45 +01:00
|
|
|
int ref_excluded(struct string_list *ref_excludes, const char *path)
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
2013-11-01 20:02:45 +01:00
|
|
|
if (!ref_excludes)
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
return 0;
|
2013-11-01 20:02:45 +01:00
|
|
|
for_each_string_list_item(item, ref_excludes) {
|
2014-02-15 03:01:46 +01:00
|
|
|
if (!wildmatch(item->string, path, 0, NULL))
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-25 20:38:30 +02:00
|
|
|
static int handle_one_ref(const char *path, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
2006-12-19 02:25:28 +01:00
|
|
|
struct all_refs_cb *cb = cb_data;
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
struct object *object;
|
|
|
|
|
2013-11-01 20:02:45 +01:00
|
|
|
if (ref_excluded(cb->all_revs->ref_excludes, path))
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
return 0;
|
|
|
|
|
2015-05-25 20:38:30 +02:00
|
|
|
object = get_reference(cb->all_revs, path, oid->hash, cb->all_flags);
|
2011-08-26 02:35:39 +02:00
|
|
|
add_rev_cmdline(cb->all_revs, object, path, REV_CMD_REF, cb->all_flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
add_pending_sha1(cb->all_revs, path, oid->hash, cb->all_flags);
|
2006-02-26 01:19:46 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-01-20 10:48:25 +01:00
|
|
|
static void init_all_refs_cb(struct all_refs_cb *cb, struct rev_info *revs,
|
|
|
|
unsigned flags)
|
|
|
|
{
|
|
|
|
cb->all_revs = revs;
|
|
|
|
cb->all_flags = flags;
|
|
|
|
}
|
|
|
|
|
2013-11-01 20:02:45 +01:00
|
|
|
void clear_ref_exclusion(struct string_list **ref_excludes_p)
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
{
|
2013-11-01 20:02:45 +01:00
|
|
|
if (*ref_excludes_p) {
|
|
|
|
string_list_clear(*ref_excludes_p, 0);
|
|
|
|
free(*ref_excludes_p);
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
}
|
2013-11-01 20:02:45 +01:00
|
|
|
*ref_excludes_p = NULL;
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
}
|
|
|
|
|
2013-11-01 20:02:45 +01:00
|
|
|
void add_ref_exclusion(struct string_list **ref_excludes_p, const char *exclude)
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
{
|
2013-11-01 20:02:45 +01:00
|
|
|
if (!*ref_excludes_p) {
|
|
|
|
*ref_excludes_p = xcalloc(1, sizeof(**ref_excludes_p));
|
|
|
|
(*ref_excludes_p)->strdup_strings = 1;
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
}
|
2013-11-01 20:02:45 +01:00
|
|
|
string_list_append(*ref_excludes_p, exclude);
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
static void handle_refs(const char *submodule, struct rev_info *revs, unsigned flags,
|
|
|
|
int (*for_each)(const char *, each_ref_fn, void *))
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
2006-12-19 02:25:28 +01:00
|
|
|
struct all_refs_cb cb;
|
2010-01-20 10:48:25 +01:00
|
|
|
init_all_refs_cb(&cb, revs, flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
for_each(submodule, handle_one_ref, &cb);
|
2006-12-19 02:25:28 +01:00
|
|
|
}
|
|
|
|
|
2006-12-22 01:49:06 +01:00
|
|
|
static void handle_one_reflog_commit(unsigned char *sha1, void *cb_data)
|
2006-12-19 02:25:28 +01:00
|
|
|
{
|
|
|
|
struct all_refs_cb *cb = cb_data;
|
2006-12-22 01:49:06 +01:00
|
|
|
if (!is_null_sha1(sha1)) {
|
|
|
|
struct object *o = parse_object(sha1);
|
|
|
|
if (o) {
|
|
|
|
o->flags |= cb->all_flags;
|
2011-08-26 02:35:39 +02:00
|
|
|
/* ??? CMDLINEFLAGS ??? */
|
2006-12-22 01:49:06 +01:00
|
|
|
add_pending_object(cb->all_revs, o, "");
|
|
|
|
}
|
|
|
|
else if (!cb->warned_bad_reflog) {
|
2007-03-31 01:07:05 +02:00
|
|
|
warning("reflog of '%s' references pruned commits",
|
2006-12-22 01:49:06 +01:00
|
|
|
cb->name_for_errormsg);
|
|
|
|
cb->warned_bad_reflog = 1;
|
|
|
|
}
|
2006-12-19 02:25:28 +01:00
|
|
|
}
|
2006-12-22 01:49:06 +01:00
|
|
|
}
|
|
|
|
|
2007-01-08 01:59:54 +01:00
|
|
|
static int handle_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
|
|
|
|
const char *email, unsigned long timestamp, int tz,
|
|
|
|
const char *message, void *cb_data)
|
2006-12-22 01:49:06 +01:00
|
|
|
{
|
|
|
|
handle_one_reflog_commit(osha1, cb_data);
|
|
|
|
handle_one_reflog_commit(nsha1, cb_data);
|
2006-12-19 02:25:28 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-05-25 20:39:03 +02:00
|
|
|
static int handle_one_reflog(const char *path, const struct object_id *oid,
|
|
|
|
int flag, void *cb_data)
|
2006-12-19 02:25:28 +01:00
|
|
|
{
|
|
|
|
struct all_refs_cb *cb = cb_data;
|
2006-12-22 01:49:06 +01:00
|
|
|
cb->warned_bad_reflog = 0;
|
2006-12-19 02:25:28 +01:00
|
|
|
cb->name_for_errormsg = path;
|
|
|
|
for_each_reflog_ent(path, handle_one_reflog_ent, cb_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-10-16 00:38:31 +02:00
|
|
|
void add_reflogs_to_pending(struct rev_info *revs, unsigned flags)
|
2006-12-19 02:25:28 +01:00
|
|
|
{
|
|
|
|
struct all_refs_cb cb;
|
2015-05-25 20:38:28 +02:00
|
|
|
|
2006-12-19 02:25:28 +01:00
|
|
|
cb.all_revs = revs;
|
|
|
|
cb.all_flags = flags;
|
2015-05-25 20:39:03 +02:00
|
|
|
for_each_reflog(handle_one_reflog, &cb);
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
2014-10-17 02:44:23 +02:00
|
|
|
static void add_cache_tree(struct cache_tree *it, struct rev_info *revs,
|
|
|
|
struct strbuf *path)
|
|
|
|
{
|
|
|
|
size_t baselen = path->len;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (it->entry_count >= 0) {
|
|
|
|
struct tree *tree = lookup_tree(it->sha1);
|
|
|
|
add_pending_object_with_path(revs, &tree->object, "",
|
|
|
|
040000, path->buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < it->subtree_nr; i++) {
|
|
|
|
struct cache_tree_sub *sub = it->down[i];
|
|
|
|
strbuf_addf(path, "%s%s", baselen ? "/" : "", sub->name);
|
|
|
|
add_cache_tree(sub->cache_tree, revs, path);
|
|
|
|
strbuf_setlen(path, baselen);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void add_index_objects_to_pending(struct rev_info *revs, unsigned flags)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
read_cache();
|
|
|
|
for (i = 0; i < active_nr; i++) {
|
|
|
|
struct cache_entry *ce = active_cache[i];
|
|
|
|
struct blob *blob;
|
|
|
|
|
|
|
|
if (S_ISGITLINK(ce->ce_mode))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
blob = lookup_blob(ce->sha1);
|
|
|
|
if (!blob)
|
|
|
|
die("unable to add index blob to traversal");
|
|
|
|
add_pending_object_with_path(revs, &blob->object, "",
|
|
|
|
ce->ce_mode, ce->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (active_cache_tree) {
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
add_cache_tree(active_cache_tree, revs, &path);
|
|
|
|
strbuf_release(&path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-26 02:35:39 +02:00
|
|
|
static int add_parents_only(struct rev_info *revs, const char *arg_, int flags)
|
2006-04-30 09:54:29 +02:00
|
|
|
{
|
|
|
|
unsigned char sha1[20];
|
|
|
|
struct object *it;
|
|
|
|
struct commit *commit;
|
|
|
|
struct commit_list *parents;
|
2011-08-26 02:35:39 +02:00
|
|
|
const char *arg = arg_;
|
2006-04-30 09:54:29 +02:00
|
|
|
|
|
|
|
if (*arg == '^') {
|
2013-05-16 17:32:38 +02:00
|
|
|
flags ^= UNINTERESTING | BOTTOM;
|
2006-04-30 09:54:29 +02:00
|
|
|
arg++;
|
|
|
|
}
|
2012-07-02 21:04:52 +02:00
|
|
|
if (get_sha1_committish(arg, sha1))
|
2006-04-30 09:54:29 +02:00
|
|
|
return 0;
|
|
|
|
while (1) {
|
|
|
|
it = get_reference(revs, arg, sha1, 0);
|
2011-05-19 03:08:09 +02:00
|
|
|
if (!it && revs->ignore_missing)
|
|
|
|
return 0;
|
2006-07-12 05:45:31 +02:00
|
|
|
if (it->type != OBJ_TAG)
|
2006-04-30 09:54:29 +02:00
|
|
|
break;
|
2008-02-18 21:48:01 +01:00
|
|
|
if (!((struct tag*)it)->tagged)
|
|
|
|
return 0;
|
2015-11-10 03:22:29 +01:00
|
|
|
hashcpy(sha1, ((struct tag*)it)->tagged->oid.hash);
|
2006-04-30 09:54:29 +02:00
|
|
|
}
|
2006-07-12 05:45:31 +02:00
|
|
|
if (it->type != OBJ_COMMIT)
|
2006-04-30 09:54:29 +02:00
|
|
|
return 0;
|
|
|
|
commit = (struct commit *)it;
|
|
|
|
for (parents = commit->parents; parents; parents = parents->next) {
|
|
|
|
it = &parents->item->object;
|
|
|
|
it->flags |= flags;
|
2011-08-26 02:35:39 +02:00
|
|
|
add_rev_cmdline(revs, it, arg_, REV_CMD_PARENTS_ONLY, flags);
|
2006-04-30 09:54:29 +02:00
|
|
|
add_pending_object(revs, it, arg);
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2006-07-29 06:21:48 +02:00
|
|
|
void init_revisions(struct rev_info *revs, const char *prefix)
|
2006-03-10 10:21:39 +01:00
|
|
|
{
|
|
|
|
memset(revs, 0, sizeof(*revs));
|
2006-04-15 07:19:38 +02:00
|
|
|
|
2006-04-16 08:46:36 +02:00
|
|
|
revs->abbrev = DEFAULT_ABBREV;
|
2006-04-15 07:19:38 +02:00
|
|
|
revs->ignore_merges = 1;
|
2006-06-11 19:57:35 +02:00
|
|
|
revs->simplify_history = 1;
|
2007-11-10 20:05:14 +01:00
|
|
|
DIFF_OPT_SET(&revs->pruning, RECURSIVE);
|
2009-05-23 10:15:35 +02:00
|
|
|
DIFF_OPT_SET(&revs->pruning, QUICK);
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
revs->pruning.add_remove = file_add_remove;
|
|
|
|
revs->pruning.change = file_change;
|
toposort: rename "lifo" field
The primary invariant of sort_in_topological_order() is that a
parent commit is not emitted until all children of it are. When
traversing a forked history like this with "git log C E":
A----B----C
\
D----E
we ensure that A is emitted after all of B, C, D, and E are done, B
has to wait until C is done, and D has to wait until E is done.
In some applications, however, we would further want to control how
these child commits B, C, D and E on two parallel ancestry chains
are shown.
Most of the time, we would want to see C and B emitted together, and
then E and D, and finally A (i.e. the --topo-order output). The
"lifo" parameter of the sort_in_topological_order() function is used
to control this behaviour. We start the traversal by knowing two
commits, C and E. While keeping in mind that we also need to
inspect E later, we pick C first to inspect, and we notice and
record that B needs to be inspected. By structuring the "work to be
done" set as a LIFO stack, we ensure that B is inspected next,
before other in-flight commits we had known that we will need to
inspect, e.g. E.
When showing in --date-order, we would want to see commits ordered
by timestamps, i.e. show C, E, B and D in this order before showing
A, possibly mixing commits from two parallel histories together.
When "lifo" parameter is set to false, the function keeps the "work
to be done" set sorted in the date order to realize this semantics.
After inspecting C, we add B to the "work to be done" set, but the
next commit we inspect from the set is E which is newer than B.
The name "lifo", however, is too strongly tied to the way how the
function implements its behaviour, and does not describe what the
behaviour _means_.
Replace this field with an enum rev_sort_order, with two possible
values: REV_SORT_IN_GRAPH_ORDER and REV_SORT_BY_COMMIT_DATE, and
update the existing code. The mechanical replacement rule is:
"lifo == 0" is equivalent to "sort_order == REV_SORT_BY_COMMIT_DATE"
"lifo == 1" is equivalent to "sort_order == REV_SORT_IN_GRAPH_ORDER"
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-07 01:07:14 +02:00
|
|
|
revs->sort_order = REV_SORT_IN_GRAPH_ORDER;
|
2006-03-10 10:21:39 +01:00
|
|
|
revs->dense = 1;
|
2006-07-29 06:21:48 +02:00
|
|
|
revs->prefix = prefix;
|
2006-03-10 10:21:39 +01:00
|
|
|
revs->max_age = -1;
|
|
|
|
revs->min_age = -1;
|
2006-12-20 03:25:32 +01:00
|
|
|
revs->skip_count = -1;
|
2006-03-10 10:21:39 +01:00
|
|
|
revs->max_count = -1;
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->max_parents = -1;
|
2006-03-10 10:21:39 +01:00
|
|
|
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
revs->commit_format = CMIT_FMT_DEFAULT;
|
|
|
|
|
2012-10-10 01:40:03 +02:00
|
|
|
init_grep_defaults();
|
|
|
|
grep_init(&revs->grep_filter, prefix);
|
2008-08-25 08:15:05 +02:00
|
|
|
revs->grep_filter.status_only = 1;
|
|
|
|
revs->grep_filter.regflags = REG_NEWLINE;
|
|
|
|
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
diff_setup(&revs->diffopt);
|
2008-02-13 09:34:39 +01:00
|
|
|
if (prefix && !revs->diffopt.prefix) {
|
diff --relative: output paths as relative to the current subdirectory
This adds --relative option to the diff family. When you start
from a subdirectory:
$ git diff --relative
shows only the diff that is inside your current subdirectory,
and without $prefix part. People who usually live in
subdirectories may like it.
There are a few things I should also mention about the change:
- This works not just with diff but also works with the log
family of commands, but the history pruning is not affected.
In other words, if you go to a subdirectory, you can say:
$ git log --relative -p
but it will show the log message even for commits that do not
touch the current directory. You can limit it by giving
pathspec yourself:
$ git log --relative -p .
This originally was not a conscious design choice, but we
have a way to affect diff pathspec and pruning pathspec
independently. IOW "git log --full-diff -p ." tells it to
prune history to commits that affect the current subdirectory
but show the changes with full context. I think it makes
more sense to leave pruning independent from --relative than
the obvious alternative of always pruning with the current
subdirectory, which would break the symmetry.
- Because this works also with the log family, you could
format-patch a single change, limiting the effect to your
subdirectory, like so:
$ cd gitk-git
$ git format-patch -1 --relative 911f1eb
But because that is a special purpose usage, this option will
never become the default, with or without repository or user
preference configuration. The risk of producing a partial
patch and sending it out by mistake is too great if we did
so.
- This is inherently incompatible with --no-index, which is a
bolted-on hack that does not have much to do with git
itself. I didn't bother checking and erroring out on the
combined use of the options, but probably I should.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-12 23:26:02 +01:00
|
|
|
revs->diffopt.prefix = prefix;
|
|
|
|
revs->diffopt.prefix_length = strlen(prefix);
|
|
|
|
}
|
2011-03-29 22:57:27 +02:00
|
|
|
|
|
|
|
revs->notes_opt.use_default_notes = -1;
|
2006-03-10 10:21:39 +01:00
|
|
|
}
|
|
|
|
|
2006-07-02 01:29:37 +02:00
|
|
|
static void add_pending_commit_list(struct rev_info *revs,
|
|
|
|
struct commit_list *commit_list,
|
|
|
|
unsigned int flags)
|
|
|
|
{
|
|
|
|
while (commit_list) {
|
|
|
|
struct object *object = &commit_list->item->object;
|
|
|
|
object->flags |= flags;
|
2015-11-10 03:22:28 +01:00
|
|
|
add_pending_object(revs, object, oid_to_hex(&object->oid));
|
2006-07-02 01:29:37 +02:00
|
|
|
commit_list = commit_list->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-03 11:59:32 +02:00
|
|
|
static void prepare_show_merge(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
struct commit_list *bases;
|
|
|
|
struct commit *head, *other;
|
|
|
|
unsigned char sha1[20];
|
|
|
|
const char **prune = NULL;
|
|
|
|
int i, prune_num = 1; /* counting terminating NULL */
|
|
|
|
|
2011-09-17 13:57:45 +02:00
|
|
|
if (get_sha1("HEAD", sha1))
|
2006-07-03 11:59:32 +02:00
|
|
|
die("--merge without HEAD?");
|
2011-09-17 13:57:45 +02:00
|
|
|
head = lookup_commit_or_die(sha1, "HEAD");
|
|
|
|
if (get_sha1("MERGE_HEAD", sha1))
|
2006-07-03 11:59:32 +02:00
|
|
|
die("--merge without MERGE_HEAD?");
|
2011-09-17 13:57:45 +02:00
|
|
|
other = lookup_commit_or_die(sha1, "MERGE_HEAD");
|
2006-07-03 11:59:32 +02:00
|
|
|
add_pending_object(revs, &head->object, "HEAD");
|
|
|
|
add_pending_object(revs, &other->object, "MERGE_HEAD");
|
2014-10-30 20:20:44 +01:00
|
|
|
bases = get_merge_bases(head, other);
|
2013-05-16 17:32:38 +02:00
|
|
|
add_rev_cmdline_list(revs, bases, REV_CMD_MERGE_BASE, UNINTERESTING | BOTTOM);
|
|
|
|
add_pending_commit_list(revs, bases, UNINTERESTING | BOTTOM);
|
2008-02-27 08:18:38 +01:00
|
|
|
free_commit_list(bases);
|
|
|
|
head->object.flags |= SYMMETRIC_LEFT;
|
2006-07-03 11:59:32 +02:00
|
|
|
|
|
|
|
if (!active_nr)
|
|
|
|
read_cache();
|
|
|
|
for (i = 0; i < active_nr; i++) {
|
Convert "struct cache_entry *" to "const ..." wherever possible
I attempted to make index_state->cache[] a "const struct cache_entry **"
to find out how existing entries in index are modified and where. The
question I have is what do we do if we really need to keep track of on-disk
changes in the index. The result is
- diff-lib.c: setting CE_UPTODATE
- name-hash.c: setting CE_HASHED
- preload-index.c, read-cache.c, unpack-trees.c and
builtin/update-index: obvious
- entry.c: write_entry() may refresh the checked out entry via
fill_stat_cache_info(). This causes "non-const struct cache_entry
*" in builtin/apply.c, builtin/checkout-index.c and
builtin/checkout.c
- builtin/ls-files.c: --with-tree changes stagemask and may set
CE_UPDATE
Of these, write_entry() and its call sites are probably most
interesting because it modifies on-disk info. But this is stat info
and can be retrieved via refresh, at least for porcelain
commands. Other just uses ce_flags for local purposes.
So, keeping track of "dirty" entries is just a matter of setting a
flag in index modification functions exposed by read-cache.c. Except
unpack-trees, the rest of the code base does not do anything funny
behind read-cache's back.
The actual patch is less valueable than the summary above. But if
anyone wants to re-identify the above sites. Applying this patch, then
this:
diff --git a/cache.h b/cache.h
index 430d021..1692891 100644
--- a/cache.h
+++ b/cache.h
@@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode)
#define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
struct index_state {
- struct cache_entry **cache;
+ const struct cache_entry **cache;
unsigned int version;
unsigned int cache_nr, cache_alloc, cache_changed;
struct string_list *resolve_undo;
will help quickly identify them without bogus warnings.
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 17:29:00 +02:00
|
|
|
const struct cache_entry *ce = active_cache[i];
|
2006-07-03 11:59:32 +02:00
|
|
|
if (!ce_stage(ce))
|
|
|
|
continue;
|
2014-01-24 14:40:28 +01:00
|
|
|
if (ce_path_match(ce, &revs->prune_data, NULL)) {
|
2006-07-03 11:59:32 +02:00
|
|
|
prune_num++;
|
2014-09-16 20:56:57 +02:00
|
|
|
REALLOC_ARRAY(prune, prune_num);
|
2006-07-03 11:59:32 +02:00
|
|
|
prune[prune_num-2] = ce->name;
|
|
|
|
prune[prune_num-1] = NULL;
|
|
|
|
}
|
|
|
|
while ((i+1 < active_nr) &&
|
|
|
|
ce_same_name(ce, active_cache[i+1]))
|
|
|
|
i++;
|
|
|
|
}
|
2010-12-17 13:43:06 +01:00
|
|
|
free_pathspec(&revs->prune_data);
|
2013-10-26 04:09:20 +02:00
|
|
|
parse_pathspec(&revs->prune_data, PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL,
|
2013-11-18 23:31:29 +01:00
|
|
|
PATHSPEC_PREFER_FULL | PATHSPEC_LITERAL_PATH, "", prune);
|
2008-02-27 08:18:38 +01:00
|
|
|
revs->limited = 1;
|
2006-07-03 11:59:32 +02:00
|
|
|
}
|
|
|
|
|
2012-07-02 21:33:52 +02:00
|
|
|
int handle_revision_arg(const char *arg_, struct rev_info *revs, int flags, unsigned revarg_opt)
|
2006-09-06 06:28:36 +02:00
|
|
|
{
|
2012-07-02 21:56:44 +02:00
|
|
|
struct object_context oc;
|
2006-09-06 06:28:36 +02:00
|
|
|
char *dotdot;
|
|
|
|
struct object *object;
|
|
|
|
unsigned char sha1[20];
|
|
|
|
int local_flags;
|
2011-08-26 02:35:39 +02:00
|
|
|
const char *arg = arg_;
|
2012-07-02 21:33:52 +02:00
|
|
|
int cant_be_filename = revarg_opt & REVARG_CANNOT_BE_FILENAME;
|
2012-07-02 21:43:05 +02:00
|
|
|
unsigned get_sha1_flags = 0;
|
2006-09-06 06:28:36 +02:00
|
|
|
|
2013-05-16 17:32:38 +02:00
|
|
|
flags = flags & UNINTERESTING ? flags | BOTTOM : flags & ~BOTTOM;
|
|
|
|
|
2006-09-06 06:28:36 +02:00
|
|
|
dotdot = strstr(arg, "..");
|
|
|
|
if (dotdot) {
|
|
|
|
unsigned char from_sha1[20];
|
|
|
|
const char *next = dotdot + 2;
|
|
|
|
const char *this = arg;
|
|
|
|
int symmetric = *next == '.';
|
2013-05-16 17:32:38 +02:00
|
|
|
unsigned int flags_exclude = flags ^ (UNINTERESTING | BOTTOM);
|
2011-05-02 22:39:16 +02:00
|
|
|
static const char head_by_default[] = "HEAD";
|
2011-08-26 02:35:39 +02:00
|
|
|
unsigned int a_flags;
|
2006-09-06 06:28:36 +02:00
|
|
|
|
|
|
|
*dotdot = 0;
|
|
|
|
next += symmetric;
|
|
|
|
|
|
|
|
if (!*next)
|
2011-05-02 22:39:16 +02:00
|
|
|
next = head_by_default;
|
2006-09-06 06:28:36 +02:00
|
|
|
if (dotdot == arg)
|
2011-05-02 22:39:16 +02:00
|
|
|
this = head_by_default;
|
|
|
|
if (this == head_by_default && next == head_by_default &&
|
|
|
|
!symmetric) {
|
|
|
|
/*
|
|
|
|
* Just ".."? That is not a range but the
|
|
|
|
* pathspec for the parent directory.
|
|
|
|
*/
|
|
|
|
if (!cant_be_filename) {
|
|
|
|
*dotdot = '.';
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
2012-07-02 21:04:52 +02:00
|
|
|
if (!get_sha1_committish(this, from_sha1) &&
|
|
|
|
!get_sha1_committish(next, sha1)) {
|
2013-09-19 23:20:34 +02:00
|
|
|
struct object *a_obj, *b_obj;
|
2006-09-06 06:28:36 +02:00
|
|
|
|
|
|
|
if (!cant_be_filename) {
|
|
|
|
*dotdot = '.';
|
|
|
|
verify_non_filename(revs->prefix, arg);
|
|
|
|
}
|
|
|
|
|
2013-09-19 23:20:34 +02:00
|
|
|
a_obj = parse_object(from_sha1);
|
|
|
|
b_obj = parse_object(sha1);
|
|
|
|
if (!a_obj || !b_obj) {
|
|
|
|
missing:
|
|
|
|
if (revs->ignore_missing)
|
|
|
|
return 0;
|
|
|
|
die(symmetric
|
|
|
|
? "Invalid symmetric difference expression %s"
|
|
|
|
: "Invalid revision range %s", arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!symmetric) {
|
|
|
|
/* just A..B */
|
|
|
|
a_flags = flags_exclude;
|
|
|
|
} else {
|
|
|
|
/* A...B -- find merge bases between the two */
|
|
|
|
struct commit *a, *b;
|
|
|
|
struct commit_list *exclude;
|
|
|
|
|
|
|
|
a = (a_obj->type == OBJ_COMMIT
|
|
|
|
? (struct commit *)a_obj
|
2015-11-10 03:22:29 +01:00
|
|
|
: lookup_commit_reference(a_obj->oid.hash));
|
2013-09-19 23:20:34 +02:00
|
|
|
b = (b_obj->type == OBJ_COMMIT
|
|
|
|
? (struct commit *)b_obj
|
2015-11-10 03:22:29 +01:00
|
|
|
: lookup_commit_reference(b_obj->oid.hash));
|
2013-09-19 23:20:34 +02:00
|
|
|
if (!a || !b)
|
|
|
|
goto missing;
|
2014-10-30 20:20:44 +01:00
|
|
|
exclude = get_merge_bases(a, b);
|
2013-05-13 17:00:47 +02:00
|
|
|
add_rev_cmdline_list(revs, exclude,
|
|
|
|
REV_CMD_MERGE_BASE,
|
|
|
|
flags_exclude);
|
2006-09-06 06:28:36 +02:00
|
|
|
add_pending_commit_list(revs, exclude,
|
|
|
|
flags_exclude);
|
|
|
|
free_commit_list(exclude);
|
2013-09-19 23:20:34 +02:00
|
|
|
|
2011-08-26 02:35:39 +02:00
|
|
|
a_flags = flags | SYMMETRIC_LEFT;
|
2013-09-19 23:20:34 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
a_obj->flags |= a_flags;
|
|
|
|
b_obj->flags |= flags;
|
|
|
|
add_rev_cmdline(revs, a_obj, this,
|
2011-08-26 02:35:39 +02:00
|
|
|
REV_CMD_LEFT, a_flags);
|
2013-09-19 23:20:34 +02:00
|
|
|
add_rev_cmdline(revs, b_obj, next,
|
2011-08-26 02:35:39 +02:00
|
|
|
REV_CMD_RIGHT, flags);
|
2013-09-19 23:20:34 +02:00
|
|
|
add_pending_object(revs, a_obj, this);
|
|
|
|
add_pending_object(revs, b_obj, next);
|
2006-09-06 06:28:36 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
*dotdot = '.';
|
|
|
|
}
|
|
|
|
dotdot = strstr(arg, "^@");
|
|
|
|
if (dotdot && !dotdot[2]) {
|
|
|
|
*dotdot = 0;
|
|
|
|
if (add_parents_only(revs, arg, flags))
|
|
|
|
return 0;
|
|
|
|
*dotdot = '^';
|
|
|
|
}
|
2006-10-31 23:22:34 +01:00
|
|
|
dotdot = strstr(arg, "^!");
|
|
|
|
if (dotdot && !dotdot[2]) {
|
|
|
|
*dotdot = 0;
|
2013-05-16 17:32:38 +02:00
|
|
|
if (!add_parents_only(revs, arg, flags ^ (UNINTERESTING | BOTTOM)))
|
2006-10-31 23:22:34 +01:00
|
|
|
*dotdot = '^';
|
|
|
|
}
|
|
|
|
|
2006-09-06 06:28:36 +02:00
|
|
|
local_flags = 0;
|
|
|
|
if (*arg == '^') {
|
2013-05-16 17:32:38 +02:00
|
|
|
local_flags = UNINTERESTING | BOTTOM;
|
2006-09-06 06:28:36 +02:00
|
|
|
arg++;
|
|
|
|
}
|
2012-07-02 21:43:05 +02:00
|
|
|
|
|
|
|
if (revarg_opt & REVARG_COMMITTISH)
|
|
|
|
get_sha1_flags = GET_SHA1_COMMITTISH;
|
|
|
|
|
|
|
|
if (get_sha1_with_context(arg, get_sha1_flags, sha1, &oc))
|
2011-05-19 03:08:09 +02:00
|
|
|
return revs->ignore_missing ? 0 : -1;
|
2006-09-06 06:28:36 +02:00
|
|
|
if (!cant_be_filename)
|
|
|
|
verify_non_filename(revs->prefix, arg);
|
|
|
|
object = get_reference(revs, arg, sha1, flags ^ local_flags);
|
2011-08-26 02:35:39 +02:00
|
|
|
add_rev_cmdline(revs, object, arg_, REV_CMD_REV, flags ^ local_flags);
|
2012-07-02 21:56:44 +02:00
|
|
|
add_pending_object_with_mode(revs, object, arg, oc.mode);
|
2006-09-06 06:28:36 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
struct cmdline_pathspec {
|
|
|
|
int alloc;
|
|
|
|
int nr;
|
|
|
|
const char **path;
|
|
|
|
};
|
2008-07-05 23:26:39 +02:00
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
static void append_prune_data(struct cmdline_pathspec *prune, const char **av)
|
|
|
|
{
|
|
|
|
while (*av) {
|
2013-10-31 10:25:43 +01:00
|
|
|
ALLOC_GROW(prune->path, prune->nr + 1, prune->alloc);
|
2011-05-11 23:01:19 +02:00
|
|
|
prune->path[prune->nr++] = *(av++);
|
|
|
|
}
|
|
|
|
}
|
2009-11-20 11:50:21 +01:00
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
static void read_pathspec_from_stdin(struct rev_info *revs, struct strbuf *sb,
|
|
|
|
struct cmdline_pathspec *prune)
|
|
|
|
{
|
2009-11-20 11:50:21 +01:00
|
|
|
while (strbuf_getwholeline(sb, stdin, '\n') != EOF) {
|
|
|
|
int len = sb->len;
|
|
|
|
if (len && sb->buf[len - 1] == '\n')
|
|
|
|
sb->buf[--len] = '\0';
|
2013-10-31 10:25:43 +01:00
|
|
|
ALLOC_GROW(prune->path, prune->nr + 1, prune->alloc);
|
2011-05-11 23:01:19 +02:00
|
|
|
prune->path[prune->nr++] = xstrdup(sb->buf);
|
2009-11-20 11:50:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
static void read_revisions_from_stdin(struct rev_info *revs,
|
|
|
|
struct cmdline_pathspec *prune)
|
2008-07-05 23:26:39 +02:00
|
|
|
{
|
2009-11-20 11:00:40 +01:00
|
|
|
struct strbuf sb;
|
2009-11-20 11:50:21 +01:00
|
|
|
int seen_dashdash = 0;
|
2014-03-12 21:06:17 +01:00
|
|
|
int save_warning;
|
|
|
|
|
|
|
|
save_warning = warn_on_object_refname_ambiguity;
|
|
|
|
warn_on_object_refname_ambiguity = 0;
|
2008-07-05 23:26:39 +02:00
|
|
|
|
2009-11-20 11:00:40 +01:00
|
|
|
strbuf_init(&sb, 1000);
|
|
|
|
while (strbuf_getwholeline(&sb, stdin, '\n') != EOF) {
|
|
|
|
int len = sb.len;
|
|
|
|
if (len && sb.buf[len - 1] == '\n')
|
|
|
|
sb.buf[--len] = '\0';
|
2008-07-05 23:26:39 +02:00
|
|
|
if (!len)
|
|
|
|
break;
|
2009-11-20 11:50:21 +01:00
|
|
|
if (sb.buf[0] == '-') {
|
|
|
|
if (len == 2 && sb.buf[1] == '-') {
|
|
|
|
seen_dashdash = 1;
|
|
|
|
break;
|
|
|
|
}
|
2008-07-05 23:26:39 +02:00
|
|
|
die("options not supported in --stdin mode");
|
2009-11-20 11:50:21 +01:00
|
|
|
}
|
object_array_entry: fix memory handling of the name field
Previously, the memory management of the object_array_entry::name
field was inconsistent and undocumented. object_array_entries are
ultimately created by a single function, add_object_array_with_mode(),
which has an argument "const char *name". This function used to
simply set the name field to reference the string pointed to by the
name parameter, and nobody on the object_array side ever freed the
memory. Thus, it assumed that the memory for the name field would be
managed by the caller, and that the lifetime of that string would be
at least as long as the lifetime of the object_array_entry. But
callers were inconsistent:
* Some passed pointers to constant strings or argv entries, which was
OK.
* Some passed pointers to newly-allocated memory, but didn't arrange
for the memory ever to be freed.
* Some passed the return value of sha1_to_hex(), which is a pointer to
a statically-allocated buffer that can be overwritten at any time.
* Some passed pointers to refnames that they received from a
for_each_ref()-type iteration, but the lifetimes of such refnames is
not guaranteed by the refs API.
Bring consistency to this mess by changing object_array to make its
own copy for the object_array_entry::name field and free this memory
when an object_array_entry is deleted from the array.
Many callers were passing the empty string as the name parameter, so
as a performance optimization, treat the empty string specially.
Instead of making a copy, store a pointer to a statically-allocated
empty string to object_array_entry::name. When deleting such an
entry, skip the free().
Change the callers that were already passing copies to
add_object_array_with_mode() to either skip the copy, or (if the
memory needed to be allocated anyway) freeing the memory itself.
A part of this commit effectively reverts
70d26c6e76 read_revisions_from_stdin: make copies for handle_revision_arg
because the copying introduced by that commit (which is still
necessary) is now done at a deeper level.
Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-25 11:08:14 +02:00
|
|
|
if (handle_revision_arg(sb.buf, revs, 0,
|
2013-04-16 11:57:45 +02:00
|
|
|
REVARG_CANNOT_BE_FILENAME))
|
2009-11-20 11:00:40 +01:00
|
|
|
die("bad revision '%s'", sb.buf);
|
2008-07-05 23:26:39 +02:00
|
|
|
}
|
2009-11-20 11:50:21 +01:00
|
|
|
if (seen_dashdash)
|
|
|
|
read_pathspec_from_stdin(revs, &sb, prune);
|
2014-03-12 21:06:17 +01:00
|
|
|
|
2009-11-20 11:00:40 +01:00
|
|
|
strbuf_release(&sb);
|
2014-03-12 21:06:17 +01:00
|
|
|
warn_on_object_refname_ambiguity = save_warning;
|
2008-07-05 23:26:39 +02:00
|
|
|
}
|
|
|
|
|
2006-09-20 22:21:56 +02:00
|
|
|
static void add_grep(struct rev_info *revs, const char *ptn, enum grep_pat_token what)
|
2006-09-18 02:23:20 +02:00
|
|
|
{
|
2008-08-25 08:15:05 +02:00
|
|
|
append_grep_pattern(&revs->grep_filter, ptn, "command line", 0, what);
|
2006-09-20 22:21:56 +02:00
|
|
|
}
|
|
|
|
|
log --author/--committer: really match only with name part
When we tried to find commits done by AUTHOR, the first implementation
tried to pattern match a line with "^author .*AUTHOR", which later was
enhanced to strip leading caret and look for "^author AUTHOR" when the
search pattern was anchored at the left end (i.e. --author="^AUTHOR").
This had a few problems:
* When looking for fixed strings (e.g. "git log -F --author=x --grep=y"),
the regexp internally used "^author .*x" would never match anything;
* To match at the end (e.g. "git log --author='google.com>$'"), the
generated regexp has to also match the trailing timestamp part the
commit header lines have. Also, in order to determine if the '$' at
the end means "match at the end of the line" or just a literal dollar
sign (probably backslash-quoted), we would need to parse the regexp
ourselves.
An earlier alternative tried to make sure that a line matches "^author "
(to limit by field name) and the user supplied pattern at the same time.
While it solved the -F problem by introducing a special override for
matching the "^author ", it did not solve the trailing timestamp nor tail
match problem. It also would have matched every commit if --author=author
was asked for, not because the author's email part had this string, but
because every commit header line that talks about the author begins with
that field name, regardleses of who wrote it.
Instead of piling more hacks on top of hacks, this rethinks the grep
machinery that is used to look for strings in the commit header, and makes
sure that (1) field name matches literally at the beginning of the line,
followed by a SP, and (2) the user supplied pattern is matched against the
remainder of the line, excluding the trailing timestamp data.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
|
|
|
static void add_header_grep(struct rev_info *revs, enum grep_header_field field, const char *pattern)
|
2006-09-20 22:21:56 +02:00
|
|
|
{
|
log --author/--committer: really match only with name part
When we tried to find commits done by AUTHOR, the first implementation
tried to pattern match a line with "^author .*AUTHOR", which later was
enhanced to strip leading caret and look for "^author AUTHOR" when the
search pattern was anchored at the left end (i.e. --author="^AUTHOR").
This had a few problems:
* When looking for fixed strings (e.g. "git log -F --author=x --grep=y"),
the regexp internally used "^author .*x" would never match anything;
* To match at the end (e.g. "git log --author='google.com>$'"), the
generated regexp has to also match the trailing timestamp part the
commit header lines have. Also, in order to determine if the '$' at
the end means "match at the end of the line" or just a literal dollar
sign (probably backslash-quoted), we would need to parse the regexp
ourselves.
An earlier alternative tried to make sure that a line matches "^author "
(to limit by field name) and the user supplied pattern at the same time.
While it solved the -F problem by introducing a special override for
matching the "^author ", it did not solve the trailing timestamp nor tail
match problem. It also would have matched every commit if --author=author
was asked for, not because the author's email part had this string, but
because every commit header line that talks about the author begins with
that field name, regardleses of who wrote it.
Instead of piling more hacks on top of hacks, this rethinks the grep
machinery that is used to look for strings in the commit header, and makes
sure that (1) field name matches literally at the beginning of the line,
followed by a SP, and (2) the user supplied pattern is matched against the
remainder of the line, excluding the trailing timestamp data.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-05 07:15:02 +02:00
|
|
|
append_header_grep_pattern(&revs->grep_filter, field, pattern);
|
2006-09-18 02:23:20 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void add_message_grep(struct rev_info *revs, const char *pattern)
|
|
|
|
{
|
2006-09-20 22:21:56 +02:00
|
|
|
add_grep(revs, pattern, GREP_PATTERN_BODY);
|
2006-09-18 02:23:20 +02:00
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:34 +02:00
|
|
|
static int handle_revision_opt(struct rev_info *revs, int argc, const char **argv,
|
|
|
|
int *unkc, const char **unkv)
|
2008-07-08 15:19:33 +02:00
|
|
|
{
|
|
|
|
const char *arg = argv[0];
|
2010-08-05 10:22:55 +02:00
|
|
|
const char *optarg;
|
|
|
|
int argcount;
|
2008-07-08 15:19:33 +02:00
|
|
|
|
|
|
|
/* pseudo revision arguments */
|
|
|
|
if (!strcmp(arg, "--all") || !strcmp(arg, "--branches") ||
|
|
|
|
!strcmp(arg, "--tags") || !strcmp(arg, "--remotes") ||
|
|
|
|
!strcmp(arg, "--reflog") || !strcmp(arg, "--not") ||
|
2009-10-27 19:28:07 +01:00
|
|
|
!strcmp(arg, "--no-walk") || !strcmp(arg, "--do-walk") ||
|
2013-11-30 21:55:40 +01:00
|
|
|
!strcmp(arg, "--bisect") || starts_with(arg, "--glob=") ||
|
2014-10-17 02:44:23 +02:00
|
|
|
!strcmp(arg, "--indexed-objects") ||
|
2014-06-09 20:30:12 +02:00
|
|
|
starts_with(arg, "--exclude=") ||
|
2013-11-30 21:55:40 +01:00
|
|
|
starts_with(arg, "--branches=") || starts_with(arg, "--tags=") ||
|
|
|
|
starts_with(arg, "--remotes=") || starts_with(arg, "--no-walk="))
|
2008-07-08 15:19:33 +02:00
|
|
|
{
|
|
|
|
unkv[(*unkc)++] = arg;
|
Allow "non-option" revision options in parse_option-enabled commands
Commands which use parse_options() but also call setup_revisions()
must do their parsing in a two step process:
1. first, they parse all options. Anything unknown goes to
parse_revision_opt() (which calls handle_revision_opt), which
may claim the option or say "I don't recognize this"
2. the non-option remainder goes to setup_revisions() to
actually get turned into revisions
Some revision options are "non-options" in that they must be
parsed in order with their revision counterparts in
setup_revisions(). For example, "--all" functions as a
pseudo-option expanding to all refs, and "--no-walk" affects refs
after it on the command line, but not before. The revision option
parser in step 1 recognizes such options and sets them aside for
later parsing by setup_revisions().
However, the return value used from handle_revision_opt indicated
"I didn't recognize this", which was wrong. It did, and it took
appropriate action (even though that action was just deferring it
for later parsing). Thus it should return "yes, I recognized
this."
Previously, these pseudo-options generated an error when used with
parse_options parsers (currently just blame and shortlog). With
this patch, they should work fine, enabling things like "git
shortlog --all".
Signed-off-by: Jeff King <peff@peff.net>
Acked-By: Pierre Habouzit <madcoder@debian.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 12:22:23 +02:00
|
|
|
return 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
}
|
|
|
|
|
2010-08-05 10:22:55 +02:00
|
|
|
if ((argcount = parse_long_opt("max-count", argv, &optarg))) {
|
|
|
|
revs->max_count = atoi(optarg);
|
2010-06-01 10:35:49 +02:00
|
|
|
revs->no_walk = 0;
|
2010-08-05 10:22:55 +02:00
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("skip", argv, &optarg))) {
|
|
|
|
revs->skip_count = atoi(optarg);
|
|
|
|
return argcount;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if ((*arg == '-') && isdigit(arg[1])) {
|
2014-06-07 00:33:25 +02:00
|
|
|
/* accept -<digit>, like traditional "head" */
|
|
|
|
if (strtol_i(arg + 1, 10, &revs->max_count) < 0 ||
|
|
|
|
revs->max_count < 0)
|
|
|
|
die("'%s': not a non-negative integer", arg + 1);
|
2010-06-01 10:35:49 +02:00
|
|
|
revs->no_walk = 0;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "-n")) {
|
|
|
|
if (argc <= 1)
|
|
|
|
return error("-n requires an argument");
|
|
|
|
revs->max_count = atoi(argv[1]);
|
2010-06-01 10:35:49 +02:00
|
|
|
revs->no_walk = 0;
|
2008-07-08 15:19:33 +02:00
|
|
|
return 2;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "-n")) {
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->max_count = atoi(arg + 2);
|
2010-06-01 10:35:49 +02:00
|
|
|
revs->no_walk = 0;
|
2010-08-05 10:22:55 +02:00
|
|
|
} else if ((argcount = parse_long_opt("max-age", argv, &optarg))) {
|
|
|
|
revs->max_age = atoi(optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("since", argv, &optarg))) {
|
|
|
|
revs->max_age = approxidate(optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("after", argv, &optarg))) {
|
|
|
|
revs->max_age = approxidate(optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("min-age", argv, &optarg))) {
|
|
|
|
revs->min_age = atoi(optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("before", argv, &optarg))) {
|
|
|
|
revs->min_age = approxidate(optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("until", argv, &optarg))) {
|
|
|
|
revs->min_age = approxidate(optarg);
|
|
|
|
return argcount;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--first-parent")) {
|
|
|
|
revs->first_parent_only = 1;
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
} else if (!strcmp(arg, "--ancestry-path")) {
|
|
|
|
revs->ancestry_path = 1;
|
2010-06-04 01:17:37 +02:00
|
|
|
revs->simplify_history = 0;
|
revision: --ancestry-path
"rev-list A..H" computes the set of commits that are ancestors of H, but
excludes the ones that are ancestors of A. This is useful to see what
happened to the history leading to H since A, in the sense that "what does
H have that did not exist in A" (e.g. when you have a choice to update to
H from A).
x---x---A---B---C <-- topic
/ \
x---x---x---o---o---o---o---M---D---E---F---G <-- dev
/ \
x---o---o---o---o---o---o---o---o---o---o---o---N---H <-- master
The result in the above example would be the commits marked with caps
letters (except for A itself, of course), and the ones marked with 'o'.
When you want to find out what commits in H are contaminated with the bug
introduced by A and need fixing, however, you might want to view only the
subset of "A..B" that are actually descendants of A, i.e. excluding the
ones marked with 'o'. Introduce a new option --ancestry-path to compute
this set with "rev-list --ancestry-path A..B".
Note that in practice, you would build a fix immediately on top of A and
"git branch --contains A" will give the names of branches that you would
need to merge the fix into (i.e. topic, dev and master), so this may not
be worth paying the extra cost of postprocessing.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-04-20 22:48:39 +02:00
|
|
|
revs->limited = 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "-g") || !strcmp(arg, "--walk-reflogs")) {
|
|
|
|
init_reflog_walk(&revs->reflog_info);
|
|
|
|
} else if (!strcmp(arg, "--default")) {
|
|
|
|
if (argc <= 1)
|
|
|
|
return error("bad --default argument");
|
|
|
|
revs->def = argv[1];
|
|
|
|
return 2;
|
|
|
|
} else if (!strcmp(arg, "--merge")) {
|
|
|
|
revs->show_merge = 1;
|
|
|
|
} else if (!strcmp(arg, "--topo-order")) {
|
toposort: rename "lifo" field
The primary invariant of sort_in_topological_order() is that a
parent commit is not emitted until all children of it are. When
traversing a forked history like this with "git log C E":
A----B----C
\
D----E
we ensure that A is emitted after all of B, C, D, and E are done, B
has to wait until C is done, and D has to wait until E is done.
In some applications, however, we would further want to control how
these child commits B, C, D and E on two parallel ancestry chains
are shown.
Most of the time, we would want to see C and B emitted together, and
then E and D, and finally A (i.e. the --topo-order output). The
"lifo" parameter of the sort_in_topological_order() function is used
to control this behaviour. We start the traversal by knowing two
commits, C and E. While keeping in mind that we also need to
inspect E later, we pick C first to inspect, and we notice and
record that B needs to be inspected. By structuring the "work to be
done" set as a LIFO stack, we ensure that B is inspected next,
before other in-flight commits we had known that we will need to
inspect, e.g. E.
When showing in --date-order, we would want to see commits ordered
by timestamps, i.e. show C, E, B and D in this order before showing
A, possibly mixing commits from two parallel histories together.
When "lifo" parameter is set to false, the function keeps the "work
to be done" set sorted in the date order to realize this semantics.
After inspecting C, we add B to the "work to be done" set, but the
next commit we inspect from the set is E which is newer than B.
The name "lifo", however, is too strongly tied to the way how the
function implements its behaviour, and does not describe what the
behaviour _means_.
Replace this field with an enum rev_sort_order, with two possible
values: REV_SORT_IN_GRAPH_ORDER and REV_SORT_BY_COMMIT_DATE, and
update the existing code. The mechanical replacement rule is:
"lifo == 0" is equivalent to "sort_order == REV_SORT_BY_COMMIT_DATE"
"lifo == 1" is equivalent to "sort_order == REV_SORT_IN_GRAPH_ORDER"
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-07 01:07:14 +02:00
|
|
|
revs->sort_order = REV_SORT_IN_GRAPH_ORDER;
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->topo_order = 1;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
} else if (!strcmp(arg, "--simplify-merges")) {
|
|
|
|
revs->simplify_merges = 1;
|
2012-06-08 23:47:08 +02:00
|
|
|
revs->topo_order = 1;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
revs->rewrite_parents = 1;
|
|
|
|
revs->simplify_history = 0;
|
|
|
|
revs->limited = 1;
|
2008-11-03 20:25:46 +01:00
|
|
|
} else if (!strcmp(arg, "--simplify-by-decoration")) {
|
|
|
|
revs->simplify_merges = 1;
|
2012-06-08 23:47:08 +02:00
|
|
|
revs->topo_order = 1;
|
2008-11-03 20:25:46 +01:00
|
|
|
revs->rewrite_parents = 1;
|
|
|
|
revs->simplify_history = 0;
|
|
|
|
revs->simplify_by_decoration = 1;
|
|
|
|
revs->limited = 1;
|
|
|
|
revs->prune = 1;
|
2009-08-15 16:23:12 +02:00
|
|
|
load_ref_decorations(DECORATE_SHORT_REFS);
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--date-order")) {
|
toposort: rename "lifo" field
The primary invariant of sort_in_topological_order() is that a
parent commit is not emitted until all children of it are. When
traversing a forked history like this with "git log C E":
A----B----C
\
D----E
we ensure that A is emitted after all of B, C, D, and E are done, B
has to wait until C is done, and D has to wait until E is done.
In some applications, however, we would further want to control how
these child commits B, C, D and E on two parallel ancestry chains
are shown.
Most of the time, we would want to see C and B emitted together, and
then E and D, and finally A (i.e. the --topo-order output). The
"lifo" parameter of the sort_in_topological_order() function is used
to control this behaviour. We start the traversal by knowing two
commits, C and E. While keeping in mind that we also need to
inspect E later, we pick C first to inspect, and we notice and
record that B needs to be inspected. By structuring the "work to be
done" set as a LIFO stack, we ensure that B is inspected next,
before other in-flight commits we had known that we will need to
inspect, e.g. E.
When showing in --date-order, we would want to see commits ordered
by timestamps, i.e. show C, E, B and D in this order before showing
A, possibly mixing commits from two parallel histories together.
When "lifo" parameter is set to false, the function keeps the "work
to be done" set sorted in the date order to realize this semantics.
After inspecting C, we add B to the "work to be done" set, but the
next commit we inspect from the set is E which is newer than B.
The name "lifo", however, is too strongly tied to the way how the
function implements its behaviour, and does not describe what the
behaviour _means_.
Replace this field with an enum rev_sort_order, with two possible
values: REV_SORT_IN_GRAPH_ORDER and REV_SORT_BY_COMMIT_DATE, and
update the existing code. The mechanical replacement rule is:
"lifo == 0" is equivalent to "sort_order == REV_SORT_BY_COMMIT_DATE"
"lifo == 1" is equivalent to "sort_order == REV_SORT_IN_GRAPH_ORDER"
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-07 01:07:14 +02:00
|
|
|
revs->sort_order = REV_SORT_BY_COMMIT_DATE;
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->topo_order = 1;
|
2013-06-07 19:35:54 +02:00
|
|
|
} else if (!strcmp(arg, "--author-date-order")) {
|
|
|
|
revs->sort_order = REV_SORT_BY_AUTHOR_DATE;
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->topo_order = 1;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--early-output")) {
|
2008-07-08 15:19:33 +02:00
|
|
|
int count = 100;
|
|
|
|
switch (arg[14]) {
|
|
|
|
case '=':
|
|
|
|
count = atoi(arg+15);
|
|
|
|
/* Fallthrough */
|
|
|
|
case 0:
|
|
|
|
revs->topo_order = 1;
|
|
|
|
revs->early_output = count;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(arg, "--parents")) {
|
|
|
|
revs->rewrite_parents = 1;
|
|
|
|
revs->print_parents = 1;
|
|
|
|
} else if (!strcmp(arg, "--dense")) {
|
|
|
|
revs->dense = 1;
|
|
|
|
} else if (!strcmp(arg, "--sparse")) {
|
|
|
|
revs->dense = 0;
|
|
|
|
} else if (!strcmp(arg, "--show-all")) {
|
|
|
|
revs->show_all = 1;
|
|
|
|
} else if (!strcmp(arg, "--remove-empty")) {
|
|
|
|
revs->remove_empty_trees = 1;
|
2009-06-29 19:28:25 +02:00
|
|
|
} else if (!strcmp(arg, "--merges")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->min_parents = 2;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--no-merges")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->max_parents = 1;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--min-parents=")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->min_parents = atoi(arg+14);
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--no-min-parents")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->min_parents = 0;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--max-parents=")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->max_parents = atoi(arg+14);
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--no-max-parents")) {
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->max_parents = -1;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--boundary")) {
|
|
|
|
revs->boundary = 1;
|
|
|
|
} else if (!strcmp(arg, "--left-right")) {
|
|
|
|
revs->left_right = 1;
|
2011-02-21 17:09:11 +01:00
|
|
|
} else if (!strcmp(arg, "--left-only")) {
|
2011-02-22 01:58:37 +01:00
|
|
|
if (revs->right_only)
|
2011-03-07 13:31:42 +01:00
|
|
|
die("--left-only is incompatible with --right-only"
|
|
|
|
" or --cherry");
|
2011-02-21 17:09:11 +01:00
|
|
|
revs->left_only = 1;
|
|
|
|
} else if (!strcmp(arg, "--right-only")) {
|
2011-02-22 01:58:37 +01:00
|
|
|
if (revs->left_only)
|
|
|
|
die("--right-only is incompatible with --left-only");
|
2011-02-21 17:09:11 +01:00
|
|
|
revs->right_only = 1;
|
2011-03-07 13:31:42 +01:00
|
|
|
} else if (!strcmp(arg, "--cherry")) {
|
|
|
|
if (revs->left_only)
|
|
|
|
die("--cherry is incompatible with --left-only");
|
|
|
|
revs->cherry_mark = 1;
|
|
|
|
revs->right_only = 1;
|
2011-03-21 11:14:06 +01:00
|
|
|
revs->max_parents = 1;
|
2011-03-07 13:31:42 +01:00
|
|
|
revs->limited = 1;
|
2010-06-10 13:47:23 +02:00
|
|
|
} else if (!strcmp(arg, "--count")) {
|
|
|
|
revs->count = 1;
|
2011-03-07 13:31:40 +01:00
|
|
|
} else if (!strcmp(arg, "--cherry-mark")) {
|
|
|
|
if (revs->cherry_pick)
|
|
|
|
die("--cherry-mark is incompatible with --cherry-pick");
|
|
|
|
revs->cherry_mark = 1;
|
|
|
|
revs->limited = 1; /* needs limit_list() */
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--cherry-pick")) {
|
2011-03-07 13:31:40 +01:00
|
|
|
if (revs->cherry_mark)
|
|
|
|
die("--cherry-pick is incompatible with --cherry-mark");
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->cherry_pick = 1;
|
|
|
|
revs->limited = 1;
|
|
|
|
} else if (!strcmp(arg, "--objects")) {
|
|
|
|
revs->tag_objects = 1;
|
|
|
|
revs->tree_objects = 1;
|
|
|
|
revs->blob_objects = 1;
|
|
|
|
} else if (!strcmp(arg, "--objects-edge")) {
|
|
|
|
revs->tag_objects = 1;
|
|
|
|
revs->tree_objects = 1;
|
|
|
|
revs->blob_objects = 1;
|
|
|
|
revs->edge_hint = 1;
|
2014-12-25 00:05:39 +01:00
|
|
|
} else if (!strcmp(arg, "--objects-edge-aggressive")) {
|
|
|
|
revs->tag_objects = 1;
|
|
|
|
revs->tree_objects = 1;
|
|
|
|
revs->blob_objects = 1;
|
|
|
|
revs->edge_hint = 1;
|
|
|
|
revs->edge_hint_aggressive = 1;
|
2011-09-02 00:43:34 +02:00
|
|
|
} else if (!strcmp(arg, "--verify-objects")) {
|
|
|
|
revs->tag_objects = 1;
|
|
|
|
revs->tree_objects = 1;
|
|
|
|
revs->blob_objects = 1;
|
|
|
|
revs->verify_objects = 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--unpacked")) {
|
|
|
|
revs->unpacked = 1;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--unpacked=")) {
|
2009-02-28 09:00:21 +01:00
|
|
|
die("--unpacked=<packfile> no longer supported.");
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "-r")) {
|
|
|
|
revs->diff = 1;
|
|
|
|
DIFF_OPT_SET(&revs->diffopt, RECURSIVE);
|
|
|
|
} else if (!strcmp(arg, "-t")) {
|
|
|
|
revs->diff = 1;
|
|
|
|
DIFF_OPT_SET(&revs->diffopt, RECURSIVE);
|
|
|
|
DIFF_OPT_SET(&revs->diffopt, TREE_IN_RECURSIVE);
|
|
|
|
} else if (!strcmp(arg, "-m")) {
|
|
|
|
revs->ignore_merges = 0;
|
|
|
|
} else if (!strcmp(arg, "-c")) {
|
|
|
|
revs->diff = 1;
|
|
|
|
revs->dense_combined_merges = 0;
|
|
|
|
revs->combine_merges = 1;
|
|
|
|
} else if (!strcmp(arg, "--cc")) {
|
|
|
|
revs->diff = 1;
|
|
|
|
revs->dense_combined_merges = 1;
|
|
|
|
revs->combine_merges = 1;
|
|
|
|
} else if (!strcmp(arg, "-v")) {
|
|
|
|
revs->verbose_header = 1;
|
|
|
|
} else if (!strcmp(arg, "--pretty")) {
|
|
|
|
revs->verbose_header = 1;
|
2010-01-20 22:59:36 +01:00
|
|
|
revs->pretty_given = 1;
|
2014-07-29 19:53:40 +02:00
|
|
|
get_commit_format(NULL, revs);
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--pretty=") || starts_with(arg, "--format=")) {
|
2010-08-05 10:22:55 +02:00
|
|
|
/*
|
|
|
|
* Detached form ("--pretty X" as opposed to "--pretty=X")
|
|
|
|
* not allowed, since the argument is optional.
|
|
|
|
*/
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->verbose_header = 1;
|
2010-01-20 22:59:36 +01:00
|
|
|
revs->pretty_given = 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
get_commit_format(arg+9, revs);
|
2011-03-29 22:57:47 +02:00
|
|
|
} else if (!strcmp(arg, "--show-notes") || !strcmp(arg, "--notes")) {
|
2010-01-20 22:59:36 +01:00
|
|
|
revs->show_notes = 1;
|
|
|
|
revs->show_notes_given = 1;
|
2011-03-29 22:57:27 +02:00
|
|
|
revs->notes_opt.use_default_notes = 1;
|
2011-10-19 00:53:23 +02:00
|
|
|
} else if (!strcmp(arg, "--show-signature")) {
|
|
|
|
revs->show_signature = 1;
|
2014-03-25 14:23:27 +01:00
|
|
|
} else if (!strcmp(arg, "--show-linear-break") ||
|
|
|
|
starts_with(arg, "--show-linear-break=")) {
|
|
|
|
if (starts_with(arg, "--show-linear-break="))
|
|
|
|
revs->break_bar = xstrdup(arg + 20);
|
|
|
|
else
|
|
|
|
revs->break_bar = " ..........";
|
|
|
|
revs->track_linear = 1;
|
|
|
|
revs->track_first_time = 1;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--show-notes=") ||
|
|
|
|
starts_with(arg, "--notes=")) {
|
2010-03-12 18:04:26 +01:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
revs->show_notes = 1;
|
|
|
|
revs->show_notes_given = 1;
|
2013-11-30 21:55:40 +01:00
|
|
|
if (starts_with(arg, "--show-notes")) {
|
2011-03-29 22:57:47 +02:00
|
|
|
if (revs->notes_opt.use_default_notes < 0)
|
|
|
|
revs->notes_opt.use_default_notes = 1;
|
|
|
|
strbuf_addstr(&buf, arg+13);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
strbuf_addstr(&buf, arg+8);
|
2011-03-29 22:56:04 +02:00
|
|
|
expand_notes_ref(&buf);
|
2011-03-29 22:56:53 +02:00
|
|
|
string_list_append(&revs->notes_opt.extra_notes_refs,
|
2010-06-26 01:41:38 +02:00
|
|
|
strbuf_detach(&buf, NULL));
|
2010-01-20 22:59:36 +01:00
|
|
|
} else if (!strcmp(arg, "--no-notes")) {
|
|
|
|
revs->show_notes = 0;
|
|
|
|
revs->show_notes_given = 1;
|
2011-03-29 22:59:42 +02:00
|
|
|
revs->notes_opt.use_default_notes = -1;
|
|
|
|
/* we have been strdup'ing ourselves, so trick
|
|
|
|
* string_list into free()ing strings */
|
|
|
|
revs->notes_opt.extra_notes_refs.strdup_strings = 1;
|
|
|
|
string_list_clear(&revs->notes_opt.extra_notes_refs, 0);
|
|
|
|
revs->notes_opt.extra_notes_refs.strdup_strings = 0;
|
2010-03-12 18:04:26 +01:00
|
|
|
} else if (!strcmp(arg, "--standard-notes")) {
|
|
|
|
revs->show_notes_given = 1;
|
2011-03-29 22:57:27 +02:00
|
|
|
revs->notes_opt.use_default_notes = 1;
|
2010-03-12 18:04:26 +01:00
|
|
|
} else if (!strcmp(arg, "--no-standard-notes")) {
|
2011-03-29 22:57:27 +02:00
|
|
|
revs->notes_opt.use_default_notes = 0;
|
2009-02-24 10:59:16 +01:00
|
|
|
} else if (!strcmp(arg, "--oneline")) {
|
|
|
|
revs->verbose_header = 1;
|
|
|
|
get_commit_format("oneline", revs);
|
2010-01-21 23:57:41 +01:00
|
|
|
revs->pretty_given = 1;
|
2009-02-24 10:59:16 +01:00
|
|
|
revs->abbrev_commit = 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--graph")) {
|
|
|
|
revs->topo_order = 1;
|
|
|
|
revs->rewrite_parents = 1;
|
|
|
|
revs->graph = graph_init(revs);
|
|
|
|
} else if (!strcmp(arg, "--root")) {
|
|
|
|
revs->show_root_diff = 1;
|
|
|
|
} else if (!strcmp(arg, "--no-commit-id")) {
|
|
|
|
revs->no_commit_id = 1;
|
|
|
|
} else if (!strcmp(arg, "--always")) {
|
|
|
|
revs->always_show_header = 1;
|
|
|
|
} else if (!strcmp(arg, "--no-abbrev")) {
|
|
|
|
revs->abbrev = 0;
|
|
|
|
} else if (!strcmp(arg, "--abbrev")) {
|
|
|
|
revs->abbrev = DEFAULT_ABBREV;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--abbrev=")) {
|
2008-07-08 15:19:33 +02:00
|
|
|
revs->abbrev = strtoul(arg + 9, NULL, 10);
|
|
|
|
if (revs->abbrev < MINIMUM_ABBREV)
|
|
|
|
revs->abbrev = MINIMUM_ABBREV;
|
|
|
|
else if (revs->abbrev > 40)
|
|
|
|
revs->abbrev = 40;
|
|
|
|
} else if (!strcmp(arg, "--abbrev-commit")) {
|
|
|
|
revs->abbrev_commit = 1;
|
2011-05-18 19:56:04 +02:00
|
|
|
revs->abbrev_commit_given = 1;
|
|
|
|
} else if (!strcmp(arg, "--no-abbrev-commit")) {
|
|
|
|
revs->abbrev_commit = 0;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--full-diff")) {
|
|
|
|
revs->diff = 1;
|
|
|
|
revs->full_diff = 1;
|
|
|
|
} else if (!strcmp(arg, "--full-history")) {
|
|
|
|
revs->simplify_history = 0;
|
|
|
|
} else if (!strcmp(arg, "--relative-date")) {
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
revs->date_mode.type = DATE_RELATIVE;
|
2009-09-24 10:28:15 +02:00
|
|
|
revs->date_mode_explicit = 1;
|
2010-08-05 10:22:55 +02:00
|
|
|
} else if ((argcount = parse_long_opt("date", argv, &optarg))) {
|
convert "enum date_mode" into a struct
In preparation for adding date modes that may carry extra
information beyond the mode itself, this patch converts the
date_mode enum into a struct.
Most of the conversion is fairly straightforward; we pass
the struct as a pointer and dereference the type field where
necessary. Locations that declare a date_mode can use a "{}"
constructor. However, the tricky case is where we use the
enum labels as constants, like:
show_date(t, tz, DATE_NORMAL);
Ideally we could say:
show_date(t, tz, &{ DATE_NORMAL });
but of course C does not allow that. Likewise, we cannot
cast the constant to a struct, because we need to pass an
actual address. Our options are basically:
1. Manually add a "struct date_mode d = { DATE_NORMAL }"
definition to each caller, and pass "&d". This makes
the callers uglier, because they sometimes do not even
have their own scope (e.g., they are inside a switch
statement).
2. Provide a pre-made global "date_normal" struct that can
be passed by address. We'd also need "date_rfc2822",
"date_iso8601", and so forth. But at least the ugliness
is defined in one place.
3. Provide a wrapper that generates the correct struct on
the fly. The big downside is that we end up pointing to
a single global, which makes our wrapper non-reentrant.
But show_date is already not reentrant, so it does not
matter.
This patch implements 3, along with a minor macro to keep
the size of the callers sane.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-06-25 18:55:02 +02:00
|
|
|
parse_date_format(optarg, &revs->date_mode);
|
2009-09-24 10:28:15 +02:00
|
|
|
revs->date_mode_explicit = 1;
|
2010-08-05 10:22:55 +02:00
|
|
|
return argcount;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--log-size")) {
|
|
|
|
revs->show_log_size = 1;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Grepping the commit log
|
|
|
|
*/
|
2010-08-05 10:22:55 +02:00
|
|
|
else if ((argcount = parse_long_opt("author", argv, &optarg))) {
|
|
|
|
add_header_grep(revs, GREP_HEADER_AUTHOR, optarg);
|
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("committer", argv, &optarg))) {
|
|
|
|
add_header_grep(revs, GREP_HEADER_COMMITTER, optarg);
|
|
|
|
return argcount;
|
2012-09-29 06:41:28 +02:00
|
|
|
} else if ((argcount = parse_long_opt("grep-reflog", argv, &optarg))) {
|
|
|
|
add_header_grep(revs, GREP_HEADER_REFLOG, optarg);
|
|
|
|
return argcount;
|
2010-08-05 10:22:55 +02:00
|
|
|
} else if ((argcount = parse_long_opt("grep", argv, &optarg))) {
|
|
|
|
add_message_grep(revs, optarg);
|
|
|
|
return argcount;
|
2012-09-13 23:21:44 +02:00
|
|
|
} else if (!strcmp(arg, "--grep-debug")) {
|
|
|
|
revs->grep_filter.debug = 1;
|
2012-10-04 00:01:34 +02:00
|
|
|
} else if (!strcmp(arg, "--basic-regexp")) {
|
|
|
|
grep_set_pattern_type_option(GREP_PATTERN_TYPE_BRE, &revs->grep_filter);
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
|
2012-10-03 23:50:51 +02:00
|
|
|
grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, &revs->grep_filter);
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
|
2008-08-25 08:15:05 +02:00
|
|
|
revs->grep_filter.regflags |= REG_ICASE;
|
2012-02-21 10:02:46 +01:00
|
|
|
DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
|
2012-10-03 23:50:51 +02:00
|
|
|
grep_set_pattern_type_option(GREP_PATTERN_TYPE_FIXED, &revs->grep_filter);
|
2012-10-04 00:01:34 +02:00
|
|
|
} else if (!strcmp(arg, "--perl-regexp")) {
|
|
|
|
grep_set_pattern_type_option(GREP_PATTERN_TYPE_PCRE, &revs->grep_filter);
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--all-match")) {
|
2008-08-25 08:15:05 +02:00
|
|
|
revs->grep_filter.all_match = 1;
|
log: teach --invert-grep option
"git log --grep=<string>" shows only commits with messages that
match the given string, but sometimes it is useful to be able to
show only commits that do *not* have certain messages (e.g. "show
me ones that are not FIXUP commits").
Originally, we had the invert-grep flag in grep_opt, but because
"git grep --invert-grep" does not make sense except in conjunction
with "--files-with-matches", which is already covered by
"--files-without-matches", it was moved it to revisions structure.
To have the flag there expresses the function to the feature better.
When the newly inserted two tests run, the history would have commits
with messages "initial", "second", "third", "fourth", "fifth", "sixth"
and "Second", committed in this order. The commits that does not match
either "th" or "Sec" is "second" and "initial". For the case insensitive
case only "initial" matches.
Signed-off-by: Christoph Junghans <ottxor@gentoo.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-13 02:33:32 +01:00
|
|
|
} else if (!strcmp(arg, "--invert-grep")) {
|
|
|
|
revs->invert_grep = 1;
|
2010-08-05 10:22:55 +02:00
|
|
|
} else if ((argcount = parse_long_opt("encoding", argv, &optarg))) {
|
|
|
|
if (strcmp(optarg, "none"))
|
|
|
|
git_log_output_encoding = xstrdup(optarg);
|
2008-07-08 15:19:33 +02:00
|
|
|
else
|
|
|
|
git_log_output_encoding = "";
|
2010-08-05 10:22:55 +02:00
|
|
|
return argcount;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else if (!strcmp(arg, "--reverse")) {
|
|
|
|
revs->reverse ^= 1;
|
|
|
|
} else if (!strcmp(arg, "--children")) {
|
|
|
|
revs->children.name = "children";
|
|
|
|
revs->limited = 1;
|
2011-05-19 03:08:09 +02:00
|
|
|
} else if (!strcmp(arg, "--ignore-missing")) {
|
|
|
|
revs->ignore_missing = 1;
|
2008-07-08 15:19:33 +02:00
|
|
|
} else {
|
|
|
|
int opts = diff_opt_parse(&revs->diffopt, argv, argc);
|
|
|
|
if (!opts)
|
|
|
|
unkv[(*unkc)++] = arg;
|
|
|
|
return opts;
|
|
|
|
}
|
2014-03-25 14:23:27 +01:00
|
|
|
if (revs->graph && revs->track_linear)
|
|
|
|
die("--show-linear-break and --graph are incompatible");
|
2008-07-08 15:19:33 +02:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:34 +02:00
|
|
|
void parse_revision_opt(struct rev_info *revs, struct parse_opt_ctx_t *ctx,
|
|
|
|
const struct option *options,
|
|
|
|
const char * const usagestr[])
|
|
|
|
{
|
|
|
|
int n = handle_revision_opt(revs, ctx->argc, ctx->argv,
|
|
|
|
&ctx->cpidx, ctx->out);
|
|
|
|
if (n <= 0) {
|
|
|
|
error("unknown option `%s'", ctx->argv[0]);
|
|
|
|
usage_with_options(usagestr, options);
|
|
|
|
}
|
|
|
|
ctx->argv += n;
|
|
|
|
ctx->argc -= n;
|
|
|
|
}
|
|
|
|
|
2015-06-29 17:40:30 +02:00
|
|
|
static int for_each_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data, const char *term) {
|
|
|
|
struct strbuf bisect_refs = STRBUF_INIT;
|
|
|
|
int status;
|
|
|
|
strbuf_addf(&bisect_refs, "refs/bisect/%s", term);
|
|
|
|
status = for_each_ref_in_submodule(submodule, bisect_refs.buf, fn, cb_data);
|
|
|
|
strbuf_release(&bisect_refs);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
static int for_each_bad_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data)
|
2009-10-27 19:28:07 +01:00
|
|
|
{
|
2015-06-29 17:40:30 +02:00
|
|
|
return for_each_bisect_ref(submodule, fn, cb_data, term_bad);
|
2009-10-27 19:28:07 +01:00
|
|
|
}
|
|
|
|
|
2010-07-07 15:39:12 +02:00
|
|
|
static int for_each_good_bisect_ref(const char *submodule, each_ref_fn fn, void *cb_data)
|
2009-10-27 19:28:07 +01:00
|
|
|
{
|
2015-06-29 17:40:30 +02:00
|
|
|
return for_each_bisect_ref(submodule, fn, cb_data, term_good);
|
2009-10-27 19:28:07 +01:00
|
|
|
}
|
|
|
|
|
2011-04-21 12:45:07 +02:00
|
|
|
static int handle_revision_pseudo_opt(const char *submodule,
|
|
|
|
struct rev_info *revs,
|
|
|
|
int argc, const char **argv, int *flags)
|
|
|
|
{
|
|
|
|
const char *arg = argv[0];
|
|
|
|
const char *optarg;
|
|
|
|
int argcount;
|
|
|
|
|
2011-04-21 12:48:24 +02:00
|
|
|
/*
|
|
|
|
* NOTE!
|
|
|
|
*
|
|
|
|
* Commands like "git shortlog" will not accept the options below
|
|
|
|
* unless parse_revision_opt queues them (as opposed to erroring
|
|
|
|
* out).
|
|
|
|
*
|
|
|
|
* When implementing your new pseudo-option, remember to
|
|
|
|
* register it in the list at the top of handle_revision_opt.
|
|
|
|
*/
|
2011-04-21 12:45:07 +02:00
|
|
|
if (!strcmp(arg, "--all")) {
|
|
|
|
handle_refs(submodule, revs, *flags, for_each_ref_submodule);
|
|
|
|
handle_refs(submodule, revs, *flags, head_ref_submodule);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--branches")) {
|
|
|
|
handle_refs(submodule, revs, *flags, for_each_branch_ref_submodule);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--bisect")) {
|
2015-06-29 17:40:30 +02:00
|
|
|
read_bisect_terms(&term_bad, &term_good);
|
2011-04-21 12:45:07 +02:00
|
|
|
handle_refs(submodule, revs, *flags, for_each_bad_bisect_ref);
|
2013-05-16 17:32:38 +02:00
|
|
|
handle_refs(submodule, revs, *flags ^ (UNINTERESTING | BOTTOM), for_each_good_bisect_ref);
|
2011-04-21 12:45:07 +02:00
|
|
|
revs->bisect = 1;
|
|
|
|
} else if (!strcmp(arg, "--tags")) {
|
|
|
|
handle_refs(submodule, revs, *flags, for_each_tag_ref_submodule);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--remotes")) {
|
|
|
|
handle_refs(submodule, revs, *flags, for_each_remote_ref_submodule);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if ((argcount = parse_long_opt("glob", argv, &optarg))) {
|
|
|
|
struct all_refs_cb cb;
|
|
|
|
init_all_refs_cb(&cb, revs, *flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
for_each_glob_ref(handle_one_ref, optarg, &cb);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
revision: introduce --exclude=<glob> to tame wildcards
People often find "git log --branches" etc. that includes _all_
branches is cumbersome to use when they want to grab most but except
some. The same applies to --tags, --all and --glob.
Teach the revision machinery to remember patterns, and then upon the
next such a globbing option, exclude those that match the pattern.
With this, I can view only my integration branches (e.g. maint,
master, etc.) without topic branches, which are named after two
letters from primary authors' names, slash and topic name.
git rev-list --no-walk --exclude=??/* --branches |
git name-rev --refs refs/heads/* --stdin
This one shows things reachable from local and remote branches that
have not been merged to the integration branches.
git log --remotes --branches --not --exclude=??/* --branches
It may be a bit rough around the edges, in that the pattern to give
the exclude option depends on what globbing option follows. In
these examples, the pattern "??/*" is used, not "refs/heads/??/*",
because the globbing option that follows the -"-exclude=<pattern>"
is "--branches". As each use of globbing option resets previously
set "--exclude", this may not be such a bad thing, though.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-31 01:37:55 +02:00
|
|
|
return argcount;
|
|
|
|
} else if ((argcount = parse_long_opt("exclude", argv, &optarg))) {
|
2013-11-01 20:02:45 +01:00
|
|
|
add_ref_exclusion(&revs->ref_excludes, optarg);
|
2011-04-21 12:45:07 +02:00
|
|
|
return argcount;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--branches=")) {
|
2011-04-21 12:45:07 +02:00
|
|
|
struct all_refs_cb cb;
|
|
|
|
init_all_refs_cb(&cb, revs, *flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
for_each_glob_ref_in(handle_one_ref, arg + 11, "refs/heads/", &cb);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--tags=")) {
|
2011-04-21 12:45:07 +02:00
|
|
|
struct all_refs_cb cb;
|
|
|
|
init_all_refs_cb(&cb, revs, *flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
for_each_glob_ref_in(handle_one_ref, arg + 7, "refs/tags/", &cb);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--remotes=")) {
|
2011-04-21 12:45:07 +02:00
|
|
|
struct all_refs_cb cb;
|
|
|
|
init_all_refs_cb(&cb, revs, *flags);
|
2015-05-25 20:38:30 +02:00
|
|
|
for_each_glob_ref_in(handle_one_ref, arg + 10, "refs/remotes/", &cb);
|
2013-11-01 20:02:45 +01:00
|
|
|
clear_ref_exclusion(&revs->ref_excludes);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--reflog")) {
|
2014-10-16 00:38:31 +02:00
|
|
|
add_reflogs_to_pending(revs, *flags);
|
2014-10-17 02:44:23 +02:00
|
|
|
} else if (!strcmp(arg, "--indexed-objects")) {
|
|
|
|
add_index_objects_to_pending(revs, *flags);
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--not")) {
|
2013-05-16 17:32:38 +02:00
|
|
|
*flags ^= UNINTERESTING | BOTTOM;
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--no-walk")) {
|
teach log --no-walk=unsorted, which avoids sorting
When 'git log' is passed the --no-walk option, no revision walk takes
place, naturally. Perhaps somewhat surprisingly, however, the provided
revisions still get sorted by commit date. So e.g 'git log --no-walk
HEAD HEAD~1' and 'git log --no-walk HEAD~1 HEAD' give the same result
(unless the two revisions share the commit date, in which case they
will retain the order given on the command line). As the commit that
introduced --no-walk (8e64006 (Teach revision machinery about
--no-walk, 2007-07-24)) points out, the sorting is intentional, to
allow things like
git log --abbrev-commit --pretty=oneline --decorate --all --no-walk
to show all refs in order by commit date.
But there are also other cases where the sorting is not wanted, such
as
<command producing revisions in order> |
git log --oneline --no-walk --stdin
To accomodate both cases, leave the decision of whether or not to sort
up to the caller, by allowing --no-walk={sorted,unsorted}, defaulting
to 'sorted' for backward-compatibility reasons.
Signed-off-by: Martin von Zweigbergk <martinvonz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-29 08:15:54 +02:00
|
|
|
revs->no_walk = REVISION_WALK_NO_WALK_SORTED;
|
2013-11-30 21:55:40 +01:00
|
|
|
} else if (starts_with(arg, "--no-walk=")) {
|
teach log --no-walk=unsorted, which avoids sorting
When 'git log' is passed the --no-walk option, no revision walk takes
place, naturally. Perhaps somewhat surprisingly, however, the provided
revisions still get sorted by commit date. So e.g 'git log --no-walk
HEAD HEAD~1' and 'git log --no-walk HEAD~1 HEAD' give the same result
(unless the two revisions share the commit date, in which case they
will retain the order given on the command line). As the commit that
introduced --no-walk (8e64006 (Teach revision machinery about
--no-walk, 2007-07-24)) points out, the sorting is intentional, to
allow things like
git log --abbrev-commit --pretty=oneline --decorate --all --no-walk
to show all refs in order by commit date.
But there are also other cases where the sorting is not wanted, such
as
<command producing revisions in order> |
git log --oneline --no-walk --stdin
To accomodate both cases, leave the decision of whether or not to sort
up to the caller, by allowing --no-walk={sorted,unsorted}, defaulting
to 'sorted' for backward-compatibility reasons.
Signed-off-by: Martin von Zweigbergk <martinvonz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-29 08:15:54 +02:00
|
|
|
/*
|
|
|
|
* Detached form ("--no-walk X" as opposed to "--no-walk=X")
|
|
|
|
* not allowed, since the argument is optional.
|
|
|
|
*/
|
|
|
|
if (!strcmp(arg + 10, "sorted"))
|
|
|
|
revs->no_walk = REVISION_WALK_NO_WALK_SORTED;
|
|
|
|
else if (!strcmp(arg + 10, "unsorted"))
|
|
|
|
revs->no_walk = REVISION_WALK_NO_WALK_UNSORTED;
|
|
|
|
else
|
|
|
|
return error("invalid argument to --no-walk");
|
2011-04-21 12:45:07 +02:00
|
|
|
} else if (!strcmp(arg, "--do-walk")) {
|
|
|
|
revs->no_walk = 0;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2015-08-29 07:04:18 +02:00
|
|
|
static void NORETURN diagnose_missing_default(const char *def)
|
|
|
|
{
|
|
|
|
unsigned char sha1[20];
|
|
|
|
int flags;
|
|
|
|
const char *refname;
|
|
|
|
|
|
|
|
refname = resolve_ref_unsafe(def, 0, sha1, &flags);
|
|
|
|
if (!refname || !(flags & REF_ISSYMREF) || (flags & REF_ISBROKEN))
|
|
|
|
die(_("your current branch appears to be broken"));
|
|
|
|
|
|
|
|
skip_prefix(refname, "refs/heads/", &refname);
|
|
|
|
die(_("your current branch '%s' does not have any commits yet"),
|
|
|
|
refname);
|
|
|
|
}
|
|
|
|
|
2006-02-26 01:19:46 +01:00
|
|
|
/*
|
|
|
|
* Parse revision information, filling in the "rev_info" structure,
|
|
|
|
* and removing the used arguments from the argument list.
|
|
|
|
*
|
2006-03-01 00:07:20 +01:00
|
|
|
* Returns the number of arguments left that weren't recognized
|
|
|
|
* (which are also moved to the head of the argument list)
|
2006-02-26 01:19:46 +01:00
|
|
|
*/
|
2010-03-09 07:58:09 +01:00
|
|
|
int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct setup_revision_opt *opt)
|
2006-02-26 01:19:46 +01:00
|
|
|
{
|
2012-07-02 21:33:52 +02:00
|
|
|
int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0, revarg_opt;
|
2011-05-11 23:01:19 +02:00
|
|
|
struct cmdline_pathspec prune_data;
|
2010-07-07 15:39:12 +02:00
|
|
|
const char *submodule = NULL;
|
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
memset(&prune_data, 0, sizeof(prune_data));
|
2010-07-07 15:39:12 +02:00
|
|
|
if (opt)
|
|
|
|
submodule = opt->submodule;
|
2006-02-26 01:19:46 +01:00
|
|
|
|
|
|
|
/* First, search for "--" */
|
2012-04-14 21:04:48 +02:00
|
|
|
if (opt && opt->assume_dashdash) {
|
2006-02-26 01:19:46 +01:00
|
|
|
seen_dashdash = 1;
|
2012-04-14 21:04:48 +02:00
|
|
|
} else {
|
|
|
|
seen_dashdash = 0;
|
|
|
|
for (i = 1; i < argc; i++) {
|
|
|
|
const char *arg = argv[i];
|
|
|
|
if (strcmp(arg, "--"))
|
|
|
|
continue;
|
|
|
|
argv[i] = NULL;
|
|
|
|
argc = i;
|
|
|
|
if (argv[i + 1])
|
|
|
|
append_prune_data(&prune_data, argv + i + 1);
|
|
|
|
seen_dashdash = 1;
|
|
|
|
break;
|
|
|
|
}
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
|
|
|
|
2008-07-08 15:19:33 +02:00
|
|
|
/* Second, deal with arguments and options */
|
|
|
|
flags = 0;
|
2012-07-02 21:43:05 +02:00
|
|
|
revarg_opt = opt ? opt->revarg_opt : 0;
|
|
|
|
if (seen_dashdash)
|
|
|
|
revarg_opt |= REVARG_CANNOT_BE_FILENAME;
|
2009-11-03 15:59:18 +01:00
|
|
|
read_from_stdin = 0;
|
2008-07-08 15:19:33 +02:00
|
|
|
for (left = i = 1; i < argc; i++) {
|
2006-02-26 01:19:46 +01:00
|
|
|
const char *arg = argv[i];
|
|
|
|
if (*arg == '-') {
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
int opts;
|
2008-07-08 15:19:33 +02:00
|
|
|
|
2011-04-21 12:45:07 +02:00
|
|
|
opts = handle_revision_pseudo_opt(submodule,
|
|
|
|
revs, argc - i, argv + i,
|
|
|
|
&flags);
|
|
|
|
if (opts > 0) {
|
|
|
|
i += opts - 1;
|
2007-07-24 01:38:40 +02:00
|
|
|
continue;
|
|
|
|
}
|
2011-04-21 12:45:07 +02:00
|
|
|
|
2009-11-03 15:59:18 +01:00
|
|
|
if (!strcmp(arg, "--stdin")) {
|
|
|
|
if (revs->disable_stdin) {
|
|
|
|
argv[left++] = arg;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (read_from_stdin++)
|
|
|
|
die("--stdin given twice?");
|
2009-11-20 11:50:21 +01:00
|
|
|
read_revisions_from_stdin(revs, &prune_data);
|
2009-11-03 15:59:18 +01:00
|
|
|
continue;
|
|
|
|
}
|
2006-09-20 22:21:56 +02:00
|
|
|
|
2008-07-08 15:19:33 +02:00
|
|
|
opts = handle_revision_opt(revs, argc - i, argv + i, &left, argv);
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
if (opts > 0) {
|
|
|
|
i += opts - 1;
|
|
|
|
continue;
|
|
|
|
}
|
2008-07-08 15:19:33 +02:00
|
|
|
if (opts < 0)
|
|
|
|
exit(128);
|
2006-02-26 01:19:46 +01:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2012-07-02 21:33:52 +02:00
|
|
|
|
|
|
|
if (handle_revision_arg(arg, revs, flags, revarg_opt)) {
|
2006-09-06 06:28:36 +02:00
|
|
|
int j;
|
|
|
|
if (seen_dashdash || *arg == '^')
|
2006-02-26 01:19:46 +01:00
|
|
|
die("bad revision '%s'", arg);
|
|
|
|
|
2006-04-27 00:09:27 +02:00
|
|
|
/* If we didn't have a "--":
|
|
|
|
* (1) all filenames must exist;
|
|
|
|
* (2) all rev-args must not be interpretable
|
|
|
|
* as a valid filename.
|
|
|
|
* but the latter we have checked in the main loop.
|
|
|
|
*/
|
2006-04-26 19:15:54 +02:00
|
|
|
for (j = i; j < argc; j++)
|
2012-06-18 20:18:21 +02:00
|
|
|
verify_filename(revs->prefix, argv[j], j == i);
|
2006-04-26 19:15:54 +02:00
|
|
|
|
2009-11-20 11:50:21 +01:00
|
|
|
append_prune_data(&prune_data, argv + i);
|
2006-02-26 01:19:46 +01:00
|
|
|
break;
|
|
|
|
}
|
2010-03-13 23:47:05 +01:00
|
|
|
else
|
|
|
|
got_rev_arg = 1;
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
2006-09-06 06:28:36 +02:00
|
|
|
|
2011-05-11 23:01:19 +02:00
|
|
|
if (prune_data.nr) {
|
2011-05-12 00:23:25 +02:00
|
|
|
/*
|
|
|
|
* If we need to introduce the magic "a lone ':' means no
|
|
|
|
* pathspec whatsoever", here is the place to do so.
|
|
|
|
*
|
|
|
|
* if (prune_data.nr == 1 && !strcmp(prune_data[0], ":")) {
|
|
|
|
* prune_data.nr = 0;
|
|
|
|
* prune_data.alloc = 0;
|
|
|
|
* free(prune_data.path);
|
|
|
|
* prune_data.path = NULL;
|
|
|
|
* } else {
|
|
|
|
* terminate prune_data.alloc with NULL and
|
|
|
|
* call init_pathspec() to set revs->prune_data here.
|
|
|
|
* }
|
|
|
|
*/
|
2013-10-31 10:25:43 +01:00
|
|
|
ALLOC_GROW(prune_data.path, prune_data.nr + 1, prune_data.alloc);
|
2011-05-11 23:01:19 +02:00
|
|
|
prune_data.path[prune_data.nr++] = NULL;
|
2013-07-14 10:35:31 +02:00
|
|
|
parse_pathspec(&revs->prune_data, 0, 0,
|
|
|
|
revs->prefix, prune_data.path);
|
2011-05-11 23:01:19 +02:00
|
|
|
}
|
2009-11-20 11:33:28 +01:00
|
|
|
|
2008-07-08 15:19:33 +02:00
|
|
|
if (revs->def == NULL)
|
2010-03-09 07:58:09 +01:00
|
|
|
revs->def = opt ? opt->def : NULL;
|
2010-03-09 08:27:25 +01:00
|
|
|
if (opt && opt->tweak)
|
|
|
|
opt->tweak(revs, opt);
|
2008-07-08 15:19:33 +02:00
|
|
|
if (revs->show_merge)
|
2006-07-03 11:59:32 +02:00
|
|
|
prepare_show_merge(revs);
|
2010-03-13 23:47:05 +01:00
|
|
|
if (revs->def && !revs->pending.nr && !got_rev_arg) {
|
2006-02-26 01:19:46 +01:00
|
|
|
unsigned char sha1[20];
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
struct object *object;
|
2012-07-02 21:56:44 +02:00
|
|
|
struct object_context oc;
|
2012-07-02 19:32:11 +02:00
|
|
|
if (get_sha1_with_context(revs->def, 0, sha1, &oc))
|
2015-08-29 07:04:18 +02:00
|
|
|
diagnose_missing_default(revs->def);
|
2008-07-08 15:19:33 +02:00
|
|
|
object = get_reference(revs, revs->def, sha1, 0);
|
2012-07-02 21:56:44 +02:00
|
|
|
add_pending_object_with_mode(revs, object, revs->def, oc.mode);
|
2006-02-26 01:19:46 +01:00
|
|
|
}
|
2006-03-10 10:21:39 +01:00
|
|
|
|
2007-09-29 18:50:39 +02:00
|
|
|
/* Did the user ask for any diff output? Run the diff! */
|
|
|
|
if (revs->diffopt.output_format & ~DIFF_FORMAT_NO_OUTPUT)
|
|
|
|
revs->diff = 1;
|
|
|
|
|
2007-12-25 12:06:47 +01:00
|
|
|
/* Pickaxe, diff-filter and rename following need diffs */
|
|
|
|
if (revs->diffopt.pickaxe ||
|
|
|
|
revs->diffopt.filter ||
|
|
|
|
DIFF_OPT_TST(&revs->diffopt, FOLLOW_RENAMES))
|
2007-09-29 18:50:39 +02:00
|
|
|
revs->diff = 1;
|
|
|
|
|
2006-10-31 03:58:03 +01:00
|
|
|
if (revs->topo_order)
|
2006-04-02 04:38:25 +02:00
|
|
|
revs->limited = 1;
|
|
|
|
|
2010-12-17 13:43:06 +01:00
|
|
|
if (revs->prune_data.nr) {
|
2013-07-14 10:35:58 +02:00
|
|
|
copy_pathspec(&revs->pruning.pathspec, &revs->prune_data);
|
Finally implement "git log --follow"
Ok, I've really held off doing this too damn long, because I'm lazy, and I
was always hoping that somebody else would do it.
But no, people keep asking for it, but nobody actually did anything, so I
decided I might as well bite the bullet, and instead of telling people
they could add a "--follow" flag to "git log" to do what they want to do,
I decided that it looks like I just have to do it for them..
The code wasn't actually that complicated, in that the diffstat for this
patch literally says "70 insertions(+), 1 deletions(-)", but I will have
to admit that in order to get to this fairly simple patch, you did have to
know and understand the internal git diff generation machinery pretty
well, and had to really be able to follow how commit generation interacts
with generating patches and generating the log.
So I suspect that while I was right that it wasn't that hard, I might have
been expecting too much of random people - this patch does seem to be
firmly in the core "Linus or Junio" territory.
To make a long story short: I'm sorry for it taking so long until I just
did it.
I'm not going to guarantee that this works for everybody, but you really
can just look at the patch, and after the appropriate appreciative noises
("Ooh, aah") over how clever I am, you can then just notice that the code
itself isn't really that complicated.
All the real new code is in the new "try_to_follow_renames()" function. It
really isn't rocket science: we notice that the pathname we were looking
at went away, so we start a full tree diff and try to see if we can
instead make that pathname be a rename or a copy from some other previous
pathname. And if we can, we just continue, except we show *that*
particular diff, and ever after we use the _previous_ pathname.
One thing to look out for: the "rename detection" is considered to be a
singular event in the _linear_ "git log" output! That's what people want
to do, but I just wanted to point out that this patch is *not* carrying
around a "commit,pathname" kind of pair and it's *not* going to be able to
notice the file coming from multiple *different* files in earlier history.
IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind
of "files have single identities" kind of semantics, and git log will just
pick the identity based on the normal move/copy heuristics _as_if_ the
history could be linearized.
Put another way: I think the model is broken, but given the broken model,
I think this patch does just about as well as you can do. If you have
merges with the same "file" having different filenames over the two
branches, git will just end up picking _one_ of the pathnames at the point
where the newer one goes away. It never looks at multiple pathnames in
parallel.
And if you understood all that, you probably didn't need it explained, and
if you didn't understand the above blathering, it doesn't really mtter to
you. What matters to you is that you can now do
git log -p --follow builtin-rev-list.c
and it will find the point where the old "rev-list.c" got renamed to
"builtin-rev-list.c" and show it as such.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-06-19 23:22:46 +02:00
|
|
|
/* Can't prune commits with rename following: the paths change.. */
|
2007-11-10 20:05:14 +01:00
|
|
|
if (!DIFF_OPT_TST(&revs->diffopt, FOLLOW_RENAMES))
|
2007-11-05 22:22:34 +01:00
|
|
|
revs->prune = 1;
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
if (!revs->full_diff)
|
2013-07-14 10:35:58 +02:00
|
|
|
copy_pathspec(&revs->diffopt.pathspec,
|
|
|
|
&revs->prune_data);
|
2006-03-10 10:21:39 +01:00
|
|
|
}
|
2010-03-09 08:27:25 +01:00
|
|
|
if (revs->combine_merges)
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
revs->ignore_merges = 0;
|
|
|
|
revs->diffopt.abbrev = revs->abbrev;
|
Implement line-history search (git log -L)
This is a rewrite of much of Bo's work, mainly in an effort to split
it into smaller, easier to understand routines.
The algorithm is built around the struct range_set, which encodes a
series of line ranges as intervals [a,b). This is used in two
contexts:
* A set of lines we are tracking (which will change as we dig through
history).
* To encode diffs, as pairs of ranges.
The main routine is range_set_map_across_diff(). It processes the
diff between a commit C and some parent P. It determines which diff
hunks are relevant to the ranges tracked in C, and computes the new
ranges for P.
The algorithm is then simply to process history in topological order
from newest to oldest, computing ranges and (partial) diffs. At
branch points, we need to merge the ranges we are watching. We will
find that many commits do not affect the chosen ranges, and mark them
TREESAME (in addition to those already filtered by pathspec limiting).
Another pass of history simplification then gets rid of such commits.
This is wired as an extra filtering pass in the log machinery. This
currently only reduces code duplication, but should allow for other
simplifications and options to be used.
Finally, we hook a diff printer into the output chain. Ideally we
would wire directly into the diff logic, to optionally use features
like word diff. However, that will require some major reworking of
the diff chain, so we completely replace the output with our own diff
for now.
As this was a GSoC project, and has quite some history by now, many
people have helped. In no particular order, thanks go to
Jakub Narebski <jnareb@gmail.com>
Jens Lehmann <Jens.Lehmann@web.de>
Jonathan Nieder <jrnieder@gmail.com>
Junio C Hamano <gitster@pobox.com>
Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Will Palmer <wmpalmer@gmail.com>
Apologies to everyone I forgot.
Signed-off-by: Bo Yang <struggleyb.nku@gmail.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-28 17:47:32 +01:00
|
|
|
|
|
|
|
if (revs->line_level_traverse) {
|
|
|
|
revs->limited = 1;
|
|
|
|
revs->topo_order = 1;
|
|
|
|
}
|
|
|
|
|
2012-08-03 14:16:24 +02:00
|
|
|
diff_setup_done(&revs->diffopt);
|
2006-03-10 10:21:39 +01:00
|
|
|
|
2012-10-10 01:40:03 +02:00
|
|
|
grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
|
|
|
|
&revs->grep_filter);
|
2008-08-25 08:15:05 +02:00
|
|
|
compile_grep_patterns(&revs->grep_filter);
|
2006-09-18 02:23:20 +02:00
|
|
|
|
2007-08-20 04:33:43 +02:00
|
|
|
if (revs->reverse && revs->reflog_info)
|
|
|
|
die("cannot combine --reverse with --walk-reflogs");
|
2008-07-09 00:25:44 +02:00
|
|
|
if (revs->rewrite_parents && revs->children.name)
|
2008-04-03 11:12:06 +02:00
|
|
|
die("cannot combine --parents and --children");
|
2007-08-20 04:33:43 +02:00
|
|
|
|
2008-05-04 12:36:54 +02:00
|
|
|
/*
|
|
|
|
* Limitations on the graph functionality
|
|
|
|
*/
|
|
|
|
if (revs->reverse && revs->graph)
|
|
|
|
die("cannot combine --reverse with --graph");
|
|
|
|
|
|
|
|
if (revs->reflog_info && revs->graph)
|
|
|
|
die("cannot combine --walk-reflogs with --graph");
|
2015-03-11 03:13:02 +01:00
|
|
|
if (revs->no_walk && revs->graph)
|
|
|
|
die("cannot combine --no-walk with --graph");
|
2012-09-29 20:59:52 +02:00
|
|
|
if (!revs->reflog_info && revs->grep_filter.use_reflog_filter)
|
|
|
|
die("cannot use --grep-reflog without --walk-reflogs");
|
2008-05-04 12:36:54 +02:00
|
|
|
|
2015-03-19 23:14:08 +01:00
|
|
|
if (revs->first_parent_only && revs->bisect)
|
|
|
|
die(_("--first-parent is incompatible with --bisect"));
|
|
|
|
|
2006-02-26 01:19:46 +01:00
|
|
|
return left;
|
|
|
|
}
|
2006-02-28 20:24:00 +01:00
|
|
|
|
2008-04-03 11:12:06 +02:00
|
|
|
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)
|
|
|
|
{
|
|
|
|
struct commit_list *l = xcalloc(1, sizeof(*l));
|
|
|
|
|
|
|
|
l->item = child;
|
|
|
|
l->next = add_decoration(&revs->children, &parent->object, l);
|
|
|
|
}
|
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
static int remove_duplicate_parents(struct rev_info *revs, struct commit *commit)
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
{
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
struct treesame_state *ts = lookup_decoration(&revs->treesame, &commit->object);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
struct commit_list **pp, *p;
|
|
|
|
int surviving_parents;
|
|
|
|
|
|
|
|
/* Examine existing parents while marking ones we have seen... */
|
|
|
|
pp = &commit->parents;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
surviving_parents = 0;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
while ((p = *pp) != NULL) {
|
|
|
|
struct commit *parent = p->item;
|
|
|
|
if (parent->object.flags & TMP_MARK) {
|
|
|
|
*pp = p->next;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
if (ts)
|
|
|
|
compact_treesame(revs, commit, surviving_parents);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
parent->object.flags |= TMP_MARK;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
surviving_parents++;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
pp = &p->next;
|
|
|
|
}
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
/* clear the temporary mark */
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
for (p = commit->parents; p; p = p->next) {
|
|
|
|
p->item->object.flags &= ~TMP_MARK;
|
|
|
|
}
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
/* no update_treesame() - removing duplicates can't affect TREESAME */
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
return surviving_parents;
|
|
|
|
}
|
|
|
|
|
2008-08-14 19:59:44 +02:00
|
|
|
struct merge_simplify_state {
|
|
|
|
struct commit *simplified;
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct merge_simplify_state *locate_simplify_state(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct merge_simplify_state *st;
|
|
|
|
|
|
|
|
st = lookup_decoration(&revs->merge_simplification, &commit->object);
|
|
|
|
if (!st) {
|
|
|
|
st = xcalloc(1, sizeof(*st));
|
|
|
|
add_decoration(&revs->merge_simplification, &commit->object, st);
|
|
|
|
}
|
|
|
|
return st;
|
|
|
|
}
|
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
static int mark_redundant_parents(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list *h = reduce_heads(commit->parents);
|
|
|
|
int i = 0, marked = 0;
|
|
|
|
struct commit_list *po, *pn;
|
|
|
|
|
|
|
|
/* Want these for sanity-checking only */
|
|
|
|
int orig_cnt = commit_list_count(commit->parents);
|
|
|
|
int cnt = commit_list_count(h);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Not ready to remove items yet, just mark them for now, based
|
|
|
|
* on the output of reduce_heads(). reduce_heads outputs the reduced
|
|
|
|
* set in its original order, so this isn't too hard.
|
|
|
|
*/
|
|
|
|
po = commit->parents;
|
|
|
|
pn = h;
|
|
|
|
while (po) {
|
|
|
|
if (pn && po->item == pn->item) {
|
|
|
|
pn = pn->next;
|
|
|
|
i++;
|
|
|
|
} else {
|
|
|
|
po->item->object.flags |= TMP_MARK;
|
|
|
|
marked++;
|
|
|
|
}
|
|
|
|
po=po->next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i != cnt || cnt+marked != orig_cnt)
|
|
|
|
die("mark_redundant_parents %d %d %d %d", orig_cnt, cnt, i, marked);
|
|
|
|
|
|
|
|
free_commit_list(h);
|
|
|
|
|
|
|
|
return marked;
|
|
|
|
}
|
|
|
|
|
2013-05-16 17:32:37 +02:00
|
|
|
static int mark_treesame_root_parents(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list *p;
|
|
|
|
int marked = 0;
|
|
|
|
|
|
|
|
for (p = commit->parents; p; p = p->next) {
|
|
|
|
struct commit *parent = p->item;
|
|
|
|
if (!parent->parents && (parent->object.flags & TREESAME)) {
|
|
|
|
parent->object.flags |= TMP_MARK;
|
|
|
|
marked++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return marked;
|
|
|
|
}
|
|
|
|
|
2013-05-16 17:32:36 +02:00
|
|
|
/*
|
|
|
|
* Awkward naming - this means one parent we are TREESAME to.
|
|
|
|
* cf mark_treesame_root_parents: root parents that are TREESAME (to an
|
|
|
|
* empty tree). Better name suggestions?
|
|
|
|
*/
|
|
|
|
static int leave_one_treesame_to_parent(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct treesame_state *ts = lookup_decoration(&revs->treesame, &commit->object);
|
|
|
|
struct commit *unmarked = NULL, *marked = NULL;
|
|
|
|
struct commit_list *p;
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
for (p = commit->parents, n = 0; p; p = p->next, n++) {
|
|
|
|
if (ts->treesame[n]) {
|
|
|
|
if (p->item->object.flags & TMP_MARK) {
|
|
|
|
if (!marked)
|
|
|
|
marked = p->item;
|
|
|
|
} else {
|
|
|
|
if (!unmarked) {
|
|
|
|
unmarked = p->item;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we are TREESAME to a marked-for-deletion parent, but not to any
|
|
|
|
* unmarked parents, unmark the first TREESAME parent. This is the
|
|
|
|
* parent that the default simplify_history==1 scan would have followed,
|
|
|
|
* and it doesn't make sense to omit that path when asking for a
|
|
|
|
* simplified full history. Retaining it improves the chances of
|
|
|
|
* understanding odd missed merges that took an old version of a file.
|
|
|
|
*
|
|
|
|
* Example:
|
|
|
|
*
|
|
|
|
* I--------*X A modified the file, but mainline merge X used
|
|
|
|
* \ / "-s ours", so took the version from I. X is
|
|
|
|
* `-*A--' TREESAME to I and !TREESAME to A.
|
|
|
|
*
|
|
|
|
* Default log from X would produce "I". Without this check,
|
|
|
|
* --full-history --simplify-merges would produce "I-A-X", showing
|
|
|
|
* the merge commit X and that it changed A, but not making clear that
|
|
|
|
* it had just taken the I version. With this check, the topology above
|
|
|
|
* is retained.
|
|
|
|
*
|
|
|
|
* Note that it is possible that the simplification chooses a different
|
|
|
|
* TREESAME parent from the default, in which case this test doesn't
|
|
|
|
* activate, and we _do_ drop the default parent. Example:
|
|
|
|
*
|
|
|
|
* I------X A modified the file, but it was reverted in B,
|
|
|
|
* \ / meaning mainline merge X is TREESAME to both
|
|
|
|
* *A-*B parents.
|
|
|
|
*
|
|
|
|
* Default log would produce "I" by following the first parent;
|
|
|
|
* --full-history --simplify-merges will produce "I-A-B". But this is a
|
|
|
|
* reasonable result - it presents a logical full history leading from
|
|
|
|
* I to X, and X is not an important merge.
|
|
|
|
*/
|
|
|
|
if (!unmarked && marked) {
|
|
|
|
marked->object.flags &= ~TMP_MARK;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
static int remove_marked_parents(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list **pp, *p;
|
|
|
|
int nth_parent, removed = 0;
|
|
|
|
|
|
|
|
pp = &commit->parents;
|
|
|
|
nth_parent = 0;
|
|
|
|
while ((p = *pp) != NULL) {
|
|
|
|
struct commit *parent = p->item;
|
|
|
|
if (parent->object.flags & TMP_MARK) {
|
|
|
|
parent->object.flags &= ~TMP_MARK;
|
|
|
|
*pp = p->next;
|
|
|
|
free(p);
|
|
|
|
removed++;
|
|
|
|
compact_treesame(revs, commit, nth_parent);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
pp = &p->next;
|
|
|
|
nth_parent++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Removing parents can only increase TREESAMEness */
|
|
|
|
if (removed && !(commit->object.flags & TREESAME))
|
|
|
|
update_treesame(revs, commit);
|
|
|
|
|
|
|
|
return nth_parent;
|
|
|
|
}
|
|
|
|
|
2008-08-14 19:59:44 +02:00
|
|
|
static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
{
|
|
|
|
struct commit_list *p;
|
2013-05-16 17:32:39 +02:00
|
|
|
struct commit *parent;
|
2008-08-14 19:59:44 +02:00
|
|
|
struct merge_simplify_state *st, *pst;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
int cnt;
|
|
|
|
|
2008-08-14 19:59:44 +02:00
|
|
|
st = locate_simplify_state(revs, commit);
|
|
|
|
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
/*
|
|
|
|
* Have we handled this one?
|
|
|
|
*/
|
2008-08-14 19:59:44 +02:00
|
|
|
if (st->simplified)
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
return tail;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* An UNINTERESTING commit simplifies to itself, so does a
|
|
|
|
* root commit. We do not rewrite parents of such commit
|
|
|
|
* anyway.
|
|
|
|
*/
|
|
|
|
if ((commit->object.flags & UNINTERESTING) || !commit->parents) {
|
2008-08-14 19:59:44 +02:00
|
|
|
st->simplified = commit;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
return tail;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-06-08 23:56:03 +02:00
|
|
|
* Do we know what commit all of our parents that matter
|
|
|
|
* should be rewritten to? Otherwise we are not ready to
|
|
|
|
* rewrite this one yet.
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
*/
|
|
|
|
for (cnt = 0, p = commit->parents; p; p = p->next) {
|
2008-08-14 19:59:44 +02:00
|
|
|
pst = locate_simplify_state(revs, p->item);
|
|
|
|
if (!pst->simplified) {
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
tail = &commit_list_insert(p->item, tail)->next;
|
|
|
|
cnt++;
|
|
|
|
}
|
2012-06-08 23:56:03 +02:00
|
|
|
if (revs->first_parent_only)
|
|
|
|
break;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
}
|
2008-08-18 09:37:34 +02:00
|
|
|
if (cnt) {
|
|
|
|
tail = &commit_list_insert(commit, tail)->next;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
return tail;
|
2008-08-18 09:37:34 +02:00
|
|
|
}
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
|
|
|
|
/*
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
* Rewrite our list of parents. Note that this cannot
|
|
|
|
* affect our TREESAME flags in any way - a commit is
|
|
|
|
* always TREESAME to its simplification.
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
*/
|
2008-08-14 19:59:44 +02:00
|
|
|
for (p = commit->parents; p; p = p->next) {
|
|
|
|
pst = locate_simplify_state(revs, p->item);
|
|
|
|
p->item = pst->simplified;
|
2012-06-08 23:56:03 +02:00
|
|
|
if (revs->first_parent_only)
|
|
|
|
break;
|
2008-08-14 19:59:44 +02:00
|
|
|
}
|
2013-01-17 23:23:03 +01:00
|
|
|
|
2013-04-08 22:10:27 +02:00
|
|
|
if (revs->first_parent_only)
|
2012-06-08 23:56:03 +02:00
|
|
|
cnt = 1;
|
2013-04-08 22:10:27 +02:00
|
|
|
else
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
cnt = remove_duplicate_parents(revs, commit);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* It is possible that we are a merge and one side branch
|
|
|
|
* does not have any commit that touches the given paths;
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
* in such a case, the immediate parent from that branch
|
|
|
|
* will be rewritten to be the merge base.
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
*
|
|
|
|
* o----X X: the commit we are looking at;
|
|
|
|
* / / o: a commit that touches the paths;
|
|
|
|
* ---o----'
|
|
|
|
*
|
2013-05-16 17:32:37 +02:00
|
|
|
* Further, a merge of an independent branch that doesn't
|
|
|
|
* touch the path will reduce to a treesame root parent:
|
|
|
|
*
|
|
|
|
* ----o----X X: the commit we are looking at;
|
|
|
|
* / o: a commit that touches the paths;
|
|
|
|
* r r: a root commit not touching the paths
|
|
|
|
*
|
|
|
|
* Detect and simplify both cases.
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
*/
|
|
|
|
if (1 < cnt) {
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
int marked = mark_redundant_parents(revs, commit);
|
2013-05-16 17:32:37 +02:00
|
|
|
marked += mark_treesame_root_parents(revs, commit);
|
2013-05-16 17:32:36 +02:00
|
|
|
if (marked)
|
|
|
|
marked -= leave_one_treesame_to_parent(revs, commit);
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
if (marked)
|
|
|
|
cnt = remove_marked_parents(revs, commit);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A commit simplifies to itself if it is a root, if it is
|
|
|
|
* UNINTERESTING, if it touches the given paths, or if it is a
|
2013-05-16 17:32:39 +02:00
|
|
|
* merge and its parents don't simplify to one relevant commit
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
* (the first two cases are already handled at the beginning of
|
|
|
|
* this function).
|
|
|
|
*
|
2013-05-16 17:32:39 +02:00
|
|
|
* Otherwise, it simplifies to what its sole relevant parent
|
|
|
|
* simplifies to.
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
*/
|
|
|
|
if (!cnt ||
|
|
|
|
(commit->object.flags & UNINTERESTING) ||
|
|
|
|
!(commit->object.flags & TREESAME) ||
|
2013-05-16 17:32:39 +02:00
|
|
|
(parent = one_relevant_parent(revs, commit->parents)) == NULL)
|
2008-08-14 19:59:44 +02:00
|
|
|
st->simplified = commit;
|
|
|
|
else {
|
2013-05-16 17:32:39 +02:00
|
|
|
pst = locate_simplify_state(revs, parent);
|
2008-08-14 19:59:44 +02:00
|
|
|
st->simplified = pst->simplified;
|
|
|
|
}
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
return tail;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void simplify_merges(struct rev_info *revs)
|
|
|
|
{
|
2012-06-08 23:50:22 +02:00
|
|
|
struct commit_list *list, *next;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
struct commit_list *yet_to_do, **tail;
|
2012-06-08 23:50:22 +02:00
|
|
|
struct commit *commit;
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
|
2008-08-14 22:52:36 +02:00
|
|
|
if (!revs->prune)
|
|
|
|
return;
|
2008-08-04 02:47:16 +02:00
|
|
|
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
/* feed the list reversed */
|
|
|
|
yet_to_do = NULL;
|
2012-06-08 23:50:22 +02:00
|
|
|
for (list = revs->commits; list; list = next) {
|
|
|
|
commit = list->item;
|
|
|
|
next = list->next;
|
|
|
|
/*
|
|
|
|
* Do not free(list) here yet; the original list
|
|
|
|
* is used later in this function.
|
|
|
|
*/
|
|
|
|
commit_list_insert(commit, &yet_to_do);
|
|
|
|
}
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
while (yet_to_do) {
|
|
|
|
list = yet_to_do;
|
|
|
|
yet_to_do = NULL;
|
|
|
|
tail = &yet_to_do;
|
|
|
|
while (list) {
|
2015-10-24 18:21:31 +02:00
|
|
|
commit = pop_commit(&list);
|
2008-08-14 19:59:44 +02:00
|
|
|
tail = simplify_one(revs, commit, tail);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* clean up the result, removing the simplified ones */
|
|
|
|
list = revs->commits;
|
|
|
|
revs->commits = NULL;
|
|
|
|
tail = &revs->commits;
|
|
|
|
while (list) {
|
2008-08-14 19:59:44 +02:00
|
|
|
struct merge_simplify_state *st;
|
2012-06-08 23:50:22 +02:00
|
|
|
|
2015-10-24 18:21:31 +02:00
|
|
|
commit = pop_commit(&list);
|
2008-08-14 19:59:44 +02:00
|
|
|
st = locate_simplify_state(revs, commit);
|
|
|
|
if (st->simplified == commit)
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
tail = &commit_list_insert(commit, tail)->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-03 11:12:06 +02:00
|
|
|
static void set_children(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
struct commit_list *l;
|
|
|
|
for (l = revs->commits; l; l = l->next) {
|
|
|
|
struct commit *commit = l->item;
|
|
|
|
struct commit_list *p;
|
|
|
|
|
|
|
|
for (p = commit->parents; p; p = p->next)
|
|
|
|
add_child(revs, p->item, commit);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-03-29 09:21:21 +02:00
|
|
|
void reset_revision_walk(void)
|
|
|
|
{
|
|
|
|
clear_object_flags(SEEN | ADDED | SHOWN);
|
|
|
|
}
|
|
|
|
|
2007-05-04 23:54:57 +02:00
|
|
|
int prepare_revision_walk(struct rev_info *revs)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
clean up name allocation in prepare_revision_walk
When we enter prepare_revision_walk, we have zero or more
entries in our "pending" array. We disconnect that array
from the rev_info, and then process each entry:
1. If the entry is a commit and the --source option is in
effect, we keep a pointer to the object name.
2. Otherwise, we re-add the item to the pending list with
a blank name.
We then throw away the old array by freeing the array
itself, but do not touch the "name" field of each entry. For
any items of type (2), we leak the memory associated with
the name. This commit fixes that by calling object_array_clear,
which handles the cleanup for us.
That breaks (1), though, because it depends on the memory
pointed to by the name to last forever. We can solve that by
making a copy of the name. This is slightly less efficient,
but it shouldn't matter in practice, as we do it only for
the tip commits of the traversal.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:35:12 +02:00
|
|
|
int i;
|
|
|
|
struct object_array old_pending;
|
2012-04-25 22:35:41 +02:00
|
|
|
struct commit_list **next = &revs->commits;
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
|
clean up name allocation in prepare_revision_walk
When we enter prepare_revision_walk, we have zero or more
entries in our "pending" array. We disconnect that array
from the rev_info, and then process each entry:
1. If the entry is a commit and the --source option is in
effect, we keep a pointer to the object name.
2. Otherwise, we re-add the item to the pending list with
a blank name.
We then throw away the old array by freeing the array
itself, but do not touch the "name" field of each entry. For
any items of type (2), we leak the memory associated with
the name. This commit fixes that by calling object_array_clear,
which handles the cleanup for us.
That breaks (1), though, because it depends on the memory
pointed to by the name to last forever. We can solve that by
making a copy of the name. This is slightly less efficient,
but it shouldn't matter in practice, as we do it only for
the tip commits of the traversal.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:35:12 +02:00
|
|
|
memcpy(&old_pending, &revs->pending, sizeof(old_pending));
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 02:42:35 +02:00
|
|
|
revs->pending.nr = 0;
|
|
|
|
revs->pending.alloc = 0;
|
|
|
|
revs->pending.objects = NULL;
|
clean up name allocation in prepare_revision_walk
When we enter prepare_revision_walk, we have zero or more
entries in our "pending" array. We disconnect that array
from the rev_info, and then process each entry:
1. If the entry is a commit and the --source option is in
effect, we keep a pointer to the object name.
2. Otherwise, we re-add the item to the pending list with
a blank name.
We then throw away the old array by freeing the array
itself, but do not touch the "name" field of each entry. For
any items of type (2), we leak the memory associated with
the name. This commit fixes that by calling object_array_clear,
which handles the cleanup for us.
That breaks (1), though, because it depends on the memory
pointed to by the name to last forever. We can solve that by
making a copy of the name. This is slightly less efficient,
but it shouldn't matter in practice, as we do it only for
the tip commits of the traversal.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:35:12 +02:00
|
|
|
for (i = 0; i < old_pending.nr; i++) {
|
|
|
|
struct object_array_entry *e = old_pending.objects + i;
|
traverse_commit_list: support pending blobs/trees with paths
When we call traverse_commit_list, we may have trees and
blobs in the pending array. As we process these, we pass the
"name" field from the pending entry as the path of the
object within the tree (which then becomes the root path if
we recurse into subtrees).
When we set up the traversal in prepare_revision_walk,
though, the "name" field of any pending trees and blobs is
likely to be the ref at which we found the object. We would
not want to make this part of the path (e.g., doing so would
make "git rev-list --objects v2.6.11-tree" in linux.git show
paths like "v2.6.11-tree/Makefile", which is nonsensical).
Therefore prepare_revision_walk sets the name field of each
pending tree and blobs to the empty string.
However, this leaves no room for a caller who does know the
correct path of a pending object to propagate that
information to the revision walker. We can fix this by
making two related changes:
1. Use the "path" field as the path instead of the "name"
field in traverse_commit_list. If the path is not set,
default to "" (which is what we always ended up with in
the current code, because of prepare_revision_walk).
2. In prepare_revision_walk, make a complete copy of the
entry. This makes the path field available to the
walker (if there is one), solving our problem.
Leaving the name field intact is now OK, as we do not
use it as a path due to point (1) above (and we can use
it to make more meaningful error messages if we want).
We also make the original "mode" field available to the
walker, though it does not actually use it.
Note that we still re-add the pending objects and free the
old ones (so we may strdup the path and name only to free
the old ones). This could be made more efficient by simply
copying the object_array entries that we are keeping.
However, that would require more restructuring of the code,
and is not done here.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:43:19 +02:00
|
|
|
struct commit *commit = handle_commit(revs, e);
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
if (commit) {
|
|
|
|
if (!(commit->object.flags & SEEN)) {
|
|
|
|
commit->object.flags |= SEEN;
|
2012-04-25 22:35:41 +02:00
|
|
|
next = commit_list_append(commit, next);
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-10-01 17:56:08 +02:00
|
|
|
if (!revs->leak_pending)
|
clean up name allocation in prepare_revision_walk
When we enter prepare_revision_walk, we have zero or more
entries in our "pending" array. We disconnect that array
from the rev_info, and then process each entry:
1. If the entry is a commit and the --source option is in
effect, we keep a pointer to the object name.
2. Otherwise, we re-add the item to the pending list with
a blank name.
We then throw away the old array by freeing the array
itself, but do not touch the "name" field of each entry. For
any items of type (2), we leak the memory associated with
the name. This commit fixes that by calling object_array_clear,
which handles the cleanup for us.
That breaks (1), though, because it depends on the memory
pointed to by the name to last forever. We can solve that by
making a copy of the name. This is slightly less efficient,
but it shouldn't matter in practice, as we do it only for
the tip commits of the traversal.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-10-16 00:35:12 +02:00
|
|
|
object_array_clear(&old_pending);
|
Common option parsing for "git log --diff" and friends
This basically does a few things that are sadly somewhat interdependent,
and nontrivial to split out
- get rid of "struct log_tree_opt"
The fields in "log_tree_opt" are moved into "struct rev_info", and all
users of log_tree_opt are changed to use the rev_info struct instead.
- add the parsing for the log_tree_opt arguments to "setup_revision()"
- make setup_revision set a flag (revs->diff) if the diff-related
arguments were used. This allows "git log" to decide whether it wants
to show diffs or not.
- make setup_revision() also initialize the diffopt part of rev_info
(which we had from before, but we just didn't initialize it)
- make setup_revision() do all the "finishing touches" on it all (it will
do the proper flag combination logic, and call "diff_setup_done()")
Now, that was the easy and straightforward part.
The slightly more involved part is that some of the programs that want to
use the new-and-improved rev_info parsing don't actually want _commits_,
they may want tree'ish arguments instead. That meant that I had to change
setup_revision() to parse the arguments not into the "revs->commits" list,
but into the "revs->pending_objects" list.
Then, when we do "prepare_revision_walk()", we walk that list, and create
the sorted commit list from there.
This actually cleaned some stuff up, but it's the less obvious part of the
patch, and re-organized the "revision.c" logic somewhat. It actually paves
the way for splitting argument parsing _entirely_ out of "revision.c",
since now the argument parsing really is totally independent of the commit
walking: that didn't use to be true, since there was lots of overlap with
get_commit_reference() handling etc, now the _only_ overlap is the shared
(and trivial) "add_pending_object()" thing.
However, I didn't do that file split, just because I wanted the diff
itself to be smaller, and show the actual changes more clearly. If this
gets accepted, I'll do further cleanups then - that includes the file
split, but also using the new infrastructure to do a nicer "git diff" etc.
Even in this form, it actually ends up removing more lines than it adds.
It's nice to note how simple and straightforward this makes the built-in
"git log" command, even though it continues to support all the diff flags
too. It doesn't get much simpler that this.
I think this is worth merging soonish, because it does allow for future
cleanup and even more sharing of code. However, it obviously touches
"revision.c", which is subtle. I've tested that it passes all the tests we
have, and it passes my "looks sane" detector, but somebody else should
also give it a good look-over.
[jc: squashed the original and three "oops this too" updates, with
another fix-up.]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-15 01:52:13 +02:00
|
|
|
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
/* Signal whether we need per-parent treesame decoration */
|
2013-05-16 17:32:39 +02:00
|
|
|
if (revs->simplify_merges ||
|
|
|
|
(revs->limited && limiting_can_increase_treesame(revs)))
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
revs->treesame.name = "treesame";
|
|
|
|
|
teach log --no-walk=unsorted, which avoids sorting
When 'git log' is passed the --no-walk option, no revision walk takes
place, naturally. Perhaps somewhat surprisingly, however, the provided
revisions still get sorted by commit date. So e.g 'git log --no-walk
HEAD HEAD~1' and 'git log --no-walk HEAD~1 HEAD' give the same result
(unless the two revisions share the commit date, in which case they
will retain the order given on the command line). As the commit that
introduced --no-walk (8e64006 (Teach revision machinery about
--no-walk, 2007-07-24)) points out, the sorting is intentional, to
allow things like
git log --abbrev-commit --pretty=oneline --decorate --all --no-walk
to show all refs in order by commit date.
But there are also other cases where the sorting is not wanted, such
as
<command producing revisions in order> |
git log --oneline --no-walk --stdin
To accomodate both cases, leave the decision of whether or not to sort
up to the caller, by allowing --no-walk={sorted,unsorted}, defaulting
to 'sorted' for backward-compatibility reasons.
Signed-off-by: Martin von Zweigbergk <martinvonz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-29 08:15:54 +02:00
|
|
|
if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
|
|
|
|
commit_list_sort_by_date(&revs->commits);
|
2006-04-15 21:09:56 +02:00
|
|
|
if (revs->no_walk)
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-02-28 20:24:00 +01:00
|
|
|
if (revs->limited)
|
2007-05-04 23:54:57 +02:00
|
|
|
if (limit_list(revs) < 0)
|
|
|
|
return -1;
|
2006-02-28 20:24:00 +01:00
|
|
|
if (revs->topo_order)
|
toposort: rename "lifo" field
The primary invariant of sort_in_topological_order() is that a
parent commit is not emitted until all children of it are. When
traversing a forked history like this with "git log C E":
A----B----C
\
D----E
we ensure that A is emitted after all of B, C, D, and E are done, B
has to wait until C is done, and D has to wait until E is done.
In some applications, however, we would further want to control how
these child commits B, C, D and E on two parallel ancestry chains
are shown.
Most of the time, we would want to see C and B emitted together, and
then E and D, and finally A (i.e. the --topo-order output). The
"lifo" parameter of the sort_in_topological_order() function is used
to control this behaviour. We start the traversal by knowing two
commits, C and E. While keeping in mind that we also need to
inspect E later, we pick C first to inspect, and we notice and
record that B needs to be inspected. By structuring the "work to be
done" set as a LIFO stack, we ensure that B is inspected next,
before other in-flight commits we had known that we will need to
inspect, e.g. E.
When showing in --date-order, we would want to see commits ordered
by timestamps, i.e. show C, E, B and D in this order before showing
A, possibly mixing commits from two parallel histories together.
When "lifo" parameter is set to false, the function keeps the "work
to be done" set sorted in the date order to realize this semantics.
After inspecting C, we add B to the "work to be done" set, but the
next commit we inspect from the set is E which is newer than B.
The name "lifo", however, is too strongly tied to the way how the
function implements its behaviour, and does not describe what the
behaviour _means_.
Replace this field with an enum rev_sort_order, with two possible
values: REV_SORT_IN_GRAPH_ORDER and REV_SORT_BY_COMMIT_DATE, and
update the existing code. The mechanical replacement rule is:
"lifo == 0" is equivalent to "sort_order == REV_SORT_BY_COMMIT_DATE"
"lifo == 1" is equivalent to "sort_order == REV_SORT_IN_GRAPH_ORDER"
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-07 01:07:14 +02:00
|
|
|
sort_in_topological_order(&revs->commits, revs->sort_order);
|
Implement line-history search (git log -L)
This is a rewrite of much of Bo's work, mainly in an effort to split
it into smaller, easier to understand routines.
The algorithm is built around the struct range_set, which encodes a
series of line ranges as intervals [a,b). This is used in two
contexts:
* A set of lines we are tracking (which will change as we dig through
history).
* To encode diffs, as pairs of ranges.
The main routine is range_set_map_across_diff(). It processes the
diff between a commit C and some parent P. It determines which diff
hunks are relevant to the ranges tracked in C, and computes the new
ranges for P.
The algorithm is then simply to process history in topological order
from newest to oldest, computing ranges and (partial) diffs. At
branch points, we need to merge the ranges we are watching. We will
find that many commits do not affect the chosen ranges, and mark them
TREESAME (in addition to those already filtered by pathspec limiting).
Another pass of history simplification then gets rid of such commits.
This is wired as an extra filtering pass in the log machinery. This
currently only reduces code duplication, but should allow for other
simplifications and options to be used.
Finally, we hook a diff printer into the output chain. Ideally we
would wire directly into the diff logic, to optionally use features
like word diff. However, that will require some major reworking of
the diff chain, so we completely replace the output with our own diff
for now.
As this was a GSoC project, and has quite some history by now, many
people have helped. In no particular order, thanks go to
Jakub Narebski <jnareb@gmail.com>
Jens Lehmann <Jens.Lehmann@web.de>
Jonathan Nieder <jrnieder@gmail.com>
Junio C Hamano <gitster@pobox.com>
Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Will Palmer <wmpalmer@gmail.com>
Apologies to everyone I forgot.
Signed-off-by: Bo Yang <struggleyb.nku@gmail.com>
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-28 17:47:32 +01:00
|
|
|
if (revs->line_level_traverse)
|
|
|
|
line_log_filter(revs);
|
revision traversal: show full history with merge simplification
The --full-history traversal keeps all merges in addition to non-merge
commits that touch paths in the given pathspec. This is useful to view
both sides of a merge in a topology like this:
A---M---o
/ /
---O---B
even when A and B makes identical change to the given paths. The revision
traversal without --full-history aims to come up with the simplest history
to explain the final state of the tree, and one of the side branches can
be pruned away.
The behaviour to keep all merges however is inconvenient if neither A nor
B touches the paths we are interested in. --full-history reduces the
topology to:
---O---M---o
in such a case, without removing M.
This adds a post processing phase on top of --full-history traversal to
remove needless merges from the resulting history.
The idea is to compute, for each commit in the "full history" result set,
the commit that should replace it in the simplified history. The commit
to replace it in the final history is determined as follows:
* In any case, we first figure out the replacement commits of parents of
the commit we are looking at. The commit we are looking at is
rewritten as if the replacement commits of its original parents are its
parents. While doing so, we reduce the redundant parents from the
rewritten parent list by not just removing the identical ones, but also
removing a parent that is an ancestor of another parent.
* After the above parent simplification, if the commit is a root commit,
an UNINTERESTING commit, a merge commit, or modifies the paths we are
interested in, then the replacement commit of the commit is itself. In
other words, such a commit is not dropped from the final result.
The first point above essentially means that the history is rewritten in
the bottom up direction. We can rewrite the parent list of a commit only
after we know how all of its parents are rewritten. This means that the
processing needs to happen on the full history (i.e. after limit_list()).
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-07-31 10:17:41 +02:00
|
|
|
if (revs->simplify_merges)
|
|
|
|
simplify_merges(revs);
|
2008-04-03 11:12:06 +02:00
|
|
|
if (revs->children.name)
|
|
|
|
set_children(revs);
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-02-28 20:24:00 +01:00
|
|
|
}
|
|
|
|
|
2007-05-04 23:54:57 +02:00
|
|
|
static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
|
2006-03-01 00:07:20 +01:00
|
|
|
{
|
2008-07-12 20:00:57 +02:00
|
|
|
struct commit_list *cache = NULL;
|
|
|
|
|
2006-03-01 00:07:20 +01:00
|
|
|
for (;;) {
|
|
|
|
struct commit *p = *pp;
|
Make "--parents" logs also be incremental
The parent rewriting feature caused us to create the whole history in one
go, and then simplify it later, because of how rewrite_parents() had been
written. However, with a little tweaking, it's perfectly possible to do
even that one incrementally.
Right now, this doesn't really much matter, because every user of
"--parents" will probably generally _also_ use "--topo-order", which will
cause the old non-incremental behaviour anyway. However, I'm hopeful that
we could make even the topological sort incremental, or at least
_partially_ so (for example, make it incremental up to the first merge).
In the meantime, this at least moves things in the right direction, and
removes a strange special case.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-04-09 02:05:58 +02:00
|
|
|
if (!revs->limited)
|
2008-07-12 20:00:57 +02:00
|
|
|
if (add_parents_to_list(revs, p, &revs->commits, &cache) < 0)
|
2007-05-04 23:54:57 +02:00
|
|
|
return rewrite_one_error;
|
2007-11-13 08:16:08 +01:00
|
|
|
if (p->object.flags & UNINTERESTING)
|
|
|
|
return rewrite_one_ok;
|
|
|
|
if (!(p->object.flags & TREESAME))
|
2007-05-04 23:54:57 +02:00
|
|
|
return rewrite_one_ok;
|
2006-03-01 00:07:20 +01:00
|
|
|
if (!p->parents)
|
2007-05-04 23:54:57 +02:00
|
|
|
return rewrite_one_noparents;
|
2013-05-16 17:32:39 +02:00
|
|
|
if ((p = one_relevant_parent(revs, p->parents)) == NULL)
|
|
|
|
return rewrite_one_ok;
|
|
|
|
*pp = p;
|
2006-03-01 00:07:20 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-03-28 17:47:31 +01:00
|
|
|
int rewrite_parents(struct rev_info *revs, struct commit *commit,
|
|
|
|
rewrite_parent_fn_t rewrite_parent)
|
2006-03-01 00:07:20 +01:00
|
|
|
{
|
|
|
|
struct commit_list **pp = &commit->parents;
|
|
|
|
while (*pp) {
|
|
|
|
struct commit_list *parent = *pp;
|
2013-03-28 17:47:31 +01:00
|
|
|
switch (rewrite_parent(revs, &parent->item)) {
|
2007-05-04 23:54:57 +02:00
|
|
|
case rewrite_one_ok:
|
|
|
|
break;
|
|
|
|
case rewrite_one_noparents:
|
2006-03-01 00:07:20 +01:00
|
|
|
*pp = parent->next;
|
|
|
|
continue;
|
2007-05-04 23:54:57 +02:00
|
|
|
case rewrite_one_error:
|
|
|
|
return -1;
|
2006-03-01 00:07:20 +01:00
|
|
|
}
|
|
|
|
pp = &parent->next;
|
|
|
|
}
|
revision.c: Make --full-history consider more merges
History simplification previously always treated merges as TREESAME
if they were TREESAME to any parent.
While this was consistent with the default behaviour, this could be
extremely unhelpful when searching detailed history, and could not be
overridden. For example, if a merge had ignored a change, as if by "-s
ours", then:
git log -m -p --full-history -Schange file
would successfully locate "change"'s addition but would not locate the
merge that resolved against it.
Futher, simplify_merges could drop the actual parent that a commit
was TREESAME to, leaving it as a normal commit marked TREESAME that
isn't actually TREESAME to its remaining parent.
Now redefine a commit's TREESAME flag to be true only if a commit is
TREESAME to _all_ of its parents. This doesn't affect either the default
simplify_history behaviour (because partially TREESAME merges are turned
into normal commits), or full-history with parent rewriting (because all
merges are output). But it does affect other modes. The clearest
difference is that --full-history will show more merges - sufficient to
ensure that -m -p --full-history log searches can really explain every
change to the file, including those changes' ultimate fate in merges.
Also modify simplify_merges to recalculate TREESAME after removing
a parent. This is achieved by storing per-parent TREESAME flags on the
initial scan, so the combined flag can be easily recomputed.
This fixes some t6111 failures, but creates a couple of new ones -
we are now showing some merges that don't need to be shown.
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-16 17:32:34 +02:00
|
|
|
remove_duplicate_parents(revs, commit);
|
2007-05-04 23:54:57 +02:00
|
|
|
return 0;
|
2006-03-01 00:07:20 +01:00
|
|
|
}
|
|
|
|
|
2013-01-05 22:26:45 +01:00
|
|
|
static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap)
|
|
|
|
{
|
|
|
|
char *person, *endp;
|
|
|
|
size_t len, namelen, maillen;
|
|
|
|
const char *name;
|
|
|
|
const char *mail;
|
|
|
|
struct ident_split ident;
|
|
|
|
|
|
|
|
person = strstr(buf->buf, what);
|
|
|
|
if (!person)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
person += strlen(what);
|
|
|
|
endp = strchr(person, '\n');
|
|
|
|
if (!endp)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
len = endp - person;
|
|
|
|
|
|
|
|
if (split_ident_line(&ident, person, len))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
mail = ident.mail_begin;
|
|
|
|
maillen = ident.mail_end - ident.mail_begin;
|
|
|
|
name = ident.name_begin;
|
|
|
|
namelen = ident.name_end - ident.name_begin;
|
|
|
|
|
|
|
|
if (map_user(mailmap, &mail, &maillen, &name, &namelen)) {
|
|
|
|
struct strbuf namemail = STRBUF_INIT;
|
|
|
|
|
|
|
|
strbuf_addf(&namemail, "%.*s <%.*s>",
|
|
|
|
(int)namelen, name, (int)maillen, mail);
|
|
|
|
|
|
|
|
strbuf_splice(buf, ident.name_begin - buf->buf,
|
|
|
|
ident.mail_end - ident.name_begin + 1,
|
|
|
|
namemail.buf, namemail.len);
|
|
|
|
|
|
|
|
strbuf_release(&namemail);
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-09-18 00:43:40 +02:00
|
|
|
static int commit_match(struct commit *commit, struct rev_info *opt)
|
|
|
|
{
|
2012-09-29 06:41:28 +02:00
|
|
|
int retval;
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
const char *encoding;
|
2014-06-10 23:39:30 +02:00
|
|
|
const char *message;
|
2012-09-29 06:41:28 +02:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
|
"log --author=me --grep=it" should find intersection, not union
Historically, any grep filter in "git log" family of commands were taken
as restricting to commits with any of the words in the commit log message.
However, the user almost always want to find commits "done by this person
on that topic". With "--all-match" option, a series of grep patterns can
be turned into a requirement that all of them must produce a match, but
that makes it impossible to ask for "done by me, on either this or that"
with:
log --author=me --committer=him --grep=this --grep=that
because it will require both "this" and "that" to appear.
Change the "header" parser of grep library to treat the headers specially,
and parse it as:
(all-match-OR (HEADER-AUTHOR me)
(HEADER-COMMITTER him)
(OR
(PATTERN this)
(PATTERN that) ) )
Even though the "log" command line parser doesn't give direct access to
the extended grep syntax to group terms with parentheses, this change will
cover the majority of the case the users would want.
This incidentally revealed that one test in t7002 was bogus. It ran:
log --author=Thor --grep=Thu --format='%s'
and expected (wrongly) "Thu" to match "Thursday" in the author/committer
date, but that would never match, as the timestamp in raw commit buffer
does not have the name of the day-of-the-week.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-18 05:09:06 +01:00
|
|
|
if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list)
|
2006-09-18 00:43:40 +02:00
|
|
|
return 1;
|
2012-09-29 20:59:52 +02:00
|
|
|
|
|
|
|
/* Prepend "fake" headers as needed */
|
|
|
|
if (opt->grep_filter.use_reflog_filter) {
|
2012-09-29 06:41:28 +02:00
|
|
|
strbuf_addstr(&buf, "reflog ");
|
|
|
|
get_reflog_message(&buf, opt->reflog_info);
|
|
|
|
strbuf_addch(&buf, '\n');
|
|
|
|
}
|
2012-09-29 20:59:52 +02:00
|
|
|
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
/*
|
|
|
|
* We grep in the user's output encoding, under the assumption that it
|
|
|
|
* is the encoding they are most likely to write their grep pattern
|
|
|
|
* for. In addition, it means we will match the "notes" encoding below,
|
|
|
|
* so we will not end up with a buffer that has two different encodings
|
|
|
|
* in it.
|
|
|
|
*/
|
|
|
|
encoding = get_log_output_encoding();
|
2013-04-19 01:08:40 +02:00
|
|
|
message = logmsg_reencode(commit, NULL, encoding);
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
|
2012-09-29 20:59:52 +02:00
|
|
|
/* Copy the commit to temporary if we are using "fake" headers */
|
|
|
|
if (buf.len)
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
strbuf_addstr(&buf, message);
|
2012-09-29 20:59:52 +02:00
|
|
|
|
log --use-mailmap: optimize for cases without --author/--committer search
When we taught the commit_match() mechanism to pay attention to the
new --use-mailmap option, we started to unconditionally copy the
commit object to a temporary buffer, just in case we need the author
and committer lines updated via the mailmap mechanism, and rewrite
author and committer using the mailmap.
It turns out that this has a rather unpleasant performance
implications. In the linux kernel repository, running
$ git log --author='Junio C Hamano' --pretty=short >/dev/null
under /usr/bin/time, with and without --use-mailmap (the .mailmap
file is 118 entries long, the particular author does not appear in
it), cost (with warm cache):
[without --use-mailmap]
5.42user 0.26system 0:05.70elapsed 99%CPU (0avgtext+0avgdata 2005936maxresident)k
0inputs+0outputs (0major+137669minor)pagefaults 0swaps
[with --use-mailmap]
6.47user 0.30system 0:06.78elapsed 99%CPU (0avgtext+0avgdata 2006288maxresident)k
0inputs+0outputs (0major+137692minor)pagefaults 0swaps
which incurs about 20% overhead. The command is doing extra work,
so the extra cost may be justified.
But it is inexcusable to pay the cost when we do not need
author/committer match. In the same repository,
$ git log --grep='fix menuconfig on debian lenny' --pretty=short >/dev/null
shows very similar numbers as the above:
[without --use-mailmap]
5.32user 0.30system 0:05.63elapsed 99%CPU (0avgtext+0avgdata 2005984maxresident)k
0inputs+0outputs (0major+137672minor)pagefaults 0swaps
[with --use-mailmap]
6.64user 0.24system 0:06.89elapsed 99%CPU (0avgtext+0avgdata 2006320maxresident)k
0inputs+0outputs (0major+137694minor)pagefaults 0swaps
The latter case is an unnecessary performance regression. We may
want to _show_ the result with mailmap applied, but we do not have
to copy and rewrite the author/committer of all commits we try to
match if we do not query for these fields.
Trivially optimize this performace regression by limiting the
rewrites for only when we are matching with author/committer fields.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-08 09:02:49 +01:00
|
|
|
if (opt->grep_filter.header_list && opt->mailmap) {
|
2013-01-05 22:26:45 +01:00
|
|
|
if (!buf.len)
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
strbuf_addstr(&buf, message);
|
2013-01-05 22:26:45 +01:00
|
|
|
|
|
|
|
commit_rewrite_person(&buf, "\nauthor ", opt->mailmap);
|
|
|
|
commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap);
|
|
|
|
}
|
|
|
|
|
2012-09-29 06:41:29 +02:00
|
|
|
/* Append "fake" message parts as needed */
|
|
|
|
if (opt->show_notes) {
|
|
|
|
if (!buf.len)
|
log: re-encode commit messages before grepping
If you run "git log --grep=foo", we will run your regex on
the literal bytes of the commit message. This can provide
confusing results if the commit message is not in the same
encoding as your grep expression (or worse, you have commits
in multiple encodings, in which case your regex would need
to be written to match either encoding). On top of this, we
might also be grepping in the commit's notes, which are
already re-encoded, potentially leading to grepping in a
buffer with mixed encodings concatenated. This is insanity,
but most people never noticed, because their terminal and
their commit encodings all match.
Instead, let's massage the to-be-grepped commit into a
standardized encoding. There is not much point in adding a
flag for "this is the encoding I expect my grep pattern to
match"; the only sane choice is for it to use the log output
encoding. That is presumably what the user's terminal is
using, and it means that the patterns found by the grep will
match the output produced by git.
As a bonus, this fixes a potential segfault in commit_match
when commit->buffer is NULL, as we now build on logmsg_reencode,
which handles reading the commit buffer from disk if
necessary. The segfault can be triggered with:
git commit -m 'text1' --allow-empty
git commit -m 'text2' --allow-empty
git log --graph --no-walk --grep 'text2'
which arguably does not make any sense (--graph inherently
wants a connected history, and by --no-walk the command line
is telling us to show discrete points in history without
connectivity), and we probably should forbid the
combination, but that is a separate issue.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-11 21:59:58 +01:00
|
|
|
strbuf_addstr(&buf, message);
|
2015-11-10 03:22:29 +01:00
|
|
|
format_display_notes(commit->object.oid.hash, &buf, encoding, 1);
|
2012-09-29 06:41:29 +02:00
|
|
|
}
|
|
|
|
|
2014-06-10 23:39:30 +02:00
|
|
|
/*
|
|
|
|
* Find either in the original commit message, or in the temporary.
|
|
|
|
* Note that we cast away the constness of "message" here. It is
|
|
|
|
* const because it may come from the cached commit buffer. That's OK,
|
|
|
|
* because we know that it is modifiable heap memory, and that while
|
|
|
|
* grep_buffer may modify it for speed, it will restore any
|
|
|
|
* changes before returning.
|
|
|
|
*/
|
2012-09-29 06:41:28 +02:00
|
|
|
if (buf.len)
|
|
|
|
retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len);
|
|
|
|
else
|
|
|
|
retval = grep_buffer(&opt->grep_filter,
|
2014-06-10 23:39:30 +02:00
|
|
|
(char *)message, strlen(message));
|
2012-09-29 06:41:28 +02:00
|
|
|
strbuf_release(&buf);
|
2014-06-10 23:41:39 +02:00
|
|
|
unuse_commit_buffer(commit, message);
|
log: teach --invert-grep option
"git log --grep=<string>" shows only commits with messages that
match the given string, but sometimes it is useful to be able to
show only commits that do *not* have certain messages (e.g. "show
me ones that are not FIXUP commits").
Originally, we had the invert-grep flag in grep_opt, but because
"git grep --invert-grep" does not make sense except in conjunction
with "--files-with-matches", which is already covered by
"--files-without-matches", it was moved it to revisions structure.
To have the flag there expresses the function to the feature better.
When the newly inserted two tests run, the history would have commits
with messages "initial", "second", "third", "fourth", "fifth", "sixth"
and "Second", committed in this order. The commits that does not match
either "th" or "Sec" is "second" and "initial". For the case insensitive
case only "initial" matches.
Signed-off-by: Christoph Junghans <ottxor@gentoo.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-01-13 02:33:32 +01:00
|
|
|
return opt->invert_grep ? !retval : retval;
|
2006-09-18 00:43:40 +02:00
|
|
|
}
|
|
|
|
|
log: use true parents for diff even when rewriting
When using pathspec filtering in combination with diff-based log
output, parent simplification happens before the diff is computed.
The diff is therefore against the *simplified* parents.
This works okay, arguably by accident, in the normal case:
simplification reduces to one parent as long as the commit is TREESAME
to it. So the simplified parent of any given commit must have the
same tree contents on the filtered paths as its true (unfiltered)
parent.
However, --full-diff breaks this guarantee, and indeed gives pretty
spectacular results when comparing the output of
git log --graph --stat ...
git log --graph --full-diff --stat ...
(--graph internally kicks in parent simplification, much like
--parents).
To fix it, store a copy of the parent list before simplification (in a
slab) whenever --full-diff is in effect. Then use the stored parents
instead of the simplified ones in the commit display code paths. The
latter do not actually check for --full-diff to avoid duplicated code;
they just grab the original parents if save_parents() has not been
called for this revision walk.
For ordinary commits it should be obvious that this is the right thing
to do.
Merge commits are a bit subtle. Observe that with default
simplification, merge simplification is an all-or-nothing decision:
either the merge is TREESAME to one parent and disappears, or it is
different from all parents and the parent list remains intact.
Redundant parents are not pruned, so the existing code also shows them
as a merge.
So if we do show a merge commit, the parent list just consists of the
rewrite result on each parent. Running, e.g., --cc on this in
--full-diff mode is not very useful: if any commits were skipped, some
hunks will disagree with all sides of the merge (with one side,
because commits were skipped; with the others, because they didn't
have those changes in the first place). This triggers --cc showing
these hunks spuriously.
Therefore I believe that even for merge commits it is better to show
the diffs wrt. the original parents.
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Thomas Rast <trast@inf.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-31 22:13:20 +02:00
|
|
|
static inline int want_ancestry(const struct rev_info *revs)
|
2008-04-03 11:12:06 +02:00
|
|
|
{
|
2008-07-09 00:25:44 +02:00
|
|
|
return (revs->rewrite_parents || revs->children.name);
|
2008-04-03 11:12:06 +02:00
|
|
|
}
|
|
|
|
|
2009-08-19 04:34:33 +02:00
|
|
|
enum commit_action get_commit_action(struct rev_info *revs, struct commit *commit)
|
2007-11-04 21:12:05 +01:00
|
|
|
{
|
|
|
|
if (commit->object.flags & SHOWN)
|
|
|
|
return commit_ignore;
|
2015-11-10 03:22:29 +01:00
|
|
|
if (revs->unpacked && has_sha1_pack(commit->object.oid.hash))
|
2007-11-04 21:12:05 +01:00
|
|
|
return commit_ignore;
|
Add "--show-all" revision walker flag for debugging
It's really not very easy to visualize the commit walker, because - on
purpose - it obvously doesn't show the uninteresting commits!
This adds a "--show-all" flag to the revision walker, which will make
it show uninteresting commits too, and they'll have a '^' in front of
them (it also fixes a logic error for !verbose_header for boundary
commits - we should show the '-' even if left_right isn't shown).
A separate patch to gitk to teach it the new '^' was sent
to paulus. With the change in place, it actually is interesting
even for the cases that git doesn't have any problems with, ie
for the kernel you can do:
gitk -d --show-all v2.6.24..
and you see just how far down it has to parse things to see it all. The
use of "-d" is a good idea, since the date-ordered toposort is much better
at showing why it goes deep down (ie the date of some of those commits
after 2.6.24 is much older, because they were merged from trees that
weren't rebased).
So I think this is a useful feature even for non-debugging - just to
visualize what git does internally more.
When it actually breaks out due to the "everybody_uninteresting()"
case, it adds the uninteresting commits (both the one it's looking at
now, and the list of pending ones) to the list
This way, we really list *all* the commits we've looked at.
Because we now end up listing commits we may not even have been parsed
at all "show_log" and "show_commit" need to protect against commits
that don't have a commit buffer entry.
That second part is debatable just how it should work. Maybe we shouldn't
show such entries at all (with this patch those entries do get shown, they
just don't get any message shown with them). But I think this is a useful
case.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-09 23:02:07 +01:00
|
|
|
if (revs->show_all)
|
|
|
|
return commit_show;
|
2007-11-04 21:12:05 +01:00
|
|
|
if (commit->object.flags & UNINTERESTING)
|
|
|
|
return commit_ignore;
|
|
|
|
if (revs->min_age != -1 && (commit->date > revs->min_age))
|
|
|
|
return commit_ignore;
|
2011-03-21 11:14:06 +01:00
|
|
|
if (revs->min_parents || (revs->max_parents >= 0)) {
|
2013-05-16 17:32:40 +02:00
|
|
|
int n = commit_list_count(commit->parents);
|
2011-03-21 11:14:06 +01:00
|
|
|
if ((n < revs->min_parents) ||
|
|
|
|
((revs->max_parents >= 0) && (n > revs->max_parents)))
|
|
|
|
return commit_ignore;
|
|
|
|
}
|
2007-11-04 21:12:05 +01:00
|
|
|
if (!commit_match(commit, revs))
|
|
|
|
return commit_ignore;
|
2007-11-05 22:22:34 +01:00
|
|
|
if (revs->prune && revs->dense) {
|
2007-11-04 21:12:05 +01:00
|
|
|
/* Commit without changes? */
|
2007-11-13 08:16:08 +01:00
|
|
|
if (commit->object.flags & TREESAME) {
|
2013-05-16 17:32:40 +02:00
|
|
|
int n;
|
|
|
|
struct commit_list *p;
|
2007-11-04 21:12:05 +01:00
|
|
|
/* drop merges unless we want parenthood */
|
2008-04-03 11:12:06 +02:00
|
|
|
if (!want_ancestry(revs))
|
2007-11-04 21:12:05 +01:00
|
|
|
return commit_ignore;
|
2013-05-16 17:32:40 +02:00
|
|
|
/*
|
|
|
|
* If we want ancestry, then need to keep any merges
|
|
|
|
* between relevant commits to tie together topology.
|
|
|
|
* For consistency with TREESAME and simplification
|
|
|
|
* use "relevant" here rather than just INTERESTING,
|
|
|
|
* to treat bottom commit(s) as part of the topology.
|
|
|
|
*/
|
|
|
|
for (n = 0, p = commit->parents; p; p = p->next)
|
|
|
|
if (relevant_commit(p->item))
|
|
|
|
if (++n >= 2)
|
|
|
|
return commit_show;
|
|
|
|
return commit_ignore;
|
2007-11-04 21:12:05 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return commit_show;
|
|
|
|
}
|
|
|
|
|
2015-01-14 23:49:24 +01:00
|
|
|
define_commit_slab(saved_parents, struct commit_list *);
|
|
|
|
|
|
|
|
#define EMPTY_PARENT_LIST ((struct commit_list *)-1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* You may only call save_parents() once per commit (this is checked
|
|
|
|
* for non-root commits).
|
|
|
|
*/
|
|
|
|
static void save_parents(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list **pp;
|
|
|
|
|
|
|
|
if (!revs->saved_parents_slab) {
|
|
|
|
revs->saved_parents_slab = xmalloc(sizeof(struct saved_parents));
|
|
|
|
init_saved_parents(revs->saved_parents_slab);
|
|
|
|
}
|
|
|
|
|
|
|
|
pp = saved_parents_at(revs->saved_parents_slab, commit);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When walking with reflogs, we may visit the same commit
|
|
|
|
* several times: once for each appearance in the reflog.
|
|
|
|
*
|
|
|
|
* In this case, save_parents() will be called multiple times.
|
|
|
|
* We want to keep only the first set of parents. We need to
|
|
|
|
* store a sentinel value for an empty (i.e., NULL) parent
|
|
|
|
* list to distinguish it from a not-yet-saved list, however.
|
|
|
|
*/
|
|
|
|
if (*pp)
|
|
|
|
return;
|
|
|
|
if (commit->parents)
|
|
|
|
*pp = copy_commit_list(commit->parents);
|
|
|
|
else
|
|
|
|
*pp = EMPTY_PARENT_LIST;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void free_saved_parents(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
if (revs->saved_parents_slab)
|
|
|
|
clear_saved_parents(revs->saved_parents_slab);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct commit_list *get_saved_parents(struct rev_info *revs, const struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list *parents;
|
|
|
|
|
|
|
|
if (!revs->saved_parents_slab)
|
|
|
|
return commit->parents;
|
|
|
|
|
|
|
|
parents = *saved_parents_at(revs->saved_parents_slab, commit);
|
|
|
|
if (parents == EMPTY_PARENT_LIST)
|
|
|
|
return NULL;
|
|
|
|
return parents;
|
|
|
|
}
|
|
|
|
|
2009-08-19 04:34:33 +02:00
|
|
|
enum commit_action simplify_commit(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
enum commit_action action = get_commit_action(revs, commit);
|
|
|
|
|
|
|
|
if (action == commit_show &&
|
|
|
|
!revs->show_all &&
|
|
|
|
revs->prune && revs->dense && want_ancestry(revs)) {
|
log: use true parents for diff even when rewriting
When using pathspec filtering in combination with diff-based log
output, parent simplification happens before the diff is computed.
The diff is therefore against the *simplified* parents.
This works okay, arguably by accident, in the normal case:
simplification reduces to one parent as long as the commit is TREESAME
to it. So the simplified parent of any given commit must have the
same tree contents on the filtered paths as its true (unfiltered)
parent.
However, --full-diff breaks this guarantee, and indeed gives pretty
spectacular results when comparing the output of
git log --graph --stat ...
git log --graph --full-diff --stat ...
(--graph internally kicks in parent simplification, much like
--parents).
To fix it, store a copy of the parent list before simplification (in a
slab) whenever --full-diff is in effect. Then use the stored parents
instead of the simplified ones in the commit display code paths. The
latter do not actually check for --full-diff to avoid duplicated code;
they just grab the original parents if save_parents() has not been
called for this revision walk.
For ordinary commits it should be obvious that this is the right thing
to do.
Merge commits are a bit subtle. Observe that with default
simplification, merge simplification is an all-or-nothing decision:
either the merge is TREESAME to one parent and disappears, or it is
different from all parents and the parent list remains intact.
Redundant parents are not pruned, so the existing code also shows them
as a merge.
So if we do show a merge commit, the parent list just consists of the
rewrite result on each parent. Running, e.g., --cc on this in
--full-diff mode is not very useful: if any commits were skipped, some
hunks will disagree with all sides of the merge (with one side,
because commits were skipped; with the others, because they didn't
have those changes in the first place). This triggers --cc showing
these hunks spuriously.
Therefore I believe that even for merge commits it is better to show
the diffs wrt. the original parents.
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Thomas Rast <trast@inf.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-31 22:13:20 +02:00
|
|
|
/*
|
|
|
|
* --full-diff on simplified parents is no good: it
|
|
|
|
* will show spurious changes from the commits that
|
|
|
|
* were elided. So we save the parents on the side
|
|
|
|
* when --full-diff is in effect.
|
|
|
|
*/
|
|
|
|
if (revs->full_diff)
|
|
|
|
save_parents(revs, commit);
|
2013-03-28 17:47:31 +01:00
|
|
|
if (rewrite_parents(revs, commit, rewrite_one) < 0)
|
2009-08-19 04:34:33 +02:00
|
|
|
return commit_error;
|
|
|
|
}
|
|
|
|
return action;
|
|
|
|
}
|
|
|
|
|
2014-03-25 14:23:27 +01:00
|
|
|
static void track_linear(struct rev_info *revs, struct commit *commit)
|
|
|
|
{
|
|
|
|
if (revs->track_first_time) {
|
|
|
|
revs->linear = 1;
|
|
|
|
revs->track_first_time = 0;
|
|
|
|
} else {
|
|
|
|
struct commit_list *p;
|
|
|
|
for (p = revs->previous_parents; p; p = p->next)
|
|
|
|
if (p->item == NULL || /* first commit */
|
2015-11-10 03:22:28 +01:00
|
|
|
!oidcmp(&p->item->object.oid, &commit->object.oid))
|
2014-03-25 14:23:27 +01:00
|
|
|
break;
|
|
|
|
revs->linear = p != NULL;
|
|
|
|
}
|
|
|
|
if (revs->reverse) {
|
|
|
|
if (revs->linear)
|
|
|
|
commit->object.flags |= TRACK_LINEAR;
|
|
|
|
}
|
|
|
|
free_commit_list(revs->previous_parents);
|
|
|
|
revs->previous_parents = copy_commit_list(commit->parents);
|
|
|
|
}
|
|
|
|
|
2006-12-20 03:25:32 +01:00
|
|
|
static struct commit *get_revision_1(struct rev_info *revs)
|
2006-02-28 20:24:00 +01:00
|
|
|
{
|
2006-12-20 03:25:32 +01:00
|
|
|
if (!revs->commits)
|
2006-02-28 20:24:00 +01:00
|
|
|
return NULL;
|
|
|
|
|
2006-03-01 00:07:20 +01:00
|
|
|
do {
|
2015-10-24 18:21:31 +02:00
|
|
|
struct commit *commit = pop_commit(&revs->commits);
|
Make path-limiting be incremental when possible.
This makes git-rev-list able to do path-limiting without having to parse
all of history before it starts showing the results.
This makes things like "git log -- pathname" much more pleasant to use.
This is actually a pretty small patch, and the biggest part of it is
purely cleanups (turning the "goto next" statements into "continue"), but
it's conceptually a lot bigger than it looks.
What it does is that if you do a path-limited revision list, and you do
_not_ ask for pseudo-parenthood information, it won't do all the
path-limiting up-front, but instead do it incrementally in
"get_revision()".
This is an absolutely huge deal for anything like "git log -- <pathname>",
but also for some things that we don't do yet - like the "find where
things changed" logic I've described elsewhere, where we want to find the
previous revision that changed a file.
The reason I put "RFC" in the subject line is that while I've validated it
various ways, like doing
git-rev-list HEAD -- drivers/char/ | md5sum
before-and-after on the kernel archive, it's "git-rev-list" after all. In
other words, it's that really really subtle and complex central piece of
software. So while I think this is important and should go in asap, I also
think it should get lots of testing and eyeballs looking at the code.
Btw, don't even bother testing this with the git archive. git itself is so
small that parsing the whole revision history for it takes about a second
even with path limiting. The thing that _really_ shows this off is doing
git log drivers/
on the kernel archive, or even better, on the _historic_ kernel archive.
With this change, the response is instantaneous (although seeking to the
end of the result will obviously take as long as it ever did). Before this
change, the command would think about the result for tens of seconds - or
even minutes, in the case of the bigger old kernel archive - before
starting to output the results.
NOTE NOTE NOTE! Using path limiting with things like "gitk", which uses
the "--parents" flag to actually generate a pseudo-history of the
resulting commits won't actually see the improvement in interactivity,
since that forces git-rev-list to do the whole-history thing after all.
MAYBE we can fix that too at some point, but I won't promise anything.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-03-31 03:05:25 +02:00
|
|
|
|
reflogs: clear flags properly in corner case
The reflog-walking mechanism is based on the regular
revision traversal. We just rewrite the parents of each
commit in fake_reflog_parent to point to the commit in the
next reflog entry instead of the real parents.
However, the regular revision traversal tries not to show
the same commit twice, and so sets the SHOWN flag on each
commit it shows. In a reflog, however, we may want to see
the same commit more than once if it appears in the reflog
multiple times (which easily happens, for example, if you do
a reset to a prior state).
The fake_reflog_parent function takes care of this by
clearing flags, including SHOWN. Unfortunately, it does so
at the very end of the function, and it is possible to
return early from the function if there is no fake parent to
set up (e.g., because we are at the very first reflog entry
on the branch). In such a case the flag is not cleared, and
the entry is skipped by the revision traversal machinery as
already shown.
You can see this by walking the log of a ref which is set to
its very first commit more than once (the test below shows
such a situation). In this case the reflog walk will fail to
show the entry for the initial creation of the ref.
We don't want to simply move the flag-clearing to the top of
the function; we want to make sure flags set during the
fake-parent installation are also cleared. Instead, let's
hoist the flag-clearing out of the fake_reflog_parent
function entirely. It's not really about fake parents
anyway, and the only caller is the get_revision machinery.
Reported-by: Martin von Zweigbergk <martin.von.zweigbergk@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-22 05:42:53 +01:00
|
|
|
if (revs->reflog_info) {
|
2013-08-03 12:36:15 +02:00
|
|
|
save_parents(revs, commit);
|
2007-01-11 11:47:48 +01:00
|
|
|
fake_reflog_parent(revs->reflog_info, commit);
|
reflogs: clear flags properly in corner case
The reflog-walking mechanism is based on the regular
revision traversal. We just rewrite the parents of each
commit in fake_reflog_parent to point to the commit in the
next reflog entry instead of the real parents.
However, the regular revision traversal tries not to show
the same commit twice, and so sets the SHOWN flag on each
commit it shows. In a reflog, however, we may want to see
the same commit more than once if it appears in the reflog
multiple times (which easily happens, for example, if you do
a reset to a prior state).
The fake_reflog_parent function takes care of this by
clearing flags, including SHOWN. Unfortunately, it does so
at the very end of the function, and it is possible to
return early from the function if there is no fake parent to
set up (e.g., because we are at the very first reflog entry
on the branch). In such a case the flag is not cleared, and
the entry is skipped by the revision traversal machinery as
already shown.
You can see this by walking the log of a ref which is set to
its very first commit more than once (the test below shows
such a situation). In this case the reflog walk will fail to
show the entry for the initial creation of the ref.
We don't want to simply move the flag-clearing to the top of
the function; we want to make sure flags set during the
fake-parent installation are also cleared. Instead, let's
hoist the flag-clearing out of the fake_reflog_parent
function entirely. It's not really about fake parents
anyway, and the only caller is the get_revision machinery.
Reported-by: Martin von Zweigbergk <martin.von.zweigbergk@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Acked-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-11-22 05:42:53 +01:00
|
|
|
commit->object.flags &= ~(ADDED | SEEN | SHOWN);
|
|
|
|
}
|
2007-01-11 11:47:48 +01:00
|
|
|
|
Make path-limiting be incremental when possible.
This makes git-rev-list able to do path-limiting without having to parse
all of history before it starts showing the results.
This makes things like "git log -- pathname" much more pleasant to use.
This is actually a pretty small patch, and the biggest part of it is
purely cleanups (turning the "goto next" statements into "continue"), but
it's conceptually a lot bigger than it looks.
What it does is that if you do a path-limited revision list, and you do
_not_ ask for pseudo-parenthood information, it won't do all the
path-limiting up-front, but instead do it incrementally in
"get_revision()".
This is an absolutely huge deal for anything like "git log -- <pathname>",
but also for some things that we don't do yet - like the "find where
things changed" logic I've described elsewhere, where we want to find the
previous revision that changed a file.
The reason I put "RFC" in the subject line is that while I've validated it
various ways, like doing
git-rev-list HEAD -- drivers/char/ | md5sum
before-and-after on the kernel archive, it's "git-rev-list" after all. In
other words, it's that really really subtle and complex central piece of
software. So while I think this is important and should go in asap, I also
think it should get lots of testing and eyeballs looking at the code.
Btw, don't even bother testing this with the git archive. git itself is so
small that parsing the whole revision history for it takes about a second
even with path limiting. The thing that _really_ shows this off is doing
git log drivers/
on the kernel archive, or even better, on the _historic_ kernel archive.
With this change, the response is instantaneous (although seeking to the
end of the result will obviously take as long as it ever did). Before this
change, the command would think about the result for tens of seconds - or
even minutes, in the case of the bigger old kernel archive - before
starting to output the results.
NOTE NOTE NOTE! Using path limiting with things like "gitk", which uses
the "--parents" flag to actually generate a pseudo-history of the
resulting commits won't actually see the improvement in interactivity,
since that forces git-rev-list to do the whole-history thing after all.
MAYBE we can fix that too at some point, but I won't promise anything.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-03-31 03:05:25 +02:00
|
|
|
/*
|
|
|
|
* If we haven't done the list limiting, we need to look at
|
2006-04-02 02:35:06 +02:00
|
|
|
* the parents here. We also need to do the date-based limiting
|
|
|
|
* that we'd otherwise have done in limit_list().
|
Make path-limiting be incremental when possible.
This makes git-rev-list able to do path-limiting without having to parse
all of history before it starts showing the results.
This makes things like "git log -- pathname" much more pleasant to use.
This is actually a pretty small patch, and the biggest part of it is
purely cleanups (turning the "goto next" statements into "continue"), but
it's conceptually a lot bigger than it looks.
What it does is that if you do a path-limited revision list, and you do
_not_ ask for pseudo-parenthood information, it won't do all the
path-limiting up-front, but instead do it incrementally in
"get_revision()".
This is an absolutely huge deal for anything like "git log -- <pathname>",
but also for some things that we don't do yet - like the "find where
things changed" logic I've described elsewhere, where we want to find the
previous revision that changed a file.
The reason I put "RFC" in the subject line is that while I've validated it
various ways, like doing
git-rev-list HEAD -- drivers/char/ | md5sum
before-and-after on the kernel archive, it's "git-rev-list" after all. In
other words, it's that really really subtle and complex central piece of
software. So while I think this is important and should go in asap, I also
think it should get lots of testing and eyeballs looking at the code.
Btw, don't even bother testing this with the git archive. git itself is so
small that parsing the whole revision history for it takes about a second
even with path limiting. The thing that _really_ shows this off is doing
git log drivers/
on the kernel archive, or even better, on the _historic_ kernel archive.
With this change, the response is instantaneous (although seeking to the
end of the result will obviously take as long as it ever did). Before this
change, the command would think about the result for tens of seconds - or
even minutes, in the case of the bigger old kernel archive - before
starting to output the results.
NOTE NOTE NOTE! Using path limiting with things like "gitk", which uses
the "--parents" flag to actually generate a pseudo-history of the
resulting commits won't actually see the improvement in interactivity,
since that forces git-rev-list to do the whole-history thing after all.
MAYBE we can fix that too at some point, but I won't promise anything.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-03-31 03:05:25 +02:00
|
|
|
*/
|
2006-04-02 02:35:06 +02:00
|
|
|
if (!revs->limited) {
|
2006-10-31 02:37:49 +01:00
|
|
|
if (revs->max_age != -1 &&
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
(commit->date < revs->max_age))
|
|
|
|
continue;
|
add `ignore_missing_links` mode to revwalk
When pack-objects is computing the reachability bitmap to
serve a fetch request, it can erroneously die() if some of
the UNINTERESTING objects are not present. Upload-pack
throws away HAVE lines from the client for objects we do not
have, but we may have a tip object without all of its
ancestors (e.g., if the tip is no longer reachable and was
new enough to survive a `git prune`, but some of its
reachable objects did get pruned).
In the non-bitmap case, we do a revision walk with the HAVE
objects marked as UNINTERESTING. The revision walker
explicitly ignores errors in accessing UNINTERESTING commits
to handle this case (and we do not bother looking at
UNINTERESTING trees or blobs at all).
When we have bitmaps, however, the process is quite
different. The bitmap index for a pack-objects run is
calculated in two separate steps:
First, we perform an extensive walk from all the HAVEs to
find the full set of objects reachable from them. This walk
is usually optimized away because we are expected to hit an
object with a bitmap during the traversal, which allows us
to terminate early.
Secondly, we perform an extensive walk from all the WANTs,
which usually also terminates early because we hit a commit
with an existing bitmap.
Once we have the resulting bitmaps from the two walks, we
AND-NOT them together to obtain the resulting set of objects
we need to pack.
When we are walking the HAVE objects, the revision walker
does not know that we are walking it only to mark the
results as uninteresting. We strip out the UNINTERESTING flag,
because those objects _are_ interesting to us during the
first walk. We want to keep going to get a complete set of
reachable objects if we can.
We need some way to tell the revision walker that it's OK to
silently truncate the HAVE walk, just like it does for the
UNINTERESTING case. This patch introduces a new
`ignore_missing_links` flag to the `rev_info` struct, which
we set only for the HAVE walk.
It also adds tests to cover UNINTERESTING objects missing
from several positions: a missing blob, a missing tree, and
a missing parent commit. The missing blob already worked (as
we do not care about its contents at all), but the other two
cases caused us to die().
Note that there are a few cases we do not need to test:
1. We do not need to test a missing tree, with the blob
still present. Without the tree that refers to it, we
would not know that the blob is relevant to our walk.
2. We do not need to test a tip commit that is missing.
Upload-pack omits these for us (and in fact, we
complain even in the non-bitmap case if it fails to do
so).
Reported-by: Siddharth Agarwal <sid0@fb.com>
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-28 11:00:43 +01:00
|
|
|
if (add_parents_to_list(revs, commit, &revs->commits, NULL) < 0) {
|
|
|
|
if (!revs->ignore_missing_links)
|
|
|
|
die("Failed to traverse parents of commit %s",
|
2015-11-10 03:22:28 +01:00
|
|
|
oid_to_hex(&commit->object.oid));
|
add `ignore_missing_links` mode to revwalk
When pack-objects is computing the reachability bitmap to
serve a fetch request, it can erroneously die() if some of
the UNINTERESTING objects are not present. Upload-pack
throws away HAVE lines from the client for objects we do not
have, but we may have a tip object without all of its
ancestors (e.g., if the tip is no longer reachable and was
new enough to survive a `git prune`, but some of its
reachable objects did get pruned).
In the non-bitmap case, we do a revision walk with the HAVE
objects marked as UNINTERESTING. The revision walker
explicitly ignores errors in accessing UNINTERESTING commits
to handle this case (and we do not bother looking at
UNINTERESTING trees or blobs at all).
When we have bitmaps, however, the process is quite
different. The bitmap index for a pack-objects run is
calculated in two separate steps:
First, we perform an extensive walk from all the HAVEs to
find the full set of objects reachable from them. This walk
is usually optimized away because we are expected to hit an
object with a bitmap during the traversal, which allows us
to terminate early.
Secondly, we perform an extensive walk from all the WANTs,
which usually also terminates early because we hit a commit
with an existing bitmap.
Once we have the resulting bitmaps from the two walks, we
AND-NOT them together to obtain the resulting set of objects
we need to pack.
When we are walking the HAVE objects, the revision walker
does not know that we are walking it only to mark the
results as uninteresting. We strip out the UNINTERESTING flag,
because those objects _are_ interesting to us during the
first walk. We want to keep going to get a complete set of
reachable objects if we can.
We need some way to tell the revision walker that it's OK to
silently truncate the HAVE walk, just like it does for the
UNINTERESTING case. This patch introduces a new
`ignore_missing_links` flag to the `rev_info` struct, which
we set only for the HAVE walk.
It also adds tests to cover UNINTERESTING objects missing
from several positions: a missing blob, a missing tree, and
a missing parent commit. The missing blob already worked (as
we do not care about its contents at all), but the other two
cases caused us to die().
Note that there are a few cases we do not need to test:
1. We do not need to test a missing tree, with the blob
still present. Without the tree that refers to it, we
would not know that the blob is relevant to our walk.
2. We do not need to test a tip commit that is missing.
Upload-pack omits these for us (and in fact, we
complain even in the non-bitmap case if it fails to do
so).
Reported-by: Siddharth Agarwal <sid0@fb.com>
Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-03-28 11:00:43 +01:00
|
|
|
}
|
2006-04-02 02:35:06 +02:00
|
|
|
}
|
2006-10-31 02:37:49 +01:00
|
|
|
|
2007-11-04 21:12:05 +01:00
|
|
|
switch (simplify_commit(revs, commit)) {
|
|
|
|
case commit_ignore:
|
2006-09-18 00:43:40 +02:00
|
|
|
continue;
|
2007-11-04 21:12:05 +01:00
|
|
|
case commit_error:
|
2009-02-11 10:27:43 +01:00
|
|
|
die("Failed to simplify parents of commit %s",
|
2015-11-10 03:22:28 +01:00
|
|
|
oid_to_hex(&commit->object.oid));
|
2007-11-04 21:12:05 +01:00
|
|
|
default:
|
2014-03-25 14:23:27 +01:00
|
|
|
if (revs->track_linear)
|
|
|
|
track_linear(revs, commit);
|
2007-11-04 21:12:05 +01:00
|
|
|
return commit;
|
2006-03-28 09:58:34 +02:00
|
|
|
}
|
2006-03-01 00:07:20 +01:00
|
|
|
} while (revs->commits);
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-12-20 03:25:32 +01:00
|
|
|
|
2013-05-25 11:08:09 +02:00
|
|
|
/*
|
|
|
|
* Return true for entries that have not yet been shown. (This is an
|
|
|
|
* object_array_each_func_t.)
|
|
|
|
*/
|
|
|
|
static int entry_unshown(struct object_array_entry *entry, void *cb_data_unused)
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
{
|
2013-05-25 11:08:09 +02:00
|
|
|
return !(entry->item->flags & SHOWN);
|
|
|
|
}
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
|
2013-05-25 11:08:09 +02:00
|
|
|
/*
|
|
|
|
* If array is on the verge of a realloc, garbage-collect any entries
|
|
|
|
* that have already been shown to try to free up some space.
|
|
|
|
*/
|
|
|
|
static void gc_boundary(struct object_array *array)
|
|
|
|
{
|
|
|
|
if (array->nr == array->alloc)
|
|
|
|
object_array_filter(array, entry_unshown, NULL);
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
}
|
|
|
|
|
2008-05-25 01:02:05 +02:00
|
|
|
static void create_boundary_commit_list(struct rev_info *revs)
|
|
|
|
{
|
|
|
|
unsigned i;
|
|
|
|
struct commit *c;
|
|
|
|
struct object_array *array = &revs->boundary_commits;
|
|
|
|
struct object_array_entry *objects = array->objects;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If revs->commits is non-NULL at this point, an error occurred in
|
|
|
|
* get_revision_1(). Ignore the error and continue printing the
|
|
|
|
* boundary commits anyway. (This is what the code has always
|
|
|
|
* done.)
|
|
|
|
*/
|
|
|
|
if (revs->commits) {
|
|
|
|
free_commit_list(revs->commits);
|
|
|
|
revs->commits = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put all of the actual boundary commits from revs->boundary_commits
|
|
|
|
* into revs->commits
|
|
|
|
*/
|
|
|
|
for (i = 0; i < array->nr; i++) {
|
|
|
|
c = (struct commit *)(objects[i].item);
|
|
|
|
if (!c)
|
|
|
|
continue;
|
|
|
|
if (!(c->object.flags & CHILD_SHOWN))
|
|
|
|
continue;
|
|
|
|
if (c->object.flags & (SHOWN | BOUNDARY))
|
|
|
|
continue;
|
|
|
|
c->object.flags |= BOUNDARY;
|
|
|
|
commit_list_insert(c, &revs->commits);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If revs->topo_order is set, sort the boundary commits
|
|
|
|
* in topological order
|
|
|
|
*/
|
toposort: rename "lifo" field
The primary invariant of sort_in_topological_order() is that a
parent commit is not emitted until all children of it are. When
traversing a forked history like this with "git log C E":
A----B----C
\
D----E
we ensure that A is emitted after all of B, C, D, and E are done, B
has to wait until C is done, and D has to wait until E is done.
In some applications, however, we would further want to control how
these child commits B, C, D and E on two parallel ancestry chains
are shown.
Most of the time, we would want to see C and B emitted together, and
then E and D, and finally A (i.e. the --topo-order output). The
"lifo" parameter of the sort_in_topological_order() function is used
to control this behaviour. We start the traversal by knowing two
commits, C and E. While keeping in mind that we also need to
inspect E later, we pick C first to inspect, and we notice and
record that B needs to be inspected. By structuring the "work to be
done" set as a LIFO stack, we ensure that B is inspected next,
before other in-flight commits we had known that we will need to
inspect, e.g. E.
When showing in --date-order, we would want to see commits ordered
by timestamps, i.e. show C, E, B and D in this order before showing
A, possibly mixing commits from two parallel histories together.
When "lifo" parameter is set to false, the function keeps the "work
to be done" set sorted in the date order to realize this semantics.
After inspecting C, we add B to the "work to be done" set, but the
next commit we inspect from the set is E which is newer than B.
The name "lifo", however, is too strongly tied to the way how the
function implements its behaviour, and does not describe what the
behaviour _means_.
Replace this field with an enum rev_sort_order, with two possible
values: REV_SORT_IN_GRAPH_ORDER and REV_SORT_BY_COMMIT_DATE, and
update the existing code. The mechanical replacement rule is:
"lifo == 0" is equivalent to "sort_order == REV_SORT_BY_COMMIT_DATE"
"lifo == 1" is equivalent to "sort_order == REV_SORT_IN_GRAPH_ORDER"
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-07 01:07:14 +02:00
|
|
|
sort_in_topological_order(&revs->commits, revs->sort_order);
|
2008-05-25 01:02:05 +02:00
|
|
|
}
|
|
|
|
|
2008-05-04 12:36:54 +02:00
|
|
|
static struct commit *get_revision_internal(struct rev_info *revs)
|
2006-12-20 03:25:32 +01:00
|
|
|
{
|
|
|
|
struct commit *c = NULL;
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
struct commit_list *l;
|
|
|
|
|
|
|
|
if (revs->boundary == 2) {
|
2008-05-25 01:02:05 +02:00
|
|
|
/*
|
|
|
|
* All of the normal commits have already been returned,
|
|
|
|
* and we are now returning boundary commits.
|
|
|
|
* create_boundary_commit_list() has populated
|
|
|
|
* revs->commits with the remaining commits to return.
|
|
|
|
*/
|
|
|
|
c = pop_commit(&revs->commits);
|
|
|
|
if (c)
|
|
|
|
c->object.flags |= SHOWN;
|
2007-01-20 23:04:02 +01:00
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
/*
|
revision: avoid work after --max-count is reached
During a revision traversal in which --max-count has been
specified, we decrement a counter for each revision returned
by get_revision. When it hits 0, we typically return NULL
(the exception being if we still have boundary commits to
show).
However, before we check the counter, we call get_revision_1
to get the next commit. This might involve looking at a
large number of commits if we have restricted the traversal
(e.g., we might traverse until we find the next commit whose
diff actually matches a pathspec).
There's no need to make this get_revision_1 call when our
counter runs out. If we are not in --boundary mode, we will
just throw away the result and immediately return NULL. If
we are in --boundary mode, then we will still throw away the
result, and then start showing the boundary commits.
However, as git_revision_1 does not impact the boundary
list, it should not have an impact.
In most cases, avoiding this work will not be especially
noticeable. However, in some cases, it can make a big
difference:
[before]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.301s
user 0m0.280s
sys 0m0.016s
[after]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.010s
user 0m0.008s
sys 0m0.000s
Note that the output is produced almost instantaneously in
the first case, and then git uselessly spends a long time
looking for the next commit to touch that file (but there
isn't one, and we traverse all the way down to the roots).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-13 09:50:23 +02:00
|
|
|
* If our max_count counter has reached zero, then we are done. We
|
|
|
|
* don't simply return NULL because we still might need to show
|
|
|
|
* boundary commits. But we want to avoid calling get_revision_1, which
|
|
|
|
* might do a considerable amount of work finding the next commit only
|
|
|
|
* for us to throw it away.
|
|
|
|
*
|
|
|
|
* If it is non-zero, then either we don't have a max_count at all
|
|
|
|
* (-1), or it is still counting, in which case we decrement.
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
*/
|
revision: avoid work after --max-count is reached
During a revision traversal in which --max-count has been
specified, we decrement a counter for each revision returned
by get_revision. When it hits 0, we typically return NULL
(the exception being if we still have boundary commits to
show).
However, before we check the counter, we call get_revision_1
to get the next commit. This might involve looking at a
large number of commits if we have restricted the traversal
(e.g., we might traverse until we find the next commit whose
diff actually matches a pathspec).
There's no need to make this get_revision_1 call when our
counter runs out. If we are not in --boundary mode, we will
just throw away the result and immediately return NULL. If
we are in --boundary mode, then we will still throw away the
result, and then start showing the boundary commits.
However, as git_revision_1 does not impact the boundary
list, it should not have an impact.
In most cases, avoiding this work will not be especially
noticeable. However, in some cases, it can make a big
difference:
[before]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.301s
user 0m0.280s
sys 0m0.016s
[after]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.010s
user 0m0.008s
sys 0m0.000s
Note that the output is produced almost instantaneously in
the first case, and then git uselessly spends a long time
looking for the next commit to touch that file (but there
isn't one, and we traverse all the way down to the roots).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-13 09:50:23 +02:00
|
|
|
if (revs->max_count) {
|
|
|
|
c = get_revision_1(revs);
|
|
|
|
if (c) {
|
2013-10-31 10:25:43 +01:00
|
|
|
while (revs->skip_count > 0) {
|
revision: avoid work after --max-count is reached
During a revision traversal in which --max-count has been
specified, we decrement a counter for each revision returned
by get_revision. When it hits 0, we typically return NULL
(the exception being if we still have boundary commits to
show).
However, before we check the counter, we call get_revision_1
to get the next commit. This might involve looking at a
large number of commits if we have restricted the traversal
(e.g., we might traverse until we find the next commit whose
diff actually matches a pathspec).
There's no need to make this get_revision_1 call when our
counter runs out. If we are not in --boundary mode, we will
just throw away the result and immediately return NULL. If
we are in --boundary mode, then we will still throw away the
result, and then start showing the boundary commits.
However, as git_revision_1 does not impact the boundary
list, it should not have an impact.
In most cases, avoiding this work will not be especially
noticeable. However, in some cases, it can make a big
difference:
[before]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.301s
user 0m0.280s
sys 0m0.016s
[after]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.010s
user 0m0.008s
sys 0m0.000s
Note that the output is produced almost instantaneously in
the first case, and then git uselessly spends a long time
looking for the next commit to touch that file (but there
isn't one, and we traverse all the way down to the roots).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-13 09:50:23 +02:00
|
|
|
revs->skip_count--;
|
|
|
|
c = get_revision_1(revs);
|
|
|
|
if (!c)
|
|
|
|
break;
|
|
|
|
}
|
2007-03-06 12:20:55 +01:00
|
|
|
}
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
|
revision: avoid work after --max-count is reached
During a revision traversal in which --max-count has been
specified, we decrement a counter for each revision returned
by get_revision. When it hits 0, we typically return NULL
(the exception being if we still have boundary commits to
show).
However, before we check the counter, we call get_revision_1
to get the next commit. This might involve looking at a
large number of commits if we have restricted the traversal
(e.g., we might traverse until we find the next commit whose
diff actually matches a pathspec).
There's no need to make this get_revision_1 call when our
counter runs out. If we are not in --boundary mode, we will
just throw away the result and immediately return NULL. If
we are in --boundary mode, then we will still throw away the
result, and then start showing the boundary commits.
However, as git_revision_1 does not impact the boundary
list, it should not have an impact.
In most cases, avoiding this work will not be especially
noticeable. However, in some cases, it can make a big
difference:
[before]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.301s
user 0m0.280s
sys 0m0.016s
[after]
$ time git rev-list -1 origin Documentation/RelNotes/1.7.11.2.txt
8d141a1d562abb31f27f599dbf6e10a6c06ed73e
real 0m0.010s
user 0m0.008s
sys 0m0.000s
Note that the output is produced almost instantaneously in
the first case, and then git uselessly spends a long time
looking for the next commit to touch that file (but there
isn't one, and we traverse all the way down to the roots).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-07-13 09:50:23 +02:00
|
|
|
if (revs->max_count > 0)
|
|
|
|
revs->max_count--;
|
2006-12-20 03:25:32 +01:00
|
|
|
}
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
|
2007-03-06 03:23:57 +01:00
|
|
|
if (c)
|
|
|
|
c->object.flags |= SHOWN;
|
|
|
|
|
2013-10-31 10:25:43 +01:00
|
|
|
if (!revs->boundary)
|
2006-12-20 03:25:32 +01:00
|
|
|
return c;
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
|
|
|
|
if (!c) {
|
|
|
|
/*
|
|
|
|
* get_revision_1() runs out the commits, and
|
|
|
|
* we are done computing the boundaries.
|
|
|
|
* switch to boundary commits output mode.
|
|
|
|
*/
|
|
|
|
revs->boundary = 2;
|
2008-05-25 01:02:05 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Update revs->commits to contain the list of
|
|
|
|
* boundary commits.
|
|
|
|
*/
|
|
|
|
create_boundary_commit_list(revs);
|
|
|
|
|
2008-05-25 01:02:04 +02:00
|
|
|
return get_revision_internal(revs);
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* boundary commits are the commits that are parents of the
|
|
|
|
* ones we got from get_revision_1() but they themselves are
|
|
|
|
* not returned from get_revision_1(). Before returning
|
|
|
|
* 'c', we need to mark its parents that they could be boundaries.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (l = c->parents; l; l = l->next) {
|
|
|
|
struct object *p;
|
|
|
|
p = &(l->item->object);
|
2007-03-06 03:23:57 +01:00
|
|
|
if (p->flags & (CHILD_SHOWN | SHOWN))
|
revision walker: Fix --boundary when limited
This cleans up the boundary processing in the commit walker. It
- rips out the boundary logic from the commit walker. Placing
"negative" commits in the revs->commits list was Ok if all we
cared about "boundary" was the UNINTERESTING limiting case,
but conceptually it was wrong.
- makes get_revision_1() function to walk the commits and return
the results as if there is no funny postprocessing flags such
as --reverse, --skip nor --max-count.
- makes get_revision() function the postprocessing phase:
If reverse is given, wait for get_revision_1() to give
everything that it would normally give, and then reverse it
before consuming.
If skip is given, skip that many before going further.
If max is given, stop when we gave out that many.
Now that we are about to return one positive commit, mark
the parents of that commit to be potential boundaries
before returning, iff we are doing the boundary processing.
Return the commit.
- After get_revision() finishes giving out all the positive
commits, if we are doing the boundary processing, we look at
the parents that we marked as potential boundaries earlier,
see if they are really boundaries, and give them out.
It loses more code than it adds, even when the new gc_boundary()
function, which is purely for early optimization, is counted.
Note that this patch is purely for eyeballing and discussion
only. It breaks git-bundle's verify logic because the logic
does not use BOUNDARY_SHOW flag for its internal computation
anymore. After we correct it not to attempt to affect the
boundary processing by setting the BOUNDARY_SHOW flag, we can
remove BOUNDARY_SHOW from revision.h and use that bit assignment
for the new CHILD_SHOWN flag.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-03-05 22:10:06 +01:00
|
|
|
continue;
|
|
|
|
p->flags |= CHILD_SHOWN;
|
|
|
|
gc_boundary(&revs->boundary_commits);
|
|
|
|
add_object_array(p, NULL, &revs->boundary_commits);
|
|
|
|
}
|
|
|
|
|
|
|
|
return c;
|
2006-12-20 03:25:32 +01:00
|
|
|
}
|
2008-05-04 12:36:54 +02:00
|
|
|
|
|
|
|
struct commit *get_revision(struct rev_info *revs)
|
|
|
|
{
|
2008-08-29 21:18:38 +02:00
|
|
|
struct commit *c;
|
|
|
|
struct commit_list *reversed;
|
|
|
|
|
|
|
|
if (revs->reverse) {
|
|
|
|
reversed = NULL;
|
2013-10-31 10:25:43 +01:00
|
|
|
while ((c = get_revision_internal(revs)))
|
2008-08-29 21:18:38 +02:00
|
|
|
commit_list_insert(c, &reversed);
|
|
|
|
revs->commits = reversed;
|
|
|
|
revs->reverse = 0;
|
|
|
|
revs->reverse_output_stage = 1;
|
|
|
|
}
|
|
|
|
|
2014-03-25 14:23:27 +01:00
|
|
|
if (revs->reverse_output_stage) {
|
|
|
|
c = pop_commit(&revs->commits);
|
|
|
|
if (revs->track_linear)
|
|
|
|
revs->linear = !!(c && c->object.flags & TRACK_LINEAR);
|
|
|
|
return c;
|
|
|
|
}
|
2008-08-29 21:18:38 +02:00
|
|
|
|
|
|
|
c = get_revision_internal(revs);
|
2008-05-04 12:36:54 +02:00
|
|
|
if (c && revs->graph)
|
|
|
|
graph_update(revs->graph, c);
|
2014-03-25 14:23:27 +01:00
|
|
|
if (!c) {
|
log: use true parents for diff even when rewriting
When using pathspec filtering in combination with diff-based log
output, parent simplification happens before the diff is computed.
The diff is therefore against the *simplified* parents.
This works okay, arguably by accident, in the normal case:
simplification reduces to one parent as long as the commit is TREESAME
to it. So the simplified parent of any given commit must have the
same tree contents on the filtered paths as its true (unfiltered)
parent.
However, --full-diff breaks this guarantee, and indeed gives pretty
spectacular results when comparing the output of
git log --graph --stat ...
git log --graph --full-diff --stat ...
(--graph internally kicks in parent simplification, much like
--parents).
To fix it, store a copy of the parent list before simplification (in a
slab) whenever --full-diff is in effect. Then use the stored parents
instead of the simplified ones in the commit display code paths. The
latter do not actually check for --full-diff to avoid duplicated code;
they just grab the original parents if save_parents() has not been
called for this revision walk.
For ordinary commits it should be obvious that this is the right thing
to do.
Merge commits are a bit subtle. Observe that with default
simplification, merge simplification is an all-or-nothing decision:
either the merge is TREESAME to one parent and disappears, or it is
different from all parents and the parent list remains intact.
Redundant parents are not pruned, so the existing code also shows them
as a merge.
So if we do show a merge commit, the parent list just consists of the
rewrite result on each parent. Running, e.g., --cc on this in
--full-diff mode is not very useful: if any commits were skipped, some
hunks will disagree with all sides of the merge (with one side,
because commits were skipped; with the others, because they didn't
have those changes in the first place). This triggers --cc showing
these hunks spuriously.
Therefore I believe that even for merge commits it is better to show
the diffs wrt. the original parents.
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Thomas Rast <trast@inf.ethz.ch>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-31 22:13:20 +02:00
|
|
|
free_saved_parents(revs);
|
2014-03-25 14:23:27 +01:00
|
|
|
if (revs->previous_parents) {
|
|
|
|
free_commit_list(revs->previous_parents);
|
|
|
|
revs->previous_parents = NULL;
|
|
|
|
}
|
|
|
|
}
|
2008-05-04 12:36:54 +02:00
|
|
|
return c;
|
|
|
|
}
|
2011-03-07 13:31:39 +01:00
|
|
|
|
|
|
|
char *get_revision_mark(const struct rev_info *revs, const struct commit *commit)
|
|
|
|
{
|
|
|
|
if (commit->object.flags & BOUNDARY)
|
|
|
|
return "-";
|
|
|
|
else if (commit->object.flags & UNINTERESTING)
|
|
|
|
return "^";
|
2011-03-07 13:31:40 +01:00
|
|
|
else if (commit->object.flags & PATCHSAME)
|
|
|
|
return "=";
|
2011-03-07 13:31:39 +01:00
|
|
|
else if (!revs || revs->left_right) {
|
|
|
|
if (commit->object.flags & SYMMETRIC_LEFT)
|
|
|
|
return "<";
|
|
|
|
else
|
|
|
|
return ">";
|
|
|
|
} else if (revs->graph)
|
|
|
|
return "*";
|
2011-03-07 13:31:40 +01:00
|
|
|
else if (revs->cherry_mark)
|
|
|
|
return "+";
|
2011-03-07 13:31:39 +01:00
|
|
|
return "";
|
|
|
|
}
|
2011-03-10 15:45:03 +01:00
|
|
|
|
|
|
|
void put_revision_mark(const struct rev_info *revs, const struct commit *commit)
|
|
|
|
{
|
|
|
|
char *mark = get_revision_mark(revs, commit);
|
|
|
|
if (!strlen(mark))
|
|
|
|
return;
|
|
|
|
fputs(mark, stdout);
|
|
|
|
putchar(' ');
|
|
|
|
}
|