From 750f7b668f33c9e8decbdd8141115328992d6fea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Jun 2007 14:22:46 -0700 Subject: [PATCH 1/2] Finally implement "git log --follow" Ok, I've really held off doing this too damn long, because I'm lazy, and I was always hoping that somebody else would do it. But no, people keep asking for it, but nobody actually did anything, so I decided I might as well bite the bullet, and instead of telling people they could add a "--follow" flag to "git log" to do what they want to do, I decided that it looks like I just have to do it for them.. The code wasn't actually that complicated, in that the diffstat for this patch literally says "70 insertions(+), 1 deletions(-)", but I will have to admit that in order to get to this fairly simple patch, you did have to know and understand the internal git diff generation machinery pretty well, and had to really be able to follow how commit generation interacts with generating patches and generating the log. So I suspect that while I was right that it wasn't that hard, I might have been expecting too much of random people - this patch does seem to be firmly in the core "Linus or Junio" territory. To make a long story short: I'm sorry for it taking so long until I just did it. I'm not going to guarantee that this works for everybody, but you really can just look at the patch, and after the appropriate appreciative noises ("Ooh, aah") over how clever I am, you can then just notice that the code itself isn't really that complicated. All the real new code is in the new "try_to_follow_renames()" function. It really isn't rocket science: we notice that the pathname we were looking at went away, so we start a full tree diff and try to see if we can instead make that pathname be a rename or a copy from some other previous pathname. And if we can, we just continue, except we show *that* particular diff, and ever after we use the _previous_ pathname. One thing to look out for: the "rename detection" is considered to be a singular event in the _linear_ "git log" output! That's what people want to do, but I just wanted to point out that this patch is *not* carrying around a "commit,pathname" kind of pair and it's *not* going to be able to notice the file coming from multiple *different* files in earlier history. IOW, if you use "git log --follow", then you get the stupid CVS/SVN kind of "files have single identities" kind of semantics, and git log will just pick the identity based on the normal move/copy heuristics _as_if_ the history could be linearized. Put another way: I think the model is broken, but given the broken model, I think this patch does just about as well as you can do. If you have merges with the same "file" having different filenames over the two branches, git will just end up picking _one_ of the pathnames at the point where the newer one goes away. It never looks at multiple pathnames in parallel. And if you understood all that, you probably didn't need it explained, and if you didn't understand the above blathering, it doesn't really mtter to you. What matters to you is that you can now do git log -p --follow builtin-rev-list.c and it will find the point where the old "rev-list.c" got renamed to "builtin-rev-list.c" and show it as such. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- builtin-log.c | 5 +++++ diff.c | 2 ++ diff.h | 1 + revision.c | 4 +++- tree-diff.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/builtin-log.c b/builtin-log.c index b9035ab799..073a2a16a3 100644 --- a/builtin-log.c +++ b/builtin-log.c @@ -58,6 +58,11 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix, argc = setup_revisions(argc, argv, rev, "HEAD"); if (rev->diffopt.pickaxe || rev->diffopt.filter) rev->always_show_header = 0; + if (rev->diffopt.follow_renames) { + rev->always_show_header = 0; + if (rev->diffopt.nr_paths != 1) + usage("git logs can only follow renames on one pathname at a time"); + } for (i = 1; i < argc; i++) { const char *arg = argv[i]; if (!strcmp(arg, "--decorate")) { diff --git a/diff.c b/diff.c index 4aa9bbc011..9938969fa5 100644 --- a/diff.c +++ b/diff.c @@ -2210,6 +2210,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) } else if (!strcmp(arg, "--find-copies-harder")) options->find_copies_harder = 1; + else if (!strcmp(arg, "--follow")) + options->follow_renames = 1; else if (!strcmp(arg, "--abbrev")) options->abbrev = DEFAULT_ABBREV; else if (!prefixcmp(arg, "--abbrev=")) { diff --git a/diff.h b/diff.h index a7ee6d8c87..9fd6d447d4 100644 --- a/diff.h +++ b/diff.h @@ -55,6 +55,7 @@ struct diff_options { full_index:1, silent_on_remove:1, find_copies_harder:1, + follow_renames:1, color_diff:1, color_diff_words:1, has_changes:1, diff --git a/revision.c b/revision.c index 1f4590b896..7834bb108e 100644 --- a/revision.c +++ b/revision.c @@ -1230,7 +1230,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, const ch if (revs->prune_data) { diff_tree_setup_paths(revs->prune_data, &revs->pruning); - revs->prune_fn = try_to_simplify_commit; + /* Can't prune commits with rename following: the paths change.. */ + if (!revs->diffopt.follow_renames) + revs->prune_fn = try_to_simplify_commit; if (!revs->full_diff) diff_tree_setup_paths(revs->prune_data, &revs->diffopt); } diff --git a/tree-diff.c b/tree-diff.c index 852498eb49..42924e9b63 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -3,6 +3,7 @@ */ #include "cache.h" #include "diff.h" +#include "diffcore.h" #include "tree.h" static char *malloc_base(const char *base, int baselen, const char *path, int pathlen) @@ -290,6 +291,59 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, stru return 0; } +/* + * Does it look like the resulting diff might be due to a rename? + * - single entry + * - not a valid previous file + */ +static inline int diff_might_be_rename(void) +{ + return diff_queued_diff.nr == 1 && + !DIFF_FILE_VALID(diff_queued_diff.queue[0]->one); +} + +static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt) +{ + struct diff_options diff_opts; + const char *paths[2]; + int i; + + diff_setup(&diff_opts); + diff_opts.recursive = 1; + diff_opts.detect_rename = DIFF_DETECT_RENAME; + diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; + diff_opts.single_follow = opt->paths[0]; + paths[0] = NULL; + diff_tree_setup_paths(paths, &diff_opts); + if (diff_setup_done(&diff_opts) < 0) + die("unable to set up diff options to follow renames"); + diff_tree(t1, t2, base, &diff_opts); + diffcore_std(&diff_opts); + + /* NOTE! Ignore the first diff! That was the old one! */ + for (i = 1; i < diff_queued_diff.nr; i++) { + struct diff_filepair *p = diff_queued_diff.queue[i]; + + /* + * Found a source? Not only do we use that for the new + * diff_queued_diff, we also use that as the path in + * the future! + */ + if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, opt->paths[0])) { + diff_queued_diff.queue[0] = p; + opt->paths[0] = xstrdup(p->one->path); + diff_tree_setup_paths(opt->paths, opt); + break; + } + } + + /* + * Then, ignore any but the first entry! It might be the old one, + * or it might be the rename/copy we found + */ + diff_queued_diff.nr = 1; +} + int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt) { void *tree1, *tree2; @@ -306,6 +360,11 @@ int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const cha init_tree_desc(&t1, tree1, size1); init_tree_desc(&t2, tree2, size2); retval = diff_tree(&t1, &t2, base, opt); + if (opt->follow_renames && diff_might_be_rename()) { + init_tree_desc(&t1, tree1, size1); + init_tree_desc(&t2, tree2, size2); + try_to_follow_renames(&t1, &t2, base, opt); + } free(tree1); free(tree2); return retval; From 9f38e1ef7e7992ca490b9b419f52fb4d11efb0e4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jun 2007 10:22:59 -0700 Subject: [PATCH 2/2] Fix up "git log --follow" a bit.. This fixes "git log --follow" to hopefully not leak memory any more, and also cleans it up a bit to look more like some of the other functions that use "diff_queued_diff" (by *not* using it directly as a global in the code, but by instead just taking a pointer to the diff queue and using that). As to "diff_queued_diff", I think it would be better off not as a global at all, but as being just an entry in the "struct diff_options" structure, but that's a separate issue, and there may be some subtle reason for why it's currently a global. Anyway, no real changes. Instead of having a magical first entry in the diff-queue, we now end up just keeping the diff-queue clean, and keeping our "preferred" file pairing in an internal "choice" variable. That makes it easy to switch the choice around when we find a better one. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- tree-diff.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tree-diff.c b/tree-diff.c index 42924e9b63..26bdbdd2bf 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -305,9 +305,15 @@ static inline int diff_might_be_rename(void) static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt) { struct diff_options diff_opts; - const char *paths[2]; + struct diff_queue_struct *q = &diff_queued_diff; + struct diff_filepair *choice; + const char *paths[1]; int i; + /* Remove the file creation entry from the diff queue, and remember it */ + choice = q->queue[0]; + q->nr = 0; + diff_setup(&diff_opts); diff_opts.recursive = 1; diff_opts.detect_rename = DIFF_DETECT_RENAME; @@ -320,17 +326,21 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co diff_tree(t1, t2, base, &diff_opts); diffcore_std(&diff_opts); - /* NOTE! Ignore the first diff! That was the old one! */ - for (i = 1; i < diff_queued_diff.nr; i++) { - struct diff_filepair *p = diff_queued_diff.queue[i]; + /* Go through the new set of filepairing, and see if we find a more interesting one */ + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; /* * Found a source? Not only do we use that for the new - * diff_queued_diff, we also use that as the path in + * diff_queued_diff, we will also use that as the path in * the future! */ if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, opt->paths[0])) { - diff_queued_diff.queue[0] = p; + /* Switch the file-pairs around */ + q->queue[i] = choice; + choice = p; + + /* Update the path we use from now on.. */ opt->paths[0] = xstrdup(p->one->path); diff_tree_setup_paths(opt->paths, opt); break; @@ -338,10 +348,19 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co } /* - * Then, ignore any but the first entry! It might be the old one, - * or it might be the rename/copy we found + * Then, discard all the non-relevane file pairs... */ - diff_queued_diff.nr = 1; + for (i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + diff_free_filepair(p); + } + + /* + * .. and re-instate the one we want (which might be either the + * original one, or the rename/copy we found) + */ + q->queue[0] = choice; + q->nr = 1; } int diff_tree_sha1(const unsigned char *old, const unsigned char *new, const char *base, struct diff_options *opt)