From 53d00b39ce00073ab6b450488acc3f532a223e8f Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Wed, 31 Jul 2013 22:13:20 +0200 Subject: [PATCH 1/2] log: use true parents for diff even when rewriting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using pathspec filtering in combination with diff-based log output, parent simplification happens before the diff is computed. The diff is therefore against the *simplified* parents. This works okay, arguably by accident, in the normal case: simplification reduces to one parent as long as the commit is TREESAME to it. So the simplified parent of any given commit must have the same tree contents on the filtered paths as its true (unfiltered) parent. However, --full-diff breaks this guarantee, and indeed gives pretty spectacular results when comparing the output of git log --graph --stat ... git log --graph --full-diff --stat ... (--graph internally kicks in parent simplification, much like --parents). To fix it, store a copy of the parent list before simplification (in a slab) whenever --full-diff is in effect. Then use the stored parents instead of the simplified ones in the commit display code paths. The latter do not actually check for --full-diff to avoid duplicated code; they just grab the original parents if save_parents() has not been called for this revision walk. For ordinary commits it should be obvious that this is the right thing to do. Merge commits are a bit subtle. Observe that with default simplification, merge simplification is an all-or-nothing decision: either the merge is TREESAME to one parent and disappears, or it is different from all parents and the parent list remains intact. Redundant parents are not pruned, so the existing code also shows them as a merge. So if we do show a merge commit, the parent list just consists of the rewrite result on each parent. Running, e.g., --cc on this in --full-diff mode is not very useful: if any commits were skipped, some hunks will disagree with all sides of the merge (with one side, because commits were skipped; with the others, because they didn't have those changes in the first place). This triggers --cc showing these hunks spuriously. Therefore I believe that even for merge commits it is better to show the diffs wrt. the original parents. Reported-by: Uwe Kleine-König Helped-by: Junio C Hamano Helped-by: Ramsay Jones Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- combine-diff.c | 3 ++- commit.c | 16 ++++++++++++++ commit.h | 3 +++ log-tree.c | 2 +- revision.c | 43 +++++++++++++++++++++++++++++++++++- revision.h | 20 +++++++++++++++++ t/t6012-rev-list-simplify.sh | 6 +++++ 7 files changed, 90 insertions(+), 3 deletions(-) diff --git a/combine-diff.c b/combine-diff.c index 6dc06093d3..3d2aaf34ab 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -10,6 +10,7 @@ #include "refs.h" #include "userdiff.h" #include "sha1-array.h" +#include "revision.h" static struct combine_diff_path *intersect_paths(struct combine_diff_path *curr, int n, int num_parent) { @@ -1383,7 +1384,7 @@ void diff_tree_combined(const unsigned char *sha1, void diff_tree_combined_merge(const struct commit *commit, int dense, struct rev_info *rev) { - struct commit_list *parent = commit->parents; + struct commit_list *parent = get_saved_parents(rev, commit); struct sha1_array parents = SHA1_ARRAY_INIT; while (parent) { diff --git a/commit.c b/commit.c index e5862f6d7c..5ecdb38b3e 100644 --- a/commit.c +++ b/commit.c @@ -377,6 +377,22 @@ unsigned commit_list_count(const struct commit_list *l) return c; } +struct commit_list *copy_commit_list(struct commit_list *list) +{ + struct commit_list *head = NULL; + struct commit_list **pp = &head; + while (list) { + struct commit_list *new; + new = xmalloc(sizeof(struct commit_list)); + new->item = list->item; + new->next = NULL; + *pp = new; + pp = &new->next; + list = list->next; + } + return head; +} + void free_commit_list(struct commit_list *list) { while (list) { diff --git a/commit.h b/commit.h index 35cc4e266b..ca766d2f4a 100644 --- a/commit.h +++ b/commit.h @@ -62,6 +62,9 @@ struct commit_list *commit_list_insert_by_date(struct commit *item, struct commit_list **list); void commit_list_sort_by_date(struct commit_list **list); +/* Shallow copy of the input list */ +struct commit_list *copy_commit_list(struct commit_list *list); + void free_commit_list(struct commit_list *list); /* Commit formats */ diff --git a/log-tree.c b/log-tree.c index a49d8e895d..8534d91826 100644 --- a/log-tree.c +++ b/log-tree.c @@ -738,7 +738,7 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log sha1 = commit->tree->object.sha1; /* Root commit? */ - parents = commit->parents; + parents = get_saved_parents(opt, commit); if (!parents) { if (opt->show_root_diff) { diff_root_tree_sha1(sha1, "", &opt->diffopt); diff --git a/revision.c b/revision.c index 84ccc0529b..e3ca9361b4 100644 --- a/revision.c +++ b/revision.c @@ -15,6 +15,7 @@ #include "string-list.h" #include "line-log.h" #include "mailmap.h" +#include "commit-slab.h" volatile show_early_output_fn_t show_early_output; @@ -2763,7 +2764,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) return retval; } -static inline int want_ancestry(struct rev_info *revs) +static inline int want_ancestry(const struct rev_info *revs) { return (revs->rewrite_parents || revs->children.name); } @@ -2820,6 +2821,14 @@ enum commit_action simplify_commit(struct rev_info *revs, struct commit *commit) if (action == commit_show && !revs->show_all && revs->prune && revs->dense && want_ancestry(revs)) { + /* + * --full-diff on simplified parents is no good: it + * will show spurious changes from the commits that + * were elided. So we save the parents on the side + * when --full-diff is in effect. + */ + if (revs->full_diff) + save_parents(revs, commit); if (rewrite_parents(revs, commit, rewrite_one) < 0) return commit_error; } @@ -3038,6 +3047,8 @@ struct commit *get_revision(struct rev_info *revs) c = get_revision_internal(revs); if (c && revs->graph) graph_update(revs->graph, c); + if (!c) + free_saved_parents(revs); return c; } @@ -3069,3 +3080,33 @@ void put_revision_mark(const struct rev_info *revs, const struct commit *commit) fputs(mark, stdout); putchar(' '); } + +define_commit_slab(saved_parents, struct commit_list *); + +void save_parents(struct rev_info *revs, struct commit *commit) +{ + struct commit_list **pp; + + if (!revs->saved_parents_slab) { + revs->saved_parents_slab = xmalloc(sizeof(struct saved_parents)); + init_saved_parents(revs->saved_parents_slab); + } + + pp = saved_parents_at(revs->saved_parents_slab, commit); + assert(*pp == NULL); + *pp = copy_commit_list(commit->parents); +} + +struct commit_list *get_saved_parents(struct rev_info *revs, const struct commit *commit) +{ + if (!revs->saved_parents_slab) + return commit->parents; + + return *saved_parents_at(revs->saved_parents_slab, commit); +} + +void free_saved_parents(struct rev_info *revs) +{ + if (revs->saved_parents_slab) + clear_saved_parents(revs->saved_parents_slab); +} diff --git a/revision.h b/revision.h index 95859ba119..e7f1d211bf 100644 --- a/revision.h +++ b/revision.h @@ -25,6 +25,7 @@ struct rev_info; struct log_info; struct string_list; +struct saved_parents; struct rev_cmdline_info { unsigned int nr; @@ -187,6 +188,9 @@ struct rev_info { /* line level range that we are chasing */ struct decoration line_log_data; + + /* copies of the parent lists, for --full-diff display */ + struct saved_parents *saved_parents_slab; }; #define REV_TREE_SAME 0 @@ -273,4 +277,20 @@ typedef enum rewrite_result (*rewrite_parent_fn_t)(struct rev_info *revs, struct extern int rewrite_parents(struct rev_info *revs, struct commit *commit, rewrite_parent_fn_t rewrite_parent); + +/* + * Save a copy of the parent list, and return the saved copy. This is + * used by the log machinery to retrieve the original parents when + * commit->parents has been modified by history simpification. + * + * You may only call save_parents() once per commit (this is checked + * for non-root commits). + * + * get_saved_parents() will transparently return commit->parents if + * history simplification is off. + */ +extern void save_parents(struct rev_info *revs, struct commit *commit); +extern struct commit_list *get_saved_parents(struct rev_info *revs, const struct commit *commit); +extern void free_saved_parents(struct rev_info *revs); + #endif diff --git a/t/t6012-rev-list-simplify.sh b/t/t6012-rev-list-simplify.sh index 57ce2395d6..fde5e712eb 100755 --- a/t/t6012-rev-list-simplify.sh +++ b/t/t6012-rev-list-simplify.sh @@ -127,4 +127,10 @@ test_expect_success 'full history simplification without parent' ' } ' +test_expect_success '--full-diff is not affected by --parents' ' + git log -p --pretty="%H" --full-diff -- file >expected && + git log -p --pretty="%H" --full-diff --parents -- file >actual && + test_cmp expected actual +' + test_done From 838f9a15667cfefa9e645c26627ce81ce7599915 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 3 Aug 2013 12:36:15 +0200 Subject: [PATCH 2/2] log: use true parents for diff when walking reflogs The reflog walking logic (git log -g) replaces the true parent list with the preceding commit in the reflog. This results in bogus commit diffs when combined with options such as -p; the diff is against the reflog predecessor, not the parent of the commit. Save the true parents on the side, extending the functions from the previous commit. The diff logic picks them up and uses them to show the correct diffs. We do have to be somewhat careful about repeated calling of save_parents(), since the reflog may list a commit more than once. We now store (commit_list*)-1 to distinguish the "not saved yet" and "root commit" cases. This lets us preserve an empty parent list even if save_parents() is repeatedly called. Suggested-by: Jeff King Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- revision.c | 28 +++++++++++++++++++++++++--- t/t1411-reflog-show.sh | 22 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/revision.c b/revision.c index e3ca9361b4..ac20d1aaed 100644 --- a/revision.c +++ b/revision.c @@ -2848,6 +2848,7 @@ static struct commit *get_revision_1(struct rev_info *revs) free(entry); if (revs->reflog_info) { + save_parents(revs, commit); fake_reflog_parent(revs->reflog_info, commit); commit->object.flags &= ~(ADDED | SEEN | SHOWN); } @@ -3083,6 +3084,8 @@ void put_revision_mark(const struct rev_info *revs, const struct commit *commit) define_commit_slab(saved_parents, struct commit_list *); +#define EMPTY_PARENT_LIST ((struct commit_list *)-1) + void save_parents(struct rev_info *revs, struct commit *commit) { struct commit_list **pp; @@ -3093,16 +3096,35 @@ void save_parents(struct rev_info *revs, struct commit *commit) } pp = saved_parents_at(revs->saved_parents_slab, commit); - assert(*pp == NULL); - *pp = copy_commit_list(commit->parents); + + /* + * When walking with reflogs, we may visit the same commit + * several times: once for each appearance in the reflog. + * + * In this case, save_parents() will be called multiple times. + * We want to keep only the first set of parents. We need to + * store a sentinel value for an empty (i.e., NULL) parent + * list to distinguish it from a not-yet-saved list, however. + */ + if (*pp) + return; + if (commit->parents) + *pp = copy_commit_list(commit->parents); + else + *pp = EMPTY_PARENT_LIST; } struct commit_list *get_saved_parents(struct rev_info *revs, const struct commit *commit) { + struct commit_list *parents; + if (!revs->saved_parents_slab) return commit->parents; - return *saved_parents_at(revs->saved_parents_slab, commit); + parents = *saved_parents_at(revs->saved_parents_slab, commit); + if (parents == EMPTY_PARENT_LIST) + return NULL; + return parents; } void free_saved_parents(struct rev_info *revs) diff --git a/t/t1411-reflog-show.sh b/t/t1411-reflog-show.sh index 9a105fe21f..6f47c0dd0e 100755 --- a/t/t1411-reflog-show.sh +++ b/t/t1411-reflog-show.sh @@ -144,4 +144,26 @@ test_expect_success 'empty reflog file' ' test_cmp expect actual ' +# This guards against the alternative of showing the diffs vs. the +# reflog ancestor. The reflog used is designed to list the commits +# more than once, so as to exercise the corresponding logic. +test_expect_success 'git log -g -p shows diffs vs. parents' ' + test_commit two && + git branch flipflop && + git update-ref refs/heads/flipflop -m flip1 HEAD^ && + git update-ref refs/heads/flipflop -m flop1 HEAD && + git update-ref refs/heads/flipflop -m flip2 HEAD^ && + git log -g -p flipflop >reflog && + grep -v ^Reflog reflog >actual && + git log -1 -p HEAD^ >log.one && + git log -1 -p HEAD >log.two && + ( + cat log.one; echo + cat log.two; echo + cat log.one; echo + cat log.two + ) >expect && + test_cmp expect actual +' + test_done