[PATCH] Fix diff-pruning logic which was running prune too early.

For later stages to reorder patches, pruning logic and rename detection
logic should not decide which delete to discard (because another entry
said it will take over the file as a rename) until the very end.

Also fix some tests that were assuming the earlier "last one is rename
or keep everything else is copy" semantics of diff-raw format, which no
longer is true.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Junio C Hamano 2005-05-23 18:14:03 -07:00 committed by Linus Torvalds
parent 9a4a100eb4
commit bceafe752c
8 changed files with 73 additions and 131 deletions

View File

@ -229,7 +229,6 @@ int main(int argc, const char **argv)
ret = diff_cache(active_cache, active_nr); ret = diff_cache(active_cache, active_nr);
if (detect_rename) if (detect_rename)
diffcore_rename(detect_rename, diff_score_opt); diffcore_rename(detect_rename, diff_score_opt);
diffcore_prune();
if (pickaxe) if (pickaxe)
diffcore_pickaxe(pickaxe); diffcore_pickaxe(pickaxe);
if (2 <= argc) if (2 <= argc)

View File

@ -115,7 +115,6 @@ int main(int argc, const char **argv)
} }
if (detect_rename) if (detect_rename)
diffcore_rename(detect_rename, diff_score_opt); diffcore_rename(detect_rename, diff_score_opt);
diffcore_prune();
if (pickaxe) if (pickaxe)
diffcore_pickaxe(pickaxe); diffcore_pickaxe(pickaxe);
if (1 < argc) if (1 < argc)

View File

@ -266,10 +266,8 @@ static int call_diff_flush(void)
{ {
if (detect_rename) if (detect_rename)
diffcore_rename(detect_rename, diff_score_opt); diffcore_rename(detect_rename, diff_score_opt);
diffcore_prune();
if (pickaxe) if (pickaxe)
diffcore_pickaxe(pickaxe); diffcore_pickaxe(pickaxe);
if (diff_queue_is_empty()) { if (diff_queue_is_empty()) {
diff_flush(DIFF_FORMAT_NO_OUTPUT, 0); diff_flush(DIFF_FORMAT_NO_OUTPUT, 0);
return 0; return 0;

164
diff.c
View File

@ -12,9 +12,6 @@ static const char *diff_opts = "-pu";
static unsigned char null_sha1[20] = { 0, }; static unsigned char null_sha1[20] = { 0, };
static int reverse_diff; static int reverse_diff;
static int generate_patch;
static int line_termination = '\n';
static int inter_name_termination = '\t';
static const char *external_diff(void) static const char *external_diff(void)
{ {
@ -502,7 +499,9 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
return dp; return dp;
} }
static void diff_flush_raw(struct diff_filepair *p) static void diff_flush_raw(struct diff_filepair *p,
int line_termination,
int inter_name_termination)
{ {
int two_paths; int two_paths;
char status[10]; char status[10];
@ -566,10 +565,6 @@ static void diff_flush_patch(struct diff_filepair *p)
const char *name, *other; const char *name, *other;
char msg_[PATH_MAX*2+200], *msg; char msg_[PATH_MAX*2+200], *msg;
/* diffcore_prune() keeps "stay" entries for diff-raw
* copy/rename detection, but when we are generating
* patches we do not need them.
*/
if (diff_unmodified_pair(p)) if (diff_unmodified_pair(p))
return; return;
@ -585,7 +580,7 @@ static void diff_flush_patch(struct diff_filepair *p)
"similarity index %d%%\n" "similarity index %d%%\n"
"copy from %s\n" "copy from %s\n"
"copy to %s\n", "copy to %s\n",
(int)(0.5 + p->score * 100/MAX_SCORE), (int)(0.5 + p->score * 100.0/MAX_SCORE),
p->one->path, p->two->path); p->one->path, p->two->path);
msg = msg_; msg = msg_;
break; break;
@ -594,7 +589,7 @@ static void diff_flush_patch(struct diff_filepair *p)
"similarity index %d%%\n" "similarity index %d%%\n"
"rename old %s\n" "rename old %s\n"
"rename new %s\n", "rename new %s\n",
(int)(0.5 + p->score * 100/MAX_SCORE), (int)(0.5 + p->score * 100.0/MAX_SCORE),
p->one->path, p->two->path); p->one->path, p->two->path);
msg = msg_; msg = msg_;
break; break;
@ -630,105 +625,82 @@ int diff_needs_to_stay(struct diff_queue_struct *q, int i,
return 0; return 0;
} }
static int diff_used_as_source(struct diff_queue_struct *q, int lim,
struct diff_filespec *it)
{
int i;
for (i = 0; i < lim; i++) {
struct diff_filepair *p = q->queue[i++];
if (!strcmp(p->one->path, it->path))
return 1;
}
return 0;
}
void diffcore_prune(void)
{
/*
* Although rename/copy detection wants to have "no-change"
* entries fed into them, the downstream do not need to see
* them, unless we had rename/copy for the same path earlier.
* This function removes such entries.
*
* The applications that use rename/copy should:
*
* (1) feed change and "no-change" entries via diff_queue().
* (2) call diffcore_rename, and any other future diffcore_xxx
* that would benefit by still having "no-change" entries.
* (3) call diffcore_prune
* (4) call other diffcore_xxx that do not need to see
* "no-change" entries.
* (5) call diff_flush().
*/
struct diff_queue_struct *q = &diff_queued_diff;
struct diff_queue_struct outq;
int i;
outq.queue = NULL;
outq.nr = outq.alloc = 0;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (!diff_unmodified_pair(p) ||
diff_used_as_source(q, i, p->one))
diff_q(&outq, p);
else
free(p);
}
free(q->queue);
*q = outq;
return;
}
int diff_queue_is_empty(void) int diff_queue_is_empty(void)
{ {
struct diff_queue_struct *q = &diff_queued_diff; struct diff_queue_struct *q = &diff_queued_diff;
return q->nr == 0; return q->nr == 0;
} }
static void diff_resolve_rename_copy(void)
{
int i;
struct diff_queue_struct *q = &diff_queued_diff;
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
p->status = 0;
if (DIFF_PAIR_UNMERGED(p))
p->status = 'U';
else if (!DIFF_FILE_VALID((p)->one))
p->status = 'N';
else if (!DIFF_FILE_VALID((p)->two)) {
/* maybe earlier one said 'R', meaning
* it will take it, in which case we do
* not need to keep 'D'.
*/
int j;
for (j = 0; j < i; j++) {
struct diff_filepair *pp = q->queue[j];
if (pp->status == 'R' &&
!strcmp(pp->one->path, p->one->path))
break;
}
if (j < i)
continue;
p->status = 'D';
}
else if (strcmp(p->one->path, p->two->path)) {
/* This is rename or copy. Which one is it? */
if (diff_needs_to_stay(q, i+1, p->one))
p->status = 'C';
else
p->status = 'R';
}
else if (memcmp(p->one->sha1, p->two->sha1, 20))
p->status = 'M';
else {
/* we do not need this one */
p->status = 0;
}
}
}
void diff_flush(int diff_output_style, int resolve_rename_copy) void diff_flush(int diff_output_style, int resolve_rename_copy)
{ {
struct diff_queue_struct *q = &diff_queued_diff; struct diff_queue_struct *q = &diff_queued_diff;
int i; int i;
int line_termination = '\n';
int inter_name_termination = '\t';
generate_patch = 0; if (diff_output_style == DIFF_FORMAT_MACHINE)
switch (diff_output_style) {
case DIFF_FORMAT_HUMAN:
line_termination = '\n';
inter_name_termination = '\t';
break;
case DIFF_FORMAT_MACHINE:
line_termination = inter_name_termination = 0; line_termination = inter_name_termination = 0;
break; if (resolve_rename_copy)
case DIFF_FORMAT_PATCH: diff_resolve_rename_copy();
generate_patch = 1;
break;
}
for (i = 0; i < q->nr; i++) { for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i]; struct diff_filepair *p = q->queue[i];
if (resolve_rename_copy) { if (p->status == 0)
if (DIFF_PAIR_UNMERGED(p)) continue;
p->status = 'U'; switch (diff_output_style) {
else if (!DIFF_FILE_VALID((p)->one)) case DIFF_FORMAT_PATCH:
p->status = 'N';
else if (!DIFF_FILE_VALID((p)->two))
p->status = 'D';
else if (strcmp(p->one->path, p->two->path)) {
/* This is rename or copy. Which one is it? */
if (diff_needs_to_stay(q, i+1, p->one))
p->status = 'C';
else
p->status = 'R';
}
else
p->status = 'M';
}
if (generate_patch)
diff_flush_patch(p); diff_flush_patch(p);
else break;
diff_flush_raw(p); case DIFF_FORMAT_HUMAN:
case DIFF_FORMAT_MACHINE:
diff_flush_raw(p, line_termination,
inter_name_termination);
break;
}
} }
for (i = 0; i < q->nr; i++) { for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i]; struct diff_filepair *p = q->queue[i];
diff_free_filespec_data(p->one); diff_free_filespec_data(p->one);
@ -755,9 +727,9 @@ void diff_addremove(int addremove, unsigned mode,
* with something like '=' or '*' (I haven't decided * with something like '=' or '*' (I haven't decided
* which but should not make any difference). * which but should not make any difference).
* Feeding the same new and old to diff_change() * Feeding the same new and old to diff_change()
* also has the same effect. diffcore_prune() should * also has the same effect.
* be used to filter uninteresting ones out before the * Before the final output happens, they are pruned after
* final output happens. * merged into rename/copy pairs as appropriate.
*/ */
if (reverse_diff) if (reverse_diff)
addremove = (addremove == '+' ? '-' : addremove = (addremove == '+' ? '-' :

2
diff.h
View File

@ -39,8 +39,6 @@ extern void diff_setup(int reverse);
extern void diffcore_rename(int rename_copy, int minimum_score); extern void diffcore_rename(int rename_copy, int minimum_score);
extern void diffcore_prune(void);
extern void diffcore_pickaxe(const char *needle); extern void diffcore_pickaxe(const char *needle);
extern void diffcore_pathspec(const char **pathspec); extern void diffcore_pathspec(const char **pathspec);

View File

@ -133,10 +133,7 @@ static void record_rename_pair(struct diff_queue_struct *outq,
* The downstream diffcore transformers are free to reorder * The downstream diffcore transformers are free to reorder
* the entries as long as they keep file pairs that has the * the entries as long as they keep file pairs that has the
* same p->one->path in earlier rename_rank to appear before * same p->one->path in earlier rename_rank to appear before
* later ones. This ordering is used by the diff_flush() * later ones.
* logic to tell renames from copies, and also used by the
* diffcore_prune() logic to omit unnecessary
* "no-modification" entries.
* *
* To the final output routine, and in the diff-raw format * To the final output routine, and in the diff-raw format
* output, a rename/copy that is based on a path that has a * output, a rename/copy that is based on a path that has a
@ -271,14 +268,8 @@ void diffcore_rename(int detect_rename, int minimum_score)
/* We really want to cull the candidates list early /* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas. * with cheap tests in order to avoid doing deltas.
*
* With the current callers, we should not have already
* matched entries at this point, but it is nonetheless
* checked for sanity.
*/ */
for (i = 0; i < created.nr; i++) { for (i = 0; i < created.nr; i++) {
if (created.s[i]->xfrm_flags & RENAME_DST_MATCHED)
continue; /* we have matched exactly already */
for (h = 0; h < sizeof(srcs)/sizeof(srcs[0]); h++) { for (h = 0; h < sizeof(srcs)/sizeof(srcs[0]); h++) {
struct diff_rename_pool *p = srcs[h]; struct diff_rename_pool *p = srcs[h];
for (j = 0; j < p->nr; j++) { for (j = 0; j < p->nr; j++) {
@ -386,25 +377,13 @@ void diffcore_rename(int detect_rename, int minimum_score)
} }
else if (!DIFF_FILE_VALID(p->two)) { else if (!DIFF_FILE_VALID(p->two)) {
/* deleted */ /* deleted */
if (p->one->xfrm_flags & RENAME_SRC_GONE) diff_queue(q, p->one, p->two);
; /* rename/copy deleted it already */
else
diff_queue(q, p->one, p->two);
} }
else if (strcmp(p->one->path, p->two->path)) { else if (strcmp(p->one->path, p->two->path)) {
/* rename or copy */ /* rename or copy */
struct diff_filepair *dp = struct diff_filepair *dp =
diff_queue(q, p->one, p->two); diff_queue(q, p->one, p->two);
dp->score = p->score; dp->score = p->score;
/* if we have a later entry that is a rename/copy
* that depends on p->one, then we copy here.
* otherwise we rename it.
*/
if (!diff_needs_to_stay(&outq, i+1, p->one))
/* this is the last one, so mark it as gone.
*/
p->one->xfrm_flags |= RENAME_SRC_GONE;
} }
else else
/* otherwise it is a modified (or "stay") entry */ /* otherwise it is a modified (or "stay") entry */

View File

@ -12,8 +12,6 @@
#define DEFAULT_MINIMUM_SCORE 5000 #define DEFAULT_MINIMUM_SCORE 5000
#define RENAME_DST_MATCHED 01 #define RENAME_DST_MATCHED 01
#define RENAME_SRC_GONE 02
#define RENAME_SCORE_SHIFT 8
struct diff_filespec { struct diff_filespec {
unsigned char sha1[20]; unsigned char sha1[20];

View File

@ -147,14 +147,13 @@ test_expect_success \
################################################################ ################################################################
# tree has COPYING and rezrov. work tree has the same COPYING and # tree has COPYING and rezrov. work tree has the same COPYING and
# copy-edited COPYING.1, and unchanged rezrov. We should see # copy-edited COPYING.1, and unchanged rezrov. We should not say
# unmodified COPYING in the output, so that downstream diff-helper can # anything about rezrov nor COPYING, since the revised again diff-raw
# notice. We should not say anything about rezrov. # nows how to say Copy.
git-diff-cache -C $tree >current git-diff-cache -C $tree >current
cat >expected <<\EOF cat >expected <<\EOF
:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0603b3238a076dc6c8022aedc6648fa523a17178 C1234 COPYING COPYING.1 :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 0603b3238a076dc6c8022aedc6648fa523a17178 C1234 COPYING COPYING.1
:100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 M COPYING
EOF EOF
test_expect_success \ test_expect_success \