[PATCH] Tweak count-delta interface

Make it return copied source and insertion separately, so that
later implementation of heuristics can use them more flexibly.

This does not change the heuristics implemented in
diffcore-rename nor diffcore-break in any way.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Junio C Hamano 2005-06-03 01:36:03 -07:00 committed by Linus Torvalds
parent 5b86040679
commit 355e76a4a3
5 changed files with 40 additions and 25 deletions

View File

@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const unsigned char **datap)
/* /*
* NOTE. We do not _interpret_ delta fully. As an approximation, we * NOTE. We do not _interpret_ delta fully. As an approximation, we
* just count the number of bytes that are copied from the source, and * just count the number of bytes that are copied from the source, and
* the number of literal data bytes that are inserted. Number of * the number of literal data bytes that are inserted.
* bytes that are _not_ copied from the source is deletion, and number *
* of inserted literal bytes are addition, so sum of them is what we * Number of bytes that are _not_ copied from the source is deletion,
* return. xdelta can express an edit that copies data inside of the * and number of inserted literal bytes are addition, so sum of them
* destination which originally came from the source. We do not count * is the extent of damage. xdelta can express an edit that copies
* that in the following routine, so we are undercounting the source * data inside of the destination which originally came from the
* material that remains in the final output that way. * source. We do not count that in the following routine, so we are
* undercounting the source material that remains in the final output
* that way.
*/ */
unsigned long count_delta(void *delta_buf, unsigned long delta_size) int count_delta(void *delta_buf, unsigned long delta_size,
unsigned long *src_copied, unsigned long *literal_added)
{ {
unsigned long copied_from_source, added_literal; unsigned long copied_from_source, added_literal;
const unsigned char *data, *top; const unsigned char *data, *top;
@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
/* the smallest delta size possible is 6 bytes */ /* the smallest delta size possible is 6 bytes */
if (delta_size < 6) if (delta_size < 6)
return UINT_MAX; return -1;
data = delta_buf; data = delta_buf;
top = delta_buf + delta_size; top = delta_buf + delta_size;
@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
/* sanity check */ /* sanity check */
if (data != top || out != dst_size) if (data != top || out != dst_size)
return UINT_MAX; return -1;
/* delete size is what was _not_ copied from source. /* delete size is what was _not_ copied from source.
* edit size is that and literal additions. * edit size is that and literal additions.
*/ */
if (src_size + added_literal < copied_from_source) *src_copied = copied_from_source;
/* we ended up overcounting and underflowed */ *literal_added = added_literal;
return 0; return 0;
return (src_size - copied_from_source) + added_literal;
} }

View File

@ -4,6 +4,7 @@
#ifndef COUNT_DELTA_H #ifndef COUNT_DELTA_H
#define COUNT_DELTA_H #define COUNT_DELTA_H
unsigned long count_delta(void *, unsigned long); int count_delta(void *, unsigned long,
unsigned long *src_copied, unsigned long *literal_added);
#endif #endif

View File

@ -23,7 +23,7 @@ static int very_different(struct diff_filespec *src,
* want to get the filepair broken. * want to get the filepair broken.
*/ */
void *delta; void *delta;
unsigned long delta_size, base_size; unsigned long delta_size, base_size, src_copied, literal_added;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0; /* leave symlink rename alone */ return 0; /* leave symlink rename alone */
@ -61,10 +61,17 @@ static int very_different(struct diff_filespec *src,
return MAX_SCORE; return MAX_SCORE;
/* Estimate the edit size by interpreting delta. */ /* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size); if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
free(delta);
return 0;
}
free(delta); free(delta);
if (delta_size == UINT_MAX)
return 0; /* error in delta computation */ /* Extent of damage */
if (src->size + literal_added < src_copied)
delta_size = 0;
else
delta_size = (src->size - src_copied) + literal_added;
if (base_size < delta_size) if (base_size < delta_size)
return MAX_SCORE; return MAX_SCORE;

View File

@ -135,7 +135,7 @@ static int estimate_similarity(struct diff_filespec *src,
* call into this function in that case. * call into this function in that case.
*/ */
void *delta; void *delta;
unsigned long delta_size, base_size; unsigned long delta_size, base_size, src_copied, literal_added;
int score; int score;
/* We deal only with regular files. Symlink renames are handled /* We deal only with regular files. Symlink renames are handled
@ -174,10 +174,17 @@ static int estimate_similarity(struct diff_filespec *src,
return 0; return 0;
/* Estimate the edit size by interpreting delta. */ /* Estimate the edit size by interpreting delta. */
delta_size = count_delta(delta, delta_size); if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
free(delta); free(delta);
if (delta_size == UINT_MAX)
return 0; return 0;
}
free(delta);
/* Extent of damage */
if (src->size + literal_added < src_copied)
delta_size = 0;
else
delta_size = (src->size - src_copied) + literal_added;
/* /*
* Now we will give some score to it. 100% edit gets 0 points * Now we will give some score to it. 100% edit gets 0 points

View File

@ -12,8 +12,6 @@
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/ #define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
#define RENAME_DST_MATCHED 01
struct diff_filespec { struct diff_filespec {
unsigned char sha1[20]; unsigned char sha1[20];
char *path; char *path;