[PATCH] Tweak count-delta interface
Make it return copied source and insertion separately, so that later implementation of heuristics can use them more flexibly. This does not change the heuristics implemented in diffcore-rename nor diffcore-break in any way. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
5b86040679
commit
355e76a4a3
@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const unsigned char **datap)
|
|||||||
/*
|
/*
|
||||||
* NOTE. We do not _interpret_ delta fully. As an approximation, we
|
* NOTE. We do not _interpret_ delta fully. As an approximation, we
|
||||||
* just count the number of bytes that are copied from the source, and
|
* just count the number of bytes that are copied from the source, and
|
||||||
* the number of literal data bytes that are inserted. Number of
|
* the number of literal data bytes that are inserted.
|
||||||
* bytes that are _not_ copied from the source is deletion, and number
|
*
|
||||||
* of inserted literal bytes are addition, so sum of them is what we
|
* Number of bytes that are _not_ copied from the source is deletion,
|
||||||
* return. xdelta can express an edit that copies data inside of the
|
* and number of inserted literal bytes are addition, so sum of them
|
||||||
* destination which originally came from the source. We do not count
|
* is the extent of damage. xdelta can express an edit that copies
|
||||||
* that in the following routine, so we are undercounting the source
|
* data inside of the destination which originally came from the
|
||||||
* material that remains in the final output that way.
|
* source. We do not count that in the following routine, so we are
|
||||||
|
* undercounting the source material that remains in the final output
|
||||||
|
* that way.
|
||||||
*/
|
*/
|
||||||
unsigned long count_delta(void *delta_buf, unsigned long delta_size)
|
int count_delta(void *delta_buf, unsigned long delta_size,
|
||||||
|
unsigned long *src_copied, unsigned long *literal_added)
|
||||||
{
|
{
|
||||||
unsigned long copied_from_source, added_literal;
|
unsigned long copied_from_source, added_literal;
|
||||||
const unsigned char *data, *top;
|
const unsigned char *data, *top;
|
||||||
@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
|
|||||||
|
|
||||||
/* the smallest delta size possible is 6 bytes */
|
/* the smallest delta size possible is 6 bytes */
|
||||||
if (delta_size < 6)
|
if (delta_size < 6)
|
||||||
return UINT_MAX;
|
return -1;
|
||||||
|
|
||||||
data = delta_buf;
|
data = delta_buf;
|
||||||
top = delta_buf + delta_size;
|
top = delta_buf + delta_size;
|
||||||
@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
|
|||||||
|
|
||||||
/* sanity check */
|
/* sanity check */
|
||||||
if (data != top || out != dst_size)
|
if (data != top || out != dst_size)
|
||||||
return UINT_MAX;
|
return -1;
|
||||||
|
|
||||||
/* delete size is what was _not_ copied from source.
|
/* delete size is what was _not_ copied from source.
|
||||||
* edit size is that and literal additions.
|
* edit size is that and literal additions.
|
||||||
*/
|
*/
|
||||||
if (src_size + added_literal < copied_from_source)
|
*src_copied = copied_from_source;
|
||||||
/* we ended up overcounting and underflowed */
|
*literal_added = added_literal;
|
||||||
return 0;
|
return 0;
|
||||||
return (src_size - copied_from_source) + added_literal;
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#ifndef COUNT_DELTA_H
|
#ifndef COUNT_DELTA_H
|
||||||
#define COUNT_DELTA_H
|
#define COUNT_DELTA_H
|
||||||
|
|
||||||
unsigned long count_delta(void *, unsigned long);
|
int count_delta(void *, unsigned long,
|
||||||
|
unsigned long *src_copied, unsigned long *literal_added);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,7 +23,7 @@ static int very_different(struct diff_filespec *src,
|
|||||||
* want to get the filepair broken.
|
* want to get the filepair broken.
|
||||||
*/
|
*/
|
||||||
void *delta;
|
void *delta;
|
||||||
unsigned long delta_size, base_size;
|
unsigned long delta_size, base_size, src_copied, literal_added;
|
||||||
|
|
||||||
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
|
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
|
||||||
return 0; /* leave symlink rename alone */
|
return 0; /* leave symlink rename alone */
|
||||||
@ -61,10 +61,17 @@ static int very_different(struct diff_filespec *src,
|
|||||||
return MAX_SCORE;
|
return MAX_SCORE;
|
||||||
|
|
||||||
/* Estimate the edit size by interpreting delta. */
|
/* Estimate the edit size by interpreting delta. */
|
||||||
delta_size = count_delta(delta, delta_size);
|
if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
|
||||||
|
free(delta);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
free(delta);
|
free(delta);
|
||||||
if (delta_size == UINT_MAX)
|
|
||||||
return 0; /* error in delta computation */
|
/* Extent of damage */
|
||||||
|
if (src->size + literal_added < src_copied)
|
||||||
|
delta_size = 0;
|
||||||
|
else
|
||||||
|
delta_size = (src->size - src_copied) + literal_added;
|
||||||
|
|
||||||
if (base_size < delta_size)
|
if (base_size < delta_size)
|
||||||
return MAX_SCORE;
|
return MAX_SCORE;
|
||||||
|
@ -135,7 +135,7 @@ static int estimate_similarity(struct diff_filespec *src,
|
|||||||
* call into this function in that case.
|
* call into this function in that case.
|
||||||
*/
|
*/
|
||||||
void *delta;
|
void *delta;
|
||||||
unsigned long delta_size, base_size;
|
unsigned long delta_size, base_size, src_copied, literal_added;
|
||||||
int score;
|
int score;
|
||||||
|
|
||||||
/* We deal only with regular files. Symlink renames are handled
|
/* We deal only with regular files. Symlink renames are handled
|
||||||
@ -174,10 +174,17 @@ static int estimate_similarity(struct diff_filespec *src,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Estimate the edit size by interpreting delta. */
|
/* Estimate the edit size by interpreting delta. */
|
||||||
delta_size = count_delta(delta, delta_size);
|
if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
|
||||||
free(delta);
|
free(delta);
|
||||||
if (delta_size == UINT_MAX)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
free(delta);
|
||||||
|
|
||||||
|
/* Extent of damage */
|
||||||
|
if (src->size + literal_added < src_copied)
|
||||||
|
delta_size = 0;
|
||||||
|
else
|
||||||
|
delta_size = (src->size - src_copied) + literal_added;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we will give some score to it. 100% edit gets 0 points
|
* Now we will give some score to it. 100% edit gets 0 points
|
||||||
|
@ -12,8 +12,6 @@
|
|||||||
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
|
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
|
||||||
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
|
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
|
||||||
|
|
||||||
#define RENAME_DST_MATCHED 01
|
|
||||||
|
|
||||||
struct diff_filespec {
|
struct diff_filespec {
|
||||||
unsigned char sha1[20];
|
unsigned char sha1[20];
|
||||||
char *path;
|
char *path;
|
||||||
|
Loading…
Reference in New Issue
Block a user