diffcore-rename.c: simplify finding exact renames

The find_exact_renames function currently only uses the hash table for
grouping, i.e.:

1. add sources
2. add destinations
3. iterate all buckets, per bucket:
4. split sources from destinations
5. iterate destinations, per destination:
6. iterate sources to find best match

This can be simplified by utilizing the lookup functionality of the hash
table, i.e.:

1. add sources
2. iterate destinations, per destination:
3. lookup sources matching the current destination
4. iterate sources to find best match

This saves several iterations and file_similarity allocations for the
destinations.

Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Karsten Blees 2013-11-14 20:19:34 +01:00 committed by Junio C Hamano
parent 48f6407ffe
commit 7c85f8acb2

View File

@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
}
struct file_similarity {
int src_dst, index;
int index;
struct diff_filespec *filespec;
struct file_similarity *next;
};
@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec)
return hash;
}
static int find_identical_files(struct file_similarity *src,
struct file_similarity *dst,
static int find_identical_files(struct hash_table *srcs,
int dst_index,
struct diff_options *options)
{
int renames = 0;
/*
* Walk over all the destinations ...
*/
do {
struct diff_filespec *target = dst->filespec;
struct diff_filespec *target = rename_dst[dst_index].two;
struct file_similarity *p, *best;
int i = 100, best_score = -1;
/*
* .. to find the best source match
* Find the best source match for specified destination.
*/
best = NULL;
for (p = src; p; p = p->next) {
for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
int score;
struct diff_filespec *source = p->filespec;
@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src,
break;
}
if (best) {
record_rename_pair(dst->index, best->index, MAX_SCORE);
record_rename_pair(dst_index, best->index, MAX_SCORE);
renames++;
}
} while ((dst = dst->next) != NULL);
return renames;
}
static void free_similarity_list(struct file_similarity *p)
static int free_similarity_list(void *p, void *unused)
{
while (p) {
struct file_similarity *entry = p;
p = p->next;
p = entry->next;
free(entry);
}
return 0;
}
static int find_same_files(void *ptr, void *data)
{
int ret;
struct file_similarity *p = ptr;
struct file_similarity *src = NULL, *dst = NULL;
struct diff_options *options = data;
/* Split the hash list up into sources and destinations */
do {
struct file_similarity *entry = p;
p = p->next;
if (entry->src_dst < 0) {
entry->next = src;
src = entry;
} else {
entry->next = dst;
dst = entry;
}
} while (p);
/*
* If we have both sources *and* destinations, see if
* we can match them up
*/
ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
/* Free the hashes and return the number of renames found */
free_similarity_list(src);
free_similarity_list(dst);
return ret;
}
static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec)
{
void **pos;
unsigned int hash;
struct file_similarity *entry = xmalloc(sizeof(*entry));
entry->src_dst = src_dst;
entry->index = index;
entry->filespec = filespec;
entry->next = NULL;
@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
*/
static int find_exact_renames(struct diff_options *options)
{
int i;
int i, renames = 0;
struct hash_table file_table;
/* Add all sources to the hash table */
init_hash(&file_table);
preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
preallocate_hash(&file_table, rename_src_nr);
for (i = 0; i < rename_src_nr; i++)
insert_file_table(&file_table, -1, i, rename_src[i].p->one);
insert_file_table(&file_table, i, rename_src[i].p->one);
/* Walk the destinations and find best source match */
for (i = 0; i < rename_dst_nr; i++)
insert_file_table(&file_table, 1, i, rename_dst[i].two);
renames += find_identical_files(&file_table, i, options);
/* Find the renames */
i = for_each_hash(&file_table, find_same_files, options);
/* Free source file_similarity chains */
for_each_hash(&file_table, free_similarity_list, options);
/* .. and free the hash data structure */
free_hash(&file_table);
return i;
return renames;
}
#define NUM_CANDIDATE_PER_DST 4