diffcore-rename.c: simplify finding exact renames

The find_exact_renames function currently only uses the hash table for
grouping, i.e.:

1. add sources
2. add destinations
3. iterate all buckets, per bucket:
4. split sources from destinations
5. iterate destinations, per destination:
6. iterate sources to find best match

This can be simplified by utilizing the lookup functionality of the hash
table, i.e.:

1. add sources
2. iterate destinations, per destination:
3. lookup sources matching the current destination
4. iterate sources to find best match

This saves several iterations and file_similarity allocations for the
destinations.

Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Karsten Blees 2013-11-14 20:19:34 +01:00 committed by Junio C Hamano
parent 48f6407ffe
commit 7c85f8acb2

View File

@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
} }
struct file_similarity { struct file_similarity {
int src_dst, index; int index;
struct diff_filespec *filespec; struct diff_filespec *filespec;
struct file_similarity *next; struct file_similarity *next;
}; };
@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec)
return hash; return hash;
} }
static int find_identical_files(struct file_similarity *src, static int find_identical_files(struct hash_table *srcs,
struct file_similarity *dst, int dst_index,
struct diff_options *options) struct diff_options *options)
{ {
int renames = 0; int renames = 0;
/* struct diff_filespec *target = rename_dst[dst_index].two;
* Walk over all the destinations ...
*/
do {
struct diff_filespec *target = dst->filespec;
struct file_similarity *p, *best; struct file_similarity *p, *best;
int i = 100, best_score = -1; int i = 100, best_score = -1;
/* /*
* .. to find the best source match * Find the best source match for specified destination.
*/ */
best = NULL; best = NULL;
for (p = src; p; p = p->next) { for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
int score; int score;
struct diff_filespec *source = p->filespec; struct diff_filespec *source = p->filespec;
@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src,
break; break;
} }
if (best) { if (best) {
record_rename_pair(dst->index, best->index, MAX_SCORE); record_rename_pair(dst_index, best->index, MAX_SCORE);
renames++; renames++;
} }
} while ((dst = dst->next) != NULL);
return renames; return renames;
} }
static void free_similarity_list(struct file_similarity *p) static int free_similarity_list(void *p, void *unused)
{ {
while (p) { while (p) {
struct file_similarity *entry = p; struct file_similarity *entry = p;
p = p->next; p = entry->next;
free(entry); free(entry);
} }
return 0;
} }
static int find_same_files(void *ptr, void *data) static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec)
{
int ret;
struct file_similarity *p = ptr;
struct file_similarity *src = NULL, *dst = NULL;
struct diff_options *options = data;
/* Split the hash list up into sources and destinations */
do {
struct file_similarity *entry = p;
p = p->next;
if (entry->src_dst < 0) {
entry->next = src;
src = entry;
} else {
entry->next = dst;
dst = entry;
}
} while (p);
/*
* If we have both sources *and* destinations, see if
* we can match them up
*/
ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
/* Free the hashes and return the number of renames found */
free_similarity_list(src);
free_similarity_list(dst);
return ret;
}
static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
{ {
void **pos; void **pos;
unsigned int hash; unsigned int hash;
struct file_similarity *entry = xmalloc(sizeof(*entry)); struct file_similarity *entry = xmalloc(sizeof(*entry));
entry->src_dst = src_dst;
entry->index = index; entry->index = index;
entry->filespec = filespec; entry->filespec = filespec;
entry->next = NULL; entry->next = NULL;
@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
*/ */
static int find_exact_renames(struct diff_options *options) static int find_exact_renames(struct diff_options *options)
{ {
int i; int i, renames = 0;
struct hash_table file_table; struct hash_table file_table;
/* Add all sources to the hash table */
init_hash(&file_table); init_hash(&file_table);
preallocate_hash(&file_table, rename_src_nr + rename_dst_nr); preallocate_hash(&file_table, rename_src_nr);
for (i = 0; i < rename_src_nr; i++) for (i = 0; i < rename_src_nr; i++)
insert_file_table(&file_table, -1, i, rename_src[i].p->one); insert_file_table(&file_table, i, rename_src[i].p->one);
/* Walk the destinations and find best source match */
for (i = 0; i < rename_dst_nr; i++) for (i = 0; i < rename_dst_nr; i++)
insert_file_table(&file_table, 1, i, rename_dst[i].two); renames += find_identical_files(&file_table, i, options);
/* Find the renames */ /* Free source file_similarity chains */
i = for_each_hash(&file_table, find_same_files, options); for_each_hash(&file_table, free_similarity_list, options);
/* .. and free the hash data structure */ /* .. and free the hash data structure */
free_hash(&file_table); free_hash(&file_table);
return i; return renames;
} }
#define NUM_CANDIDATE_PER_DST 4 #define NUM_CANDIDATE_PER_DST 4