diffcore-rename.c: simplify finding exact renames
The find_exact_renames function currently only uses the hash table for grouping, i.e.: 1. add sources 2. add destinations 3. iterate all buckets, per bucket: 4. split sources from destinations 5. iterate destinations, per destination: 6. iterate sources to find best match This can be simplified by utilizing the lookup functionality of the hash table, i.e.: 1. add sources 2. iterate destinations, per destination: 3. lookup sources matching the current destination 4. iterate sources to find best match This saves several iterations and file_similarity allocations for the destinations. Signed-off-by: Karsten Blees <blees@dcon.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
48f6407ffe
commit
7c85f8acb2
@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_)
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct file_similarity {
|
struct file_similarity {
|
||||||
int src_dst, index;
|
int index;
|
||||||
struct diff_filespec *filespec;
|
struct diff_filespec *filespec;
|
||||||
struct file_similarity *next;
|
struct file_similarity *next;
|
||||||
};
|
};
|
||||||
@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec)
|
|||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_identical_files(struct file_similarity *src,
|
static int find_identical_files(struct hash_table *srcs,
|
||||||
struct file_similarity *dst,
|
int dst_index,
|
||||||
struct diff_options *options)
|
struct diff_options *options)
|
||||||
{
|
{
|
||||||
int renames = 0;
|
int renames = 0;
|
||||||
|
|
||||||
/*
|
struct diff_filespec *target = rename_dst[dst_index].two;
|
||||||
* Walk over all the destinations ...
|
|
||||||
*/
|
|
||||||
do {
|
|
||||||
struct diff_filespec *target = dst->filespec;
|
|
||||||
struct file_similarity *p, *best;
|
struct file_similarity *p, *best;
|
||||||
int i = 100, best_score = -1;
|
int i = 100, best_score = -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* .. to find the best source match
|
* Find the best source match for specified destination.
|
||||||
*/
|
*/
|
||||||
best = NULL;
|
best = NULL;
|
||||||
for (p = src; p; p = p->next) {
|
for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) {
|
||||||
int score;
|
int score;
|
||||||
struct diff_filespec *source = p->filespec;
|
struct diff_filespec *source = p->filespec;
|
||||||
|
|
||||||
@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (best) {
|
if (best) {
|
||||||
record_rename_pair(dst->index, best->index, MAX_SCORE);
|
record_rename_pair(dst_index, best->index, MAX_SCORE);
|
||||||
renames++;
|
renames++;
|
||||||
}
|
}
|
||||||
} while ((dst = dst->next) != NULL);
|
|
||||||
return renames;
|
return renames;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void free_similarity_list(struct file_similarity *p)
|
static int free_similarity_list(void *p, void *unused)
|
||||||
{
|
{
|
||||||
while (p) {
|
while (p) {
|
||||||
struct file_similarity *entry = p;
|
struct file_similarity *entry = p;
|
||||||
p = p->next;
|
p = entry->next;
|
||||||
free(entry);
|
free(entry);
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int find_same_files(void *ptr, void *data)
|
static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec)
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
struct file_similarity *p = ptr;
|
|
||||||
struct file_similarity *src = NULL, *dst = NULL;
|
|
||||||
struct diff_options *options = data;
|
|
||||||
|
|
||||||
/* Split the hash list up into sources and destinations */
|
|
||||||
do {
|
|
||||||
struct file_similarity *entry = p;
|
|
||||||
p = p->next;
|
|
||||||
if (entry->src_dst < 0) {
|
|
||||||
entry->next = src;
|
|
||||||
src = entry;
|
|
||||||
} else {
|
|
||||||
entry->next = dst;
|
|
||||||
dst = entry;
|
|
||||||
}
|
|
||||||
} while (p);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If we have both sources *and* destinations, see if
|
|
||||||
* we can match them up
|
|
||||||
*/
|
|
||||||
ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
|
|
||||||
|
|
||||||
/* Free the hashes and return the number of renames found */
|
|
||||||
free_similarity_list(src);
|
|
||||||
free_similarity_list(dst);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
|
|
||||||
{
|
{
|
||||||
void **pos;
|
void **pos;
|
||||||
unsigned int hash;
|
unsigned int hash;
|
||||||
struct file_similarity *entry = xmalloc(sizeof(*entry));
|
struct file_similarity *entry = xmalloc(sizeof(*entry));
|
||||||
|
|
||||||
entry->src_dst = src_dst;
|
|
||||||
entry->index = index;
|
entry->index = index;
|
||||||
entry->filespec = filespec;
|
entry->filespec = filespec;
|
||||||
entry->next = NULL;
|
entry->next = NULL;
|
||||||
@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
|
|||||||
*/
|
*/
|
||||||
static int find_exact_renames(struct diff_options *options)
|
static int find_exact_renames(struct diff_options *options)
|
||||||
{
|
{
|
||||||
int i;
|
int i, renames = 0;
|
||||||
struct hash_table file_table;
|
struct hash_table file_table;
|
||||||
|
|
||||||
|
/* Add all sources to the hash table */
|
||||||
init_hash(&file_table);
|
init_hash(&file_table);
|
||||||
preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
|
preallocate_hash(&file_table, rename_src_nr);
|
||||||
for (i = 0; i < rename_src_nr; i++)
|
for (i = 0; i < rename_src_nr; i++)
|
||||||
insert_file_table(&file_table, -1, i, rename_src[i].p->one);
|
insert_file_table(&file_table, i, rename_src[i].p->one);
|
||||||
|
|
||||||
|
/* Walk the destinations and find best source match */
|
||||||
for (i = 0; i < rename_dst_nr; i++)
|
for (i = 0; i < rename_dst_nr; i++)
|
||||||
insert_file_table(&file_table, 1, i, rename_dst[i].two);
|
renames += find_identical_files(&file_table, i, options);
|
||||||
|
|
||||||
/* Find the renames */
|
/* Free source file_similarity chains */
|
||||||
i = for_each_hash(&file_table, find_same_files, options);
|
for_each_hash(&file_table, free_similarity_list, options);
|
||||||
|
|
||||||
/* .. and free the hash data structure */
|
/* .. and free the hash data structure */
|
||||||
free_hash(&file_table);
|
free_hash(&file_table);
|
||||||
|
|
||||||
return i;
|
return renames;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NUM_CANDIDATE_PER_DST 4
|
#define NUM_CANDIDATE_PER_DST 4
|
||||||
|
Loading…
Reference in New Issue
Block a user