diffcore-rename: provide basic implementation of idx_possible_rename()
Add a new struct dir_rename_info with various values we need inside our idx_possible_rename() function introduced in the previous commit. Add a basic implementation for this function showing how we plan to use the variables, but which will just return early with a value of -1 (not found) when those variables are not set up. Future commits will do the work necessary to set up those other variables so that idx_possible_rename() does not always return -1. Reviewed-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
37a2514364
commit
bde8b9f34c
@ -367,6 +367,19 @@ static int find_exact_renames(struct diff_options *options)
|
||||
return renames;
|
||||
}
|
||||
|
||||
struct dir_rename_info {
|
||||
struct strintmap idx_map;
|
||||
struct strmap dir_rename_guess;
|
||||
struct strmap *dir_rename_count;
|
||||
unsigned setup;
|
||||
};
|
||||
|
||||
static char *get_dirname(const char *filename)
|
||||
{
|
||||
char *slash = strrchr(filename, '/');
|
||||
return slash ? xstrndup(filename, slash - filename) : xstrdup("");
|
||||
}
|
||||
|
||||
static const char *get_basename(const char *filename)
|
||||
{
|
||||
/*
|
||||
@ -379,14 +392,86 @@ static const char *get_basename(const char *filename)
|
||||
return base ? base + 1 : filename;
|
||||
}
|
||||
|
||||
static int idx_possible_rename(char *filename)
|
||||
static int idx_possible_rename(char *filename, struct dir_rename_info *info)
|
||||
{
|
||||
/* Unconditionally return -1, "not found", for now */
|
||||
return -1;
|
||||
/*
|
||||
* Our comparison of files with the same basename (see
|
||||
* find_basename_matches() below), is only helpful when after exact
|
||||
* rename detection we have exactly one file with a given basename
|
||||
* among the rename sources and also only exactly one file with
|
||||
* that basename among the rename destinations. When we have
|
||||
* multiple files with the same basename in either set, we do not
|
||||
* know which to compare against. However, there are some
|
||||
* filenames that occur in large numbers (particularly
|
||||
* build-related filenames such as 'Makefile', '.gitignore', or
|
||||
* 'build.gradle' that potentially exist within every single
|
||||
* subdirectory), and for performance we want to be able to quickly
|
||||
* find renames for these files too.
|
||||
*
|
||||
* The reason basename comparisons are a useful heuristic was that it
|
||||
* is common for people to move files across directories while keeping
|
||||
* their filename the same. If we had a way of determining or even
|
||||
* making a good educated guess about which directory these non-unique
|
||||
* basename files had moved the file to, we could check it.
|
||||
* Luckily...
|
||||
*
|
||||
* When an entire directory is in fact renamed, we have two factors
|
||||
* helping us out:
|
||||
* (a) the original directory disappeared giving us a hint
|
||||
* about when we can apply an extra heuristic.
|
||||
* (a) we often have several files within that directory and
|
||||
* subdirectories that are renamed without changes
|
||||
* So, rules for a heuristic:
|
||||
* (0) If there basename matches are non-unique (the condition under
|
||||
* which this function is called) AND
|
||||
* (1) the directory in which the file was found has disappeared
|
||||
* (i.e. dirs_removed is non-NULL and has a relevant entry) THEN
|
||||
* (2) use exact renames of files within the directory to determine
|
||||
* where the directory is likely to have been renamed to. IF
|
||||
* there is at least one exact rename from within that
|
||||
* directory, we can proceed.
|
||||
* (3) If there are multiple places the directory could have been
|
||||
* renamed to based on exact renames, ignore all but one of them.
|
||||
* Just use the destination with the most renames going to it.
|
||||
* (4) Check if applying that directory rename to the original file
|
||||
* would result in a destination filename that is in the
|
||||
* potential rename set. If so, return the index of the
|
||||
* destination file (the index within rename_dst).
|
||||
* (5) Compare the original file and returned destination for
|
||||
* similarity, and if they are sufficiently similar, record the
|
||||
* rename.
|
||||
*
|
||||
* This function, idx_possible_rename(), is only responsible for (4).
|
||||
* The conditions/steps in (1)-(3) will be handled via setting up
|
||||
* dir_rename_count and dir_rename_guess in a future
|
||||
* initialize_dir_rename_info() function. Steps (0) and (5) are
|
||||
* handled by the caller of this function.
|
||||
*/
|
||||
char *old_dir, *new_dir;
|
||||
struct strbuf new_path = STRBUF_INIT;
|
||||
int idx;
|
||||
|
||||
if (!info->setup)
|
||||
return -1;
|
||||
|
||||
old_dir = get_dirname(filename);
|
||||
new_dir = strmap_get(&info->dir_rename_guess, old_dir);
|
||||
free(old_dir);
|
||||
if (!new_dir)
|
||||
return -1;
|
||||
|
||||
strbuf_addstr(&new_path, new_dir);
|
||||
strbuf_addch(&new_path, '/');
|
||||
strbuf_addstr(&new_path, get_basename(filename));
|
||||
|
||||
idx = strintmap_get(&info->idx_map, new_path.buf);
|
||||
strbuf_release(&new_path);
|
||||
return idx;
|
||||
}
|
||||
|
||||
static int find_basename_matches(struct diff_options *options,
|
||||
int minimum_score)
|
||||
int minimum_score,
|
||||
struct dir_rename_info *info)
|
||||
{
|
||||
/*
|
||||
* When I checked in early 2020, over 76% of file renames in linux
|
||||
@ -494,7 +579,7 @@ static int find_basename_matches(struct diff_options *options,
|
||||
dst_index = strintmap_get(&dests, base);
|
||||
if (src_index == -1 || dst_index == -1) {
|
||||
src_index = i;
|
||||
dst_index = idx_possible_rename(filename);
|
||||
dst_index = idx_possible_rename(filename, info);
|
||||
}
|
||||
if (dst_index == -1)
|
||||
continue;
|
||||
@ -677,8 +762,10 @@ void diffcore_rename(struct diff_options *options)
|
||||
int num_destinations, dst_cnt;
|
||||
int num_sources, want_copies;
|
||||
struct progress *progress = NULL;
|
||||
struct dir_rename_info info;
|
||||
|
||||
trace2_region_enter("diff", "setup", options->repo);
|
||||
info.setup = 0;
|
||||
want_copies = (detect_rename == DIFF_DETECT_COPY);
|
||||
if (!minimum_score)
|
||||
minimum_score = DEFAULT_RENAME_SCORE;
|
||||
@ -774,7 +861,8 @@ void diffcore_rename(struct diff_options *options)
|
||||
/* Utilize file basenames to quickly find renames. */
|
||||
trace2_region_enter("diff", "basename matches", options->repo);
|
||||
rename_count += find_basename_matches(options,
|
||||
min_basename_score);
|
||||
min_basename_score,
|
||||
&info);
|
||||
trace2_region_leave("diff", "basename matches", options->repo);
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user