From c09376d55ff41128f3966609e1c4edf3e9fee840 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:49 +0000 Subject: [PATCH 01/17] merge-ort: add new data structures for directory rename detection Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index d36a92b59b..652ff730af 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -49,14 +49,42 @@ enum merge_side { }; struct rename_info { + /* + * All variables that are arrays of size 3 correspond to data tracked + * for the sides in enum merge_side. Index 0 is almost always unused + * because we often only need to track information for MERGE_SIDE1 and + * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames + * are determined relative to what changed since the MERGE_BASE). + */ + /* * pairs: pairing of filenames from diffcore_rename() - * - * Index 1 and 2 correspond to sides 1 & 2 as used in - * conflict_info.stages. Index 0 unused. */ struct diff_queue_struct pairs[3]; + /* + * dirs_removed: directories removed on a given side of history. + */ + struct strset dirs_removed[3]; + + /* + * dir_rename_count: tracking where parts of a directory were renamed to + * + * When files in a directory are renamed, they may not all go to the + * same location. Each strmap here tracks: + * old_dir => {new_dir => int} + * That is, dir_rename_count[side] is a strmap to a strintmap. + */ + struct strmap dir_rename_count[3]; + + /* + * dir_renames: computed directory renames + * + * This is a map of old_dir => new_dir and is derived in part from + * dir_rename_count. + */ + struct strmap dir_renames[3]; + /* * needed_limit: value needed for inexact rename detection to run * From f5d9fbc2e9fde9a9dc08de12ad5ed0c6d7e3fb0a Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:50 +0000 Subject: [PATCH 02/17] merge-ort: initialize and free new directory rename data structures Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 652ff730af..2e6d41b0a0 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -308,8 +308,12 @@ static void free_strmap_strings(struct strmap *map) static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, int reinitialize) { + struct rename_info *renames = &opti->renames; + int i; void (*strmap_func)(struct strmap *, int) = reinitialize ? strmap_partial_clear : strmap_clear; + void (*strset_func)(struct strset *) = + reinitialize ? strset_partial_clear : strset_clear; /* * We marked opti->paths with strdup_strings = 0, so that we @@ -339,6 +343,23 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, string_list_clear(&opti->paths_to_free, 0); opti->paths_to_free.strdup_strings = 0; + /* Free memory used by various renames maps */ + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { + struct hashmap_iter iter; + struct strmap_entry *entry; + + strset_func(&renames->dirs_removed[i]); + + strmap_for_each_entry(&renames->dir_rename_count[i], + &iter, entry) { + struct strintmap *counts = entry->value; + strintmap_clear(counts); + } + strmap_func(&renames->dir_rename_count[i], 1); + + strmap_func(&renames->dir_renames[i], 0); + } + if (!reinitialize) { struct hashmap_iter iter; struct strmap_entry *e; @@ -1812,6 +1833,9 @@ static struct commit *make_virtual_commit(struct repository *repo, static void merge_start(struct merge_options *opt, struct merge_result *result) { + struct rename_info *renames; + int i; + /* Sanity checks on opt */ assert(opt->repo); @@ -1846,6 +1870,17 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) /* Initialization of opt->priv, our internal merge data */ opt->priv = xcalloc(1, sizeof(*opt->priv)); + /* Initialization of various renames fields */ + renames = &opt->priv->renames; + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { + strset_init_with_options(&renames->dirs_removed[i], + NULL, 0); + strmap_init_with_options(&renames->dir_rename_count[i], + NULL, 1); + strmap_init_with_options(&renames->dir_renames[i], + NULL, 0); + } + /* * Although we initialize opt->priv->paths with strdup_strings=0, * that's just to avoid making yet another copy of an allocated From eb3e3e1ddffa1348532a6b8d741c235e4c73017c Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 7 Jan 2021 21:35:51 +0000 Subject: [PATCH 03/17] merge-ort: collect which directories are removed in dirs_removed Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 2e6d41b0a0..999a7c91c5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -480,6 +480,27 @@ static void setup_path_info(struct merge_options *opt, result->util = mi; } +static void collect_rename_info(struct merge_options *opt, + struct name_entry *names, + const char *dirname, + const char *fullname, + unsigned filemask, + unsigned dirmask, + unsigned match_mask) +{ + struct rename_info *renames = &opt->priv->renames; + + /* Update dirs_removed, as needed */ + if (dirmask == 1 || dirmask == 3 || dirmask == 5) { + /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */ + unsigned sides = (0x07 - dirmask)/2; + if (sides & 1) + strset_add(&renames->dirs_removed[1], fullname); + if (sides & 2) + strset_add(&renames->dirs_removed[2], fullname); + } +} + static int collect_merge_info_callback(int n, unsigned long mask, unsigned long dirmask, @@ -580,6 +601,12 @@ static int collect_merge_info_callback(int n, return mask; } + /* + * Gather additional information used in rename detection. + */ + collect_rename_info(opt, names, dirname, fullpath, + filemask, dirmask, match_mask); + /* * Record information about the path so we can resolve later in * process_entries. From 112e11126b963923d84f90f12dae9b96271ccedb Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:40 +0000 Subject: [PATCH 04/17] merge-ort: add outline for computing directory renames Port some directory rename handling changes from merge-recursive.c's detect_and_process_renames() to the same-named function of merge-ort.c. This does not yet add any use or handling of directory renames, just the outline for where we start to compute them. Thus, a future patch will add port additional changes to merge-ort's detect_and_process_renames(). Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 999a7c91c5..b4c1fe28a5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,18 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +static void get_provisional_directory_renames(struct merge_options *opt, + unsigned side, + int *clean) +{ + die("Not yet implemented!"); +} + +static void handle_directory_level_conflicts(struct merge_options *opt) +{ + die("Not yet implemented!"); +} + /*** Function Grouping: functions related to regular rename detection ***/ static int process_renames(struct merge_options *opt, @@ -1086,13 +1098,24 @@ static int detect_and_process_renames(struct merge_options *opt, { struct diff_queue_struct combined; struct rename_info *renames = &opt->priv->renames; - int s, clean = 1; + int need_dir_renames, s, clean = 1; memset(&combined, 0, sizeof(combined)); detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1); detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2); + need_dir_renames = + !opt->priv->call_depth && + (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE || + opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT); + + if (need_dir_renames) { + get_provisional_directory_renames(opt, MERGE_SIDE1, &clean); + get_provisional_directory_renames(opt, MERGE_SIDE2, &clean); + handle_directory_level_conflicts(opt); + } + ALLOC_GROW(combined.queue, renames->pairs[1].nr + renames->pairs[2].nr, combined.alloc); From 04264d4079a4423f32d85d7833d63faa5ace4e33 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:41 +0000 Subject: [PATCH 05/17] merge-ort: add outline of get_provisional_directory_renames() This function is based on merge-recursive.c's get_directory_renames(), except that the first half has been split out into a not-yet-implemented compute_rename_counts(). The primary difference here is our lack of the non_unique_new_dir boolean in our strmap. The lack of that field will at first cause us to fail testcase 2b of t6423; however, future optimizations will obviate the need for that ugly field so we have just left it out. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index b4c1fe28a5..2a1c62c533 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,11 +721,66 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +static void compute_rename_counts(struct diff_queue_struct *pairs, + struct strmap *dir_rename_count, + struct strset *dirs_removed) +{ + die("Not yet implemented!"); +} + static void get_provisional_directory_renames(struct merge_options *opt, unsigned side, int *clean) { - die("Not yet implemented!"); + struct hashmap_iter iter; + struct strmap_entry *entry; + struct rename_info *renames = &opt->priv->renames; + + compute_rename_counts(&renames->pairs[side], + &renames->dir_rename_count[side], + &renames->dirs_removed[side]); + /* + * Collapse + * dir_rename_count: old_directory -> {new_directory -> count} + * down to + * dir_renames: old_directory -> best_new_directory + * where best_new_directory is the one with the unique highest count. + */ + strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) { + const char *source_dir = entry->key; + struct strintmap *counts = entry->value; + struct hashmap_iter count_iter; + struct strmap_entry *count_entry; + int max = 0; + int bad_max = 0; + const char *best = NULL; + + strintmap_for_each_entry(counts, &count_iter, count_entry) { + const char *target_dir = count_entry->key; + intptr_t count = (intptr_t)count_entry->value; + + if (count == max) + bad_max = max; + else if (count > max) { + max = count; + best = target_dir; + } + } + + if (bad_max == max) { + path_msg(opt, source_dir, 0, + _("CONFLICT (directory rename split): " + "Unclear where to rename %s to; it was " + "renamed to multiple other directories, with " + "no destination getting a majority of the " + "files."), + source_dir); + *clean = 0; + } else { + strmap_put(&renames->dir_renames[side], + source_dir, (void*)best); + } + } } static void handle_directory_level_conflicts(struct merge_options *opt) From 9fe37e7bb9e251a3419d93f0090c17df05fafb4e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:42 +0000 Subject: [PATCH 06/17] merge-ort: copy get_renamed_dir_portion() from merge-recursive.c Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 2a1c62c533..eb609ab006 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,110 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +MAYBE_UNUSED +static void get_renamed_dir_portion(const char *old_path, const char *new_path, + char **old_dir, char **new_dir) +{ + char *end_of_old, *end_of_new; + + /* Default return values: NULL, meaning no rename */ + *old_dir = NULL; + *new_dir = NULL; + + /* + * For + * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" + * the "e/foo.c" part is the same, we just want to know that + * "a/b/c/d" was renamed to "a/b/some/thing/else" + * so, for this example, this function returns "a/b/c/d" in + * *old_dir and "a/b/some/thing/else" in *new_dir. + */ + + /* + * If the basename of the file changed, we don't care. We want + * to know which portion of the directory, if any, changed. + */ + end_of_old = strrchr(old_path, '/'); + end_of_new = strrchr(new_path, '/'); + + /* + * If end_of_old is NULL, old_path wasn't in a directory, so there + * could not be a directory rename (our rule elsewhere that a + * directory which still exists is not considered to have been + * renamed means the root directory can never be renamed -- because + * the root directory always exists). + */ + if (end_of_old == NULL) + return; /* Note: *old_dir and *new_dir are still NULL */ + + /* + * If new_path contains no directory (end_of_new is NULL), then we + * have a rename of old_path's directory to the root directory. + */ + if (end_of_new == NULL) { + *old_dir = xstrndup(old_path, end_of_old - old_path); + *new_dir = xstrdup(""); + return; + } + + /* Find the first non-matching character traversing backwards */ + while (*--end_of_new == *--end_of_old && + end_of_old != old_path && + end_of_new != new_path) + ; /* Do nothing; all in the while loop */ + + /* + * If both got back to the beginning of their strings, then the + * directory didn't change at all, only the basename did. + */ + if (end_of_old == old_path && end_of_new == new_path && + *end_of_old == *end_of_new) + return; /* Note: *old_dir and *new_dir are still NULL */ + + /* + * If end_of_new got back to the beginning of its string, and + * end_of_old got back to the beginning of some subdirectory, then + * we have a rename/merge of a subdirectory into the root, which + * needs slightly special handling. + * + * Note: There is no need to consider the opposite case, with a + * rename/merge of the root directory into some subdirectory + * because as noted above the root directory always exists so it + * cannot be considered to be renamed. + */ + if (end_of_new == new_path && + end_of_old != old_path && end_of_old[-1] == '/') { + *old_dir = xstrndup(old_path, --end_of_old - old_path); + *new_dir = xstrdup(""); + return; + } + + /* + * We've found the first non-matching character in the directory + * paths. That means the current characters we were looking at + * were part of the first non-matching subdir name going back from + * the end of the strings. Get the whole name by advancing both + * end_of_old and end_of_new to the NEXT '/' character. That will + * represent the entire directory rename. + * + * The reason for the increment is cases like + * a/b/star/foo/whatever.c -> a/b/tar/foo/random.c + * After dropping the basename and going back to the first + * non-matching character, we're now comparing: + * a/b/s and a/b/ + * and we want to be comparing: + * a/b/star/ and a/b/tar/ + * but without the pre-increment, the one on the right would stay + * a/b/. + */ + end_of_old = strchr(++end_of_old, '/'); + end_of_new = strchr(++end_of_new, '/'); + + /* Copy the old and new directories into *old_dir and *new_dir. */ + *old_dir = xstrndup(old_path, end_of_old - old_path); + *new_dir = xstrndup(new_path, end_of_new - new_path); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) From 2f620a4f1983efcb70dee55917240b08065f2c4e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:43 +0000 Subject: [PATCH 07/17] merge-ort: implement compute_rename_counts() This function is based on the first half of get_directory_renames() from merge-recursive.c; as part of the implementation, factor out a routine, increment_count(), to update the bookkeeping to track the number of items renamed into new directories. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index eb609ab006..8aa415c542 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,7 +721,6 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ -MAYBE_UNUSED static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -825,11 +824,62 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +static void increment_count(struct strmap *dir_rename_count, + char *old_dir, + char *new_dir) +{ + struct strintmap *counts; + struct strmap_entry *e; + + /* Get the {new_dirs -> counts} mapping using old_dir */ + e = strmap_get_entry(dir_rename_count, old_dir); + if (e) { + counts = e->value; + } else { + counts = xmalloc(sizeof(*counts)); + strintmap_init_with_options(counts, 0, NULL, 1); + strmap_put(dir_rename_count, old_dir, counts); + } + + /* Increment the count for new_dir */ + strintmap_incr(counts, new_dir, 1); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) { - die("Not yet implemented!"); + int i; + + for (i = 0; i < pairs->nr; ++i) { + char *old_dir, *new_dir; + struct diff_filepair *pair = pairs->queue[i]; + + /* File not part of directory rename if it wasn't renamed */ + if (pair->status != 'R') + continue; + + /* Get the old and new directory names */ + get_renamed_dir_portion(pair->one->path, pair->two->path, + &old_dir, &new_dir); + if (!old_dir) + /* Directory didn't change at all; ignore this one. */ + continue; + + /* + * Make dir_rename_count contain a map of a map: + * old_directory -> {new_directory -> count} + * In other words, for every pair look at the directories for + * the old filename and the new filename and count how many + * times that pairing occurs. + */ + if (strset_contains(dirs_removed, old_dir)) + increment_count(dir_rename_count, old_dir, new_dir); + + /* Free resources we don't need anymore */ + free(old_dir); + free(new_dir); + } } static void get_provisional_directory_renames(struct merge_options *opt, From 98d0d08128200e244a83d917b63567c30547c36d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:44 +0000 Subject: [PATCH 08/17] merge-ort: implement handle_directory_level_conflicts() This is modelled on the version of handle_directory_level_conflicts() from merge-recursive.c, but is massively simplified due to the following factors: * strmap API provides simplifications over using direct hashmap * we have a dirs_removed field in struct rename_info that we have an easy way to populate from collect_merge_info(); this was already used in compute_rename_counts() and thus we do not need to check for condition #2. * The removal of condition #2 by handling it earlier in the code also obviates the need to check for condition #3 -- if both sides renamed a directory, meaning that the directory no longer exists on either side, then neither side could have added any new files to that directory, and thus there are no files whose locations we need to move due to such a directory rename. In fact, the same logic that makes condition #3 irrelevant means condition #1 is also irrelevant so we could drop this function. However, it is cheap to check if both sides rename the same directory, and doing so can save future computation. So, simply remove any directories that both sides renamed from the list of directory renames. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 8aa415c542..6dea4206dc 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -939,7 +939,24 @@ static void get_provisional_directory_renames(struct merge_options *opt, static void handle_directory_level_conflicts(struct merge_options *opt) { - die("Not yet implemented!"); + struct hashmap_iter iter; + struct strmap_entry *entry; + struct string_list duplicated = STRING_LIST_INIT_NODUP; + struct rename_info *renames = &opt->priv->renames; + struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1]; + struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2]; + int i; + + strmap_for_each_entry(side1_dir_renames, &iter, entry) { + if (strmap_contains(side2_dir_renames, entry->key)) + string_list_append(&duplicated, entry->key); + } + + for (i = 0; i < duplicated.nr; i++) { + strmap_remove(side1_dir_renames, duplicated.items[i].string, 0); + strmap_remove(side2_dir_renames, duplicated.items[i].string, 0); + } + string_list_clear(&duplicated, 0); } /*** Function Grouping: functions related to regular rename detection ***/ From fa5e06d6902003f5f115b7497030b5c66537726e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:45 +0000 Subject: [PATCH 09/17] merge-ort: modify collect_renames() for directory rename handling collect_renames() is similar to merge-recursive.c's get_renames(), but lacks the directory rename handling found in the latter. Port that code structure over to merge-ort. This introduces three new die-not-yet-implemented functions that will be defined in future commits. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 6dea4206dc..c86ed85b09 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -721,6 +721,11 @@ static int handle_content_merge(struct merge_options *opt, /*** Function Grouping: functions related to directory rename detection ***/ +struct collision_info { + struct string_list source_files; + unsigned reported_already:1; +}; + static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -959,6 +964,31 @@ static void handle_directory_level_conflicts(struct merge_options *opt) string_list_clear(&duplicated, 0); } +static void compute_collisions(struct strmap *collisions, + struct strmap *dir_renames, + struct diff_queue_struct *pairs) +{ + die("Not yet implemented."); +} + +static char *check_for_directory_rename(struct merge_options *opt, + const char *path, + unsigned side_index, + struct strmap *dir_renames, + struct strmap *dir_rename_exclusions, + struct strmap *collisions, + int *clean_merge) +{ + die("Not yet implemented."); +} + +static void apply_directory_rename_modifications(struct merge_options *opt, + struct diff_filepair *pair, + char *new_path) +{ + die("Not yet implemented."); +} + /*** Function Grouping: functions related to regular rename detection ***/ static int process_renames(struct merge_options *opt, @@ -1284,22 +1314,44 @@ static void detect_regular_renames(struct merge_options *opt, */ static int collect_renames(struct merge_options *opt, struct diff_queue_struct *result, - unsigned side_index) + unsigned side_index, + struct strmap *dir_renames_for_side, + struct strmap *rename_exclusions) { int i, clean = 1; + struct strmap collisions; struct diff_queue_struct *side_pairs; + struct hashmap_iter iter; + struct strmap_entry *entry; struct rename_info *renames = &opt->priv->renames; side_pairs = &renames->pairs[side_index]; + compute_collisions(&collisions, dir_renames_for_side, side_pairs); for (i = 0; i < side_pairs->nr; ++i) { struct diff_filepair *p = side_pairs->queue[i]; + char *new_path; /* non-NULL only with directory renames */ - if (p->status != 'R') { + if (p->status != 'A' && p->status != 'R') { diff_free_filepair(p); continue; } + new_path = check_for_directory_rename(opt, p->two->path, + side_index, + dir_renames_for_side, + rename_exclusions, + &collisions, + &clean); + + if (p->status != 'R' && !new_path) { + diff_free_filepair(p); + continue; + } + + if (new_path) + apply_directory_rename_modifications(opt, p, new_path); + /* * p->score comes back from diffcore_rename_extended() with * the similarity of the renamed file. The similarity is @@ -1314,6 +1366,20 @@ static int collect_renames(struct merge_options *opt, result->queue[result->nr++] = p; } + /* Free each value in the collisions map */ + strmap_for_each_entry(&collisions, &iter, entry) { + struct collision_info *info = entry->value; + string_list_clear(&info->source_files, 0); + } + /* + * In compute_collisions(), we set collisions.strdup_strings to 0 + * so that we wouldn't have to make another copy of the new_path + * allocated by apply_dir_rename(). But now that we've used them + * and have no other references to these strings, it is time to + * deallocate them. + */ + free_strmap_strings(&collisions); + strmap_clear(&collisions, 1); return clean; } @@ -1345,8 +1411,12 @@ static int detect_and_process_renames(struct merge_options *opt, ALLOC_GROW(combined.queue, renames->pairs[1].nr + renames->pairs[2].nr, combined.alloc); - clean &= collect_renames(opt, &combined, MERGE_SIDE1); - clean &= collect_renames(opt, &combined, MERGE_SIDE2); + clean &= collect_renames(opt, &combined, MERGE_SIDE1, + &renames->dir_renames[2], + &renames->dir_renames[1]); + clean &= collect_renames(opt, &combined, MERGE_SIDE2, + &renames->dir_renames[1], + &renames->dir_renames[2]); QSORT(combined.queue, combined.nr, compare_pairs); clean &= process_renames(opt, &combined); From d9d015df4a3d794ec889dc70902d28f3ca87d817 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:46 +0000 Subject: [PATCH 10/17] merge-ort: implement compute_collisions() This is nearly a wholesale copy of compute_collisions() from merge-recursive.c, and the logic remains the same, but it has been tweaked slightly due to: * using strmap.h API (instead of direct hashmaps) * allocation/freeing of data structures were done separately in merge_start() and clear_or_reinit_internal_opts() in an earlier patch in this series * there is no non_unique_new_dir data field in merge-ort; that will be handled a different way It does depend on two new functions, apply_dir_rename() and check_dir_renamed() which were introduced with simple die-not-yet-implemented shells and will be implemented in subsequent patches. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index c86ed85b09..22028d57f3 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -726,6 +726,19 @@ struct collision_info { unsigned reported_already:1; }; +/* + * Return a new string that replaces the beginning portion (which matches + * rename_info->key), with rename_info->util.new_dir. In perl-speak: + * new_path_name = (old_path =~ s/rename_info->key/rename_info->value/); + * NOTE: + * Caller must ensure that old_path starts with rename_info->key + '/'. + */ +static char *apply_dir_rename(struct strmap_entry *rename_info, + const char *old_path) +{ + die("Not yet implemented!"); +} + static void get_renamed_dir_portion(const char *old_path, const char *new_path, char **old_dir, char **new_dir) { @@ -964,11 +977,64 @@ static void handle_directory_level_conflicts(struct merge_options *opt) string_list_clear(&duplicated, 0); } +static struct strmap_entry *check_dir_renamed(const char *path, + struct strmap *dir_renames) +{ + die("Not yet implemented!"); +} + static void compute_collisions(struct strmap *collisions, struct strmap *dir_renames, struct diff_queue_struct *pairs) { - die("Not yet implemented."); + int i; + + strmap_init_with_options(collisions, NULL, 0); + if (strmap_empty(dir_renames)) + return; + + /* + * Multiple files can be mapped to the same path due to directory + * renames done by the other side of history. Since that other + * side of history could have merged multiple directories into one, + * if our side of history added the same file basename to each of + * those directories, then all N of them would get implicitly + * renamed by the directory rename detection into the same path, + * and we'd get an add/add/.../add conflict, and all those adds + * from *this* side of history. This is not representable in the + * index, and users aren't going to easily be able to make sense of + * it. So we need to provide a good warning about what's + * happening, and fall back to no-directory-rename detection + * behavior for those paths. + * + * See testcases 9e and all of section 5 from t6043 for examples. + */ + for (i = 0; i < pairs->nr; ++i) { + struct strmap_entry *rename_info; + struct collision_info *collision_info; + char *new_path; + struct diff_filepair *pair = pairs->queue[i]; + + if (pair->status != 'A' && pair->status != 'R') + continue; + rename_info = check_dir_renamed(pair->two->path, dir_renames); + if (!rename_info) + continue; + + new_path = apply_dir_rename(rename_info, pair->two->path); + assert(new_path); + collision_info = strmap_get(collisions, new_path); + if (collision_info) { + free(new_path); + } else { + collision_info = xcalloc(1, + sizeof(struct collision_info)); + string_list_init(&collision_info->source_files, 0); + strmap_put(collisions, new_path, collision_info); + } + string_list_insert(&collision_info->source_files, + pair->two->path); + } } static char *check_for_directory_rename(struct merge_options *opt, From fbcfc0cc17019da292c4aeef0fe43a8980207beb Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:47 +0000 Subject: [PATCH 11/17] merge-ort: implement apply_dir_rename() and check_dir_renamed() Both of these are copied from merge-recursive.c, with just minor tweaks due to using strmap API and not having a non_unique_new_dir field. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 22028d57f3..db922272ed 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -736,7 +736,29 @@ struct collision_info { static char *apply_dir_rename(struct strmap_entry *rename_info, const char *old_path) { - die("Not yet implemented!"); + struct strbuf new_path = STRBUF_INIT; + const char *old_dir = rename_info->key; + const char *new_dir = rename_info->value; + int oldlen, newlen, new_dir_len; + + oldlen = strlen(old_dir); + if (*new_dir == '\0') + /* + * If someone renamed/merged a subdirectory into the root + * directory (e.g. 'some/subdir' -> ''), then we want to + * avoid returning + * '' + '/filename' + * as the rename; we need to make old_path + oldlen advance + * past the '/' character. + */ + oldlen++; + new_dir_len = strlen(new_dir); + newlen = new_dir_len + (strlen(old_path) - oldlen) + 1; + strbuf_grow(&new_path, newlen); + strbuf_add(&new_path, new_dir, new_dir_len); + strbuf_addstr(&new_path, &old_path[oldlen]); + + return strbuf_detach(&new_path, NULL); } static void get_renamed_dir_portion(const char *old_path, const char *new_path, @@ -980,7 +1002,18 @@ static void handle_directory_level_conflicts(struct merge_options *opt) static struct strmap_entry *check_dir_renamed(const char *path, struct strmap *dir_renames) { - die("Not yet implemented!"); + char *temp = xstrdup(path); + char *end; + struct strmap_entry *e = NULL; + + while ((end = strrchr(temp, '/'))) { + *end = '\0'; + e = strmap_get_entry(dir_renames, temp); + if (e) + break; + } + free(temp); + return e; } static void compute_collisions(struct strmap *collisions, From 47325e8533a35ee48e85f85543e4f054d21a36c1 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:48 +0000 Subject: [PATCH 12/17] merge-ort: implement check_for_directory_rename() This is copied from merge-recursive.c, with minor tweaks due to using strmap API and the fact that it can use opt->priv->paths to get all pathnames that exist instead of taking a tree object. This depends on a new function, handle_path_level_conflicts(), which just has a placeholder die-not-yet-implemented implementation for now; a subsequent patch will implement it. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index db922272ed..86d2214006 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -864,6 +864,21 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +/* + * See if there is a directory rename for path, and if there are any file + * level conflicts on the given side for the renamed location. If there is + * a rename and there are no conflicts, return the new name. Otherwise, + * return NULL. + */ +static char *handle_path_level_conflicts(struct merge_options *opt, + const char *path, + unsigned side_index, + struct strmap_entry *rename_info, + struct strmap *collisions) +{ + die("Not yet implemented"); +} + static void increment_count(struct strmap *dir_rename_count, char *old_dir, char *new_dir) @@ -1078,7 +1093,57 @@ static char *check_for_directory_rename(struct merge_options *opt, struct strmap *collisions, int *clean_merge) { - die("Not yet implemented."); + char *new_path = NULL; + struct strmap_entry *rename_info; + struct strmap_entry *otherinfo = NULL; + const char *new_dir; + + if (strmap_empty(dir_renames)) + return new_path; + rename_info = check_dir_renamed(path, dir_renames); + if (!rename_info) + return new_path; + /* old_dir = rename_info->key; */ + new_dir = rename_info->value; + + /* + * This next part is a little weird. We do not want to do an + * implicit rename into a directory we renamed on our side, because + * that will result in a spurious rename/rename(1to2) conflict. An + * example: + * Base commit: dumbdir/afile, otherdir/bfile + * Side 1: smrtdir/afile, otherdir/bfile + * Side 2: dumbdir/afile, dumbdir/bfile + * Here, while working on Side 1, we could notice that otherdir was + * renamed/merged to dumbdir, and change the diff_filepair for + * otherdir/bfile into a rename into dumbdir/bfile. However, Side + * 2 will notice the rename from dumbdir to smrtdir, and do the + * transitive rename to move it from dumbdir/bfile to + * smrtdir/bfile. That gives us bfile in dumbdir vs being in + * smrtdir, a rename/rename(1to2) conflict. We really just want + * the file to end up in smrtdir. And the way to achieve that is + * to not let Side1 do the rename to dumbdir, since we know that is + * the source of one of our directory renames. + * + * That's why otherinfo and dir_rename_exclusions is here. + * + * As it turns out, this also prevents N-way transient rename + * confusion; See testcases 9c and 9d of t6043. + */ + otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir); + if (otherinfo) { + path_msg(opt, rename_info->key, 1, + _("WARNING: Avoiding applying %s -> %s rename " + "to %s, because %s itself was renamed."), + rename_info->key, new_dir, path, new_dir); + return NULL; + } + + new_path = handle_path_level_conflicts(opt, path, side_index, + rename_info, collisions); + *clean_merge &= (new_path != NULL); + + return new_path; } static void apply_directory_rename_modifications(struct merge_options *opt, From bea433655a79b7fbc04444508c55f13ece942a21 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:49 +0000 Subject: [PATCH 13/17] merge-ort: implement handle_path_level_conflicts() This is copied from merge-recursive.c, with minor tweaks due to: * using strmap API * merge-ort not using the non_unique_new_dir field, since it'll obviate its need entirely later with performance improvements * adding a new path_in_way() function that uses opt->priv->paths instead of doing an expensive tree_has_path() lookup to see if a tree has a given path. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 86d2214006..ad8ecb7e44 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -864,6 +864,16 @@ static void get_renamed_dir_portion(const char *old_path, const char *new_path, *new_dir = xstrndup(new_path, end_of_new - new_path); } +static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) +{ + struct merged_info *mi = strmap_get(paths, path); + struct conflict_info *ci; + if (!mi) + return 0; + INITIALIZE_CI(ci, mi); + return mi->clean || (side_mask & (ci->filemask | ci->dirmask)); +} + /* * See if there is a directory rename for path, and if there are any file * level conflicts on the given side for the renamed location. If there is @@ -876,7 +886,67 @@ static char *handle_path_level_conflicts(struct merge_options *opt, struct strmap_entry *rename_info, struct strmap *collisions) { - die("Not yet implemented"); + char *new_path = NULL; + struct collision_info *c_info; + int clean = 1; + struct strbuf collision_paths = STRBUF_INIT; + + /* + * entry has the mapping of old directory name to new directory name + * that we want to apply to path. + */ + new_path = apply_dir_rename(rename_info, path); + if (!new_path) + BUG("Failed to apply directory rename!"); + + /* + * The caller needs to have ensured that it has pre-populated + * collisions with all paths that map to new_path. Do a quick check + * to ensure that's the case. + */ + c_info = strmap_get(collisions, new_path); + if (c_info == NULL) + BUG("c_info is NULL"); + + /* + * Check for one-sided add/add/.../add conflicts, i.e. + * where implicit renames from the other side doing + * directory rename(s) can affect this side of history + * to put multiple paths into the same location. Warn + * and bail on directory renames for such paths. + */ + if (c_info->reported_already) { + clean = 0; + } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) { + c_info->reported_already = 1; + strbuf_add_separated_string_list(&collision_paths, ", ", + &c_info->source_files); + path_msg(opt, new_path, 0, + _("CONFLICT (implicit dir rename): Existing file/dir " + "at %s in the way of implicit directory rename(s) " + "putting the following path(s) there: %s."), + new_path, collision_paths.buf); + clean = 0; + } else if (c_info->source_files.nr > 1) { + c_info->reported_already = 1; + strbuf_add_separated_string_list(&collision_paths, ", ", + &c_info->source_files); + path_msg(opt, new_path, 0, + _("CONFLICT (implicit dir rename): Cannot map more " + "than one path to %s; implicit directory renames " + "tried to put these paths there: %s"), + new_path, collision_paths.buf); + clean = 0; + } + + /* Free memory we no longer need */ + strbuf_release(&collision_paths); + if (!clean && new_path) { + free(new_path); + return NULL; + } + + return new_path; } static void increment_count(struct strmap *dir_rename_count, From 05b85c6eeb1710ff83f11f71abefa2064e50e61b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:50 +0000 Subject: [PATCH 14/17] merge-ort: add a new toplevel_dir field Due to the string-equality-iff-pointer-equality requirements placed on merged_info.directory_name, apply_directory_rename_modifications() will need to have access to the exact toplevel directory name string pointer and can't just use a new empty string. Store it in a field that we can use. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ad8ecb7e44..5b3b56dc1c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -168,12 +168,15 @@ struct merge_options_internal { struct rename_info renames; /* - * current_dir_name: temporary var used in collect_merge_info_callback() + * current_dir_name, toplevel_dir: temporary vars * - * Used to set merged_info.directory_name; see documentation for that - * variable and the requirements placed on that field. + * These are used in collect_merge_info_callback(), and will set the + * various merged_info.directory_name for the various paths we get; + * see documentation for that variable and the requirements placed on + * that field. */ const char *current_dir_name; + const char *toplevel_dir; /* call_depth: recursion level counter for merging merge bases */ int call_depth; @@ -682,10 +685,10 @@ static int collect_merge_info(struct merge_options *opt, int ret; struct tree_desc t[3]; struct traverse_info info; - const char *toplevel_dir_placeholder = ""; - opt->priv->current_dir_name = toplevel_dir_placeholder; - setup_traverse_info(&info, toplevel_dir_placeholder); + opt->priv->toplevel_dir = ""; + opt->priv->current_dir_name = opt->priv->toplevel_dir; + setup_traverse_info(&info, opt->priv->toplevel_dir); info.fn = collect_merge_info_callback; info.data = opt; info.show_all_errors = 1; From 089d82bc18514d78513a85e95de257f74da18205 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:51 +0000 Subject: [PATCH 15/17] merge-ort: implement apply_directory_rename_modifications() This function roughly follows the same outline as the function of the same name from merge-recursive.c, but the code diverges in multiple ways due to some special considerations: * merge-ort's version needs to update opt->priv->paths with any new paths (and opt->priv->paths points to struct conflict_infos which track quite a bit of metadata for each path); merge-recursive's version would directly update the index * merge-ort requires that opt->priv->paths has any leading directories of any relevant files also be included in the set of paths. And due to pointer equality requirements on merged_info.directory_name, we have to be careful how we compute and insert these. * due to the above requirements on opt->priv->paths, merge-ort's version starts with a long comment to explain all the special considerations that need to be handled * merge-ort can use the full data stored in opt->priv->paths to avoid making expensive get_tree_entry() calls to regather the necessary data. * due to messages being deferred automatically in merge-ort, this is the best place to handle conflict messages whereas in merge-recursive.c they are deferred manually so that processing of entries does all the printing Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index 5b3b56dc1c..1152d0ae21 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1223,7 +1223,173 @@ static void apply_directory_rename_modifications(struct merge_options *opt, struct diff_filepair *pair, char *new_path) { - die("Not yet implemented."); + /* + * The basic idea is to get the conflict_info from opt->priv->paths + * at old path, and insert it into new_path; basically just this: + * ci = strmap_get(&opt->priv->paths, old_path); + * strmap_remove(&opt->priv->paths, old_path, 0); + * strmap_put(&opt->priv->paths, new_path, ci); + * However, there are some factors complicating this: + * - opt->priv->paths may already have an entry at new_path + * - Each ci tracks its containing directory, so we need to + * update that + * - If another ci has the same containing directory, then + * the two char*'s MUST point to the same location. See the + * comment in struct merged_info. strcmp equality is not + * enough; we need pointer equality. + * - opt->priv->paths must hold the parent directories of any + * entries that are added. So, if this directory rename + * causes entirely new directories, we must recursively add + * parent directories. + * - For each parent directory added to opt->priv->paths, we + * also need to get its parent directory stored in its + * conflict_info->merged.directory_name with all the same + * requirements about pointer equality. + */ + struct string_list dirs_to_insert = STRING_LIST_INIT_NODUP; + struct conflict_info *ci, *new_ci; + struct strmap_entry *entry; + const char *branch_with_new_path, *branch_with_dir_rename; + const char *old_path = pair->two->path; + const char *parent_name; + const char *cur_path; + int i, len; + + entry = strmap_get_entry(&opt->priv->paths, old_path); + old_path = entry->key; + ci = entry->value; + VERIFY_CI(ci); + + /* Find parent directories missing from opt->priv->paths */ + cur_path = new_path; + while (1) { + /* Find the parent directory of cur_path */ + char *last_slash = strrchr(cur_path, '/'); + if (last_slash) { + parent_name = xstrndup(cur_path, last_slash - cur_path); + } else { + parent_name = opt->priv->toplevel_dir; + break; + } + + /* Look it up in opt->priv->paths */ + entry = strmap_get_entry(&opt->priv->paths, parent_name); + if (entry) { + free((char*)parent_name); + parent_name = entry->key; /* reuse known pointer */ + break; + } + + /* Record this is one of the directories we need to insert */ + string_list_append(&dirs_to_insert, parent_name); + cur_path = parent_name; + } + + /* Traverse dirs_to_insert and insert them into opt->priv->paths */ + for (i = dirs_to_insert.nr-1; i >= 0; --i) { + struct conflict_info *dir_ci; + char *cur_dir = dirs_to_insert.items[i].string; + + dir_ci = xcalloc(1, sizeof(*dir_ci)); + + dir_ci->merged.directory_name = parent_name; + len = strlen(parent_name); + /* len+1 because of trailing '/' character */ + dir_ci->merged.basename_offset = (len > 0 ? len+1 : len); + dir_ci->dirmask = ci->filemask; + strmap_put(&opt->priv->paths, cur_dir, dir_ci); + + parent_name = cur_dir; + } + + /* + * We are removing old_path from opt->priv->paths. old_path also will + * eventually need to be freed, but it may still be used by e.g. + * ci->pathnames. So, store it in another string-list for now. + */ + string_list_append(&opt->priv->paths_to_free, old_path); + + assert(ci->filemask == 2 || ci->filemask == 4); + assert(ci->dirmask == 0); + strmap_remove(&opt->priv->paths, old_path, 0); + + branch_with_new_path = (ci->filemask == 2) ? opt->branch1 : opt->branch2; + branch_with_dir_rename = (ci->filemask == 2) ? opt->branch2 : opt->branch1; + + /* Now, finally update ci and stick it into opt->priv->paths */ + ci->merged.directory_name = parent_name; + len = strlen(parent_name); + ci->merged.basename_offset = (len > 0 ? len+1 : len); + new_ci = strmap_get(&opt->priv->paths, new_path); + if (!new_ci) { + /* Place ci back into opt->priv->paths, but at new_path */ + strmap_put(&opt->priv->paths, new_path, ci); + } else { + int index; + + /* A few sanity checks */ + VERIFY_CI(new_ci); + assert(ci->filemask == 2 || ci->filemask == 4); + assert((new_ci->filemask & ci->filemask) == 0); + assert(!new_ci->merged.clean); + + /* Copy stuff from ci into new_ci */ + new_ci->filemask |= ci->filemask; + if (new_ci->dirmask) + new_ci->df_conflict = 1; + index = (ci->filemask >> 1); + new_ci->pathnames[index] = ci->pathnames[index]; + new_ci->stages[index].mode = ci->stages[index].mode; + oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid); + + free(ci); + ci = new_ci; + } + + if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE) { + /* Notify user of updated path */ + if (pair->status == 'A') + path_msg(opt, new_path, 1, + _("Path updated: %s added in %s inside a " + "directory that was renamed in %s; moving " + "it to %s."), + old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + else + path_msg(opt, new_path, 1, + _("Path updated: %s renamed to %s in %s, " + "inside a directory that was renamed in %s; " + "moving it to %s."), + pair->one->path, old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + } else { + /* + * opt->detect_directory_renames has the value + * MERGE_DIRECTORY_RENAMES_CONFLICT, so mark these as conflicts. + */ + ci->path_conflict = 1; + if (pair->status == 'A') + path_msg(opt, new_path, 0, + _("CONFLICT (file location): %s added in %s " + "inside a directory that was renamed in %s, " + "suggesting it should perhaps be moved to " + "%s."), + old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + else + path_msg(opt, new_path, 0, + _("CONFLICT (file location): %s renamed to %s " + "in %s, inside a directory that was renamed " + "in %s, suggesting it should perhaps be " + "moved to %s."), + pair->one->path, old_path, branch_with_new_path, + branch_with_dir_rename, new_path); + } + + /* + * Finally, record the new location. + */ + pair->two->path = new_path; } /*** Function Grouping: functions related to regular rename detection ***/ From 1b6b902d95a5b2c5677b6351c670202dae7cd230 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:52 +0000 Subject: [PATCH 16/17] merge-ort: process_renames() now needs more defensiveness Since directory rename detection adds new paths to opt->priv->paths and removes old ones, process_renames() needs to now check whether pair->one->path actually exists in opt->priv->paths instead of just assuming it does. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 1152d0ae21..7314f9c69c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1410,12 +1410,28 @@ static int process_renames(struct merge_options *opt, const char *rename_branch = NULL, *delete_branch = NULL; old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path); - oldpath = old_ent->key; - oldinfo = old_ent->value; - new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path); - newpath = new_ent->key; - newinfo = new_ent->value; + if (old_ent) { + oldpath = old_ent->key; + oldinfo = old_ent->value; + } + newpath = pair->two->path; + if (new_ent) { + newpath = new_ent->key; + newinfo = new_ent->value; + } + + /* + * If pair->one->path isn't in opt->priv->paths, that means + * that either directory rename detection removed that + * path, or a parent directory of oldpath was resolved and + * we don't even need the rename; in either case, we can + * skip it. If oldinfo->merged.clean, then the other side + * of history had no changes to oldpath and we don't need + * the rename and can skip it. + */ + if (!oldinfo || oldinfo->merged.clean) + continue; /* * diff_filepairs have copies of pathnames, thus we have to From 203c872c4f29e08867d572eca4f18cd1f39f9e4b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 19 Jan 2021 19:53:53 +0000 Subject: [PATCH 17/17] merge-ort: fix a directory rename detection bug As noted in commit 902c521a35 ("t6423: more involved directory rename test", 2020-10-15), when we have a case where * dir/subdir/ has several files * almost all files in dir/subdir/ are renamed to folder/subdir/ * one of the files in dir/subdir/ is renamed to folder/subdir/newsubdir/ * the other side of history (that doesn't do the renames) adds a new file to dir/subdir/ Then for the majority of the file renames, the directory rename of dir/subdir/ -> folder/subdir/ is actually not represented that way but as dir/ -> folder/ We also had one rename that was represented as dir/subdir/ -> folder/subdir/newsubdir/ Now, since there's a new file in dir/subdir/, where does it go? Well, there's only one rule for dir/subdir/, so the code previously noted that this rule had the "majority" of the one "relevant" rename and thus erroneously used it to place the file in folder/subdir/newsubdir/. We really want the heavy weight associated with dir/ -> folder/ to also be treated as dir/subdir/ -> folder/subdir/, so that we correctly place the file in folder/subdir/. Add a bunch of logic to make sure that we use all relevant renamings in directory rename detection. Note that testcase 12f of t6423 still fails after this, but it gets further than merge-recursive does. There are some performance related bits in that testcase (the region_enter messages) that do not yet succeed, but the rest of the testcase works after this patch. Subsequent patch series will fix up the performance side. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 198 +++++++++++++++++++++------------------------------- 1 file changed, 81 insertions(+), 117 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 7314f9c69c..3192b27625 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -764,109 +764,6 @@ static char *apply_dir_rename(struct strmap_entry *rename_info, return strbuf_detach(&new_path, NULL); } -static void get_renamed_dir_portion(const char *old_path, const char *new_path, - char **old_dir, char **new_dir) -{ - char *end_of_old, *end_of_new; - - /* Default return values: NULL, meaning no rename */ - *old_dir = NULL; - *new_dir = NULL; - - /* - * For - * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" - * the "e/foo.c" part is the same, we just want to know that - * "a/b/c/d" was renamed to "a/b/some/thing/else" - * so, for this example, this function returns "a/b/c/d" in - * *old_dir and "a/b/some/thing/else" in *new_dir. - */ - - /* - * If the basename of the file changed, we don't care. We want - * to know which portion of the directory, if any, changed. - */ - end_of_old = strrchr(old_path, '/'); - end_of_new = strrchr(new_path, '/'); - - /* - * If end_of_old is NULL, old_path wasn't in a directory, so there - * could not be a directory rename (our rule elsewhere that a - * directory which still exists is not considered to have been - * renamed means the root directory can never be renamed -- because - * the root directory always exists). - */ - if (end_of_old == NULL) - return; /* Note: *old_dir and *new_dir are still NULL */ - - /* - * If new_path contains no directory (end_of_new is NULL), then we - * have a rename of old_path's directory to the root directory. - */ - if (end_of_new == NULL) { - *old_dir = xstrndup(old_path, end_of_old - old_path); - *new_dir = xstrdup(""); - return; - } - - /* Find the first non-matching character traversing backwards */ - while (*--end_of_new == *--end_of_old && - end_of_old != old_path && - end_of_new != new_path) - ; /* Do nothing; all in the while loop */ - - /* - * If both got back to the beginning of their strings, then the - * directory didn't change at all, only the basename did. - */ - if (end_of_old == old_path && end_of_new == new_path && - *end_of_old == *end_of_new) - return; /* Note: *old_dir and *new_dir are still NULL */ - - /* - * If end_of_new got back to the beginning of its string, and - * end_of_old got back to the beginning of some subdirectory, then - * we have a rename/merge of a subdirectory into the root, which - * needs slightly special handling. - * - * Note: There is no need to consider the opposite case, with a - * rename/merge of the root directory into some subdirectory - * because as noted above the root directory always exists so it - * cannot be considered to be renamed. - */ - if (end_of_new == new_path && - end_of_old != old_path && end_of_old[-1] == '/') { - *old_dir = xstrndup(old_path, --end_of_old - old_path); - *new_dir = xstrdup(""); - return; - } - - /* - * We've found the first non-matching character in the directory - * paths. That means the current characters we were looking at - * were part of the first non-matching subdir name going back from - * the end of the strings. Get the whole name by advancing both - * end_of_old and end_of_new to the NEXT '/' character. That will - * represent the entire directory rename. - * - * The reason for the increment is cases like - * a/b/star/foo/whatever.c -> a/b/tar/foo/random.c - * After dropping the basename and going back to the first - * non-matching character, we're now comparing: - * a/b/s and a/b/ - * and we want to be comparing: - * a/b/star/ and a/b/tar/ - * but without the pre-increment, the one on the right would stay - * a/b/. - */ - end_of_old = strchr(++end_of_old, '/'); - end_of_new = strchr(++end_of_new, '/'); - - /* Copy the old and new directories into *old_dir and *new_dir. */ - *old_dir = xstrndup(old_path, end_of_old - old_path); - *new_dir = xstrndup(new_path, end_of_new - new_path); -} - static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) { struct merged_info *mi = strmap_get(paths, path); @@ -952,6 +849,14 @@ static char *handle_path_level_conflicts(struct merge_options *opt, return new_path; } +static void dirname_munge(char *filename) +{ + char *slash = strrchr(filename, '/'); + if (!slash) + slash = filename; + *slash = '\0'; +} + static void increment_count(struct strmap *dir_rename_count, char *old_dir, char *new_dir) @@ -973,6 +878,76 @@ static void increment_count(struct strmap *dir_rename_count, strintmap_incr(counts, new_dir, 1); } +static void update_dir_rename_counts(struct strmap *dir_rename_count, + struct strset *dirs_removed, + const char *oldname, + const char *newname) +{ + char *old_dir = xstrdup(oldname); + char *new_dir = xstrdup(newname); + char new_dir_first_char = new_dir[0]; + int first_time_in_loop = 1; + + while (1) { + dirname_munge(old_dir); + dirname_munge(new_dir); + + /* + * When renaming + * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c" + * then this suggests that both + * a/b/c/d/e/ => a/b/some/thing/else/e/ + * a/b/c/d/ => a/b/some/thing/else/ + * so we want to increment counters for both. We do NOT, + * however, also want to suggest that there was the following + * rename: + * a/b/c/ => a/b/some/thing/ + * so we need to quit at that point. + * + * Note the when first_time_in_loop, we only strip off the + * basename, and we don't care if that's different. + */ + if (!first_time_in_loop) { + char *old_sub_dir = strchr(old_dir, '\0')+1; + char *new_sub_dir = strchr(new_dir, '\0')+1; + if (!*new_dir) { + /* + * Special case when renaming to root directory, + * i.e. when new_dir == "". In this case, we had + * something like + * a/b/subdir => subdir + * and so dirname_munge() sets things up so that + * old_dir = "a/b\0subdir\0" + * new_dir = "\0ubdir\0" + * We didn't have a '/' to overwrite a '\0' onto + * in new_dir, so we have to compare differently. + */ + if (new_dir_first_char != old_sub_dir[0] || + strcmp(old_sub_dir+1, new_sub_dir)) + break; + } else { + if (strcmp(old_sub_dir, new_sub_dir)) + break; + } + } + + if (strset_contains(dirs_removed, old_dir)) + increment_count(dir_rename_count, old_dir, new_dir); + else + break; + + /* If we hit toplevel directory ("") for old or new dir, quit */ + if (!*old_dir || !*new_dir) + break; + + first_time_in_loop = 0; + } + + /* Free resources we don't need anymore */ + free(old_dir); + free(new_dir); +} + static void compute_rename_counts(struct diff_queue_struct *pairs, struct strmap *dir_rename_count, struct strset *dirs_removed) @@ -980,20 +955,12 @@ static void compute_rename_counts(struct diff_queue_struct *pairs, int i; for (i = 0; i < pairs->nr; ++i) { - char *old_dir, *new_dir; struct diff_filepair *pair = pairs->queue[i]; /* File not part of directory rename if it wasn't renamed */ if (pair->status != 'R') continue; - /* Get the old and new directory names */ - get_renamed_dir_portion(pair->one->path, pair->two->path, - &old_dir, &new_dir); - if (!old_dir) - /* Directory didn't change at all; ignore this one. */ - continue; - /* * Make dir_rename_count contain a map of a map: * old_directory -> {new_directory -> count} @@ -1001,12 +968,9 @@ static void compute_rename_counts(struct diff_queue_struct *pairs, * the old filename and the new filename and count how many * times that pairing occurs. */ - if (strset_contains(dirs_removed, old_dir)) - increment_count(dir_rename_count, old_dir, new_dir); - - /* Free resources we don't need anymore */ - free(old_dir); - free(new_dir); + update_dir_rename_counts(dir_rename_count, dirs_removed, + pair->one->path, + pair->two->path); } }