885f0063e9
Three-way merges, by their nature, are going to often have two or more trees match at a given subdirectory. We can avoid calling fill_tree_descriptor() on the same tree by checking when these trees match. Noting when various oids match will also be useful in other calculations and optimizations as well. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
491 lines
15 KiB
C
491 lines
15 KiB
C
/*
|
|
* "Ostensibly Recursive's Twin" merge strategy, or "ort" for short. Meant
|
|
* as a drop-in replacement for the "recursive" merge strategy, allowing one
|
|
* to replace
|
|
*
|
|
* git merge [-s recursive]
|
|
*
|
|
* with
|
|
*
|
|
* git merge -s ort
|
|
*
|
|
* Note: git's parser allows the space between '-s' and its argument to be
|
|
* missing. (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
|
|
* "cale", "peedy", or "ins" instead of "ort"?)
|
|
*/
|
|
|
|
#include "cache.h"
|
|
#include "merge-ort.h"
|
|
|
|
#include "diff.h"
|
|
#include "diffcore.h"
|
|
#include "strmap.h"
|
|
#include "tree.h"
|
|
#include "xdiff-interface.h"
|
|
|
|
/*
|
|
* We have many arrays of size 3. Whenever we have such an array, the
|
|
* indices refer to one of the sides of the three-way merge. This is so
|
|
* pervasive that the constants 0, 1, and 2 are used in many places in the
|
|
* code (especially in arithmetic operations to find the other side's index
|
|
* or to compute a relevant mask), but sometimes these enum names are used
|
|
* to aid code clarity.
|
|
*
|
|
* See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
|
|
* referred to there is one of these three sides.
|
|
*/
|
|
enum merge_side {
|
|
MERGE_BASE = 0,
|
|
MERGE_SIDE1 = 1,
|
|
MERGE_SIDE2 = 2
|
|
};
|
|
|
|
struct merge_options_internal {
|
|
/*
|
|
* paths: primary data structure in all of merge ort.
|
|
*
|
|
* The keys of paths:
|
|
* * are full relative paths from the toplevel of the repository
|
|
* (e.g. "drivers/firmware/raspberrypi.c").
|
|
* * store all relevant paths in the repo, both directories and
|
|
* files (e.g. drivers, drivers/firmware would also be included)
|
|
* * these keys serve to intern all the path strings, which allows
|
|
* us to do pointer comparison on directory names instead of
|
|
* strcmp; we just have to be careful to use the interned strings.
|
|
*
|
|
* The values of paths:
|
|
* * either a pointer to a merged_info, or a conflict_info struct
|
|
* * merged_info contains all relevant information for a
|
|
* non-conflicted entry.
|
|
* * conflict_info contains a merged_info, plus any additional
|
|
* information about a conflict such as the higher orders stages
|
|
* involved and the names of the paths those came from (handy
|
|
* once renames get involved).
|
|
* * a path may start "conflicted" (i.e. point to a conflict_info)
|
|
* and then a later step (e.g. three-way content merge) determines
|
|
* it can be cleanly merged, at which point it'll be marked clean
|
|
* and the algorithm will ignore any data outside the contained
|
|
* merged_info for that entry
|
|
* * If an entry remains conflicted, the merged_info portion of a
|
|
* conflict_info will later be filled with whatever version of
|
|
* the file should be placed in the working directory (e.g. an
|
|
* as-merged-as-possible variation that contains conflict markers).
|
|
*/
|
|
struct strmap paths;
|
|
|
|
/*
|
|
* conflicted: a subset of keys->values from "paths"
|
|
*
|
|
* conflicted is basically an optimization between process_entries()
|
|
* and record_conflicted_index_entries(); the latter could loop over
|
|
* ALL the entries in paths AGAIN and look for the ones that are
|
|
* still conflicted, but since process_entries() has to loop over
|
|
* all of them, it saves the ones it couldn't resolve in this strmap
|
|
* so that record_conflicted_index_entries() can iterate just the
|
|
* relevant entries.
|
|
*/
|
|
struct strmap conflicted;
|
|
|
|
/*
|
|
* current_dir_name: temporary var used in collect_merge_info_callback()
|
|
*
|
|
* Used to set merged_info.directory_name; see documentation for that
|
|
* variable and the requirements placed on that field.
|
|
*/
|
|
const char *current_dir_name;
|
|
|
|
/* call_depth: recursion level counter for merging merge bases */
|
|
int call_depth;
|
|
};
|
|
|
|
struct version_info {
|
|
struct object_id oid;
|
|
unsigned short mode;
|
|
};
|
|
|
|
struct merged_info {
|
|
/* if is_null, ignore result. otherwise result has oid & mode */
|
|
struct version_info result;
|
|
unsigned is_null:1;
|
|
|
|
/*
|
|
* clean: whether the path in question is cleanly merged.
|
|
*
|
|
* see conflict_info.merged for more details.
|
|
*/
|
|
unsigned clean:1;
|
|
|
|
/*
|
|
* basename_offset: offset of basename of path.
|
|
*
|
|
* perf optimization to avoid recomputing offset of final '/'
|
|
* character in pathname (0 if no '/' in pathname).
|
|
*/
|
|
size_t basename_offset;
|
|
|
|
/*
|
|
* directory_name: containing directory name.
|
|
*
|
|
* Note that we assume directory_name is constructed such that
|
|
* strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
|
|
* i.e. string equality is equivalent to pointer equality. For this
|
|
* to hold, we have to be careful setting directory_name.
|
|
*/
|
|
const char *directory_name;
|
|
};
|
|
|
|
struct conflict_info {
|
|
/*
|
|
* merged: the version of the path that will be written to working tree
|
|
*
|
|
* WARNING: It is critical to check merged.clean and ensure it is 0
|
|
* before reading any conflict_info fields outside of merged.
|
|
* Allocated merge_info structs will always have clean set to 1.
|
|
* Allocated conflict_info structs will have merged.clean set to 0
|
|
* initially. The merged.clean field is how we know if it is safe
|
|
* to access other parts of conflict_info besides merged; if a
|
|
* conflict_info's merged.clean is changed to 1, the rest of the
|
|
* algorithm is not allowed to look at anything outside of the
|
|
* merged member anymore.
|
|
*/
|
|
struct merged_info merged;
|
|
|
|
/* oids & modes from each of the three trees for this path */
|
|
struct version_info stages[3];
|
|
|
|
/* pathnames for each stage; may differ due to rename detection */
|
|
const char *pathnames[3];
|
|
|
|
/* Whether this path is/was involved in a directory/file conflict */
|
|
unsigned df_conflict:1;
|
|
|
|
/*
|
|
* For filemask and dirmask, the ith bit corresponds to whether the
|
|
* ith entry is a file (filemask) or a directory (dirmask). Thus,
|
|
* filemask & dirmask is always zero, and filemask | dirmask is at
|
|
* most 7 but can be less when a path does not appear as either a
|
|
* file or a directory on at least one side of history.
|
|
*
|
|
* Note that these masks are related to enum merge_side, as the ith
|
|
* entry corresponds to side i.
|
|
*
|
|
* These values come from a traverse_trees() call; more info may be
|
|
* found looking at tree-walk.h's struct traverse_info,
|
|
* particularly the documentation above the "fn" member (note that
|
|
* filemask = mask & ~dirmask from that documentation).
|
|
*/
|
|
unsigned filemask:3;
|
|
unsigned dirmask:3;
|
|
|
|
/*
|
|
* Optimization to track which stages match, to avoid the need to
|
|
* recompute it in multiple steps. Either 0 or at least 2 bits are
|
|
* set; if at least 2 bits are set, their corresponding stages match.
|
|
*/
|
|
unsigned match_mask:3;
|
|
};
|
|
|
|
static int err(struct merge_options *opt, const char *err, ...)
|
|
{
|
|
va_list params;
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
strbuf_addstr(&sb, "error: ");
|
|
va_start(params, err);
|
|
strbuf_vaddf(&sb, err, params);
|
|
va_end(params);
|
|
|
|
error("%s", sb.buf);
|
|
strbuf_release(&sb);
|
|
|
|
return -1;
|
|
}
|
|
|
|
static int collect_merge_info_callback(int n,
|
|
unsigned long mask,
|
|
unsigned long dirmask,
|
|
struct name_entry *names,
|
|
struct traverse_info *info)
|
|
{
|
|
/*
|
|
* n is 3. Always.
|
|
* common ancestor (mbase) has mask 1, and stored in index 0 of names
|
|
* head of side 1 (side1) has mask 2, and stored in index 1 of names
|
|
* head of side 2 (side2) has mask 4, and stored in index 2 of names
|
|
*/
|
|
struct merge_options *opt = info->data;
|
|
struct merge_options_internal *opti = opt->priv;
|
|
struct conflict_info *ci;
|
|
struct name_entry *p;
|
|
size_t len;
|
|
char *fullpath;
|
|
unsigned filemask = mask & ~dirmask;
|
|
unsigned mbase_null = !(mask & 1);
|
|
unsigned side1_null = !(mask & 2);
|
|
unsigned side2_null = !(mask & 4);
|
|
unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
|
|
names[0].mode == names[1].mode &&
|
|
oideq(&names[0].oid, &names[1].oid));
|
|
unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
|
|
names[0].mode == names[2].mode &&
|
|
oideq(&names[0].oid, &names[2].oid));
|
|
unsigned sides_match = (!side1_null && !side2_null &&
|
|
names[1].mode == names[2].mode &&
|
|
oideq(&names[1].oid, &names[2].oid));
|
|
|
|
/* n = 3 is a fundamental assumption. */
|
|
if (n != 3)
|
|
BUG("Called collect_merge_info_callback wrong");
|
|
|
|
/*
|
|
* A bunch of sanity checks verifying that traverse_trees() calls
|
|
* us the way I expect. Could just remove these at some point,
|
|
* though maybe they are helpful to future code readers.
|
|
*/
|
|
assert(mbase_null == is_null_oid(&names[0].oid));
|
|
assert(side1_null == is_null_oid(&names[1].oid));
|
|
assert(side2_null == is_null_oid(&names[2].oid));
|
|
assert(!mbase_null || !side1_null || !side2_null);
|
|
assert(mask > 0 && mask < 8);
|
|
|
|
/*
|
|
* Get the name of the relevant filepath, which we'll pass to
|
|
* setup_path_info() for tracking.
|
|
*/
|
|
p = names;
|
|
while (!p->mode)
|
|
p++;
|
|
len = traverse_path_len(info, p->pathlen);
|
|
|
|
/* +1 in both of the following lines to include the NUL byte */
|
|
fullpath = xmalloc(len + 1);
|
|
make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
|
|
|
|
/*
|
|
* TODO: record information about the path other than all zeros,
|
|
* so we can resolve later in process_entries.
|
|
*/
|
|
ci = xcalloc(1, sizeof(struct conflict_info));
|
|
strmap_put(&opti->paths, fullpath, ci);
|
|
|
|
/* If dirmask, recurse into subdirectories */
|
|
if (dirmask) {
|
|
struct traverse_info newinfo;
|
|
struct tree_desc t[3];
|
|
void *buf[3] = {NULL, NULL, NULL};
|
|
const char *original_dir_name;
|
|
int i, ret;
|
|
|
|
ci->match_mask &= filemask;
|
|
newinfo = *info;
|
|
newinfo.prev = info;
|
|
newinfo.name = p->path;
|
|
newinfo.namelen = p->pathlen;
|
|
newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
|
|
|
|
for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
|
|
if (i == 1 && side1_matches_mbase)
|
|
t[1] = t[0];
|
|
else if (i == 2 && side2_matches_mbase)
|
|
t[2] = t[0];
|
|
else if (i == 2 && sides_match)
|
|
t[2] = t[1];
|
|
else {
|
|
const struct object_id *oid = NULL;
|
|
if (dirmask & 1)
|
|
oid = &names[i].oid;
|
|
buf[i] = fill_tree_descriptor(opt->repo,
|
|
t + i, oid);
|
|
}
|
|
dirmask >>= 1;
|
|
}
|
|
|
|
original_dir_name = opti->current_dir_name;
|
|
opti->current_dir_name = fullpath;
|
|
ret = traverse_trees(NULL, 3, t, &newinfo);
|
|
opti->current_dir_name = original_dir_name;
|
|
|
|
for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
|
|
free(buf[i]);
|
|
|
|
if (ret < 0)
|
|
return -1;
|
|
}
|
|
|
|
return mask;
|
|
}
|
|
|
|
static int collect_merge_info(struct merge_options *opt,
|
|
struct tree *merge_base,
|
|
struct tree *side1,
|
|
struct tree *side2)
|
|
{
|
|
int ret;
|
|
struct tree_desc t[3];
|
|
struct traverse_info info;
|
|
const char *toplevel_dir_placeholder = "";
|
|
|
|
opt->priv->current_dir_name = toplevel_dir_placeholder;
|
|
setup_traverse_info(&info, toplevel_dir_placeholder);
|
|
info.fn = collect_merge_info_callback;
|
|
info.data = opt;
|
|
info.show_all_errors = 1;
|
|
|
|
parse_tree(merge_base);
|
|
parse_tree(side1);
|
|
parse_tree(side2);
|
|
init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
|
|
init_tree_desc(t + 1, side1->buffer, side1->size);
|
|
init_tree_desc(t + 2, side2->buffer, side2->size);
|
|
|
|
ret = traverse_trees(NULL, 3, t, &info);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int detect_and_process_renames(struct merge_options *opt,
|
|
struct tree *merge_base,
|
|
struct tree *side1,
|
|
struct tree *side2)
|
|
{
|
|
int clean = 1;
|
|
|
|
/*
|
|
* Rename detection works by detecting file similarity. Here we use
|
|
* a really easy-to-implement scheme: files are similar IFF they have
|
|
* the same filename. Therefore, by this scheme, there are no renames.
|
|
*
|
|
* TODO: Actually implement a real rename detection scheme.
|
|
*/
|
|
return clean;
|
|
}
|
|
|
|
static void process_entries(struct merge_options *opt,
|
|
struct object_id *result_oid)
|
|
{
|
|
die("Not yet implemented.");
|
|
}
|
|
|
|
void merge_switch_to_result(struct merge_options *opt,
|
|
struct tree *head,
|
|
struct merge_result *result,
|
|
int update_worktree_and_index,
|
|
int display_update_msgs)
|
|
{
|
|
die("Not yet implemented");
|
|
merge_finalize(opt, result);
|
|
}
|
|
|
|
void merge_finalize(struct merge_options *opt,
|
|
struct merge_result *result)
|
|
{
|
|
die("Not yet implemented");
|
|
}
|
|
|
|
static void merge_start(struct merge_options *opt, struct merge_result *result)
|
|
{
|
|
/* Sanity checks on opt */
|
|
assert(opt->repo);
|
|
|
|
assert(opt->branch1 && opt->branch2);
|
|
|
|
assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
|
|
opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
|
|
assert(opt->rename_limit >= -1);
|
|
assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
|
|
assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
|
|
|
|
assert(opt->xdl_opts >= 0);
|
|
assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
|
|
opt->recursive_variant <= MERGE_VARIANT_THEIRS);
|
|
|
|
/*
|
|
* detect_renames, verbosity, buffer_output, and obuf are ignored
|
|
* fields that were used by "recursive" rather than "ort" -- but
|
|
* sanity check them anyway.
|
|
*/
|
|
assert(opt->detect_renames >= -1 &&
|
|
opt->detect_renames <= DIFF_DETECT_COPY);
|
|
assert(opt->verbosity >= 0 && opt->verbosity <= 5);
|
|
assert(opt->buffer_output <= 2);
|
|
assert(opt->obuf.len == 0);
|
|
|
|
assert(opt->priv == NULL);
|
|
|
|
/* Default to histogram diff. Actually, just hardcode it...for now. */
|
|
opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
|
|
|
|
/* Initialization of opt->priv, our internal merge data */
|
|
opt->priv = xcalloc(1, sizeof(*opt->priv));
|
|
|
|
/*
|
|
* Although we initialize opt->priv->paths with strdup_strings=0,
|
|
* that's just to avoid making yet another copy of an allocated
|
|
* string. Putting the entry into paths means we are taking
|
|
* ownership, so we will later free it.
|
|
*
|
|
* In contrast, conflicted just has a subset of keys from paths, so
|
|
* we don't want to free those (it'd be a duplicate free).
|
|
*/
|
|
strmap_init_with_options(&opt->priv->paths, NULL, 0);
|
|
strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
|
|
}
|
|
|
|
/*
|
|
* Originally from merge_trees_internal(); heavily adapted, though.
|
|
*/
|
|
static void merge_ort_nonrecursive_internal(struct merge_options *opt,
|
|
struct tree *merge_base,
|
|
struct tree *side1,
|
|
struct tree *side2,
|
|
struct merge_result *result)
|
|
{
|
|
struct object_id working_tree_oid;
|
|
|
|
if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
|
|
/*
|
|
* TRANSLATORS: The %s arguments are: 1) tree hash of a merge
|
|
* base, and 2-3) the trees for the two trees we're merging.
|
|
*/
|
|
err(opt, _("collecting merge info failed for trees %s, %s, %s"),
|
|
oid_to_hex(&merge_base->object.oid),
|
|
oid_to_hex(&side1->object.oid),
|
|
oid_to_hex(&side2->object.oid));
|
|
result->clean = -1;
|
|
return;
|
|
}
|
|
|
|
result->clean = detect_and_process_renames(opt, merge_base,
|
|
side1, side2);
|
|
process_entries(opt, &working_tree_oid);
|
|
|
|
/* Set return values */
|
|
result->tree = parse_tree_indirect(&working_tree_oid);
|
|
/* existence of conflicted entries implies unclean */
|
|
result->clean &= strmap_empty(&opt->priv->conflicted);
|
|
if (!opt->priv->call_depth) {
|
|
result->priv = opt->priv;
|
|
opt->priv = NULL;
|
|
}
|
|
}
|
|
|
|
void merge_incore_nonrecursive(struct merge_options *opt,
|
|
struct tree *merge_base,
|
|
struct tree *side1,
|
|
struct tree *side2,
|
|
struct merge_result *result)
|
|
{
|
|
assert(opt->ancestor != NULL);
|
|
merge_start(opt, result);
|
|
merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
|
|
}
|
|
|
|
void merge_incore_recursive(struct merge_options *opt,
|
|
struct commit_list *merge_bases,
|
|
struct commit *side1,
|
|
struct commit *side2,
|
|
struct merge_result *result)
|
|
{
|
|
die("Not yet implemented");
|
|
}
|