Merge branch 'en/ort-perf-batch-15'

Final batch for "merge -sort" optimization.

* en/ort-perf-batch-15:
  merge-ort: remove compile-time ability to turn off usage of memory pools
  merge-ort: reuse path strings in pool_alloc_filespec
  merge-ort: store filepairs and filespecs in our mem_pool
  diffcore-rename, merge-ort: add wrapper functions for filepair alloc/dealloc
  merge-ort: switch our strmaps over to using memory pools
  merge-ort: set up a memory pool
  merge-ort: add pool_alloc, pool_calloc, and pool_strndup wrappers
  diffcore-rename: use a mem_pool for exact rename detection's hashmap
  merge-ort: rename str{map,intmap,set}_func()
This commit is contained in:
Junio C Hamano 2021-08-24 15:32:39 -07:00
commit 08ac213965
3 changed files with 165 additions and 94 deletions

View File

@ -317,10 +317,11 @@ static int find_identical_files(struct hashmap *srcs,
}
static void insert_file_table(struct repository *r,
struct mem_pool *pool,
struct hashmap *table, int index,
struct diff_filespec *filespec)
{
struct file_similarity *entry = xmalloc(sizeof(*entry));
struct file_similarity *entry = mem_pool_alloc(pool, sizeof(*entry));
entry->index = index;
entry->filespec = filespec;
@ -336,7 +337,8 @@ static void insert_file_table(struct repository *r,
* and then during the second round we try to match
* cache-dirty entries as well.
*/
static int find_exact_renames(struct diff_options *options)
static int find_exact_renames(struct diff_options *options,
struct mem_pool *pool)
{
int i, renames = 0;
struct hashmap file_table;
@ -346,7 +348,7 @@ static int find_exact_renames(struct diff_options *options)
*/
hashmap_init(&file_table, NULL, NULL, rename_src_nr);
for (i = rename_src_nr-1; i >= 0; i--)
insert_file_table(options->repo,
insert_file_table(options->repo, pool,
&file_table, i,
rename_src[i].p->one);
@ -354,8 +356,8 @@ static int find_exact_renames(struct diff_options *options)
for (i = 0; i < rename_dst_nr; i++)
renames += find_identical_files(&file_table, i, options);
/* Free the hash data structure and entries */
hashmap_clear_and_free(&file_table, struct file_similarity, entry);
/* Free the hash data structure (entries will be freed with the pool) */
hashmap_clear(&file_table);
return renames;
}
@ -1330,7 +1332,47 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info,
rename_src_nr = new_num_src;
}
static void free_filespec_data(struct diff_filespec *spec)
{
if (!--spec->count)
diff_free_filespec_data(spec);
}
static void pool_free_filespec(struct mem_pool *pool,
struct diff_filespec *spec)
{
if (!pool) {
free_filespec(spec);
return;
}
/*
* Similar to free_filespec(), but only frees the data. The spec
* itself was allocated in the pool and should not be individually
* freed.
*/
free_filespec_data(spec);
}
void pool_diff_free_filepair(struct mem_pool *pool,
struct diff_filepair *p)
{
if (!pool) {
diff_free_filepair(p);
return;
}
/*
* Similar to diff_free_filepair() but only frees the data from the
* filespecs; not the filespecs or the filepair which were
* allocated from the pool.
*/
free_filespec_data(p->one);
free_filespec_data(p->two);
}
void diffcore_rename_extended(struct diff_options *options,
struct mem_pool *pool,
struct strintmap *relevant_sources,
struct strintmap *dirs_removed,
struct strmap *dir_rename_count,
@ -1345,6 +1387,7 @@ void diffcore_rename_extended(struct diff_options *options,
int num_destinations, dst_cnt;
int num_sources, want_copies;
struct progress *progress = NULL;
struct mem_pool local_pool;
struct dir_rename_info info;
struct diff_populate_filespec_options dpf_options = {
.check_binary = 0,
@ -1413,11 +1456,18 @@ void diffcore_rename_extended(struct diff_options *options,
goto cleanup; /* nothing to do */
trace2_region_enter("diff", "exact renames", options->repo);
mem_pool_init(&local_pool, 32*1024);
/*
* We really want to cull the candidates list early
* with cheap tests in order to avoid doing deltas.
*/
rename_count = find_exact_renames(options);
rename_count = find_exact_renames(options, &local_pool);
/*
* Discard local_pool immediately instead of at "cleanup:" in order
* to reduce maximum memory usage; inexact rename detection uses up
* a fair amount of memory, and mem_pools can too.
*/
mem_pool_discard(&local_pool, 0);
trace2_region_leave("diff", "exact renames", options->repo);
/* Did we only want exact renames? */
@ -1636,7 +1686,7 @@ void diffcore_rename_extended(struct diff_options *options,
pair_to_free = p;
if (pair_to_free)
diff_free_filepair(pair_to_free);
pool_diff_free_filepair(pool, pair_to_free);
}
diff_debug_queue("done copying original", &outq);
@ -1646,7 +1696,7 @@ void diffcore_rename_extended(struct diff_options *options,
for (i = 0; i < rename_dst_nr; i++)
if (rename_dst[i].filespec_to_free)
free_filespec(rename_dst[i].filespec_to_free);
pool_free_filespec(pool, rename_dst[i].filespec_to_free);
cleanup_dir_rename_info(&info, dirs_removed, dir_rename_count != NULL);
FREE_AND_NULL(rename_dst);
@ -1663,5 +1713,5 @@ void diffcore_rename_extended(struct diff_options *options,
void diffcore_rename(struct diff_options *options)
{
diffcore_rename_extended(options, NULL, NULL, NULL, NULL);
diffcore_rename_extended(options, NULL, NULL, NULL, NULL, NULL);
}

View File

@ -127,6 +127,8 @@ struct diff_filepair {
#define DIFF_PAIR_MODE_CHANGED(p) ((p)->one->mode != (p)->two->mode)
void diff_free_filepair(struct diff_filepair *);
void pool_diff_free_filepair(struct mem_pool *pool,
struct diff_filepair *p);
int diff_unmodified_pair(struct diff_filepair *);
@ -179,6 +181,7 @@ void partial_clear_dir_rename_count(struct strmap *dir_rename_count);
void diffcore_break(struct repository *, int);
void diffcore_rename(struct diff_options *);
void diffcore_rename_extended(struct diff_options *options,
struct mem_pool *pool,
struct strintmap *relevant_sources,
struct strintmap *dirs_removed,
struct strmap *dir_rename_count,

View File

@ -303,8 +303,6 @@ struct merge_options_internal {
* * these keys serve to intern all the path strings, which allows
* us to do pointer comparison on directory names instead of
* strcmp; we just have to be careful to use the interned strings.
* (Technically paths_to_free may track some strings that were
* removed from froms paths.)
*
* The values of paths:
* * either a pointer to a merged_info, or a conflict_info struct
@ -340,14 +338,14 @@ struct merge_options_internal {
struct strmap conflicted;
/*
* paths_to_free: additional list of strings to free
* pool: memory pool for fast allocation/deallocation
*
* If keys are removed from "paths", they are added to paths_to_free
* to ensure they are later freed. We avoid free'ing immediately since
* other places (e.g. conflict_info.pathnames[]) may still be
* referencing these paths.
* We allocate room for lots of filenames and auxiliary data
* structures in merge_options_internal, and it tends to all be
* freed together too. Using a memory pool for these provides a
* nice speedup.
*/
struct string_list paths_to_free;
struct mem_pool pool;
/*
* output: special messages and conflict notices for various paths
@ -519,64 +517,45 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
{
struct rename_info *renames = &opti->renames;
int i;
void (*strmap_func)(struct strmap *, int) =
void (*strmap_clear_func)(struct strmap *, int) =
reinitialize ? strmap_partial_clear : strmap_clear;
void (*strintmap_func)(struct strintmap *) =
void (*strintmap_clear_func)(struct strintmap *) =
reinitialize ? strintmap_partial_clear : strintmap_clear;
void (*strset_func)(struct strset *) =
void (*strset_clear_func)(struct strset *) =
reinitialize ? strset_partial_clear : strset_clear;
/*
* We marked opti->paths with strdup_strings = 0, so that we
* wouldn't have to make another copy of the fullpath created by
* make_traverse_path from setup_path_info(). But, now that we've
* used it and have no other references to these strings, it is time
* to deallocate them.
*/
free_strmap_strings(&opti->paths);
strmap_func(&opti->paths, 1);
strmap_clear_func(&opti->paths, 0);
/*
* All keys and values in opti->conflicted are a subset of those in
* opti->paths. We don't want to deallocate anything twice, so we
* don't free the keys and we pass 0 for free_values.
*/
strmap_func(&opti->conflicted, 0);
/*
* opti->paths_to_free is similar to opti->paths; we created it with
* strdup_strings = 0 to avoid making _another_ copy of the fullpath
* but now that we've used it and have no other references to these
* strings, it is time to deallocate them. We do so by temporarily
* setting strdup_strings to 1.
*/
opti->paths_to_free.strdup_strings = 1;
string_list_clear(&opti->paths_to_free, 0);
opti->paths_to_free.strdup_strings = 0;
strmap_clear_func(&opti->conflicted, 0);
if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
discard_index(&opti->attr_index);
/* Free memory used by various renames maps */
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
strintmap_func(&renames->dirs_removed[i]);
strmap_func(&renames->dir_renames[i], 0);
strintmap_func(&renames->relevant_sources[i]);
strintmap_clear_func(&renames->dirs_removed[i]);
strmap_clear_func(&renames->dir_renames[i], 0);
strintmap_clear_func(&renames->relevant_sources[i]);
if (!reinitialize)
assert(renames->cached_pairs_valid_side == 0);
if (i != renames->cached_pairs_valid_side &&
-1 != renames->cached_pairs_valid_side) {
strset_func(&renames->cached_target_names[i]);
strmap_func(&renames->cached_pairs[i], 1);
strset_func(&renames->cached_irrelevant[i]);
strset_clear_func(&renames->cached_target_names[i]);
strmap_clear_func(&renames->cached_pairs[i], 1);
strset_clear_func(&renames->cached_irrelevant[i]);
partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
if (!reinitialize)
strmap_clear(&renames->dir_rename_count[i], 1);
}
}
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
strintmap_func(&renames->deferred[i].possible_trivial_merges);
strset_func(&renames->deferred[i].target_dirs);
strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
strset_clear_func(&renames->deferred[i].target_dirs);
renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
}
renames->cached_pairs_valid_side = 0;
@ -603,6 +582,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
strmap_clear(&opti->output, 0);
}
mem_pool_discard(&opti->pool, 0);
/* Clean out callback_data as well. */
FREE_AND_NULL(renames->callback_data);
renames->callback_data_nr = renames->callback_data_alloc = 0;
@ -665,6 +646,36 @@ static void path_msg(struct merge_options *opt,
strbuf_addch(sb, '\n');
}
static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool,
const char *path)
{
/* Similar to alloc_filespec(), but allocate from pool and reuse path */
struct diff_filespec *spec;
spec = mem_pool_calloc(pool, 1, sizeof(*spec));
spec->path = (char*)path; /* spec won't modify it */
spec->count = 1;
spec->is_binary = -1;
return spec;
}
static struct diff_filepair *pool_diff_queue(struct mem_pool *pool,
struct diff_queue_struct *queue,
struct diff_filespec *one,
struct diff_filespec *two)
{
/* Same code as diff_queue(), except allocate from pool */
struct diff_filepair *dp;
dp = mem_pool_calloc(pool, 1, sizeof(*dp));
dp->one = one;
dp->two = two;
if (queue)
diff_q(queue, dp);
return dp;
}
/* add a string to a strbuf, but converting "/" to "_" */
static void add_flattened_path(struct strbuf *out, const char *s)
{
@ -793,8 +804,9 @@ static void setup_path_info(struct merge_options *opt,
assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
assert(resolved == (merged_version != NULL));
mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
sizeof(struct conflict_info));
mi = mem_pool_calloc(&opt->priv->pool, 1,
resolved ? sizeof(struct merged_info) :
sizeof(struct conflict_info));
mi->directory_name = current_dir_name;
mi->basename_offset = current_dir_name_len;
mi->clean = !!resolved;
@ -891,11 +903,11 @@ static void add_pair(struct merge_options *opt,
return;
}
one = alloc_filespec(pathname);
two = alloc_filespec(pathname);
one = pool_alloc_filespec(&opt->priv->pool, pathname);
two = pool_alloc_filespec(&opt->priv->pool, pathname);
fill_filespec(is_add ? two : one,
&names[names_idx].oid, 1, names[names_idx].mode);
diff_queue(&renames->pairs[side], one, two);
pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two);
}
static void collect_rename_info(struct merge_options *opt,
@ -1086,7 +1098,7 @@ static int collect_merge_info_callback(int n,
len = traverse_path_len(info, p->pathlen);
/* +1 in both of the following lines to include the NUL byte */
fullpath = xmalloc(len + 1);
fullpath = mem_pool_alloc(&opt->priv->pool, len + 1);
make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
/*
@ -1341,7 +1353,7 @@ static int handle_deferred_entries(struct merge_options *opt,
copy = renames->deferred[side].possible_trivial_merges;
strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
0,
NULL,
&opt->priv->pool,
0);
strintmap_for_each_entry(&copy, &iter, entry) {
const char *path = entry->key;
@ -2293,12 +2305,17 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
VERIFY_CI(ci);
/* Find parent directories missing from opt->priv->paths */
cur_path = new_path;
cur_path = mem_pool_strdup(&opt->priv->pool, new_path);
free((char*)new_path);
new_path = (char *)cur_path;
while (1) {
/* Find the parent directory of cur_path */
char *last_slash = strrchr(cur_path, '/');
if (last_slash) {
parent_name = xstrndup(cur_path, last_slash - cur_path);
parent_name = mem_pool_strndup(&opt->priv->pool,
cur_path,
last_slash - cur_path);
} else {
parent_name = opt->priv->toplevel_dir;
break;
@ -2307,7 +2324,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
/* Look it up in opt->priv->paths */
entry = strmap_get_entry(&opt->priv->paths, parent_name);
if (entry) {
free((char*)parent_name);
parent_name = entry->key; /* reuse known pointer */
break;
}
@ -2334,13 +2350,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
parent_name = cur_dir;
}
/*
* We are removing old_path from opt->priv->paths. old_path also will
* eventually need to be freed, but it may still be used by e.g.
* ci->pathnames. So, store it in another string-list for now.
*/
string_list_append(&opt->priv->paths_to_free, old_path);
assert(ci->filemask == 2 || ci->filemask == 4);
assert(ci->dirmask == 0);
strmap_remove(&opt->priv->paths, old_path, 0);
@ -2374,7 +2383,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
new_ci->stages[index].mode = ci->stages[index].mode;
oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
free(ci);
ci = new_ci;
}
@ -2802,10 +2810,23 @@ static void use_cached_pairs(struct merge_options *opt,
if (!new_name)
new_name = old_name;
/*
* cached_pairs has *copies* of old_name and new_name,
* because it has to persist across merges. Since
* pool_alloc_filespec() will just re-use the existing
* filenames, which will also get re-used by
* opt->priv->paths if they become renames, and then
* get freed at the end of the merge, that would leave
* the copy in cached_pairs dangling. Avoid this by
* making a copy here.
*/
old_name = mem_pool_strdup(&opt->priv->pool, old_name);
new_name = mem_pool_strdup(&opt->priv->pool, new_name);
/* We don't care about oid/mode, only filenames and status */
one = alloc_filespec(old_name);
two = alloc_filespec(new_name);
diff_queue(pairs, one, two);
one = pool_alloc_filespec(&opt->priv->pool, old_name);
two = pool_alloc_filespec(&opt->priv->pool, new_name);
pool_diff_queue(&opt->priv->pool, pairs, one, two);
pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
}
}
@ -2913,6 +2934,7 @@ static int detect_regular_renames(struct merge_options *opt,
diff_queued_diff = renames->pairs[side_index];
trace2_region_enter("diff", "diffcore_rename", opt->repo);
diffcore_rename_extended(&diff_opts,
&opt->priv->pool,
&renames->relevant_sources[side_index],
&renames->dirs_removed[side_index],
&renames->dir_rename_count[side_index],
@ -2963,7 +2985,7 @@ static int collect_renames(struct merge_options *opt,
if (p->status != 'A' && p->status != 'R') {
possibly_cache_new_pair(renames, p, side_index, NULL);
diff_free_filepair(p);
pool_diff_free_filepair(&opt->priv->pool, p);
continue;
}
@ -2976,7 +2998,7 @@ static int collect_renames(struct merge_options *opt,
possibly_cache_new_pair(renames, p, side_index, new_path);
if (p->status != 'R' && !new_path) {
diff_free_filepair(p);
pool_diff_free_filepair(&opt->priv->pool, p);
continue;
}
@ -3094,7 +3116,7 @@ cleanup:
side_pairs = &renames->pairs[s];
for (i = 0; i < side_pairs->nr; ++i) {
struct diff_filepair *p = side_pairs->queue[i];
diff_free_filepair(p);
pool_diff_free_filepair(&opt->priv->pool, p);
}
}
@ -3107,7 +3129,8 @@ simple_cleanup:
if (combined.nr) {
int i;
for (i = 0; i < combined.nr; i++)
diff_free_filepair(combined.queue[i]);
pool_diff_free_filepair(&opt->priv->pool,
combined.queue[i]);
free(combined.queue);
}
@ -3581,7 +3604,8 @@ static void process_entry(struct merge_options *opt,
* the directory to remain here, so we need to move this
* path to some new location.
*/
CALLOC_ARRAY(new_ci, 1);
new_ci = mem_pool_calloc(&opt->priv->pool, 1, sizeof(*new_ci));
/* We don't really want new_ci->merged.result copied, but it'll
* be overwritten below so it doesn't matter. We also don't
* want any directory mode/oid values copied, but we'll zero
@ -3673,7 +3697,8 @@ static void process_entry(struct merge_options *opt,
const char *a_path = NULL, *b_path = NULL;
int rename_a = 0, rename_b = 0;
new_ci = xmalloc(sizeof(*new_ci));
new_ci = mem_pool_alloc(&opt->priv->pool,
sizeof(*new_ci));
if (S_ISREG(a_mode))
rename_a = 1;
@ -3742,17 +3767,8 @@ static void process_entry(struct merge_options *opt,
b_path = path;
strmap_put(&opt->priv->paths, b_path, new_ci);
if (rename_a && rename_b) {
if (rename_a && rename_b)
strmap_remove(&opt->priv->paths, path, 0);
/*
* We removed path from opt->priv->paths. path
* will also eventually need to be freed, but
* it may still be used by e.g. ci->pathnames.
* So, store it in another string-list for now.
*/
string_list_append(&opt->priv->paths_to_free,
path);
}
/*
* Do special handling for b_path since process_entry()
@ -4293,6 +4309,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
{
struct rename_info *renames;
int i;
struct mem_pool *pool = NULL;
/* Sanity checks on opt */
trace2_region_enter("merge", "sanity checks", opt->repo);
@ -4358,9 +4375,11 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
/* Initialization of various renames fields */
renames = &opt->priv->renames;
mem_pool_init(&opt->priv->pool, 0);
pool = &opt->priv->pool;
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
strintmap_init_with_options(&renames->dirs_removed[i],
NOT_RELEVANT, NULL, 0);
NOT_RELEVANT, pool, 0);
strmap_init_with_options(&renames->dir_rename_count[i],
NULL, 1);
strmap_init_with_options(&renames->dir_renames[i],
@ -4374,7 +4393,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
*/
strintmap_init_with_options(&renames->relevant_sources[i],
-1 /* explicitly invalid */,
NULL, 0);
pool, 0);
strmap_init_with_options(&renames->cached_pairs[i],
NULL, 1);
strset_init_with_options(&renames->cached_irrelevant[i],
@ -4384,9 +4403,9 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
}
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges,
0, NULL, 0);
0, pool, 0);
strset_init_with_options(&renames->deferred[i].target_dirs,
NULL, 1);
pool, 1);
renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
}
@ -4394,14 +4413,13 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
* Although we initialize opt->priv->paths with strdup_strings=0,
* that's just to avoid making yet another copy of an allocated
* string. Putting the entry into paths means we are taking
* ownership, so we will later free it. paths_to_free is similar.
* ownership, so we will later free it.
*
* In contrast, conflicted just has a subset of keys from paths, so
* we don't want to free those (it'd be a duplicate free).
*/
strmap_init_with_options(&opt->priv->paths, NULL, 0);
strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
string_list_init_nodup(&opt->priv->paths_to_free);
strmap_init_with_options(&opt->priv->paths, pool, 0);
strmap_init_with_options(&opt->priv->conflicted, pool, 0);
/*
* keys & strbufs in output will sometimes need to outlive "paths",