merge-recursive: change current file dir string_lists to hashmap
The code was using two string_lists, one for the directories and one for the files. The code never checks the lists independently so we should be able to only use one list. The string_list also is a O(log n) for lookup and insertion. Switching this to use a hashmap will give O(1) which will save some time when there are millions of paths that will be checked. Signed-off-by: Kevin Willford <kewillf@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
ef9c4dc3b6
commit
fc65b00da7
@ -24,6 +24,31 @@
|
||||
#include "dir.h"
|
||||
#include "submodule.h"
|
||||
|
||||
struct path_hashmap_entry {
|
||||
struct hashmap_entry e;
|
||||
char path[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
static int path_hashmap_cmp(const void *cmp_data,
|
||||
const void *entry,
|
||||
const void *entry_or_key,
|
||||
const void *keydata)
|
||||
{
|
||||
const struct path_hashmap_entry *a = entry;
|
||||
const struct path_hashmap_entry *b = entry_or_key;
|
||||
const char *key = keydata;
|
||||
|
||||
if (ignore_case)
|
||||
return strcasecmp(a->path, key ? key : b->path);
|
||||
else
|
||||
return strcmp(a->path, key ? key : b->path);
|
||||
}
|
||||
|
||||
static unsigned int path_hash(const char *path)
|
||||
{
|
||||
return ignore_case ? strihash(path) : strhash(path);
|
||||
}
|
||||
|
||||
static void flush_output(struct merge_options *o)
|
||||
{
|
||||
if (o->buffer_output < 2 && o->obuf.len) {
|
||||
@ -314,15 +339,15 @@ static int save_files_dirs(const unsigned char *sha1,
|
||||
struct strbuf *base, const char *path,
|
||||
unsigned int mode, int stage, void *context)
|
||||
{
|
||||
struct path_hashmap_entry *entry;
|
||||
int baselen = base->len;
|
||||
struct merge_options *o = context;
|
||||
|
||||
strbuf_addstr(base, path);
|
||||
|
||||
if (S_ISDIR(mode))
|
||||
string_list_insert(&o->current_directory_set, base->buf);
|
||||
else
|
||||
string_list_insert(&o->current_file_set, base->buf);
|
||||
FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
|
||||
hashmap_entry_init(entry, path_hash(entry->path));
|
||||
hashmap_add(&o->current_file_dir_set, entry);
|
||||
|
||||
strbuf_setlen(base, baselen);
|
||||
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
|
||||
@ -642,6 +667,7 @@ static void add_flattened_path(struct strbuf *out, const char *s)
|
||||
|
||||
static char *unique_path(struct merge_options *o, const char *path, const char *branch)
|
||||
{
|
||||
struct path_hashmap_entry *entry;
|
||||
struct strbuf newpath = STRBUF_INIT;
|
||||
int suffix = 0;
|
||||
size_t base_len;
|
||||
@ -650,14 +676,16 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
|
||||
add_flattened_path(&newpath, branch);
|
||||
|
||||
base_len = newpath.len;
|
||||
while (string_list_has_string(&o->current_file_set, newpath.buf) ||
|
||||
string_list_has_string(&o->current_directory_set, newpath.buf) ||
|
||||
while (hashmap_get_from_hash(&o->current_file_dir_set,
|
||||
path_hash(newpath.buf), newpath.buf) ||
|
||||
(!o->call_depth && file_exists(newpath.buf))) {
|
||||
strbuf_setlen(&newpath, base_len);
|
||||
strbuf_addf(&newpath, "_%d", suffix++);
|
||||
}
|
||||
|
||||
string_list_insert(&o->current_file_set, newpath.buf);
|
||||
FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
|
||||
hashmap_entry_init(entry, path_hash(entry->path));
|
||||
hashmap_add(&o->current_file_dir_set, entry);
|
||||
return strbuf_detach(&newpath, NULL);
|
||||
}
|
||||
|
||||
@ -1941,8 +1969,14 @@ int merge_trees(struct merge_options *o,
|
||||
if (unmerged_cache()) {
|
||||
struct string_list *entries, *re_head, *re_merge;
|
||||
int i;
|
||||
string_list_clear(&o->current_file_set, 1);
|
||||
string_list_clear(&o->current_directory_set, 1);
|
||||
/*
|
||||
* Only need the hashmap while processing entries, so
|
||||
* initialize it here and free it when we are done running
|
||||
* through the entries. Keeping it in the merge_options as
|
||||
* opposed to decaring a local hashmap is for convenience
|
||||
* so that we don't have to pass it to around.
|
||||
*/
|
||||
hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
|
||||
get_files_dirs(o, head);
|
||||
get_files_dirs(o, merge);
|
||||
|
||||
@ -1978,6 +2012,8 @@ cleanup:
|
||||
string_list_clear(re_head, 0);
|
||||
string_list_clear(entries, 1);
|
||||
|
||||
hashmap_free(&o->current_file_dir_set, 1);
|
||||
|
||||
free(re_merge);
|
||||
free(re_head);
|
||||
free(entries);
|
||||
@ -2179,8 +2215,6 @@ void init_merge_options(struct merge_options *o)
|
||||
if (o->verbosity >= 5)
|
||||
o->buffer_output = 0;
|
||||
strbuf_init(&o->obuf, 0);
|
||||
string_list_init(&o->current_file_set, 1);
|
||||
string_list_init(&o->current_directory_set, 1);
|
||||
string_list_init(&o->df_conflict_file_set, 1);
|
||||
}
|
||||
|
||||
|
@ -25,8 +25,7 @@ struct merge_options {
|
||||
int show_rename_progress;
|
||||
int call_depth;
|
||||
struct strbuf obuf;
|
||||
struct string_list current_file_set;
|
||||
struct string_list current_directory_set;
|
||||
struct hashmap current_file_dir_set;
|
||||
struct string_list df_conflict_file_set;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user