Merge branch 'jh/memihash-opt'
The name-hash used for detecting paths that are different only in cases (which matter on case insensitive filesystems) has been optimized to take advantage of multi-threading when it makes sense. * jh/memihash-opt: name-hash: add test-lazy-init-name-hash to .gitignore name-hash: add perf test for lazy_init_name_hash name-hash: add test-lazy-init-name-hash name-hash: perf improvement for lazy_init_name_hash hashmap: document memihash_cont, hashmap_disallow_rehash api hashmap: add disallow_rehash setting hashmap: allow memihash computation to be continued name-hash: specify initial size for istate.dir_hash table
This commit is contained in:
commit
0330344e0f
@ -21,6 +21,9 @@ that the hashmap is initialized. It may also be useful for statistical purposes
|
||||
`cmpfn` stores the comparison function specified in `hashmap_init()`. In
|
||||
advanced scenarios, it may be useful to change this, e.g. to switch between
|
||||
case-sensitive and case-insensitive lookup.
|
||||
+
|
||||
When `disallow_rehash` is set, automatic rehashes are prevented during inserts
|
||||
and deletes.
|
||||
|
||||
`struct hashmap_entry`::
|
||||
|
||||
@ -57,6 +60,7 @@ Functions
|
||||
`unsigned int strihash(const char *buf)`::
|
||||
`unsigned int memhash(const void *buf, size_t len)`::
|
||||
`unsigned int memihash(const void *buf, size_t len)`::
|
||||
`unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)`::
|
||||
|
||||
Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
|
||||
http://www.isthe.com/chongo/tech/comp/fnv).
|
||||
@ -65,6 +69,9 @@ Functions
|
||||
`memihash` operate on arbitrary-length memory.
|
||||
+
|
||||
`strihash` and `memihash` are case insensitive versions.
|
||||
+
|
||||
`memihash_cont` is a variant of `memihash` that allows a computation to be
|
||||
continued with another chunk of data.
|
||||
|
||||
`unsigned int sha1hash(const unsigned char *sha1)`::
|
||||
|
||||
@ -184,6 +191,21 @@ passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
|
||||
+
|
||||
Returns the removed entry, or NULL if not found.
|
||||
|
||||
`void hashmap_disallow_rehash(struct hashmap *map, unsigned value)`::
|
||||
|
||||
Disallow/allow automatic rehashing of the hashmap during inserts
|
||||
and deletes.
|
||||
+
|
||||
This is useful if the caller knows that the hashmap will be accessed
|
||||
by multiple threads.
|
||||
+
|
||||
The caller is still responsible for any necessary locking; this simply
|
||||
prevents unexpected rehashing. The caller is also responsible for properly
|
||||
sizing the initial hashmap to ensure good performance.
|
||||
+
|
||||
A call to allow rehashing does not force a rehash; that might happen
|
||||
with the next insert or delete.
|
||||
|
||||
`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
|
||||
`void *hashmap_iter_next(struct hashmap_iter *iter)`::
|
||||
`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::
|
||||
|
1
Makefile
1
Makefile
@ -621,6 +621,7 @@ TEST_PROGRAMS_NEED_X += test-fake-ssh
|
||||
TEST_PROGRAMS_NEED_X += test-genrandom
|
||||
TEST_PROGRAMS_NEED_X += test-hashmap
|
||||
TEST_PROGRAMS_NEED_X += test-index-version
|
||||
TEST_PROGRAMS_NEED_X += test-lazy-init-name-hash
|
||||
TEST_PROGRAMS_NEED_X += test-line-buffer
|
||||
TEST_PROGRAMS_NEED_X += test-match-trees
|
||||
TEST_PROGRAMS_NEED_X += test-mergesort
|
||||
|
1
cache.h
1
cache.h
@ -343,6 +343,7 @@ struct index_state {
|
||||
extern struct index_state the_index;
|
||||
|
||||
/* Name hashing */
|
||||
extern int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
|
||||
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
|
||||
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
|
||||
extern void free_name_hash(struct index_state *istate);
|
||||
|
29
hashmap.c
29
hashmap.c
@ -50,6 +50,23 @@ unsigned int memihash(const void *buf, size_t len)
|
||||
return hash;
|
||||
}
|
||||
|
||||
/*
|
||||
* Incoporate another chunk of data into a memihash
|
||||
* computation.
|
||||
*/
|
||||
unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)
|
||||
{
|
||||
unsigned int hash = hash_seed;
|
||||
unsigned char *ucbuf = (unsigned char *) buf;
|
||||
while (len--) {
|
||||
unsigned int c = *ucbuf++;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c -= 'a' - 'A';
|
||||
hash = (hash * FNV32_PRIME) ^ c;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
#define HASHMAP_INITIAL_SIZE 64
|
||||
/* grow / shrink by 2^2 */
|
||||
#define HASHMAP_RESIZE_BITS 2
|
||||
@ -87,11 +104,19 @@ static inline unsigned int bucket(const struct hashmap *map,
|
||||
return key->hash & (map->tablesize - 1);
|
||||
}
|
||||
|
||||
int hashmap_bucket(const struct hashmap *map, unsigned int hash)
|
||||
{
|
||||
return hash & (map->tablesize - 1);
|
||||
}
|
||||
|
||||
static void rehash(struct hashmap *map, unsigned int newsize)
|
||||
{
|
||||
unsigned int i, oldsize = map->tablesize;
|
||||
struct hashmap_entry **oldtable = map->table;
|
||||
|
||||
if (map->disallow_rehash)
|
||||
return;
|
||||
|
||||
alloc_table(map, newsize);
|
||||
for (i = 0; i < oldsize; i++) {
|
||||
struct hashmap_entry *e = oldtable[i];
|
||||
@ -124,7 +149,9 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
|
||||
size_t initial_size)
|
||||
{
|
||||
unsigned int size = HASHMAP_INITIAL_SIZE;
|
||||
map->size = 0;
|
||||
|
||||
memset(map, 0, sizeof(*map));
|
||||
|
||||
map->cmpfn = equals_function ? equals_function : always_equal;
|
||||
|
||||
/* calculate initial table size and allocate the table */
|
||||
|
25
hashmap.h
25
hashmap.h
@ -12,6 +12,7 @@ extern unsigned int strhash(const char *buf);
|
||||
extern unsigned int strihash(const char *buf);
|
||||
extern unsigned int memhash(const void *buf, size_t len);
|
||||
extern unsigned int memihash(const void *buf, size_t len);
|
||||
extern unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len);
|
||||
|
||||
static inline unsigned int sha1hash(const unsigned char *sha1)
|
||||
{
|
||||
@ -38,6 +39,7 @@ struct hashmap {
|
||||
struct hashmap_entry **table;
|
||||
hashmap_cmp_fn cmpfn;
|
||||
unsigned int size, tablesize, grow_at, shrink_at;
|
||||
unsigned disallow_rehash : 1;
|
||||
};
|
||||
|
||||
struct hashmap_iter {
|
||||
@ -76,6 +78,29 @@ static inline void *hashmap_get_from_hash(const struct hashmap *map,
|
||||
return hashmap_get(map, &key, keydata);
|
||||
}
|
||||
|
||||
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
|
||||
|
||||
/*
|
||||
* Disallow/allow rehashing of the hashmap.
|
||||
* This is useful if the caller knows that the hashmap
|
||||
* needs multi-threaded access. The caller is still
|
||||
* required to guard/lock searches and inserts in a
|
||||
* manner appropriate to their usage. This simply
|
||||
* prevents the table from being unexpectedly re-mapped.
|
||||
*
|
||||
* If is up to the caller to ensure that the hashmap is
|
||||
* initialized to a reasonable size to prevent poor
|
||||
* performance.
|
||||
*
|
||||
* When value=1, prevent future rehashes on adds and deleted.
|
||||
* When value=0, allow future rehahses. This DOES NOT force
|
||||
* a rehash now.
|
||||
*/
|
||||
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
|
||||
{
|
||||
map->disallow_rehash = value;
|
||||
}
|
||||
|
||||
/* hashmap_iter functions */
|
||||
|
||||
extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);
|
||||
|
497
name-hash.c
497
name-hash.c
@ -23,13 +23,19 @@ static int dir_entry_cmp(const struct dir_entry *e1,
|
||||
name ? name : e2->name, e1->namelen);
|
||||
}
|
||||
|
||||
static struct dir_entry *find_dir_entry__hash(struct index_state *istate,
|
||||
const char *name, unsigned int namelen, unsigned int hash)
|
||||
{
|
||||
struct dir_entry key;
|
||||
hashmap_entry_init(&key, hash);
|
||||
key.namelen = namelen;
|
||||
return hashmap_get(&istate->dir_hash, &key, name);
|
||||
}
|
||||
|
||||
static struct dir_entry *find_dir_entry(struct index_state *istate,
|
||||
const char *name, unsigned int namelen)
|
||||
{
|
||||
struct dir_entry key;
|
||||
hashmap_entry_init(&key, memihash(name, namelen));
|
||||
key.namelen = namelen;
|
||||
return hashmap_get(&istate->dir_hash, &key, name);
|
||||
return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen));
|
||||
}
|
||||
|
||||
static struct dir_entry *hash_dir_entry(struct index_state *istate,
|
||||
@ -112,20 +118,493 @@ static int cache_entry_cmp(const struct cache_entry *ce1,
|
||||
return remove ? !(ce1 == ce2) : 0;
|
||||
}
|
||||
|
||||
static int lazy_try_threaded = 1;
|
||||
static int lazy_nr_dir_threads;
|
||||
|
||||
#ifdef NO_PTHREADS
|
||||
|
||||
static inline int lookup_lazy_params(struct index_state *istate)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void threaded_lazy_init_name_hash(
|
||||
struct index_state *istate)
|
||||
{
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "thread-utils.h"
|
||||
|
||||
/*
|
||||
* Set a minimum number of cache_entries that we will handle per
|
||||
* thread and use that to decide how many threads to run (upto
|
||||
* the number on the system).
|
||||
*
|
||||
* For guidance setting the lower per-thread bound, see:
|
||||
* t/helper/test-lazy-init-name-hash --analyze
|
||||
*/
|
||||
#define LAZY_THREAD_COST (2000)
|
||||
|
||||
/*
|
||||
* We use n mutexes to guard n partitions of the "istate->dir_hash"
|
||||
* hashtable. Since "find" and "insert" operations will hash to a
|
||||
* particular bucket and modify/search a single chain, we can say
|
||||
* that "all chains mod n" are guarded by the same mutex -- rather
|
||||
* than having a single mutex to guard the entire table. (This does
|
||||
* require that we disable "rehashing" on the hashtable.)
|
||||
*
|
||||
* So, a larger value here decreases the probability of a collision
|
||||
* and the time that each thread must wait for the mutex.
|
||||
*/
|
||||
#define LAZY_MAX_MUTEX (32)
|
||||
|
||||
static pthread_mutex_t *lazy_dir_mutex_array;
|
||||
|
||||
/*
|
||||
* An array of lazy_entry items is used by the n threads in
|
||||
* the directory parse (first) phase to (lock-free) store the
|
||||
* intermediate results. These values are then referenced by
|
||||
* the 2 threads in the second phase.
|
||||
*/
|
||||
struct lazy_entry {
|
||||
struct dir_entry *dir;
|
||||
unsigned int hash_dir;
|
||||
unsigned int hash_name;
|
||||
};
|
||||
|
||||
/*
|
||||
* Decide if we want to use threads (if available) to load
|
||||
* the hash tables. We set "lazy_nr_dir_threads" to zero when
|
||||
* it is not worth it.
|
||||
*/
|
||||
static int lookup_lazy_params(struct index_state *istate)
|
||||
{
|
||||
int nr_cpus;
|
||||
|
||||
lazy_nr_dir_threads = 0;
|
||||
|
||||
if (!lazy_try_threaded)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we are respecting case, just use the original
|
||||
* code to build the "istate->name_hash". We don't
|
||||
* need the complexity here.
|
||||
*/
|
||||
if (!ignore_case)
|
||||
return 0;
|
||||
|
||||
nr_cpus = online_cpus();
|
||||
if (nr_cpus < 2)
|
||||
return 0;
|
||||
|
||||
if (istate->cache_nr < 2 * LAZY_THREAD_COST)
|
||||
return 0;
|
||||
|
||||
if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
|
||||
nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
|
||||
lazy_nr_dir_threads = nr_cpus;
|
||||
return lazy_nr_dir_threads;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize n mutexes for use when searching and inserting
|
||||
* into "istate->dir_hash". All "dir" threads are trying
|
||||
* to insert partial pathnames into the hash as they iterate
|
||||
* over their portions of the index, so lock contention is
|
||||
* high.
|
||||
*
|
||||
* However, the hashmap is going to put items into bucket
|
||||
* chains based on their hash values. Use that to create n
|
||||
* mutexes and lock on mutex[bucket(hash) % n]. This will
|
||||
* decrease the collision rate by (hopefully) by a factor of n.
|
||||
*/
|
||||
static void init_dir_mutex(void)
|
||||
{
|
||||
int j;
|
||||
|
||||
lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
|
||||
|
||||
for (j = 0; j < LAZY_MAX_MUTEX; j++)
|
||||
init_recursive_mutex(&lazy_dir_mutex_array[j]);
|
||||
}
|
||||
|
||||
static void cleanup_dir_mutex(void)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = 0; j < LAZY_MAX_MUTEX; j++)
|
||||
pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
|
||||
|
||||
free(lazy_dir_mutex_array);
|
||||
}
|
||||
|
||||
static void lock_dir_mutex(int j)
|
||||
{
|
||||
pthread_mutex_lock(&lazy_dir_mutex_array[j]);
|
||||
}
|
||||
|
||||
static void unlock_dir_mutex(int j)
|
||||
{
|
||||
pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
|
||||
}
|
||||
|
||||
static inline int compute_dir_lock_nr(
|
||||
const struct hashmap *map,
|
||||
unsigned int hash)
|
||||
{
|
||||
return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
|
||||
}
|
||||
|
||||
static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
|
||||
struct index_state *istate,
|
||||
struct dir_entry *parent,
|
||||
struct strbuf *prefix)
|
||||
{
|
||||
struct dir_entry *dir;
|
||||
unsigned int hash;
|
||||
int lock_nr;
|
||||
|
||||
/*
|
||||
* Either we have a parent directory and path with slash(es)
|
||||
* or the directory is an immediate child of the root directory.
|
||||
*/
|
||||
assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));
|
||||
|
||||
if (parent)
|
||||
hash = memihash_cont(parent->ent.hash,
|
||||
prefix->buf + parent->namelen,
|
||||
prefix->len - parent->namelen);
|
||||
else
|
||||
hash = memihash(prefix->buf, prefix->len);
|
||||
|
||||
lock_nr = compute_dir_lock_nr(&istate->dir_hash, hash);
|
||||
lock_dir_mutex(lock_nr);
|
||||
|
||||
dir = find_dir_entry__hash(istate, prefix->buf, prefix->len, hash);
|
||||
if (!dir) {
|
||||
FLEX_ALLOC_MEM(dir, name, prefix->buf, prefix->len);
|
||||
hashmap_entry_init(dir, hash);
|
||||
dir->namelen = prefix->len;
|
||||
dir->parent = parent;
|
||||
hashmap_add(&istate->dir_hash, dir);
|
||||
|
||||
if (parent) {
|
||||
unlock_dir_mutex(lock_nr);
|
||||
|
||||
/* All I really need here is an InterlockedIncrement(&(parent->nr)) */
|
||||
lock_nr = compute_dir_lock_nr(&istate->dir_hash, parent->ent.hash);
|
||||
lock_dir_mutex(lock_nr);
|
||||
parent->nr++;
|
||||
}
|
||||
}
|
||||
|
||||
unlock_dir_mutex(lock_nr);
|
||||
|
||||
return dir;
|
||||
}
|
||||
|
||||
/*
|
||||
* handle_range_1() and handle_range_dir() are derived from
|
||||
* clear_ce_flags_1() and clear_ce_flags_dir() in unpack-trees.c
|
||||
* and handle the iteration over the entire array of index entries.
|
||||
* They use recursion for adjacent entries in the same parent
|
||||
* directory.
|
||||
*/
|
||||
static int handle_range_1(
|
||||
struct index_state *istate,
|
||||
int k_start,
|
||||
int k_end,
|
||||
struct dir_entry *parent,
|
||||
struct strbuf *prefix,
|
||||
struct lazy_entry *lazy_entries);
|
||||
|
||||
static int handle_range_dir(
|
||||
struct index_state *istate,
|
||||
int k_start,
|
||||
int k_end,
|
||||
struct dir_entry *parent,
|
||||
struct strbuf *prefix,
|
||||
struct lazy_entry *lazy_entries,
|
||||
struct dir_entry **dir_new_out)
|
||||
{
|
||||
int rc, k;
|
||||
int input_prefix_len = prefix->len;
|
||||
struct dir_entry *dir_new;
|
||||
|
||||
dir_new = hash_dir_entry_with_parent_and_prefix(istate, parent, prefix);
|
||||
|
||||
strbuf_addch(prefix, '/');
|
||||
|
||||
/*
|
||||
* Scan forward in the index array for index entries having the same
|
||||
* path prefix (that are also in this directory).
|
||||
*/
|
||||
if (strncmp(istate->cache[k_start + 1]->name, prefix->buf, prefix->len) > 0)
|
||||
k = k_start + 1;
|
||||
else if (strncmp(istate->cache[k_end - 1]->name, prefix->buf, prefix->len) == 0)
|
||||
k = k_end;
|
||||
else {
|
||||
int begin = k_start;
|
||||
int end = k_end;
|
||||
while (begin < end) {
|
||||
int mid = (begin + end) >> 1;
|
||||
int cmp = strncmp(istate->cache[mid]->name, prefix->buf, prefix->len);
|
||||
if (cmp == 0) /* mid has same prefix; look in second part */
|
||||
begin = mid + 1;
|
||||
else if (cmp > 0) /* mid is past group; look in first part */
|
||||
end = mid;
|
||||
else
|
||||
die("cache entry out of order");
|
||||
}
|
||||
k = begin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Recurse and process what we can of this subset [k_start, k).
|
||||
*/
|
||||
rc = handle_range_1(istate, k_start, k, dir_new, prefix, lazy_entries);
|
||||
|
||||
strbuf_setlen(prefix, input_prefix_len);
|
||||
|
||||
*dir_new_out = dir_new;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int handle_range_1(
|
||||
struct index_state *istate,
|
||||
int k_start,
|
||||
int k_end,
|
||||
struct dir_entry *parent,
|
||||
struct strbuf *prefix,
|
||||
struct lazy_entry *lazy_entries)
|
||||
{
|
||||
int input_prefix_len = prefix->len;
|
||||
int k = k_start;
|
||||
|
||||
while (k < k_end) {
|
||||
struct cache_entry *ce_k = istate->cache[k];
|
||||
const char *name, *slash;
|
||||
|
||||
if (prefix->len && strncmp(ce_k->name, prefix->buf, prefix->len))
|
||||
break;
|
||||
|
||||
name = ce_k->name + prefix->len;
|
||||
slash = strchr(name, '/');
|
||||
|
||||
if (slash) {
|
||||
int len = slash - name;
|
||||
int processed;
|
||||
struct dir_entry *dir_new;
|
||||
|
||||
strbuf_add(prefix, name, len);
|
||||
processed = handle_range_dir(istate, k, k_end, parent, prefix, lazy_entries, &dir_new);
|
||||
if (processed) {
|
||||
k += processed;
|
||||
strbuf_setlen(prefix, input_prefix_len);
|
||||
continue;
|
||||
}
|
||||
|
||||
strbuf_addch(prefix, '/');
|
||||
processed = handle_range_1(istate, k, k_end, dir_new, prefix, lazy_entries);
|
||||
k += processed;
|
||||
strbuf_setlen(prefix, input_prefix_len);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is too expensive to take a lock to insert "ce_k"
|
||||
* into "istate->name_hash" and increment the ref-count
|
||||
* on the "parent" dir. So we defer actually updating
|
||||
* permanent data structures until phase 2 (where we
|
||||
* can change the locking requirements) and simply
|
||||
* accumulate our current results into the lazy_entries
|
||||
* data array).
|
||||
*
|
||||
* We do not need to lock the lazy_entries array because
|
||||
* we have exclusive access to the cells in the range
|
||||
* [k_start,k_end) that this thread was given.
|
||||
*/
|
||||
lazy_entries[k].dir = parent;
|
||||
if (parent) {
|
||||
lazy_entries[k].hash_name = memihash_cont(
|
||||
parent->ent.hash,
|
||||
ce_k->name + parent->namelen,
|
||||
ce_namelen(ce_k) - parent->namelen);
|
||||
lazy_entries[k].hash_dir = parent->ent.hash;
|
||||
} else {
|
||||
lazy_entries[k].hash_name = memihash(ce_k->name, ce_namelen(ce_k));
|
||||
}
|
||||
|
||||
k++;
|
||||
}
|
||||
|
||||
return k - k_start;
|
||||
}
|
||||
|
||||
struct lazy_dir_thread_data {
|
||||
pthread_t pthread;
|
||||
struct index_state *istate;
|
||||
struct lazy_entry *lazy_entries;
|
||||
int k_start;
|
||||
int k_end;
|
||||
};
|
||||
|
||||
static void *lazy_dir_thread_proc(void *_data)
|
||||
{
|
||||
struct lazy_dir_thread_data *d = _data;
|
||||
struct strbuf prefix = STRBUF_INIT;
|
||||
handle_range_1(d->istate, d->k_start, d->k_end, NULL, &prefix, d->lazy_entries);
|
||||
strbuf_release(&prefix);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct lazy_name_thread_data {
|
||||
pthread_t pthread;
|
||||
struct index_state *istate;
|
||||
struct lazy_entry *lazy_entries;
|
||||
};
|
||||
|
||||
static void *lazy_name_thread_proc(void *_data)
|
||||
{
|
||||
struct lazy_name_thread_data *d = _data;
|
||||
int k;
|
||||
|
||||
for (k = 0; k < d->istate->cache_nr; k++) {
|
||||
struct cache_entry *ce_k = d->istate->cache[k];
|
||||
ce_k->ce_flags |= CE_HASHED;
|
||||
hashmap_entry_init(ce_k, d->lazy_entries[k].hash_name);
|
||||
hashmap_add(&d->istate->name_hash, ce_k);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void lazy_update_dir_ref_counts(
|
||||
struct index_state *istate,
|
||||
struct lazy_entry *lazy_entries)
|
||||
{
|
||||
int k;
|
||||
|
||||
for (k = 0; k < istate->cache_nr; k++) {
|
||||
if (lazy_entries[k].dir)
|
||||
lazy_entries[k].dir->nr++;
|
||||
}
|
||||
}
|
||||
|
||||
static void threaded_lazy_init_name_hash(
|
||||
struct index_state *istate)
|
||||
{
|
||||
int nr_each;
|
||||
int k_start;
|
||||
int t;
|
||||
struct lazy_entry *lazy_entries;
|
||||
struct lazy_dir_thread_data *td_dir;
|
||||
struct lazy_name_thread_data *td_name;
|
||||
|
||||
k_start = 0;
|
||||
nr_each = DIV_ROUND_UP(istate->cache_nr, lazy_nr_dir_threads);
|
||||
|
||||
lazy_entries = xcalloc(istate->cache_nr, sizeof(struct lazy_entry));
|
||||
td_dir = xcalloc(lazy_nr_dir_threads, sizeof(struct lazy_dir_thread_data));
|
||||
td_name = xcalloc(1, sizeof(struct lazy_name_thread_data));
|
||||
|
||||
init_dir_mutex();
|
||||
|
||||
/*
|
||||
* Phase 1:
|
||||
* Build "istate->dir_hash" using n "dir" threads (and a read-only index).
|
||||
*/
|
||||
for (t = 0; t < lazy_nr_dir_threads; t++) {
|
||||
struct lazy_dir_thread_data *td_dir_t = td_dir + t;
|
||||
td_dir_t->istate = istate;
|
||||
td_dir_t->lazy_entries = lazy_entries;
|
||||
td_dir_t->k_start = k_start;
|
||||
k_start += nr_each;
|
||||
if (k_start > istate->cache_nr)
|
||||
k_start = istate->cache_nr;
|
||||
td_dir_t->k_end = k_start;
|
||||
if (pthread_create(&td_dir_t->pthread, NULL, lazy_dir_thread_proc, td_dir_t))
|
||||
die("unable to create lazy_dir_thread");
|
||||
}
|
||||
for (t = 0; t < lazy_nr_dir_threads; t++) {
|
||||
struct lazy_dir_thread_data *td_dir_t = td_dir + t;
|
||||
if (pthread_join(td_dir_t->pthread, NULL))
|
||||
die("unable to join lazy_dir_thread");
|
||||
}
|
||||
|
||||
/*
|
||||
* Phase 2:
|
||||
* Iterate over all index entries and add them to the "istate->name_hash"
|
||||
* using a single "name" background thread.
|
||||
* (Testing showed it wasn't worth running more than 1 thread for this.)
|
||||
*
|
||||
* Meanwhile, finish updating the parent directory ref-counts for each
|
||||
* index entry using the current thread. (This step is very fast and
|
||||
* doesn't need threading.)
|
||||
*/
|
||||
td_name->istate = istate;
|
||||
td_name->lazy_entries = lazy_entries;
|
||||
if (pthread_create(&td_name->pthread, NULL, lazy_name_thread_proc, td_name))
|
||||
die("unable to create lazy_name_thread");
|
||||
|
||||
lazy_update_dir_ref_counts(istate, lazy_entries);
|
||||
|
||||
if (pthread_join(td_name->pthread, NULL))
|
||||
die("unable to join lazy_name_thread");
|
||||
|
||||
cleanup_dir_mutex();
|
||||
|
||||
free(td_name);
|
||||
free(td_dir);
|
||||
free(lazy_entries);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void lazy_init_name_hash(struct index_state *istate)
|
||||
{
|
||||
int nr;
|
||||
|
||||
if (istate->name_hash_initialized)
|
||||
return;
|
||||
hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
|
||||
istate->cache_nr);
|
||||
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
|
||||
for (nr = 0; nr < istate->cache_nr; nr++)
|
||||
hash_index_entry(istate, istate->cache[nr]);
|
||||
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
|
||||
istate->cache_nr);
|
||||
|
||||
if (lookup_lazy_params(istate)) {
|
||||
hashmap_disallow_rehash(&istate->dir_hash, 1);
|
||||
threaded_lazy_init_name_hash(istate);
|
||||
hashmap_disallow_rehash(&istate->dir_hash, 0);
|
||||
} else {
|
||||
int nr;
|
||||
for (nr = 0; nr < istate->cache_nr; nr++)
|
||||
hash_index_entry(istate, istate->cache[nr]);
|
||||
}
|
||||
|
||||
istate->name_hash_initialized = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* A test routine for t/helper/ sources.
|
||||
*
|
||||
* Returns the number of threads used or 0 when
|
||||
* the non-threaded code path was used.
|
||||
*
|
||||
* Requesting threading WILL NOT override guards
|
||||
* in lookup_lazy_params().
|
||||
*/
|
||||
int test_lazy_init_name_hash(struct index_state *istate, int try_threaded)
|
||||
{
|
||||
lazy_nr_dir_threads = 0;
|
||||
lazy_try_threaded = try_threaded;
|
||||
|
||||
lazy_init_name_hash(istate);
|
||||
|
||||
return lazy_nr_dir_threads;
|
||||
}
|
||||
|
||||
void add_name_hash(struct index_state *istate, struct cache_entry *ce)
|
||||
{
|
||||
if (istate->name_hash_initialized)
|
||||
|
1
t/helper/.gitignore
vendored
1
t/helper/.gitignore
vendored
@ -11,6 +11,7 @@
|
||||
/test-genrandom
|
||||
/test-hashmap
|
||||
/test-index-version
|
||||
/test-lazy-init-name-hash
|
||||
/test-line-buffer
|
||||
/test-match-trees
|
||||
/test-mergesort
|
||||
|
264
t/helper/test-lazy-init-name-hash.c
Normal file
264
t/helper/test-lazy-init-name-hash.c
Normal file
@ -0,0 +1,264 @@
|
||||
#include "cache.h"
|
||||
#include "parse-options.h"
|
||||
|
||||
static int single;
|
||||
static int multi;
|
||||
static int count = 1;
|
||||
static int dump;
|
||||
static int perf;
|
||||
static int analyze;
|
||||
static int analyze_step;
|
||||
|
||||
/*
|
||||
* Dump the contents of the "dir" and "name" hash tables to stdout.
|
||||
* If you sort the result, you can compare it with the other type
|
||||
* mode and verify that both single and multi produce the same set.
|
||||
*/
|
||||
static void dump_run(void)
|
||||
{
|
||||
struct hashmap_iter iter_dir;
|
||||
struct hashmap_iter iter_cache;
|
||||
|
||||
/* Stolen from name-hash.c */
|
||||
struct dir_entry {
|
||||
struct hashmap_entry ent;
|
||||
struct dir_entry *parent;
|
||||
int nr;
|
||||
unsigned int namelen;
|
||||
char name[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
struct dir_entry *dir;
|
||||
struct cache_entry *ce;
|
||||
|
||||
read_cache();
|
||||
if (single) {
|
||||
test_lazy_init_name_hash(&the_index, 0);
|
||||
} else {
|
||||
int nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
|
||||
if (!nr_threads_used)
|
||||
die("non-threaded code path used");
|
||||
}
|
||||
|
||||
dir = hashmap_iter_first(&the_index.dir_hash, &iter_dir);
|
||||
while (dir) {
|
||||
printf("dir %08x %7d %s\n", dir->ent.hash, dir->nr, dir->name);
|
||||
dir = hashmap_iter_next(&iter_dir);
|
||||
}
|
||||
|
||||
ce = hashmap_iter_first(&the_index.name_hash, &iter_cache);
|
||||
while (ce) {
|
||||
printf("name %08x %s\n", ce->ent.hash, ce->name);
|
||||
ce = hashmap_iter_next(&iter_cache);
|
||||
}
|
||||
|
||||
discard_cache();
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the single or multi threaded version "count" times and
|
||||
* report on the time taken.
|
||||
*/
|
||||
static uint64_t time_runs(int try_threaded)
|
||||
{
|
||||
uint64_t t0, t1, t2;
|
||||
uint64_t sum = 0;
|
||||
uint64_t avg;
|
||||
int nr_threads_used;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
t0 = getnanotime();
|
||||
read_cache();
|
||||
t1 = getnanotime();
|
||||
nr_threads_used = test_lazy_init_name_hash(&the_index, try_threaded);
|
||||
t2 = getnanotime();
|
||||
|
||||
sum += (t2 - t1);
|
||||
|
||||
if (try_threaded && !nr_threads_used)
|
||||
die("non-threaded code path used");
|
||||
|
||||
if (nr_threads_used)
|
||||
printf("%f %f %d multi %d\n",
|
||||
((double)(t1 - t0))/1000000000,
|
||||
((double)(t2 - t1))/1000000000,
|
||||
the_index.cache_nr,
|
||||
nr_threads_used);
|
||||
else
|
||||
printf("%f %f %d single\n",
|
||||
((double)(t1 - t0))/1000000000,
|
||||
((double)(t2 - t1))/1000000000,
|
||||
the_index.cache_nr);
|
||||
fflush(stdout);
|
||||
|
||||
discard_cache();
|
||||
}
|
||||
|
||||
avg = sum / count;
|
||||
if (count > 1)
|
||||
printf("avg %f %s\n",
|
||||
(double)avg/1000000000,
|
||||
(try_threaded) ? "multi" : "single");
|
||||
|
||||
return avg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try a series of runs varying the "istate->cache_nr" and
|
||||
* try to find a good value for the multi-threaded criteria.
|
||||
*/
|
||||
static void analyze_run(void)
|
||||
{
|
||||
uint64_t t1s, t1m, t2s, t2m;
|
||||
int cache_nr_limit;
|
||||
int nr_threads_used;
|
||||
int i;
|
||||
int nr;
|
||||
|
||||
read_cache();
|
||||
cache_nr_limit = the_index.cache_nr;
|
||||
discard_cache();
|
||||
|
||||
nr = analyze;
|
||||
while (1) {
|
||||
uint64_t sum_single = 0;
|
||||
uint64_t sum_multi = 0;
|
||||
uint64_t avg_single;
|
||||
uint64_t avg_multi;
|
||||
|
||||
if (nr > cache_nr_limit)
|
||||
nr = cache_nr_limit;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
read_cache();
|
||||
the_index.cache_nr = nr; /* cheap truncate of index */
|
||||
t1s = getnanotime();
|
||||
test_lazy_init_name_hash(&the_index, 0);
|
||||
t2s = getnanotime();
|
||||
sum_single += (t2s - t1s);
|
||||
the_index.cache_nr = cache_nr_limit;
|
||||
discard_cache();
|
||||
|
||||
read_cache();
|
||||
the_index.cache_nr = nr; /* cheap truncate of index */
|
||||
t1m = getnanotime();
|
||||
nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
|
||||
t2m = getnanotime();
|
||||
sum_multi += (t2m - t1m);
|
||||
the_index.cache_nr = cache_nr_limit;
|
||||
discard_cache();
|
||||
|
||||
if (!nr_threads_used)
|
||||
printf(" [size %8d] [single %f] non-threaded code path used\n",
|
||||
nr, ((double)(t2s - t1s))/1000000000);
|
||||
else
|
||||
printf(" [size %8d] [single %f] %c [multi %f %d]\n",
|
||||
nr,
|
||||
((double)(t2s - t1s))/1000000000,
|
||||
(((t2s - t1s) < (t2m - t1m)) ? '<' : '>'),
|
||||
((double)(t2m - t1m))/1000000000,
|
||||
nr_threads_used);
|
||||
fflush(stdout);
|
||||
}
|
||||
if (count > 1) {
|
||||
avg_single = sum_single / count;
|
||||
avg_multi = sum_multi / count;
|
||||
if (!nr_threads_used)
|
||||
printf("avg [size %8d] [single %f]\n",
|
||||
nr,
|
||||
(double)avg_single/1000000000);
|
||||
else
|
||||
printf("avg [size %8d] [single %f] %c [multi %f %d]\n",
|
||||
nr,
|
||||
(double)avg_single/1000000000,
|
||||
(avg_single < avg_multi ? '<' : '>'),
|
||||
(double)avg_multi/1000000000,
|
||||
nr_threads_used);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
if (nr >= cache_nr_limit)
|
||||
return;
|
||||
nr += analyze_step;
|
||||
}
|
||||
}
|
||||
|
||||
int cmd_main(int argc, const char **argv)
|
||||
{
|
||||
const char *usage[] = {
|
||||
"test-lazy-init-name-hash -d (-s | -m)",
|
||||
"test-lazy-init-name-hash -p [-c c]",
|
||||
"test-lazy-init-name-hash -a a [--step s] [-c c]",
|
||||
"test-lazy-init-name-hash (-s | -m) [-c c]",
|
||||
"test-lazy-init-name-hash -s -m [-c c]",
|
||||
NULL
|
||||
};
|
||||
struct option options[] = {
|
||||
OPT_BOOL('s', "single", &single, "run single-threaded code"),
|
||||
OPT_BOOL('m', "multi", &multi, "run multi-threaded code"),
|
||||
OPT_INTEGER('c', "count", &count, "number of passes"),
|
||||
OPT_BOOL('d', "dump", &dump, "dump hash tables"),
|
||||
OPT_BOOL('p', "perf", &perf, "compare single vs multi"),
|
||||
OPT_INTEGER('a', "analyze", &analyze, "analyze different multi sizes"),
|
||||
OPT_INTEGER(0, "step", &analyze_step, "analyze step factor"),
|
||||
OPT_END(),
|
||||
};
|
||||
const char *prefix;
|
||||
uint64_t avg_single, avg_multi;
|
||||
|
||||
prefix = setup_git_directory();
|
||||
|
||||
argc = parse_options(argc, argv, prefix, options, usage, 0);
|
||||
|
||||
/*
|
||||
* istate->dir_hash is only created when ignore_case is set.
|
||||
*/
|
||||
ignore_case = 1;
|
||||
|
||||
if (dump) {
|
||||
if (perf || analyze > 0)
|
||||
die("cannot combine dump, perf, or analyze");
|
||||
if (count > 1)
|
||||
die("count not valid with dump");
|
||||
if (single && multi)
|
||||
die("cannot use both single and multi with dump");
|
||||
if (!single && !multi)
|
||||
die("dump requires either single or multi");
|
||||
dump_run();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (perf) {
|
||||
if (analyze > 0)
|
||||
die("cannot combine dump, perf, or analyze");
|
||||
if (single || multi)
|
||||
die("cannot use single or multi with perf");
|
||||
avg_single = time_runs(0);
|
||||
avg_multi = time_runs(1);
|
||||
if (avg_multi > avg_single)
|
||||
die("multi is slower");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (analyze) {
|
||||
if (analyze < 500)
|
||||
die("analyze must be at least 500");
|
||||
if (!analyze_step)
|
||||
analyze_step = analyze;
|
||||
if (single || multi)
|
||||
die("cannot use single or multi with analyze");
|
||||
analyze_run();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!single && !multi)
|
||||
die("require either -s or -m or both");
|
||||
|
||||
if (single)
|
||||
time_runs(0);
|
||||
if (multi)
|
||||
time_runs(1);
|
||||
|
||||
return 0;
|
||||
}
|
19
t/perf/p0004-lazy-init-name-hash.sh
Normal file
19
t/perf/p0004-lazy-init-name-hash.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='Tests multi-threaded lazy_init_name_hash'
|
||||
. ./perf-lib.sh
|
||||
|
||||
test_perf_large_repo
|
||||
test_checkout_worktree
|
||||
|
||||
test_expect_success 'verify both methods build the same hashmaps' '
|
||||
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --single | sort >out.single &&
|
||||
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --multi | sort >out.multi &&
|
||||
test_cmp out.single out.multi
|
||||
'
|
||||
|
||||
test_expect_success 'multithreaded should be faster' '
|
||||
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --perf >out.perf
|
||||
'
|
||||
|
||||
test_done
|
Loading…
Reference in New Issue
Block a user