Merge branch 'jh/memihash-opt'

The name-hash used for detecting paths that are different only in
cases (which matter on case insensitive filesystems) has been
optimized to take advantage of multi-threading when it makes sense.

* jh/memihash-opt:
  name-hash: add test-lazy-init-name-hash to .gitignore
  name-hash: add perf test for lazy_init_name_hash
  name-hash: add test-lazy-init-name-hash
  name-hash: perf improvement for lazy_init_name_hash
  hashmap: document memihash_cont, hashmap_disallow_rehash api
  hashmap: add disallow_rehash setting
  hashmap: allow memihash computation to be continued
  name-hash: specify initial size for istate.dir_hash table
This commit is contained in:
Junio C Hamano 2017-03-28 14:06:00 -07:00
commit 0330344e0f
9 changed files with 849 additions and 10 deletions

View File

@ -21,6 +21,9 @@ that the hashmap is initialized. It may also be useful for statistical purposes
`cmpfn` stores the comparison function specified in `hashmap_init()`. In
advanced scenarios, it may be useful to change this, e.g. to switch between
case-sensitive and case-insensitive lookup.
+
When `disallow_rehash` is set, automatic rehashes are prevented during inserts
and deletes.
`struct hashmap_entry`::
@ -57,6 +60,7 @@ Functions
`unsigned int strihash(const char *buf)`::
`unsigned int memhash(const void *buf, size_t len)`::
`unsigned int memihash(const void *buf, size_t len)`::
`unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)`::
Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
http://www.isthe.com/chongo/tech/comp/fnv).
@ -65,6 +69,9 @@ Functions
`memihash` operate on arbitrary-length memory.
+
`strihash` and `memihash` are case insensitive versions.
+
`memihash_cont` is a variant of `memihash` that allows a computation to be
continued with another chunk of data.
`unsigned int sha1hash(const unsigned char *sha1)`::
@ -184,6 +191,21 @@ passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
+
Returns the removed entry, or NULL if not found.
`void hashmap_disallow_rehash(struct hashmap *map, unsigned value)`::
Disallow/allow automatic rehashing of the hashmap during inserts
and deletes.
+
This is useful if the caller knows that the hashmap will be accessed
by multiple threads.
+
The caller is still responsible for any necessary locking; this simply
prevents unexpected rehashing. The caller is also responsible for properly
sizing the initial hashmap to ensure good performance.
+
A call to allow rehashing does not force a rehash; that might happen
with the next insert or delete.
`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
`void *hashmap_iter_next(struct hashmap_iter *iter)`::
`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::

View File

@ -621,6 +621,7 @@ TEST_PROGRAMS_NEED_X += test-fake-ssh
TEST_PROGRAMS_NEED_X += test-genrandom
TEST_PROGRAMS_NEED_X += test-hashmap
TEST_PROGRAMS_NEED_X += test-index-version
TEST_PROGRAMS_NEED_X += test-lazy-init-name-hash
TEST_PROGRAMS_NEED_X += test-line-buffer
TEST_PROGRAMS_NEED_X += test-match-trees
TEST_PROGRAMS_NEED_X += test-mergesort

View File

@ -343,6 +343,7 @@ struct index_state {
extern struct index_state the_index;
/* Name hashing */
extern int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
extern void free_name_hash(struct index_state *istate);

View File

@ -50,6 +50,23 @@ unsigned int memihash(const void *buf, size_t len)
return hash;
}
/*
* Incoporate another chunk of data into a memihash
* computation.
*/
unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)
{
unsigned int hash = hash_seed;
unsigned char *ucbuf = (unsigned char *) buf;
while (len--) {
unsigned int c = *ucbuf++;
if (c >= 'a' && c <= 'z')
c -= 'a' - 'A';
hash = (hash * FNV32_PRIME) ^ c;
}
return hash;
}
#define HASHMAP_INITIAL_SIZE 64
/* grow / shrink by 2^2 */
#define HASHMAP_RESIZE_BITS 2
@ -87,11 +104,19 @@ static inline unsigned int bucket(const struct hashmap *map,
return key->hash & (map->tablesize - 1);
}
int hashmap_bucket(const struct hashmap *map, unsigned int hash)
{
return hash & (map->tablesize - 1);
}
static void rehash(struct hashmap *map, unsigned int newsize)
{
unsigned int i, oldsize = map->tablesize;
struct hashmap_entry **oldtable = map->table;
if (map->disallow_rehash)
return;
alloc_table(map, newsize);
for (i = 0; i < oldsize; i++) {
struct hashmap_entry *e = oldtable[i];
@ -124,7 +149,9 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
size_t initial_size)
{
unsigned int size = HASHMAP_INITIAL_SIZE;
map->size = 0;
memset(map, 0, sizeof(*map));
map->cmpfn = equals_function ? equals_function : always_equal;
/* calculate initial table size and allocate the table */

View File

@ -12,6 +12,7 @@ extern unsigned int strhash(const char *buf);
extern unsigned int strihash(const char *buf);
extern unsigned int memhash(const void *buf, size_t len);
extern unsigned int memihash(const void *buf, size_t len);
extern unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len);
static inline unsigned int sha1hash(const unsigned char *sha1)
{
@ -38,6 +39,7 @@ struct hashmap {
struct hashmap_entry **table;
hashmap_cmp_fn cmpfn;
unsigned int size, tablesize, grow_at, shrink_at;
unsigned disallow_rehash : 1;
};
struct hashmap_iter {
@ -76,6 +78,29 @@ static inline void *hashmap_get_from_hash(const struct hashmap *map,
return hashmap_get(map, &key, keydata);
}
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
/*
* Disallow/allow rehashing of the hashmap.
* This is useful if the caller knows that the hashmap
* needs multi-threaded access. The caller is still
* required to guard/lock searches and inserts in a
* manner appropriate to their usage. This simply
* prevents the table from being unexpectedly re-mapped.
*
* If is up to the caller to ensure that the hashmap is
* initialized to a reasonable size to prevent poor
* performance.
*
* When value=1, prevent future rehashes on adds and deleted.
* When value=0, allow future rehahses. This DOES NOT force
* a rehash now.
*/
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
{
map->disallow_rehash = value;
}
/* hashmap_iter functions */
extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);

View File

@ -23,13 +23,19 @@ static int dir_entry_cmp(const struct dir_entry *e1,
name ? name : e2->name, e1->namelen);
}
static struct dir_entry *find_dir_entry__hash(struct index_state *istate,
const char *name, unsigned int namelen, unsigned int hash)
{
struct dir_entry key;
hashmap_entry_init(&key, hash);
key.namelen = namelen;
return hashmap_get(&istate->dir_hash, &key, name);
}
static struct dir_entry *find_dir_entry(struct index_state *istate,
const char *name, unsigned int namelen)
{
struct dir_entry key;
hashmap_entry_init(&key, memihash(name, namelen));
key.namelen = namelen;
return hashmap_get(&istate->dir_hash, &key, name);
return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen));
}
static struct dir_entry *hash_dir_entry(struct index_state *istate,
@ -112,20 +118,493 @@ static int cache_entry_cmp(const struct cache_entry *ce1,
return remove ? !(ce1 == ce2) : 0;
}
static int lazy_try_threaded = 1;
static int lazy_nr_dir_threads;
#ifdef NO_PTHREADS
static inline int lookup_lazy_params(struct index_state *istate)
{
return 0;
}
static inline void threaded_lazy_init_name_hash(
struct index_state *istate)
{
}
#else
#include "thread-utils.h"
/*
* Set a minimum number of cache_entries that we will handle per
* thread and use that to decide how many threads to run (upto
* the number on the system).
*
* For guidance setting the lower per-thread bound, see:
* t/helper/test-lazy-init-name-hash --analyze
*/
#define LAZY_THREAD_COST (2000)
/*
* We use n mutexes to guard n partitions of the "istate->dir_hash"
* hashtable. Since "find" and "insert" operations will hash to a
* particular bucket and modify/search a single chain, we can say
* that "all chains mod n" are guarded by the same mutex -- rather
* than having a single mutex to guard the entire table. (This does
* require that we disable "rehashing" on the hashtable.)
*
* So, a larger value here decreases the probability of a collision
* and the time that each thread must wait for the mutex.
*/
#define LAZY_MAX_MUTEX (32)
static pthread_mutex_t *lazy_dir_mutex_array;
/*
* An array of lazy_entry items is used by the n threads in
* the directory parse (first) phase to (lock-free) store the
* intermediate results. These values are then referenced by
* the 2 threads in the second phase.
*/
struct lazy_entry {
struct dir_entry *dir;
unsigned int hash_dir;
unsigned int hash_name;
};
/*
* Decide if we want to use threads (if available) to load
* the hash tables. We set "lazy_nr_dir_threads" to zero when
* it is not worth it.
*/
static int lookup_lazy_params(struct index_state *istate)
{
int nr_cpus;
lazy_nr_dir_threads = 0;
if (!lazy_try_threaded)
return 0;
/*
* If we are respecting case, just use the original
* code to build the "istate->name_hash". We don't
* need the complexity here.
*/
if (!ignore_case)
return 0;
nr_cpus = online_cpus();
if (nr_cpus < 2)
return 0;
if (istate->cache_nr < 2 * LAZY_THREAD_COST)
return 0;
if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST)
nr_cpus = istate->cache_nr / LAZY_THREAD_COST;
lazy_nr_dir_threads = nr_cpus;
return lazy_nr_dir_threads;
}
/*
* Initialize n mutexes for use when searching and inserting
* into "istate->dir_hash". All "dir" threads are trying
* to insert partial pathnames into the hash as they iterate
* over their portions of the index, so lock contention is
* high.
*
* However, the hashmap is going to put items into bucket
* chains based on their hash values. Use that to create n
* mutexes and lock on mutex[bucket(hash) % n]. This will
* decrease the collision rate by (hopefully) by a factor of n.
*/
static void init_dir_mutex(void)
{
int j;
lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t));
for (j = 0; j < LAZY_MAX_MUTEX; j++)
init_recursive_mutex(&lazy_dir_mutex_array[j]);
}
static void cleanup_dir_mutex(void)
{
int j;
for (j = 0; j < LAZY_MAX_MUTEX; j++)
pthread_mutex_destroy(&lazy_dir_mutex_array[j]);
free(lazy_dir_mutex_array);
}
static void lock_dir_mutex(int j)
{
pthread_mutex_lock(&lazy_dir_mutex_array[j]);
}
static void unlock_dir_mutex(int j)
{
pthread_mutex_unlock(&lazy_dir_mutex_array[j]);
}
static inline int compute_dir_lock_nr(
const struct hashmap *map,
unsigned int hash)
{
return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX;
}
static struct dir_entry *hash_dir_entry_with_parent_and_prefix(
struct index_state *istate,
struct dir_entry *parent,
struct strbuf *prefix)
{
struct dir_entry *dir;
unsigned int hash;
int lock_nr;
/*
* Either we have a parent directory and path with slash(es)
* or the directory is an immediate child of the root directory.
*/
assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL));
if (parent)
hash = memihash_cont(parent->ent.hash,
prefix->buf + parent->namelen,
prefix->len - parent->namelen);
else
hash = memihash(prefix->buf, prefix->len);
lock_nr = compute_dir_lock_nr(&istate->dir_hash, hash);
lock_dir_mutex(lock_nr);
dir = find_dir_entry__hash(istate, prefix->buf, prefix->len, hash);
if (!dir) {
FLEX_ALLOC_MEM(dir, name, prefix->buf, prefix->len);
hashmap_entry_init(dir, hash);
dir->namelen = prefix->len;
dir->parent = parent;
hashmap_add(&istate->dir_hash, dir);
if (parent) {
unlock_dir_mutex(lock_nr);
/* All I really need here is an InterlockedIncrement(&(parent->nr)) */
lock_nr = compute_dir_lock_nr(&istate->dir_hash, parent->ent.hash);
lock_dir_mutex(lock_nr);
parent->nr++;
}
}
unlock_dir_mutex(lock_nr);
return dir;
}
/*
* handle_range_1() and handle_range_dir() are derived from
* clear_ce_flags_1() and clear_ce_flags_dir() in unpack-trees.c
* and handle the iteration over the entire array of index entries.
* They use recursion for adjacent entries in the same parent
* directory.
*/
static int handle_range_1(
struct index_state *istate,
int k_start,
int k_end,
struct dir_entry *parent,
struct strbuf *prefix,
struct lazy_entry *lazy_entries);
static int handle_range_dir(
struct index_state *istate,
int k_start,
int k_end,
struct dir_entry *parent,
struct strbuf *prefix,
struct lazy_entry *lazy_entries,
struct dir_entry **dir_new_out)
{
int rc, k;
int input_prefix_len = prefix->len;
struct dir_entry *dir_new;
dir_new = hash_dir_entry_with_parent_and_prefix(istate, parent, prefix);
strbuf_addch(prefix, '/');
/*
* Scan forward in the index array for index entries having the same
* path prefix (that are also in this directory).
*/
if (strncmp(istate->cache[k_start + 1]->name, prefix->buf, prefix->len) > 0)
k = k_start + 1;
else if (strncmp(istate->cache[k_end - 1]->name, prefix->buf, prefix->len) == 0)
k = k_end;
else {
int begin = k_start;
int end = k_end;
while (begin < end) {
int mid = (begin + end) >> 1;
int cmp = strncmp(istate->cache[mid]->name, prefix->buf, prefix->len);
if (cmp == 0) /* mid has same prefix; look in second part */
begin = mid + 1;
else if (cmp > 0) /* mid is past group; look in first part */
end = mid;
else
die("cache entry out of order");
}
k = begin;
}
/*
* Recurse and process what we can of this subset [k_start, k).
*/
rc = handle_range_1(istate, k_start, k, dir_new, prefix, lazy_entries);
strbuf_setlen(prefix, input_prefix_len);
*dir_new_out = dir_new;
return rc;
}
static int handle_range_1(
struct index_state *istate,
int k_start,
int k_end,
struct dir_entry *parent,
struct strbuf *prefix,
struct lazy_entry *lazy_entries)
{
int input_prefix_len = prefix->len;
int k = k_start;
while (k < k_end) {
struct cache_entry *ce_k = istate->cache[k];
const char *name, *slash;
if (prefix->len && strncmp(ce_k->name, prefix->buf, prefix->len))
break;
name = ce_k->name + prefix->len;
slash = strchr(name, '/');
if (slash) {
int len = slash - name;
int processed;
struct dir_entry *dir_new;
strbuf_add(prefix, name, len);
processed = handle_range_dir(istate, k, k_end, parent, prefix, lazy_entries, &dir_new);
if (processed) {
k += processed;
strbuf_setlen(prefix, input_prefix_len);
continue;
}
strbuf_addch(prefix, '/');
processed = handle_range_1(istate, k, k_end, dir_new, prefix, lazy_entries);
k += processed;
strbuf_setlen(prefix, input_prefix_len);
continue;
}
/*
* It is too expensive to take a lock to insert "ce_k"
* into "istate->name_hash" and increment the ref-count
* on the "parent" dir. So we defer actually updating
* permanent data structures until phase 2 (where we
* can change the locking requirements) and simply
* accumulate our current results into the lazy_entries
* data array).
*
* We do not need to lock the lazy_entries array because
* we have exclusive access to the cells in the range
* [k_start,k_end) that this thread was given.
*/
lazy_entries[k].dir = parent;
if (parent) {
lazy_entries[k].hash_name = memihash_cont(
parent->ent.hash,
ce_k->name + parent->namelen,
ce_namelen(ce_k) - parent->namelen);
lazy_entries[k].hash_dir = parent->ent.hash;
} else {
lazy_entries[k].hash_name = memihash(ce_k->name, ce_namelen(ce_k));
}
k++;
}
return k - k_start;
}
struct lazy_dir_thread_data {
pthread_t pthread;
struct index_state *istate;
struct lazy_entry *lazy_entries;
int k_start;
int k_end;
};
static void *lazy_dir_thread_proc(void *_data)
{
struct lazy_dir_thread_data *d = _data;
struct strbuf prefix = STRBUF_INIT;
handle_range_1(d->istate, d->k_start, d->k_end, NULL, &prefix, d->lazy_entries);
strbuf_release(&prefix);
return NULL;
}
struct lazy_name_thread_data {
pthread_t pthread;
struct index_state *istate;
struct lazy_entry *lazy_entries;
};
static void *lazy_name_thread_proc(void *_data)
{
struct lazy_name_thread_data *d = _data;
int k;
for (k = 0; k < d->istate->cache_nr; k++) {
struct cache_entry *ce_k = d->istate->cache[k];
ce_k->ce_flags |= CE_HASHED;
hashmap_entry_init(ce_k, d->lazy_entries[k].hash_name);
hashmap_add(&d->istate->name_hash, ce_k);
}
return NULL;
}
static inline void lazy_update_dir_ref_counts(
struct index_state *istate,
struct lazy_entry *lazy_entries)
{
int k;
for (k = 0; k < istate->cache_nr; k++) {
if (lazy_entries[k].dir)
lazy_entries[k].dir->nr++;
}
}
static void threaded_lazy_init_name_hash(
struct index_state *istate)
{
int nr_each;
int k_start;
int t;
struct lazy_entry *lazy_entries;
struct lazy_dir_thread_data *td_dir;
struct lazy_name_thread_data *td_name;
k_start = 0;
nr_each = DIV_ROUND_UP(istate->cache_nr, lazy_nr_dir_threads);
lazy_entries = xcalloc(istate->cache_nr, sizeof(struct lazy_entry));
td_dir = xcalloc(lazy_nr_dir_threads, sizeof(struct lazy_dir_thread_data));
td_name = xcalloc(1, sizeof(struct lazy_name_thread_data));
init_dir_mutex();
/*
* Phase 1:
* Build "istate->dir_hash" using n "dir" threads (and a read-only index).
*/
for (t = 0; t < lazy_nr_dir_threads; t++) {
struct lazy_dir_thread_data *td_dir_t = td_dir + t;
td_dir_t->istate = istate;
td_dir_t->lazy_entries = lazy_entries;
td_dir_t->k_start = k_start;
k_start += nr_each;
if (k_start > istate->cache_nr)
k_start = istate->cache_nr;
td_dir_t->k_end = k_start;
if (pthread_create(&td_dir_t->pthread, NULL, lazy_dir_thread_proc, td_dir_t))
die("unable to create lazy_dir_thread");
}
for (t = 0; t < lazy_nr_dir_threads; t++) {
struct lazy_dir_thread_data *td_dir_t = td_dir + t;
if (pthread_join(td_dir_t->pthread, NULL))
die("unable to join lazy_dir_thread");
}
/*
* Phase 2:
* Iterate over all index entries and add them to the "istate->name_hash"
* using a single "name" background thread.
* (Testing showed it wasn't worth running more than 1 thread for this.)
*
* Meanwhile, finish updating the parent directory ref-counts for each
* index entry using the current thread. (This step is very fast and
* doesn't need threading.)
*/
td_name->istate = istate;
td_name->lazy_entries = lazy_entries;
if (pthread_create(&td_name->pthread, NULL, lazy_name_thread_proc, td_name))
die("unable to create lazy_name_thread");
lazy_update_dir_ref_counts(istate, lazy_entries);
if (pthread_join(td_name->pthread, NULL))
die("unable to join lazy_name_thread");
cleanup_dir_mutex();
free(td_name);
free(td_dir);
free(lazy_entries);
}
#endif
static void lazy_init_name_hash(struct index_state *istate)
{
int nr;
if (istate->name_hash_initialized)
return;
hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
istate->cache_nr);
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
for (nr = 0; nr < istate->cache_nr; nr++)
hash_index_entry(istate, istate->cache[nr]);
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp,
istate->cache_nr);
if (lookup_lazy_params(istate)) {
hashmap_disallow_rehash(&istate->dir_hash, 1);
threaded_lazy_init_name_hash(istate);
hashmap_disallow_rehash(&istate->dir_hash, 0);
} else {
int nr;
for (nr = 0; nr < istate->cache_nr; nr++)
hash_index_entry(istate, istate->cache[nr]);
}
istate->name_hash_initialized = 1;
}
/*
* A test routine for t/helper/ sources.
*
* Returns the number of threads used or 0 when
* the non-threaded code path was used.
*
* Requesting threading WILL NOT override guards
* in lookup_lazy_params().
*/
int test_lazy_init_name_hash(struct index_state *istate, int try_threaded)
{
lazy_nr_dir_threads = 0;
lazy_try_threaded = try_threaded;
lazy_init_name_hash(istate);
return lazy_nr_dir_threads;
}
void add_name_hash(struct index_state *istate, struct cache_entry *ce)
{
if (istate->name_hash_initialized)

1
t/helper/.gitignore vendored
View File

@ -11,6 +11,7 @@
/test-genrandom
/test-hashmap
/test-index-version
/test-lazy-init-name-hash
/test-line-buffer
/test-match-trees
/test-mergesort

View File

@ -0,0 +1,264 @@
#include "cache.h"
#include "parse-options.h"
static int single;
static int multi;
static int count = 1;
static int dump;
static int perf;
static int analyze;
static int analyze_step;
/*
* Dump the contents of the "dir" and "name" hash tables to stdout.
* If you sort the result, you can compare it with the other type
* mode and verify that both single and multi produce the same set.
*/
static void dump_run(void)
{
struct hashmap_iter iter_dir;
struct hashmap_iter iter_cache;
/* Stolen from name-hash.c */
struct dir_entry {
struct hashmap_entry ent;
struct dir_entry *parent;
int nr;
unsigned int namelen;
char name[FLEX_ARRAY];
};
struct dir_entry *dir;
struct cache_entry *ce;
read_cache();
if (single) {
test_lazy_init_name_hash(&the_index, 0);
} else {
int nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
if (!nr_threads_used)
die("non-threaded code path used");
}
dir = hashmap_iter_first(&the_index.dir_hash, &iter_dir);
while (dir) {
printf("dir %08x %7d %s\n", dir->ent.hash, dir->nr, dir->name);
dir = hashmap_iter_next(&iter_dir);
}
ce = hashmap_iter_first(&the_index.name_hash, &iter_cache);
while (ce) {
printf("name %08x %s\n", ce->ent.hash, ce->name);
ce = hashmap_iter_next(&iter_cache);
}
discard_cache();
}
/*
* Run the single or multi threaded version "count" times and
* report on the time taken.
*/
static uint64_t time_runs(int try_threaded)
{
uint64_t t0, t1, t2;
uint64_t sum = 0;
uint64_t avg;
int nr_threads_used;
int i;
for (i = 0; i < count; i++) {
t0 = getnanotime();
read_cache();
t1 = getnanotime();
nr_threads_used = test_lazy_init_name_hash(&the_index, try_threaded);
t2 = getnanotime();
sum += (t2 - t1);
if (try_threaded && !nr_threads_used)
die("non-threaded code path used");
if (nr_threads_used)
printf("%f %f %d multi %d\n",
((double)(t1 - t0))/1000000000,
((double)(t2 - t1))/1000000000,
the_index.cache_nr,
nr_threads_used);
else
printf("%f %f %d single\n",
((double)(t1 - t0))/1000000000,
((double)(t2 - t1))/1000000000,
the_index.cache_nr);
fflush(stdout);
discard_cache();
}
avg = sum / count;
if (count > 1)
printf("avg %f %s\n",
(double)avg/1000000000,
(try_threaded) ? "multi" : "single");
return avg;
}
/*
* Try a series of runs varying the "istate->cache_nr" and
* try to find a good value for the multi-threaded criteria.
*/
static void analyze_run(void)
{
uint64_t t1s, t1m, t2s, t2m;
int cache_nr_limit;
int nr_threads_used;
int i;
int nr;
read_cache();
cache_nr_limit = the_index.cache_nr;
discard_cache();
nr = analyze;
while (1) {
uint64_t sum_single = 0;
uint64_t sum_multi = 0;
uint64_t avg_single;
uint64_t avg_multi;
if (nr > cache_nr_limit)
nr = cache_nr_limit;
for (i = 0; i < count; i++) {
read_cache();
the_index.cache_nr = nr; /* cheap truncate of index */
t1s = getnanotime();
test_lazy_init_name_hash(&the_index, 0);
t2s = getnanotime();
sum_single += (t2s - t1s);
the_index.cache_nr = cache_nr_limit;
discard_cache();
read_cache();
the_index.cache_nr = nr; /* cheap truncate of index */
t1m = getnanotime();
nr_threads_used = test_lazy_init_name_hash(&the_index, 1);
t2m = getnanotime();
sum_multi += (t2m - t1m);
the_index.cache_nr = cache_nr_limit;
discard_cache();
if (!nr_threads_used)
printf(" [size %8d] [single %f] non-threaded code path used\n",
nr, ((double)(t2s - t1s))/1000000000);
else
printf(" [size %8d] [single %f] %c [multi %f %d]\n",
nr,
((double)(t2s - t1s))/1000000000,
(((t2s - t1s) < (t2m - t1m)) ? '<' : '>'),
((double)(t2m - t1m))/1000000000,
nr_threads_used);
fflush(stdout);
}
if (count > 1) {
avg_single = sum_single / count;
avg_multi = sum_multi / count;
if (!nr_threads_used)
printf("avg [size %8d] [single %f]\n",
nr,
(double)avg_single/1000000000);
else
printf("avg [size %8d] [single %f] %c [multi %f %d]\n",
nr,
(double)avg_single/1000000000,
(avg_single < avg_multi ? '<' : '>'),
(double)avg_multi/1000000000,
nr_threads_used);
fflush(stdout);
}
if (nr >= cache_nr_limit)
return;
nr += analyze_step;
}
}
int cmd_main(int argc, const char **argv)
{
const char *usage[] = {
"test-lazy-init-name-hash -d (-s | -m)",
"test-lazy-init-name-hash -p [-c c]",
"test-lazy-init-name-hash -a a [--step s] [-c c]",
"test-lazy-init-name-hash (-s | -m) [-c c]",
"test-lazy-init-name-hash -s -m [-c c]",
NULL
};
struct option options[] = {
OPT_BOOL('s', "single", &single, "run single-threaded code"),
OPT_BOOL('m', "multi", &multi, "run multi-threaded code"),
OPT_INTEGER('c', "count", &count, "number of passes"),
OPT_BOOL('d', "dump", &dump, "dump hash tables"),
OPT_BOOL('p', "perf", &perf, "compare single vs multi"),
OPT_INTEGER('a', "analyze", &analyze, "analyze different multi sizes"),
OPT_INTEGER(0, "step", &analyze_step, "analyze step factor"),
OPT_END(),
};
const char *prefix;
uint64_t avg_single, avg_multi;
prefix = setup_git_directory();
argc = parse_options(argc, argv, prefix, options, usage, 0);
/*
* istate->dir_hash is only created when ignore_case is set.
*/
ignore_case = 1;
if (dump) {
if (perf || analyze > 0)
die("cannot combine dump, perf, or analyze");
if (count > 1)
die("count not valid with dump");
if (single && multi)
die("cannot use both single and multi with dump");
if (!single && !multi)
die("dump requires either single or multi");
dump_run();
return 0;
}
if (perf) {
if (analyze > 0)
die("cannot combine dump, perf, or analyze");
if (single || multi)
die("cannot use single or multi with perf");
avg_single = time_runs(0);
avg_multi = time_runs(1);
if (avg_multi > avg_single)
die("multi is slower");
return 0;
}
if (analyze) {
if (analyze < 500)
die("analyze must be at least 500");
if (!analyze_step)
analyze_step = analyze;
if (single || multi)
die("cannot use single or multi with analyze");
analyze_run();
return 0;
}
if (!single && !multi)
die("require either -s or -m or both");
if (single)
time_runs(0);
if (multi)
time_runs(1);
return 0;
}

View File

@ -0,0 +1,19 @@
#!/bin/sh
test_description='Tests multi-threaded lazy_init_name_hash'
. ./perf-lib.sh
test_perf_large_repo
test_checkout_worktree
test_expect_success 'verify both methods build the same hashmaps' '
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --single | sort >out.single &&
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --dump --multi | sort >out.multi &&
test_cmp out.single out.multi
'
test_expect_success 'multithreaded should be faster' '
$GIT_BUILD_DIR/t/helper/test-lazy-init-name-hash$X --perf >out.perf
'
test_done