Merge branch 'kb/fast-hashmap'
Improvements to our hash table to get it to meet the needs of the msysgit fscache project, with some nice performance improvements. * kb/fast-hashmap: name-hash: retire unused index_name_exists() hashmap.h: use 'unsigned int' for hash-codes everywhere test-hashmap.c: drop unnecessary #includes .gitignore: test-hashmap is a generated file read-cache.c: fix memory leaks caused by removed cache entries builtin/update-index.c: cleanup update_one fix 'git update-index --verbose --again' output remove old hash.[ch] implementation name-hash.c: remove cache entries instead of marking them CE_UNHASHED name-hash.c: use new hash map implementation for cache entries name-hash.c: remove unreferenced directory entries name-hash.c: use new hash map implementation for directories diffcore-rename.c: use new hash map implementation diffcore-rename.c: simplify finding exact renames diffcore-rename.c: move code around to prepare for the next patch buitin/describe.c: use new hash map implementation add a hashtable implementation that supports O(1) removal submodule: don't access the .gitmodules cache entry after removing it
This commit is contained in:
commit
d637d1b9a8
1
.gitignore
vendored
1
.gitignore
vendored
@ -182,6 +182,7 @@
|
||||
/test-dump-cache-tree
|
||||
/test-scrap-cache-tree
|
||||
/test-genrandom
|
||||
/test-hashmap
|
||||
/test-index-version
|
||||
/test-line-buffer
|
||||
/test-match-trees
|
||||
|
@ -1,52 +0,0 @@
|
||||
hash API
|
||||
========
|
||||
|
||||
The hash API is a collection of simple hash table functions. Users are expected
|
||||
to implement their own hashing.
|
||||
|
||||
Data Structures
|
||||
---------------
|
||||
|
||||
`struct hash_table`::
|
||||
|
||||
The hash table structure. The `array` member points to the hash table
|
||||
entries. The `size` member counts the total number of valid and invalid
|
||||
entries in the table. The `nr` member keeps track of the number of
|
||||
valid entries.
|
||||
|
||||
`struct hash_table_entry`::
|
||||
|
||||
An opaque structure representing an entry in the hash table. The `hash`
|
||||
member is the entry's hash key and the `ptr` member is the entry's
|
||||
value.
|
||||
|
||||
Functions
|
||||
---------
|
||||
|
||||
`init_hash`::
|
||||
|
||||
Initialize the hash table.
|
||||
|
||||
`free_hash`::
|
||||
|
||||
Release memory associated with the hash table.
|
||||
|
||||
`insert_hash`::
|
||||
|
||||
Insert a pointer into the hash table. If an entry with that hash
|
||||
already exists, a pointer to the existing entry's value is returned.
|
||||
Otherwise NULL is returned. This allows callers to implement
|
||||
chaining, etc.
|
||||
|
||||
`lookup_hash`::
|
||||
|
||||
Lookup an entry in the hash table. If an entry with that hash exists
|
||||
the entry's value is returned. Otherwise NULL is returned.
|
||||
|
||||
`for_each_hash`::
|
||||
|
||||
Call a function for each entry in the hash table. The function is
|
||||
expected to take the entry's value as its only argument and return an
|
||||
int. If the function returns a negative int the loop is aborted
|
||||
immediately. Otherwise, the return value is accumulated and the sum
|
||||
returned upon completion of the loop.
|
235
Documentation/technical/api-hashmap.txt
Normal file
235
Documentation/technical/api-hashmap.txt
Normal file
@ -0,0 +1,235 @@
|
||||
hashmap API
|
||||
===========
|
||||
|
||||
The hashmap API is a generic implementation of hash-based key-value mappings.
|
||||
|
||||
Data Structures
|
||||
---------------
|
||||
|
||||
`struct hashmap`::
|
||||
|
||||
The hash table structure.
|
||||
+
|
||||
The `size` member keeps track of the total number of entries. The `cmpfn`
|
||||
member is a function used to compare two entries for equality. The `table` and
|
||||
`tablesize` members store the hash table and its size, respectively.
|
||||
|
||||
`struct hashmap_entry`::
|
||||
|
||||
An opaque structure representing an entry in the hash table, which must
|
||||
be used as first member of user data structures. Ideally it should be
|
||||
followed by an int-sized member to prevent unused memory on 64-bit
|
||||
systems due to alignment.
|
||||
+
|
||||
The `hash` member is the entry's hash code and the `next` member points to the
|
||||
next entry in case of collisions (i.e. if multiple entries map to the same
|
||||
bucket).
|
||||
|
||||
`struct hashmap_iter`::
|
||||
|
||||
An iterator structure, to be used with hashmap_iter_* functions.
|
||||
|
||||
Types
|
||||
-----
|
||||
|
||||
`int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void *keydata)`::
|
||||
|
||||
User-supplied function to test two hashmap entries for equality. Shall
|
||||
return 0 if the entries are equal.
|
||||
+
|
||||
This function is always called with non-NULL `entry` / `entry_or_key`
|
||||
parameters that have the same hash code. When looking up an entry, the `key`
|
||||
and `keydata` parameters to hashmap_get and hashmap_remove are always passed
|
||||
as second and third argument, respectively. Otherwise, `keydata` is NULL.
|
||||
|
||||
Functions
|
||||
---------
|
||||
|
||||
`unsigned int strhash(const char *buf)`::
|
||||
`unsigned int strihash(const char *buf)`::
|
||||
`unsigned int memhash(const void *buf, size_t len)`::
|
||||
`unsigned int memihash(const void *buf, size_t len)`::
|
||||
|
||||
Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
|
||||
http://www.isthe.com/chongo/tech/comp/fnv).
|
||||
+
|
||||
`strhash` and `strihash` take 0-terminated strings, while `memhash` and
|
||||
`memihash` operate on arbitrary-length memory.
|
||||
+
|
||||
`strihash` and `memihash` are case insensitive versions.
|
||||
|
||||
`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`::
|
||||
|
||||
Initializes a hashmap structure.
|
||||
+
|
||||
`map` is the hashmap to initialize.
|
||||
+
|
||||
The `equals_function` can be specified to compare two entries for equality.
|
||||
If NULL, entries are considered equal if their hash codes are equal.
|
||||
+
|
||||
If the total number of entries is known in advance, the `initial_size`
|
||||
parameter may be used to preallocate a sufficiently large table and thus
|
||||
prevent expensive resizing. If 0, the table is dynamically resized.
|
||||
|
||||
`void hashmap_free(struct hashmap *map, int free_entries)`::
|
||||
|
||||
Frees a hashmap structure and allocated memory.
|
||||
+
|
||||
`map` is the hashmap to free.
|
||||
+
|
||||
If `free_entries` is true, each hashmap_entry in the map is freed as well
|
||||
(using stdlib's free()).
|
||||
|
||||
`void hashmap_entry_init(void *entry, unsigned int hash)`::
|
||||
|
||||
Initializes a hashmap_entry structure.
|
||||
+
|
||||
`entry` points to the entry to initialize.
|
||||
+
|
||||
`hash` is the hash code of the entry.
|
||||
|
||||
`void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)`::
|
||||
|
||||
Returns the hashmap entry for the specified key, or NULL if not found.
|
||||
+
|
||||
`map` is the hashmap structure.
|
||||
+
|
||||
`key` is a hashmap_entry structure (or user data structure that starts with
|
||||
hashmap_entry) that has at least been initialized with the proper hash code
|
||||
(via `hashmap_entry_init`).
|
||||
+
|
||||
If an entry with matching hash code is found, `key` and `keydata` are passed
|
||||
to `hashmap_cmp_fn` to decide whether the entry matches the key.
|
||||
|
||||
`void *hashmap_get_next(const struct hashmap *map, const void *entry)`::
|
||||
|
||||
Returns the next equal hashmap entry, or NULL if not found. This can be
|
||||
used to iterate over duplicate entries (see `hashmap_add`).
|
||||
+
|
||||
`map` is the hashmap structure.
|
||||
+
|
||||
`entry` is the hashmap_entry to start the search from, obtained via a previous
|
||||
call to `hashmap_get` or `hashmap_get_next`.
|
||||
|
||||
`void hashmap_add(struct hashmap *map, void *entry)`::
|
||||
|
||||
Adds a hashmap entry. This allows to add duplicate entries (i.e.
|
||||
separate values with the same key according to hashmap_cmp_fn).
|
||||
+
|
||||
`map` is the hashmap structure.
|
||||
+
|
||||
`entry` is the entry to add.
|
||||
|
||||
`void *hashmap_put(struct hashmap *map, void *entry)`::
|
||||
|
||||
Adds or replaces a hashmap entry. If the hashmap contains duplicate
|
||||
entries equal to the specified entry, only one of them will be replaced.
|
||||
+
|
||||
`map` is the hashmap structure.
|
||||
+
|
||||
`entry` is the entry to add or replace.
|
||||
+
|
||||
Returns the replaced entry, or NULL if not found (i.e. the entry was added).
|
||||
|
||||
`void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)`::
|
||||
|
||||
Removes a hashmap entry matching the specified key. If the hashmap
|
||||
contains duplicate entries equal to the specified key, only one of
|
||||
them will be removed.
|
||||
+
|
||||
`map` is the hashmap structure.
|
||||
+
|
||||
`key` is a hashmap_entry structure (or user data structure that starts with
|
||||
hashmap_entry) that has at least been initialized with the proper hash code
|
||||
(via `hashmap_entry_init`).
|
||||
+
|
||||
If an entry with matching hash code is found, `key` and `keydata` are
|
||||
passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
|
||||
+
|
||||
Returns the removed entry, or NULL if not found.
|
||||
|
||||
`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
|
||||
`void *hashmap_iter_next(struct hashmap_iter *iter)`::
|
||||
`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::
|
||||
|
||||
Used to iterate over all entries of a hashmap.
|
||||
+
|
||||
`hashmap_iter_init` initializes a `hashmap_iter` structure.
|
||||
+
|
||||
`hashmap_iter_next` returns the next hashmap_entry, or NULL if there are no
|
||||
more entries.
|
||||
+
|
||||
`hashmap_iter_first` is a combination of both (i.e. initializes the iterator
|
||||
and returns the first entry, if any).
|
||||
|
||||
Usage example
|
||||
-------------
|
||||
|
||||
Here's a simple usage example that maps long keys to double values.
|
||||
[source,c]
|
||||
------------
|
||||
struct hashmap map;
|
||||
|
||||
struct long2double {
|
||||
struct hashmap_entry ent; /* must be the first member! */
|
||||
long key;
|
||||
double value;
|
||||
};
|
||||
|
||||
static int long2double_cmp(const struct long2double *e1, const struct long2double *e2, const void *unused)
|
||||
{
|
||||
return !(e1->key == e2->key);
|
||||
}
|
||||
|
||||
void long2double_init(void)
|
||||
{
|
||||
hashmap_init(&map, (hashmap_cmp_fn) long2double_cmp, 0);
|
||||
}
|
||||
|
||||
void long2double_free(void)
|
||||
{
|
||||
hashmap_free(&map, 1);
|
||||
}
|
||||
|
||||
static struct long2double *find_entry(long key)
|
||||
{
|
||||
struct long2double k;
|
||||
hashmap_entry_init(&k, memhash(&key, sizeof(long)));
|
||||
k.key = key;
|
||||
return hashmap_get(&map, &k, NULL);
|
||||
}
|
||||
|
||||
double get_value(long key)
|
||||
{
|
||||
struct long2double *e = find_entry(key);
|
||||
return e ? e->value : 0;
|
||||
}
|
||||
|
||||
void set_value(long key, double value)
|
||||
{
|
||||
struct long2double *e = find_entry(key);
|
||||
if (!e) {
|
||||
e = malloc(sizeof(struct long2double));
|
||||
hashmap_entry_init(e, memhash(&key, sizeof(long)));
|
||||
e->key = key;
|
||||
hashmap_add(&map, e);
|
||||
}
|
||||
e->value = value;
|
||||
}
|
||||
------------
|
||||
|
||||
Using variable-sized keys
|
||||
-------------------------
|
||||
|
||||
The `hashmap_entry_get` and `hashmap_entry_remove` functions expect an ordinary
|
||||
`hashmap_entry` structure as key to find the correct entry. If the key data is
|
||||
variable-sized (e.g. a FLEX_ARRAY string) or quite large, it is undesirable
|
||||
to create a full-fledged entry structure on the heap and copy all the key data
|
||||
into the structure.
|
||||
|
||||
In this case, the `keydata` parameter can be used to pass
|
||||
variable-sized key data directly to the comparison function, and the `key`
|
||||
parameter can be a stripped-down, fixed size entry structure allocated on the
|
||||
stack.
|
||||
|
||||
See test-hashmap.c for an example using arbitrary-length strings as keys.
|
5
Makefile
5
Makefile
@ -555,6 +555,7 @@ TEST_PROGRAMS_NEED_X += test-date
|
||||
TEST_PROGRAMS_NEED_X += test-delta
|
||||
TEST_PROGRAMS_NEED_X += test-dump-cache-tree
|
||||
TEST_PROGRAMS_NEED_X += test-genrandom
|
||||
TEST_PROGRAMS_NEED_X += test-hashmap
|
||||
TEST_PROGRAMS_NEED_X += test-index-version
|
||||
TEST_PROGRAMS_NEED_X += test-line-buffer
|
||||
TEST_PROGRAMS_NEED_X += test-match-trees
|
||||
@ -671,7 +672,7 @@ LIB_H += git-compat-util.h
|
||||
LIB_H += gpg-interface.h
|
||||
LIB_H += graph.h
|
||||
LIB_H += grep.h
|
||||
LIB_H += hash.h
|
||||
LIB_H += hashmap.h
|
||||
LIB_H += help.h
|
||||
LIB_H += http.h
|
||||
LIB_H += kwset.h
|
||||
@ -802,7 +803,7 @@ LIB_OBJS += gettext.o
|
||||
LIB_OBJS += gpg-interface.o
|
||||
LIB_OBJS += graph.o
|
||||
LIB_OBJS += grep.o
|
||||
LIB_OBJS += hash.o
|
||||
LIB_OBJS += hashmap.o
|
||||
LIB_OBJS += help.o
|
||||
LIB_OBJS += hex.o
|
||||
LIB_OBJS += ident.o
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "exec_cmd.h"
|
||||
#include "parse-options.h"
|
||||
#include "diff.h"
|
||||
#include "hash.h"
|
||||
#include "hashmap.h"
|
||||
#include "argv-array.h"
|
||||
|
||||
#define SEEN (1u << 0)
|
||||
@ -25,7 +25,7 @@ static int longformat;
|
||||
static int first_parent;
|
||||
static int abbrev = -1; /* unspecified */
|
||||
static int max_candidates = 10;
|
||||
static struct hash_table names;
|
||||
static struct hashmap names;
|
||||
static int have_util;
|
||||
static const char *pattern;
|
||||
static int always;
|
||||
@ -37,7 +37,7 @@ static const char *diff_index_args[] = {
|
||||
};
|
||||
|
||||
struct commit_name {
|
||||
struct commit_name *next;
|
||||
struct hashmap_entry entry;
|
||||
unsigned char peeled[20];
|
||||
struct tag *tag;
|
||||
unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */
|
||||
@ -50,6 +50,12 @@ static const char *prio_names[] = {
|
||||
"head", "lightweight", "annotated",
|
||||
};
|
||||
|
||||
static int commit_name_cmp(const struct commit_name *cn1,
|
||||
const struct commit_name *cn2, const void *peeled)
|
||||
{
|
||||
return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled);
|
||||
}
|
||||
|
||||
static inline unsigned int hash_sha1(const unsigned char *sha1)
|
||||
{
|
||||
unsigned int hash;
|
||||
@ -59,21 +65,9 @@ static inline unsigned int hash_sha1(const unsigned char *sha1)
|
||||
|
||||
static inline struct commit_name *find_commit_name(const unsigned char *peeled)
|
||||
{
|
||||
struct commit_name *n = lookup_hash(hash_sha1(peeled), &names);
|
||||
while (n && !!hashcmp(peeled, n->peeled))
|
||||
n = n->next;
|
||||
return n;
|
||||
}
|
||||
|
||||
static int set_util(void *chain, void *data)
|
||||
{
|
||||
struct commit_name *n;
|
||||
for (n = chain; n; n = n->next) {
|
||||
struct commit *c = lookup_commit_reference_gently(n->peeled, 1);
|
||||
if (c)
|
||||
c->util = n;
|
||||
}
|
||||
return 0;
|
||||
struct commit_name key;
|
||||
hashmap_entry_init(&key, hash_sha1(peeled));
|
||||
return hashmap_get(&names, &key, peeled);
|
||||
}
|
||||
|
||||
static int replace_name(struct commit_name *e,
|
||||
@ -118,16 +112,10 @@ static void add_to_known_names(const char *path,
|
||||
struct tag *tag = NULL;
|
||||
if (replace_name(e, prio, sha1, &tag)) {
|
||||
if (!e) {
|
||||
void **pos;
|
||||
e = xmalloc(sizeof(struct commit_name));
|
||||
hashcpy(e->peeled, peeled);
|
||||
pos = insert_hash(hash_sha1(peeled), e, &names);
|
||||
if (pos) {
|
||||
e->next = *pos;
|
||||
*pos = e;
|
||||
} else {
|
||||
e->next = NULL;
|
||||
}
|
||||
hashmap_entry_init(e, hash_sha1(peeled));
|
||||
hashmap_add(&names, e);
|
||||
e->path = NULL;
|
||||
}
|
||||
e->tag = tag;
|
||||
@ -292,7 +280,14 @@ static void describe(const char *arg, int last_one)
|
||||
fprintf(stderr, _("searching to describe %s\n"), arg);
|
||||
|
||||
if (!have_util) {
|
||||
for_each_hash(&names, set_util, NULL);
|
||||
struct hashmap_iter iter;
|
||||
struct commit *c;
|
||||
struct commit_name *n = hashmap_iter_first(&names, &iter);
|
||||
for (; n; n = hashmap_iter_next(&iter)) {
|
||||
c = lookup_commit_reference_gently(n->peeled, 1);
|
||||
if (c)
|
||||
c->util = n;
|
||||
}
|
||||
have_util = 1;
|
||||
}
|
||||
|
||||
@ -463,9 +458,9 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
|
||||
return cmd_name_rev(args.argc, args.argv, prefix);
|
||||
}
|
||||
|
||||
init_hash(&names);
|
||||
hashmap_init(&names, (hashmap_cmp_fn) commit_name_cmp, 0);
|
||||
for_each_rawref(get_name, NULL);
|
||||
if (!names.nr && !always)
|
||||
if (!names.size && !always)
|
||||
die(_("No names found, cannot describe anything."));
|
||||
|
||||
if (argc == 0) {
|
||||
|
@ -311,7 +311,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
|
||||
if (!match_pathspec_depth(&pathspec, ce->name, ce_namelen(ce), 0, seen))
|
||||
continue;
|
||||
ALLOC_GROW(list.entry, list.nr + 1, list.alloc);
|
||||
list.entry[list.nr].name = ce->name;
|
||||
list.entry[list.nr].name = xstrdup(ce->name);
|
||||
list.entry[list.nr].is_submodule = S_ISGITLINK(ce->ce_mode);
|
||||
if (list.entry[list.nr++].is_submodule &&
|
||||
!is_staging_gitmodules_ok())
|
||||
|
@ -274,36 +274,32 @@ static void chmod_path(int flip, const char *path)
|
||||
die("git update-index: cannot chmod %cx '%s'", flip, path);
|
||||
}
|
||||
|
||||
static void update_one(const char *path, const char *prefix, int prefix_length)
|
||||
static void update_one(const char *path)
|
||||
{
|
||||
const char *p = prefix_path(prefix, prefix_length, path);
|
||||
if (!verify_path(p)) {
|
||||
if (!verify_path(path)) {
|
||||
fprintf(stderr, "Ignoring path %s\n", path);
|
||||
goto free_return;
|
||||
return;
|
||||
}
|
||||
if (mark_valid_only) {
|
||||
if (mark_ce_flags(p, CE_VALID, mark_valid_only == MARK_FLAG))
|
||||
if (mark_ce_flags(path, CE_VALID, mark_valid_only == MARK_FLAG))
|
||||
die("Unable to mark file %s", path);
|
||||
goto free_return;
|
||||
return;
|
||||
}
|
||||
if (mark_skip_worktree_only) {
|
||||
if (mark_ce_flags(p, CE_SKIP_WORKTREE, mark_skip_worktree_only == MARK_FLAG))
|
||||
if (mark_ce_flags(path, CE_SKIP_WORKTREE, mark_skip_worktree_only == MARK_FLAG))
|
||||
die("Unable to mark file %s", path);
|
||||
goto free_return;
|
||||
return;
|
||||
}
|
||||
|
||||
if (force_remove) {
|
||||
if (remove_file_from_cache(p))
|
||||
if (remove_file_from_cache(path))
|
||||
die("git update-index: unable to remove %s", path);
|
||||
report("remove '%s'", path);
|
||||
goto free_return;
|
||||
return;
|
||||
}
|
||||
if (process_path(p))
|
||||
if (process_path(path))
|
||||
die("Unable to process path %s", path);
|
||||
report("add '%s'", path);
|
||||
free_return:
|
||||
if (p < path || p > path + strlen(path))
|
||||
free((char *)p);
|
||||
}
|
||||
|
||||
static void read_index_info(int line_termination)
|
||||
@ -563,6 +559,7 @@ static int do_reupdate(int ac, const char **av,
|
||||
const struct cache_entry *ce = active_cache[pos];
|
||||
struct cache_entry *old = NULL;
|
||||
int save_nr;
|
||||
char *path;
|
||||
|
||||
if (ce_stage(ce) || !ce_path_match(ce, &pathspec))
|
||||
continue;
|
||||
@ -579,7 +576,9 @@ static int do_reupdate(int ac, const char **av,
|
||||
* or worse yet 'allow_replace', active_nr may decrease.
|
||||
*/
|
||||
save_nr = active_nr;
|
||||
update_one(ce->name + prefix_length, prefix, prefix_length);
|
||||
path = xstrdup(ce->name);
|
||||
update_one(path);
|
||||
free(path);
|
||||
if (save_nr != active_nr)
|
||||
goto redo;
|
||||
}
|
||||
@ -836,10 +835,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
|
||||
|
||||
setup_work_tree();
|
||||
p = prefix_path(prefix, prefix_length, path);
|
||||
update_one(p, NULL, 0);
|
||||
update_one(p);
|
||||
if (set_executable_bit)
|
||||
chmod_path(set_executable_bit, p);
|
||||
if (p < path || p > path + strlen(path))
|
||||
free((char *)p);
|
||||
ctx.argc--;
|
||||
ctx.argv++;
|
||||
@ -879,10 +877,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
|
||||
strbuf_swap(&buf, &nbuf);
|
||||
}
|
||||
p = prefix_path(prefix, prefix_length, buf.buf);
|
||||
update_one(p, NULL, 0);
|
||||
update_one(p);
|
||||
if (set_executable_bit)
|
||||
chmod_path(set_executable_bit, p);
|
||||
if (p < buf.buf || p > buf.buf + buf.len)
|
||||
free((char *)p);
|
||||
}
|
||||
strbuf_release(&nbuf);
|
||||
|
20
cache.h
20
cache.h
@ -3,7 +3,7 @@
|
||||
|
||||
#include "git-compat-util.h"
|
||||
#include "strbuf.h"
|
||||
#include "hash.h"
|
||||
#include "hashmap.h"
|
||||
#include "advice.h"
|
||||
#include "gettext.h"
|
||||
#include "convert.h"
|
||||
@ -130,12 +130,12 @@ struct stat_data {
|
||||
};
|
||||
|
||||
struct cache_entry {
|
||||
struct hashmap_entry ent;
|
||||
struct stat_data ce_stat_data;
|
||||
unsigned int ce_mode;
|
||||
unsigned int ce_flags;
|
||||
unsigned int ce_namelen;
|
||||
unsigned char sha1[20];
|
||||
struct cache_entry *next;
|
||||
char name[FLEX_ARRAY]; /* more */
|
||||
};
|
||||
|
||||
@ -159,7 +159,6 @@ struct cache_entry {
|
||||
#define CE_ADDED (1 << 19)
|
||||
|
||||
#define CE_HASHED (1 << 20)
|
||||
#define CE_UNHASHED (1 << 21)
|
||||
#define CE_WT_REMOVE (1 << 22) /* remove in work directory */
|
||||
#define CE_CONFLICTED (1 << 23)
|
||||
|
||||
@ -195,17 +194,18 @@ struct pathspec;
|
||||
* Copy the sha1 and stat state of a cache entry from one to
|
||||
* another. But we never change the name, or the hash state!
|
||||
*/
|
||||
#define CE_STATE_MASK (CE_HASHED | CE_UNHASHED)
|
||||
static inline void copy_cache_entry(struct cache_entry *dst,
|
||||
const struct cache_entry *src)
|
||||
{
|
||||
unsigned int state = dst->ce_flags & CE_STATE_MASK;
|
||||
unsigned int state = dst->ce_flags & CE_HASHED;
|
||||
|
||||
/* Don't copy hash chain and name */
|
||||
memcpy(dst, src, offsetof(struct cache_entry, next));
|
||||
memcpy(&dst->ce_stat_data, &src->ce_stat_data,
|
||||
offsetof(struct cache_entry, name) -
|
||||
offsetof(struct cache_entry, ce_stat_data));
|
||||
|
||||
/* Restore the hash state */
|
||||
dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
|
||||
dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state;
|
||||
}
|
||||
|
||||
static inline unsigned create_ce_flags(unsigned stage)
|
||||
@ -277,8 +277,8 @@ struct index_state {
|
||||
struct cache_time timestamp;
|
||||
unsigned name_hash_initialized : 1,
|
||||
initialized : 1;
|
||||
struct hash_table name_hash;
|
||||
struct hash_table dir_hash;
|
||||
struct hashmap name_hash;
|
||||
struct hashmap dir_hash;
|
||||
};
|
||||
|
||||
extern struct index_state the_index;
|
||||
@ -316,7 +316,6 @@ extern void free_name_hash(struct index_state *istate);
|
||||
#define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
|
||||
#define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen))
|
||||
#define cache_file_exists(name, namelen, igncase) index_file_exists(&the_index, (name), (namelen), (igncase))
|
||||
#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
|
||||
#define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen))
|
||||
#define resolve_undo_clear() resolve_undo_clear_index(&the_index)
|
||||
#define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at)
|
||||
@ -467,7 +466,6 @@ extern int unmerged_index(const struct index_state *);
|
||||
extern int verify_path(const char *path);
|
||||
extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen);
|
||||
extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
||||
extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
|
||||
extern int index_name_pos(const struct index_state *, const char *name, int namelen);
|
||||
#define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */
|
||||
#define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include "cache.h"
|
||||
#include "diff.h"
|
||||
#include "diffcore.h"
|
||||
#include "hash.h"
|
||||
#include "hashmap.h"
|
||||
#include "progress.h"
|
||||
|
||||
/* Table of rename/copy destinations */
|
||||
@ -243,30 +243,39 @@ static int score_compare(const void *a_, const void *b_)
|
||||
}
|
||||
|
||||
struct file_similarity {
|
||||
int src_dst, index;
|
||||
struct hashmap_entry entry;
|
||||
int index;
|
||||
struct diff_filespec *filespec;
|
||||
struct file_similarity *next;
|
||||
};
|
||||
|
||||
static int find_identical_files(struct file_similarity *src,
|
||||
struct file_similarity *dst,
|
||||
static unsigned int hash_filespec(struct diff_filespec *filespec)
|
||||
{
|
||||
unsigned int hash;
|
||||
if (!filespec->sha1_valid) {
|
||||
if (diff_populate_filespec(filespec, 0))
|
||||
return 0;
|
||||
hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
|
||||
}
|
||||
memcpy(&hash, filespec->sha1, sizeof(hash));
|
||||
return hash;
|
||||
}
|
||||
|
||||
static int find_identical_files(struct hashmap *srcs,
|
||||
int dst_index,
|
||||
struct diff_options *options)
|
||||
{
|
||||
int renames = 0;
|
||||
|
||||
/*
|
||||
* Walk over all the destinations ...
|
||||
*/
|
||||
do {
|
||||
struct diff_filespec *target = dst->filespec;
|
||||
struct file_similarity *p, *best;
|
||||
struct diff_filespec *target = rename_dst[dst_index].two;
|
||||
struct file_similarity *p, *best, dst;
|
||||
int i = 100, best_score = -1;
|
||||
|
||||
/*
|
||||
* .. to find the best source match
|
||||
* Find the best source match for specified destination.
|
||||
*/
|
||||
best = NULL;
|
||||
for (p = src; p; p = p->next) {
|
||||
hashmap_entry_init(&dst, hash_filespec(target));
|
||||
for (p = hashmap_get(srcs, &dst, NULL); p; p = hashmap_get_next(srcs, p)) {
|
||||
int score;
|
||||
struct diff_filespec *source = p->filespec;
|
||||
|
||||
@ -295,85 +304,21 @@ static int find_identical_files(struct file_similarity *src,
|
||||
break;
|
||||
}
|
||||
if (best) {
|
||||
record_rename_pair(dst->index, best->index, MAX_SCORE);
|
||||
record_rename_pair(dst_index, best->index, MAX_SCORE);
|
||||
renames++;
|
||||
}
|
||||
} while ((dst = dst->next) != NULL);
|
||||
return renames;
|
||||
}
|
||||
|
||||
static void free_similarity_list(struct file_similarity *p)
|
||||
static void insert_file_table(struct hashmap *table, int index, struct diff_filespec *filespec)
|
||||
{
|
||||
while (p) {
|
||||
struct file_similarity *entry = p;
|
||||
p = p->next;
|
||||
free(entry);
|
||||
}
|
||||
}
|
||||
|
||||
static int find_same_files(void *ptr, void *data)
|
||||
{
|
||||
int ret;
|
||||
struct file_similarity *p = ptr;
|
||||
struct file_similarity *src = NULL, *dst = NULL;
|
||||
struct diff_options *options = data;
|
||||
|
||||
/* Split the hash list up into sources and destinations */
|
||||
do {
|
||||
struct file_similarity *entry = p;
|
||||
p = p->next;
|
||||
if (entry->src_dst < 0) {
|
||||
entry->next = src;
|
||||
src = entry;
|
||||
} else {
|
||||
entry->next = dst;
|
||||
dst = entry;
|
||||
}
|
||||
} while (p);
|
||||
|
||||
/*
|
||||
* If we have both sources *and* destinations, see if
|
||||
* we can match them up
|
||||
*/
|
||||
ret = (src && dst) ? find_identical_files(src, dst, options) : 0;
|
||||
|
||||
/* Free the hashes and return the number of renames found */
|
||||
free_similarity_list(src);
|
||||
free_similarity_list(dst);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned int hash_filespec(struct diff_filespec *filespec)
|
||||
{
|
||||
unsigned int hash;
|
||||
if (!filespec->sha1_valid) {
|
||||
if (diff_populate_filespec(filespec, 0))
|
||||
return 0;
|
||||
hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1);
|
||||
}
|
||||
memcpy(&hash, filespec->sha1, sizeof(hash));
|
||||
return hash;
|
||||
}
|
||||
|
||||
static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec)
|
||||
{
|
||||
void **pos;
|
||||
unsigned int hash;
|
||||
struct file_similarity *entry = xmalloc(sizeof(*entry));
|
||||
|
||||
entry->src_dst = src_dst;
|
||||
entry->index = index;
|
||||
entry->filespec = filespec;
|
||||
entry->next = NULL;
|
||||
|
||||
hash = hash_filespec(filespec);
|
||||
pos = insert_hash(hash, entry, table);
|
||||
|
||||
/* We already had an entry there? */
|
||||
if (pos) {
|
||||
entry->next = *pos;
|
||||
*pos = entry;
|
||||
}
|
||||
hashmap_entry_init(entry, hash_filespec(filespec));
|
||||
hashmap_add(table, entry);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -385,24 +330,22 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index,
|
||||
*/
|
||||
static int find_exact_renames(struct diff_options *options)
|
||||
{
|
||||
int i;
|
||||
struct hash_table file_table;
|
||||
int i, renames = 0;
|
||||
struct hashmap file_table;
|
||||
|
||||
init_hash(&file_table);
|
||||
preallocate_hash(&file_table, rename_src_nr + rename_dst_nr);
|
||||
/* Add all sources to the hash table */
|
||||
hashmap_init(&file_table, NULL, rename_src_nr);
|
||||
for (i = 0; i < rename_src_nr; i++)
|
||||
insert_file_table(&file_table, -1, i, rename_src[i].p->one);
|
||||
insert_file_table(&file_table, i, rename_src[i].p->one);
|
||||
|
||||
/* Walk the destinations and find best source match */
|
||||
for (i = 0; i < rename_dst_nr; i++)
|
||||
insert_file_table(&file_table, 1, i, rename_dst[i].two);
|
||||
renames += find_identical_files(&file_table, i, options);
|
||||
|
||||
/* Find the renames */
|
||||
i = for_each_hash(&file_table, find_same_files, options);
|
||||
/* Free the hash data structure and entries */
|
||||
hashmap_free(&file_table, 1);
|
||||
|
||||
/* .. and free the hash data structure */
|
||||
free_hash(&file_table);
|
||||
|
||||
return i;
|
||||
return renames;
|
||||
}
|
||||
|
||||
#define NUM_CANDIDATE_PER_DST 4
|
||||
|
110
hash.c
110
hash.c
@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Some generic hashing helpers.
|
||||
*/
|
||||
#include "cache.h"
|
||||
#include "hash.h"
|
||||
|
||||
/*
|
||||
* Look up a hash entry in the hash table. Return the pointer to
|
||||
* the existing entry, or the empty slot if none existed. The caller
|
||||
* can then look at the (*ptr) to see whether it existed or not.
|
||||
*/
|
||||
static struct hash_table_entry *lookup_hash_entry(unsigned int hash, const struct hash_table *table)
|
||||
{
|
||||
unsigned int size = table->size, nr = hash % size;
|
||||
struct hash_table_entry *array = table->array;
|
||||
|
||||
while (array[nr].ptr) {
|
||||
if (array[nr].hash == hash)
|
||||
break;
|
||||
nr++;
|
||||
if (nr >= size)
|
||||
nr = 0;
|
||||
}
|
||||
return array + nr;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Insert a new hash entry pointer into the table.
|
||||
*
|
||||
* If that hash entry already existed, return the pointer to
|
||||
* the existing entry (and the caller can create a list of the
|
||||
* pointers or do anything else). If it didn't exist, return
|
||||
* NULL (and the caller knows the pointer has been inserted).
|
||||
*/
|
||||
static void **insert_hash_entry(unsigned int hash, void *ptr, struct hash_table *table)
|
||||
{
|
||||
struct hash_table_entry *entry = lookup_hash_entry(hash, table);
|
||||
|
||||
if (!entry->ptr) {
|
||||
entry->ptr = ptr;
|
||||
entry->hash = hash;
|
||||
table->nr++;
|
||||
return NULL;
|
||||
}
|
||||
return &entry->ptr;
|
||||
}
|
||||
|
||||
static void grow_hash_table(struct hash_table *table)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int old_size = table->size, new_size;
|
||||
struct hash_table_entry *old_array = table->array, *new_array;
|
||||
|
||||
new_size = alloc_nr(old_size);
|
||||
new_array = xcalloc(sizeof(struct hash_table_entry), new_size);
|
||||
table->size = new_size;
|
||||
table->array = new_array;
|
||||
table->nr = 0;
|
||||
for (i = 0; i < old_size; i++) {
|
||||
unsigned int hash = old_array[i].hash;
|
||||
void *ptr = old_array[i].ptr;
|
||||
if (ptr)
|
||||
insert_hash_entry(hash, ptr, table);
|
||||
}
|
||||
free(old_array);
|
||||
}
|
||||
|
||||
void *lookup_hash(unsigned int hash, const struct hash_table *table)
|
||||
{
|
||||
if (!table->array)
|
||||
return NULL;
|
||||
return lookup_hash_entry(hash, table)->ptr;
|
||||
}
|
||||
|
||||
void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table)
|
||||
{
|
||||
unsigned int nr = table->nr;
|
||||
if (nr >= table->size/2)
|
||||
grow_hash_table(table);
|
||||
return insert_hash_entry(hash, ptr, table);
|
||||
}
|
||||
|
||||
int for_each_hash(const struct hash_table *table, int (*fn)(void *, void *), void *data)
|
||||
{
|
||||
int sum = 0;
|
||||
unsigned int i;
|
||||
unsigned int size = table->size;
|
||||
struct hash_table_entry *array = table->array;
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
void *ptr = array->ptr;
|
||||
array++;
|
||||
if (ptr) {
|
||||
int val = fn(ptr, data);
|
||||
if (val < 0)
|
||||
return val;
|
||||
sum += val;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
void free_hash(struct hash_table *table)
|
||||
{
|
||||
free(table->array);
|
||||
table->array = NULL;
|
||||
table->size = 0;
|
||||
table->nr = 0;
|
||||
}
|
50
hash.h
50
hash.h
@ -1,50 +0,0 @@
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
/*
|
||||
* These are some simple generic hash table helper functions.
|
||||
* Not necessarily suitable for all users, but good for things
|
||||
* where you want to just keep track of a list of things, and
|
||||
* have a good hash to use on them.
|
||||
*
|
||||
* It keeps the hash table at roughly 50-75% free, so the memory
|
||||
* cost of the hash table itself is roughly
|
||||
*
|
||||
* 3 * 2*sizeof(void *) * nr_of_objects
|
||||
*
|
||||
* bytes.
|
||||
*
|
||||
* FIXME: on 64-bit architectures, we waste memory. It would be
|
||||
* good to have just 32-bit pointers, requiring a special allocator
|
||||
* for hashed entries or something.
|
||||
*/
|
||||
struct hash_table_entry {
|
||||
unsigned int hash;
|
||||
void *ptr;
|
||||
};
|
||||
|
||||
struct hash_table {
|
||||
unsigned int size, nr;
|
||||
struct hash_table_entry *array;
|
||||
};
|
||||
|
||||
extern void *lookup_hash(unsigned int hash, const struct hash_table *table);
|
||||
extern void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table);
|
||||
extern int for_each_hash(const struct hash_table *table, int (*fn)(void *, void *), void *data);
|
||||
extern void free_hash(struct hash_table *table);
|
||||
|
||||
static inline void init_hash(struct hash_table *table)
|
||||
{
|
||||
table->size = 0;
|
||||
table->nr = 0;
|
||||
table->array = NULL;
|
||||
}
|
||||
|
||||
static inline void preallocate_hash(struct hash_table *table, unsigned int elts)
|
||||
{
|
||||
assert(table->size == 0 && table->nr == 0 && table->array == NULL);
|
||||
table->size = elts * 2;
|
||||
table->array = xcalloc(sizeof(struct hash_table_entry), table->size);
|
||||
}
|
||||
|
||||
#endif
|
228
hashmap.c
Normal file
228
hashmap.c
Normal file
@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Generic implementation of hash-based key value mappings.
|
||||
*/
|
||||
#include "cache.h"
|
||||
#include "hashmap.h"
|
||||
|
||||
#define FNV32_BASE ((unsigned int) 0x811c9dc5)
|
||||
#define FNV32_PRIME ((unsigned int) 0x01000193)
|
||||
|
||||
unsigned int strhash(const char *str)
|
||||
{
|
||||
unsigned int c, hash = FNV32_BASE;
|
||||
while ((c = (unsigned char) *str++))
|
||||
hash = (hash * FNV32_PRIME) ^ c;
|
||||
return hash;
|
||||
}
|
||||
|
||||
unsigned int strihash(const char *str)
|
||||
{
|
||||
unsigned int c, hash = FNV32_BASE;
|
||||
while ((c = (unsigned char) *str++)) {
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c -= 'a' - 'A';
|
||||
hash = (hash * FNV32_PRIME) ^ c;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
unsigned int memhash(const void *buf, size_t len)
|
||||
{
|
||||
unsigned int hash = FNV32_BASE;
|
||||
unsigned char *ucbuf = (unsigned char *) buf;
|
||||
while (len--) {
|
||||
unsigned int c = *ucbuf++;
|
||||
hash = (hash * FNV32_PRIME) ^ c;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
unsigned int memihash(const void *buf, size_t len)
|
||||
{
|
||||
unsigned int hash = FNV32_BASE;
|
||||
unsigned char *ucbuf = (unsigned char *) buf;
|
||||
while (len--) {
|
||||
unsigned int c = *ucbuf++;
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c -= 'a' - 'A';
|
||||
hash = (hash * FNV32_PRIME) ^ c;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
#define HASHMAP_INITIAL_SIZE 64
|
||||
/* grow / shrink by 2^2 */
|
||||
#define HASHMAP_RESIZE_BITS 2
|
||||
/* load factor in percent */
|
||||
#define HASHMAP_LOAD_FACTOR 80
|
||||
|
||||
static void alloc_table(struct hashmap *map, unsigned int size)
|
||||
{
|
||||
map->tablesize = size;
|
||||
map->table = xcalloc(size, sizeof(struct hashmap_entry *));
|
||||
|
||||
/* calculate resize thresholds for new size */
|
||||
map->grow_at = (unsigned int) ((uint64_t) size * HASHMAP_LOAD_FACTOR / 100);
|
||||
if (size <= HASHMAP_INITIAL_SIZE)
|
||||
map->shrink_at = 0;
|
||||
else
|
||||
/*
|
||||
* The shrink-threshold must be slightly smaller than
|
||||
* (grow-threshold / resize-factor) to prevent erratic resizing,
|
||||
* thus we divide by (resize-factor + 1).
|
||||
*/
|
||||
map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1);
|
||||
}
|
||||
|
||||
static inline int entry_equals(const struct hashmap *map,
|
||||
const struct hashmap_entry *e1, const struct hashmap_entry *e2,
|
||||
const void *keydata)
|
||||
{
|
||||
return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata));
|
||||
}
|
||||
|
||||
static inline unsigned int bucket(const struct hashmap *map,
|
||||
const struct hashmap_entry *key)
|
||||
{
|
||||
return key->hash & (map->tablesize - 1);
|
||||
}
|
||||
|
||||
static void rehash(struct hashmap *map, unsigned int newsize)
|
||||
{
|
||||
unsigned int i, oldsize = map->tablesize;
|
||||
struct hashmap_entry **oldtable = map->table;
|
||||
|
||||
alloc_table(map, newsize);
|
||||
for (i = 0; i < oldsize; i++) {
|
||||
struct hashmap_entry *e = oldtable[i];
|
||||
while (e) {
|
||||
struct hashmap_entry *next = e->next;
|
||||
unsigned int b = bucket(map, e);
|
||||
e->next = map->table[b];
|
||||
map->table[b] = e;
|
||||
e = next;
|
||||
}
|
||||
}
|
||||
free(oldtable);
|
||||
}
|
||||
|
||||
static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map,
|
||||
const struct hashmap_entry *key, const void *keydata)
|
||||
{
|
||||
struct hashmap_entry **e = &map->table[bucket(map, key)];
|
||||
while (*e && !entry_equals(map, *e, key, keydata))
|
||||
e = &(*e)->next;
|
||||
return e;
|
||||
}
|
||||
|
||||
static int always_equal(const void *unused1, const void *unused2, const void *unused3)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
|
||||
size_t initial_size)
|
||||
{
|
||||
unsigned int size = HASHMAP_INITIAL_SIZE;
|
||||
map->size = 0;
|
||||
map->cmpfn = equals_function ? equals_function : always_equal;
|
||||
|
||||
/* calculate initial table size and allocate the table */
|
||||
initial_size = (unsigned int) ((uint64_t) initial_size * 100
|
||||
/ HASHMAP_LOAD_FACTOR);
|
||||
while (initial_size > size)
|
||||
size <<= HASHMAP_RESIZE_BITS;
|
||||
alloc_table(map, size);
|
||||
}
|
||||
|
||||
void hashmap_free(struct hashmap *map, int free_entries)
|
||||
{
|
||||
if (!map || !map->table)
|
||||
return;
|
||||
if (free_entries) {
|
||||
struct hashmap_iter iter;
|
||||
struct hashmap_entry *e;
|
||||
hashmap_iter_init(map, &iter);
|
||||
while ((e = hashmap_iter_next(&iter)))
|
||||
free(e);
|
||||
}
|
||||
free(map->table);
|
||||
memset(map, 0, sizeof(*map));
|
||||
}
|
||||
|
||||
void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)
|
||||
{
|
||||
return *find_entry_ptr(map, key, keydata);
|
||||
}
|
||||
|
||||
void *hashmap_get_next(const struct hashmap *map, const void *entry)
|
||||
{
|
||||
struct hashmap_entry *e = ((struct hashmap_entry *) entry)->next;
|
||||
for (; e; e = e->next)
|
||||
if (entry_equals(map, entry, e, NULL))
|
||||
return e;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void hashmap_add(struct hashmap *map, void *entry)
|
||||
{
|
||||
unsigned int b = bucket(map, entry);
|
||||
|
||||
/* add entry */
|
||||
((struct hashmap_entry *) entry)->next = map->table[b];
|
||||
map->table[b] = entry;
|
||||
|
||||
/* fix size and rehash if appropriate */
|
||||
map->size++;
|
||||
if (map->size > map->grow_at)
|
||||
rehash(map, map->tablesize << HASHMAP_RESIZE_BITS);
|
||||
}
|
||||
|
||||
void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)
|
||||
{
|
||||
struct hashmap_entry *old;
|
||||
struct hashmap_entry **e = find_entry_ptr(map, key, keydata);
|
||||
if (!*e)
|
||||
return NULL;
|
||||
|
||||
/* remove existing entry */
|
||||
old = *e;
|
||||
*e = old->next;
|
||||
old->next = NULL;
|
||||
|
||||
/* fix size and rehash if appropriate */
|
||||
map->size--;
|
||||
if (map->size < map->shrink_at)
|
||||
rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS);
|
||||
return old;
|
||||
}
|
||||
|
||||
void *hashmap_put(struct hashmap *map, void *entry)
|
||||
{
|
||||
struct hashmap_entry *old = hashmap_remove(map, entry, NULL);
|
||||
hashmap_add(map, entry);
|
||||
return old;
|
||||
}
|
||||
|
||||
void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)
|
||||
{
|
||||
iter->map = map;
|
||||
iter->tablepos = 0;
|
||||
iter->next = NULL;
|
||||
}
|
||||
|
||||
void *hashmap_iter_next(struct hashmap_iter *iter)
|
||||
{
|
||||
struct hashmap_entry *current = iter->next;
|
||||
for (;;) {
|
||||
if (current) {
|
||||
iter->next = current->next;
|
||||
return current;
|
||||
}
|
||||
|
||||
if (iter->tablepos >= iter->map->tablesize)
|
||||
return NULL;
|
||||
|
||||
current = iter->map->table[iter->tablepos++];
|
||||
}
|
||||
}
|
71
hashmap.h
Normal file
71
hashmap.h
Normal file
@ -0,0 +1,71 @@
|
||||
#ifndef HASHMAP_H
|
||||
#define HASHMAP_H
|
||||
|
||||
/*
|
||||
* Generic implementation of hash-based key-value mappings.
|
||||
* See Documentation/technical/api-hashmap.txt.
|
||||
*/
|
||||
|
||||
/* FNV-1 functions */
|
||||
|
||||
extern unsigned int strhash(const char *buf);
|
||||
extern unsigned int strihash(const char *buf);
|
||||
extern unsigned int memhash(const void *buf, size_t len);
|
||||
extern unsigned int memihash(const void *buf, size_t len);
|
||||
|
||||
/* data structures */
|
||||
|
||||
struct hashmap_entry {
|
||||
struct hashmap_entry *next;
|
||||
unsigned int hash;
|
||||
};
|
||||
|
||||
typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key,
|
||||
const void *keydata);
|
||||
|
||||
struct hashmap {
|
||||
struct hashmap_entry **table;
|
||||
hashmap_cmp_fn cmpfn;
|
||||
unsigned int size, tablesize, grow_at, shrink_at;
|
||||
};
|
||||
|
||||
struct hashmap_iter {
|
||||
struct hashmap *map;
|
||||
struct hashmap_entry *next;
|
||||
unsigned int tablepos;
|
||||
};
|
||||
|
||||
/* hashmap functions */
|
||||
|
||||
extern void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
|
||||
size_t initial_size);
|
||||
extern void hashmap_free(struct hashmap *map, int free_entries);
|
||||
|
||||
/* hashmap_entry functions */
|
||||
|
||||
static inline void hashmap_entry_init(void *entry, unsigned int hash)
|
||||
{
|
||||
struct hashmap_entry *e = entry;
|
||||
e->hash = hash;
|
||||
e->next = NULL;
|
||||
}
|
||||
extern void *hashmap_get(const struct hashmap *map, const void *key,
|
||||
const void *keydata);
|
||||
extern void *hashmap_get_next(const struct hashmap *map, const void *entry);
|
||||
extern void hashmap_add(struct hashmap *map, void *entry);
|
||||
extern void *hashmap_put(struct hashmap *map, void *entry);
|
||||
extern void *hashmap_remove(struct hashmap *map, const void *key,
|
||||
const void *keydata);
|
||||
|
||||
/* hashmap_iter functions */
|
||||
|
||||
extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);
|
||||
extern void *hashmap_iter_next(struct hashmap_iter *iter);
|
||||
static inline void *hashmap_iter_first(struct hashmap *map,
|
||||
struct hashmap_iter *iter)
|
||||
{
|
||||
hashmap_iter_init(map, iter);
|
||||
return hashmap_iter_next(iter);
|
||||
}
|
||||
|
||||
#endif
|
159
name-hash.c
159
name-hash.c
@ -8,49 +8,28 @@
|
||||
#define NO_THE_INDEX_COMPATIBILITY_MACROS
|
||||
#include "cache.h"
|
||||
|
||||
/*
|
||||
* This removes bit 5 if bit 6 is set.
|
||||
*
|
||||
* That will make US-ASCII characters hash to their upper-case
|
||||
* equivalent. We could easily do this one whole word at a time,
|
||||
* but that's for future worries.
|
||||
*/
|
||||
static inline unsigned char icase_hash(unsigned char c)
|
||||
{
|
||||
return c & ~((c & 0x40) >> 1);
|
||||
}
|
||||
|
||||
static unsigned int hash_name(const char *name, int namelen)
|
||||
{
|
||||
unsigned int hash = 0x123;
|
||||
|
||||
while (namelen--) {
|
||||
unsigned char c = *name++;
|
||||
c = icase_hash(c);
|
||||
hash = hash*101 + c;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
struct dir_entry {
|
||||
struct dir_entry *next;
|
||||
struct hashmap_entry ent;
|
||||
struct dir_entry *parent;
|
||||
struct cache_entry *ce;
|
||||
int nr;
|
||||
unsigned int namelen;
|
||||
};
|
||||
|
||||
static int dir_entry_cmp(const struct dir_entry *e1,
|
||||
const struct dir_entry *e2, const char *name)
|
||||
{
|
||||
return e1->namelen != e2->namelen || strncasecmp(e1->ce->name,
|
||||
name ? name : e2->ce->name, e1->namelen);
|
||||
}
|
||||
|
||||
static struct dir_entry *find_dir_entry(struct index_state *istate,
|
||||
const char *name, unsigned int namelen)
|
||||
{
|
||||
unsigned int hash = hash_name(name, namelen);
|
||||
struct dir_entry *dir;
|
||||
|
||||
for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next)
|
||||
if (dir->namelen == namelen &&
|
||||
!strncasecmp(dir->ce->name, name, namelen))
|
||||
return dir;
|
||||
return NULL;
|
||||
struct dir_entry key;
|
||||
hashmap_entry_init(&key, memihash(name, namelen));
|
||||
key.namelen = namelen;
|
||||
return hashmap_get(&istate->dir_hash, &key, name);
|
||||
}
|
||||
|
||||
static struct dir_entry *hash_dir_entry(struct index_state *istate,
|
||||
@ -84,18 +63,11 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate,
|
||||
dir = find_dir_entry(istate, ce->name, namelen);
|
||||
if (!dir) {
|
||||
/* not found, create it and add to hash table */
|
||||
void **pdir;
|
||||
unsigned int hash = hash_name(ce->name, namelen);
|
||||
|
||||
dir = xcalloc(1, sizeof(struct dir_entry));
|
||||
hashmap_entry_init(dir, memihash(ce->name, namelen));
|
||||
dir->namelen = namelen;
|
||||
dir->ce = ce;
|
||||
|
||||
pdir = insert_hash(hash, dir, &istate->dir_hash);
|
||||
if (pdir) {
|
||||
dir->next = *pdir;
|
||||
*pdir = dir;
|
||||
}
|
||||
hashmap_add(&istate->dir_hash, dir);
|
||||
|
||||
/* recursively add missing parent directories */
|
||||
dir->parent = hash_dir_entry(istate, ce, namelen);
|
||||
@ -114,45 +86,50 @@ static void add_dir_entry(struct index_state *istate, struct cache_entry *ce)
|
||||
static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce)
|
||||
{
|
||||
/*
|
||||
* Release reference to the directory entry (and parents if 0).
|
||||
*
|
||||
* Note: we do not remove / free the entry because there's no
|
||||
* hash.[ch]::remove_hash and dir->next may point to other entries
|
||||
* that are still valid, so we must not free the memory.
|
||||
* Release reference to the directory entry. If 0, remove and continue
|
||||
* with parent directory.
|
||||
*/
|
||||
struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce));
|
||||
while (dir && dir->nr && !(--dir->nr))
|
||||
dir = dir->parent;
|
||||
while (dir && !(--dir->nr)) {
|
||||
struct dir_entry *parent = dir->parent;
|
||||
hashmap_remove(&istate->dir_hash, dir, NULL);
|
||||
free(dir);
|
||||
dir = parent;
|
||||
}
|
||||
}
|
||||
|
||||
static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
|
||||
{
|
||||
void **pos;
|
||||
unsigned int hash;
|
||||
|
||||
if (ce->ce_flags & CE_HASHED)
|
||||
return;
|
||||
ce->ce_flags |= CE_HASHED;
|
||||
ce->next = NULL;
|
||||
hash = hash_name(ce->name, ce_namelen(ce));
|
||||
pos = insert_hash(hash, ce, &istate->name_hash);
|
||||
if (pos) {
|
||||
ce->next = *pos;
|
||||
*pos = ce;
|
||||
}
|
||||
hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
|
||||
hashmap_add(&istate->name_hash, ce);
|
||||
|
||||
if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
|
||||
if (ignore_case)
|
||||
add_dir_entry(istate, ce);
|
||||
}
|
||||
|
||||
static int cache_entry_cmp(const struct cache_entry *ce1,
|
||||
const struct cache_entry *ce2, const void *remove)
|
||||
{
|
||||
/*
|
||||
* For remove_name_hash, find the exact entry (pointer equality); for
|
||||
* index_file_exists, find all entries with matching hash code and
|
||||
* decide whether the entry matches in same_name.
|
||||
*/
|
||||
return remove ? !(ce1 == ce2) : 0;
|
||||
}
|
||||
|
||||
static void lazy_init_name_hash(struct index_state *istate)
|
||||
{
|
||||
int nr;
|
||||
|
||||
if (istate->name_hash_initialized)
|
||||
return;
|
||||
if (istate->cache_nr)
|
||||
preallocate_hash(&istate->name_hash, istate->cache_nr);
|
||||
hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
|
||||
istate->cache_nr);
|
||||
hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
|
||||
for (nr = 0; nr < istate->cache_nr; nr++)
|
||||
hash_index_entry(istate, istate->cache[nr]);
|
||||
istate->name_hash_initialized = 1;
|
||||
@ -160,31 +137,19 @@ static void lazy_init_name_hash(struct index_state *istate)
|
||||
|
||||
void add_name_hash(struct index_state *istate, struct cache_entry *ce)
|
||||
{
|
||||
/* if already hashed, add reference to directory entries */
|
||||
if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
|
||||
add_dir_entry(istate, ce);
|
||||
|
||||
ce->ce_flags &= ~CE_UNHASHED;
|
||||
if (istate->name_hash_initialized)
|
||||
hash_index_entry(istate, ce);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't actually *remove* it, we can just mark it invalid so that
|
||||
* we won't find it in lookups.
|
||||
*
|
||||
* Not only would we have to search the lists (simple enough), but
|
||||
* we'd also have to rehash other hash buckets in case this makes the
|
||||
* hash bucket empty (common). So it's much better to just mark
|
||||
* it.
|
||||
*/
|
||||
void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
|
||||
{
|
||||
/* if already hashed, release reference to directory entries */
|
||||
if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
|
||||
remove_dir_entry(istate, ce);
|
||||
if (!istate->name_hash_initialized || !(ce->ce_flags & CE_HASHED))
|
||||
return;
|
||||
ce->ce_flags &= ~CE_HASHED;
|
||||
hashmap_remove(&istate->name_hash, ce, ce);
|
||||
|
||||
ce->ce_flags |= CE_UNHASHED;
|
||||
if (ignore_case)
|
||||
remove_dir_entry(istate, ce);
|
||||
}
|
||||
|
||||
static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
|
||||
@ -247,49 +212,27 @@ struct cache_entry *index_dir_exists(struct index_state *istate, const char *nam
|
||||
|
||||
struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase)
|
||||
{
|
||||
unsigned int hash = hash_name(name, namelen);
|
||||
struct cache_entry *ce;
|
||||
struct hashmap_entry key;
|
||||
|
||||
lazy_init_name_hash(istate);
|
||||
ce = lookup_hash(hash, &istate->name_hash);
|
||||
|
||||
hashmap_entry_init(&key, memihash(name, namelen));
|
||||
ce = hashmap_get(&istate->name_hash, &key, NULL);
|
||||
while (ce) {
|
||||
if (!(ce->ce_flags & CE_UNHASHED)) {
|
||||
if (same_name(ce, name, namelen, icase))
|
||||
return ce;
|
||||
}
|
||||
ce = ce->next;
|
||||
ce = hashmap_get_next(&istate->name_hash, ce);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
|
||||
{
|
||||
if (namelen > 0 && name[namelen - 1] == '/')
|
||||
return index_dir_exists(istate, name, namelen - 1);
|
||||
return index_file_exists(istate, name, namelen, icase);
|
||||
}
|
||||
|
||||
static int free_dir_entry(void *entry, void *unused)
|
||||
{
|
||||
struct dir_entry *dir = entry;
|
||||
while (dir) {
|
||||
struct dir_entry *next = dir->next;
|
||||
free(dir);
|
||||
dir = next;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void free_name_hash(struct index_state *istate)
|
||||
{
|
||||
if (!istate->name_hash_initialized)
|
||||
return;
|
||||
istate->name_hash_initialized = 0;
|
||||
if (ignore_case)
|
||||
/* free directory entries */
|
||||
for_each_hash(&istate->dir_hash, free_dir_entry, NULL);
|
||||
|
||||
free_hash(&istate->name_hash);
|
||||
free_hash(&istate->dir_hash);
|
||||
hashmap_free(&istate->name_hash, 0);
|
||||
hashmap_free(&istate->dir_hash, 1);
|
||||
}
|
||||
|
10
read-cache.c
10
read-cache.c
@ -47,6 +47,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache
|
||||
struct cache_entry *old = istate->cache[nr];
|
||||
|
||||
remove_name_hash(istate, old);
|
||||
free(old);
|
||||
set_index_entry(istate, nr, ce);
|
||||
istate->cache_changed = 1;
|
||||
}
|
||||
@ -58,7 +59,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
|
||||
|
||||
new = xmalloc(cache_entry_size(namelen));
|
||||
copy_cache_entry(new, old);
|
||||
new->ce_flags &= ~CE_STATE_MASK;
|
||||
new->ce_flags &= ~CE_HASHED;
|
||||
new->ce_namelen = namelen;
|
||||
memcpy(new->name, new_name, namelen + 1);
|
||||
|
||||
@ -478,6 +479,7 @@ int remove_index_entry_at(struct index_state *istate, int pos)
|
||||
|
||||
record_resolve_undo(istate, ce);
|
||||
remove_name_hash(istate, ce);
|
||||
free(ce);
|
||||
istate->cache_changed = 1;
|
||||
istate->cache_nr--;
|
||||
if (pos >= istate->cache_nr)
|
||||
@ -499,8 +501,10 @@ void remove_marked_cache_entries(struct index_state *istate)
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = j = 0; i < istate->cache_nr; i++) {
|
||||
if (ce_array[i]->ce_flags & CE_REMOVE)
|
||||
if (ce_array[i]->ce_flags & CE_REMOVE) {
|
||||
remove_name_hash(istate, ce_array[i]);
|
||||
free(ce_array[i]);
|
||||
}
|
||||
else
|
||||
ce_array[j++] = ce_array[i];
|
||||
}
|
||||
@ -1894,7 +1898,7 @@ int read_index_unmerged(struct index_state *istate)
|
||||
new_ce->ce_mode = ce->ce_mode;
|
||||
if (add_index_entry(istate, new_ce, 0))
|
||||
return error("%s: cannot drop to stage #0",
|
||||
ce->name);
|
||||
new_ce->name);
|
||||
i = index_name_pos(istate, new_ce->name, len);
|
||||
}
|
||||
return unmerged;
|
||||
|
@ -119,6 +119,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
|
||||
struct string_list_item *item;
|
||||
struct resolve_undo_info *ru;
|
||||
int i, err = 0, matched;
|
||||
char *name;
|
||||
|
||||
if (!istate->resolve_undo)
|
||||
return pos;
|
||||
@ -138,20 +139,22 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
|
||||
if (!ru)
|
||||
return pos;
|
||||
matched = ce->ce_flags & CE_MATCHED;
|
||||
name = xstrdup(ce->name);
|
||||
remove_index_entry_at(istate, pos);
|
||||
for (i = 0; i < 3; i++) {
|
||||
struct cache_entry *nce;
|
||||
if (!ru->mode[i])
|
||||
continue;
|
||||
nce = make_cache_entry(ru->mode[i], ru->sha1[i],
|
||||
ce->name, i + 1, 0);
|
||||
name, i + 1, 0);
|
||||
if (matched)
|
||||
nce->ce_flags |= CE_MATCHED;
|
||||
if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
|
||||
err = 1;
|
||||
error("cannot unmerge '%s'", ce->name);
|
||||
error("cannot unmerge '%s'", name);
|
||||
}
|
||||
}
|
||||
free(name);
|
||||
if (err)
|
||||
return pos;
|
||||
free(ru);
|
||||
|
25
submodule.c
25
submodule.c
@ -116,30 +116,7 @@ int remove_path_from_gitmodules(const char *path)
|
||||
|
||||
void stage_updated_gitmodules(void)
|
||||
{
|
||||
struct strbuf buf = STRBUF_INIT;
|
||||
struct stat st;
|
||||
int pos;
|
||||
struct cache_entry *ce;
|
||||
int namelen = strlen(".gitmodules");
|
||||
|
||||
pos = cache_name_pos(".gitmodules", namelen);
|
||||
if (pos < 0) {
|
||||
warning(_("could not find .gitmodules in index"));
|
||||
return;
|
||||
}
|
||||
ce = active_cache[pos];
|
||||
ce->ce_flags = namelen;
|
||||
if (strbuf_read_file(&buf, ".gitmodules", 0) < 0)
|
||||
die(_("reading updated .gitmodules failed"));
|
||||
if (lstat(".gitmodules", &st) < 0)
|
||||
die_errno(_("unable to stat updated .gitmodules"));
|
||||
fill_stat_cache_info(ce, &st);
|
||||
ce->ce_mode = ce_mode_from_stat(ce, st.st_mode);
|
||||
if (remove_cache_entry_at(pos) < 0)
|
||||
die(_("unable to remove .gitmodules from index"));
|
||||
if (write_sha1_file(buf.buf, buf.len, blob_type, ce->sha1))
|
||||
die(_("adding updated .gitmodules failed"));
|
||||
if (add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE))
|
||||
if (add_file_to_cache(".gitmodules", 0))
|
||||
die(_("staging updated .gitmodules failed"));
|
||||
}
|
||||
|
||||
|
240
t/t0011-hashmap.sh
Executable file
240
t/t0011-hashmap.sh
Executable file
@ -0,0 +1,240 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='test hashmap and string hash functions'
|
||||
. ./test-lib.sh
|
||||
|
||||
test_hashmap() {
|
||||
echo "$1" | test-hashmap $3 > actual &&
|
||||
echo "$2" > expect &&
|
||||
test_cmp expect actual
|
||||
}
|
||||
|
||||
test_expect_success 'hash functions' '
|
||||
|
||||
test_hashmap "hash key1" "2215982743 2215982743 116372151 116372151" &&
|
||||
test_hashmap "hash key2" "2215982740 2215982740 116372148 116372148" &&
|
||||
test_hashmap "hash fooBarFrotz" "1383912807 1383912807 3189766727 3189766727" &&
|
||||
test_hashmap "hash foobarfrotz" "2862305959 2862305959 3189766727 3189766727"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'put' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
put foobarfrotz value4
|
||||
size" "NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
64 4"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'put (case insensitive)' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
size" "NULL
|
||||
NULL
|
||||
NULL
|
||||
64 3" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'replace' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key1 value2
|
||||
put fooBarFrotz value3
|
||||
put fooBarFrotz value4
|
||||
size" "NULL
|
||||
value1
|
||||
NULL
|
||||
value3
|
||||
64 2"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'replace (case insensitive)' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put Key1 value2
|
||||
put fooBarFrotz value3
|
||||
put foobarfrotz value4
|
||||
size" "NULL
|
||||
value1
|
||||
NULL
|
||||
value3
|
||||
64 2" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'get' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
put foobarfrotz value4
|
||||
get key1
|
||||
get key2
|
||||
get fooBarFrotz
|
||||
get notInMap" "NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
value1
|
||||
value2
|
||||
value3
|
||||
NULL"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'get (case insensitive)' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
get Key1
|
||||
get keY2
|
||||
get foobarfrotz
|
||||
get notInMap" "NULL
|
||||
NULL
|
||||
NULL
|
||||
value1
|
||||
value2
|
||||
value3
|
||||
NULL" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'add' '
|
||||
|
||||
test_hashmap "add key1 value1
|
||||
add key1 value2
|
||||
add fooBarFrotz value3
|
||||
add fooBarFrotz value4
|
||||
get key1
|
||||
get fooBarFrotz
|
||||
get notInMap" "value2
|
||||
value1
|
||||
value4
|
||||
value3
|
||||
NULL"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'add (case insensitive)' '
|
||||
|
||||
test_hashmap "add key1 value1
|
||||
add Key1 value2
|
||||
add fooBarFrotz value3
|
||||
add foobarfrotz value4
|
||||
get key1
|
||||
get Foobarfrotz
|
||||
get notInMap" "value2
|
||||
value1
|
||||
value4
|
||||
value3
|
||||
NULL" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'remove' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
remove key1
|
||||
remove key2
|
||||
remove notInMap
|
||||
size" "NULL
|
||||
NULL
|
||||
NULL
|
||||
value1
|
||||
value2
|
||||
NULL
|
||||
64 1"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'remove (case insensitive)' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
remove Key1
|
||||
remove keY2
|
||||
remove notInMap
|
||||
size" "NULL
|
||||
NULL
|
||||
NULL
|
||||
value1
|
||||
value2
|
||||
NULL
|
||||
64 1" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'iterate' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
iterate" "NULL
|
||||
NULL
|
||||
NULL
|
||||
key2 value2
|
||||
key1 value1
|
||||
fooBarFrotz value3"
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'iterate (case insensitive)' '
|
||||
|
||||
test_hashmap "put key1 value1
|
||||
put key2 value2
|
||||
put fooBarFrotz value3
|
||||
iterate" "NULL
|
||||
NULL
|
||||
NULL
|
||||
fooBarFrotz value3
|
||||
key2 value2
|
||||
key1 value1" ignorecase
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'grow / shrink' '
|
||||
|
||||
rm -f in &&
|
||||
rm -f expect &&
|
||||
for n in $(test_seq 51)
|
||||
do
|
||||
echo put key$n value$n >> in &&
|
||||
echo NULL >> expect
|
||||
done &&
|
||||
echo size >> in &&
|
||||
echo 64 51 >> expect &&
|
||||
echo put key52 value52 >> in &&
|
||||
echo NULL >> expect
|
||||
echo size >> in &&
|
||||
echo 256 52 >> expect &&
|
||||
for n in $(test_seq 12)
|
||||
do
|
||||
echo remove key$n >> in &&
|
||||
echo value$n >> expect
|
||||
done &&
|
||||
echo size >> in &&
|
||||
echo 256 40 >> expect &&
|
||||
echo remove key40 >> in &&
|
||||
echo value40 >> expect &&
|
||||
echo size >> in &&
|
||||
echo 64 39 >> expect &&
|
||||
cat in | test-hashmap > out &&
|
||||
test_cmp expect out
|
||||
|
||||
'
|
||||
|
||||
test_done
|
255
test-hashmap.c
Normal file
255
test-hashmap.c
Normal file
@ -0,0 +1,255 @@
|
||||
#include "git-compat-util.h"
|
||||
#include "hashmap.h"
|
||||
|
||||
struct test_entry
|
||||
{
|
||||
struct hashmap_entry ent;
|
||||
/* key and value as two \0-terminated strings */
|
||||
char key[FLEX_ARRAY];
|
||||
};
|
||||
|
||||
static const char *get_value(const struct test_entry *e)
|
||||
{
|
||||
return e->key + strlen(e->key) + 1;
|
||||
}
|
||||
|
||||
static int test_entry_cmp(const struct test_entry *e1,
|
||||
const struct test_entry *e2, const char* key)
|
||||
{
|
||||
return strcmp(e1->key, key ? key : e2->key);
|
||||
}
|
||||
|
||||
static int test_entry_cmp_icase(const struct test_entry *e1,
|
||||
const struct test_entry *e2, const char* key)
|
||||
{
|
||||
return strcasecmp(e1->key, key ? key : e2->key);
|
||||
}
|
||||
|
||||
static struct test_entry *alloc_test_entry(int hash, char *key, int klen,
|
||||
char *value, int vlen)
|
||||
{
|
||||
struct test_entry *entry = malloc(sizeof(struct test_entry) + klen
|
||||
+ vlen + 2);
|
||||
hashmap_entry_init(entry, hash);
|
||||
memcpy(entry->key, key, klen + 1);
|
||||
memcpy(entry->key + klen + 1, value, vlen + 1);
|
||||
return entry;
|
||||
}
|
||||
|
||||
#define HASH_METHOD_FNV 0
|
||||
#define HASH_METHOD_I 1
|
||||
#define HASH_METHOD_IDIV10 2
|
||||
#define HASH_METHOD_0 3
|
||||
#define HASH_METHOD_X2 4
|
||||
#define TEST_SPARSE 8
|
||||
#define TEST_ADD 16
|
||||
#define TEST_SIZE 100000
|
||||
|
||||
static unsigned int hash(unsigned int method, unsigned int i, const char *key)
|
||||
{
|
||||
unsigned int hash;
|
||||
switch (method & 3)
|
||||
{
|
||||
case HASH_METHOD_FNV:
|
||||
hash = strhash(key);
|
||||
break;
|
||||
case HASH_METHOD_I:
|
||||
hash = i;
|
||||
break;
|
||||
case HASH_METHOD_IDIV10:
|
||||
hash = i / 10;
|
||||
break;
|
||||
case HASH_METHOD_0:
|
||||
hash = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (method & HASH_METHOD_X2)
|
||||
hash = 2 * hash;
|
||||
return hash;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test performance of hashmap.[ch]
|
||||
* Usage: time echo "perfhashmap method rounds" | test-hashmap
|
||||
*/
|
||||
static void perf_hashmap(unsigned int method, unsigned int rounds)
|
||||
{
|
||||
struct hashmap map;
|
||||
char buf[16];
|
||||
struct test_entry **entries;
|
||||
unsigned int *hashes;
|
||||
unsigned int i, j;
|
||||
|
||||
entries = malloc(TEST_SIZE * sizeof(struct test_entry *));
|
||||
hashes = malloc(TEST_SIZE * sizeof(int));
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
snprintf(buf, sizeof(buf), "%i", i);
|
||||
entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0);
|
||||
hashes[i] = hash(method, i, entries[i]->key);
|
||||
}
|
||||
|
||||
if (method & TEST_ADD) {
|
||||
/* test adding to the map */
|
||||
for (j = 0; j < rounds; j++) {
|
||||
hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0);
|
||||
|
||||
/* add entries */
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
hashmap_entry_init(entries[i], hashes[i]);
|
||||
hashmap_add(&map, entries[i]);
|
||||
}
|
||||
|
||||
hashmap_free(&map, 0);
|
||||
}
|
||||
} else {
|
||||
/* test map lookups */
|
||||
hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0);
|
||||
|
||||
/* fill the map (sparsely if specified) */
|
||||
j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE;
|
||||
for (i = 0; i < j; i++) {
|
||||
hashmap_entry_init(entries[i], hashes[i]);
|
||||
hashmap_add(&map, entries[i]);
|
||||
}
|
||||
|
||||
for (j = 0; j < rounds; j++) {
|
||||
for (i = 0; i < TEST_SIZE; i++) {
|
||||
struct hashmap_entry key;
|
||||
hashmap_entry_init(&key, hashes[i]);
|
||||
hashmap_get(&map, &key, entries[i]->key);
|
||||
}
|
||||
}
|
||||
|
||||
hashmap_free(&map, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#define DELIM " \t\r\n"
|
||||
|
||||
/*
|
||||
* Read stdin line by line and print result of commands to stdout:
|
||||
*
|
||||
* hash key -> strhash(key) memhash(key) strihash(key) memihash(key)
|
||||
* put key value -> NULL / old value
|
||||
* get key -> NULL / value
|
||||
* remove key -> NULL / old value
|
||||
* iterate -> key1 value1\nkey2 value2\n...
|
||||
* size -> tablesize numentries
|
||||
*
|
||||
* perfhashmap method rounds -> test hashmap.[ch] performance
|
||||
*/
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
char line[1024];
|
||||
struct hashmap map;
|
||||
int icase;
|
||||
|
||||
/* init hash map */
|
||||
icase = argc > 1 && !strcmp("ignorecase", argv[1]);
|
||||
hashmap_init(&map, (hashmap_cmp_fn) (icase ? test_entry_cmp_icase
|
||||
: test_entry_cmp), 0);
|
||||
|
||||
/* process commands from stdin */
|
||||
while (fgets(line, sizeof(line), stdin)) {
|
||||
char *cmd, *p1 = NULL, *p2 = NULL;
|
||||
int l1 = 0, l2 = 0, hash = 0;
|
||||
struct test_entry *entry;
|
||||
|
||||
/* break line into command and up to two parameters */
|
||||
cmd = strtok(line, DELIM);
|
||||
/* ignore empty lines */
|
||||
if (!cmd || *cmd == '#')
|
||||
continue;
|
||||
|
||||
p1 = strtok(NULL, DELIM);
|
||||
if (p1) {
|
||||
l1 = strlen(p1);
|
||||
hash = icase ? strihash(p1) : strhash(p1);
|
||||
p2 = strtok(NULL, DELIM);
|
||||
if (p2)
|
||||
l2 = strlen(p2);
|
||||
}
|
||||
|
||||
if (!strcmp("hash", cmd) && l1) {
|
||||
|
||||
/* print results of different hash functions */
|
||||
printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1),
|
||||
strihash(p1), memihash(p1, l1));
|
||||
|
||||
} else if (!strcmp("add", cmd) && l1 && l2) {
|
||||
|
||||
/* create entry with key = p1, value = p2 */
|
||||
entry = alloc_test_entry(hash, p1, l1, p2, l2);
|
||||
|
||||
/* add to hashmap */
|
||||
hashmap_add(&map, entry);
|
||||
|
||||
} else if (!strcmp("put", cmd) && l1 && l2) {
|
||||
|
||||
/* create entry with key = p1, value = p2 */
|
||||
entry = alloc_test_entry(hash, p1, l1, p2, l2);
|
||||
|
||||
/* add / replace entry */
|
||||
entry = hashmap_put(&map, entry);
|
||||
|
||||
/* print and free replaced entry, if any */
|
||||
puts(entry ? get_value(entry) : "NULL");
|
||||
free(entry);
|
||||
|
||||
} else if (!strcmp("get", cmd) && l1) {
|
||||
|
||||
/* setup static key */
|
||||
struct hashmap_entry key;
|
||||
hashmap_entry_init(&key, hash);
|
||||
|
||||
/* lookup entry in hashmap */
|
||||
entry = hashmap_get(&map, &key, p1);
|
||||
|
||||
/* print result */
|
||||
if (!entry)
|
||||
puts("NULL");
|
||||
while (entry) {
|
||||
puts(get_value(entry));
|
||||
entry = hashmap_get_next(&map, entry);
|
||||
}
|
||||
|
||||
} else if (!strcmp("remove", cmd) && l1) {
|
||||
|
||||
/* setup static key */
|
||||
struct hashmap_entry key;
|
||||
hashmap_entry_init(&key, hash);
|
||||
|
||||
/* remove entry from hashmap */
|
||||
entry = hashmap_remove(&map, &key, p1);
|
||||
|
||||
/* print result and free entry*/
|
||||
puts(entry ? get_value(entry) : "NULL");
|
||||
free(entry);
|
||||
|
||||
} else if (!strcmp("iterate", cmd)) {
|
||||
|
||||
struct hashmap_iter iter;
|
||||
hashmap_iter_init(&map, &iter);
|
||||
while ((entry = hashmap_iter_next(&iter)))
|
||||
printf("%s %s\n", entry->key, get_value(entry));
|
||||
|
||||
} else if (!strcmp("size", cmd)) {
|
||||
|
||||
/* print table sizes */
|
||||
printf("%u %u\n", map.tablesize, map.size);
|
||||
|
||||
} else if (!strcmp("perfhashmap", cmd) && l1 && l2) {
|
||||
|
||||
perf_hashmap(atoi(p1), atoi(p2));
|
||||
|
||||
} else {
|
||||
|
||||
printf("Unknown command %s\n", cmd);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
hashmap_free(&map, 1);
|
||||
return 0;
|
||||
}
|
@ -105,12 +105,11 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
|
||||
static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
|
||||
unsigned int set, unsigned int clear)
|
||||
{
|
||||
clear |= CE_HASHED | CE_UNHASHED;
|
||||
clear |= CE_HASHED;
|
||||
|
||||
if (set & CE_REMOVE)
|
||||
set |= CE_WT_REMOVE;
|
||||
|
||||
ce->next = NULL;
|
||||
ce->ce_flags = (ce->ce_flags & ~clear) | set;
|
||||
add_index_entry(&o->result, ce,
|
||||
ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
|
||||
|
Loading…
Reference in New Issue
Block a user