From aef6cf1e5093f3739e39cfdcfb73f3b862cf842c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:05:46 -0500 Subject: [PATCH 1/6] test-hashmap: use ALLOC_ARRAY rather than bare malloc These two array allocations have several minor flaws: - they use bare malloc, rather than our error-checking xmalloc - they do a bare multiplication to determine the total size (which in theory can overflow, though in this case the sizes are all constants) - they use sizeof(type), but the type in the second one doesn't match the actual array (though it's "int" versus "unsigned int", which are guaranteed by C99 to have the same size) None of these are likely to be problems in practice, and this is just a test helper. But since people often look at test helpers as reference code, we should do our best to model the recommended techniques. Switching to ALLOC_ARRAY fixes all three. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 1145d51671..b36886bf35 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -85,8 +85,8 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) unsigned int *hashes; unsigned int i, j; - entries = malloc(TEST_SIZE * sizeof(struct test_entry *)); - hashes = malloc(TEST_SIZE * sizeof(int)); + ALLOC_ARRAY(entries, TEST_SIZE); + ALLOC_ARRAY(hashes, TEST_SIZE); for (i = 0; i < TEST_SIZE; i++) { snprintf(buf, sizeof(buf), "%i", i); entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); From b6c4380d6e3170e21e6670248b7f332c57cb077c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:06:34 -0500 Subject: [PATCH 2/6] test-hashmap: check allocation computation for overflow When we allocate the test_entry flex-struct, we have to add up all of the elements that go into the flex array. If these were to overflow a size_t, this would allocate a too-small buffer, which we would then overflow in our memcpy steps. Since this is just a test-helper, it probably doesn't matter in practice, but we should model the correct technique by using the st_add() macros. Unfortunately, we cannot use the FLEX_ALLOC() macros here, because we are stuffing two different buffers into a single flex array. While we're here, let's also swap out "malloc" for our error-checking "xmalloc", and use the preferred "sizeof(*var)" instead of "sizeof(type)". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index b36886bf35..2100877c2b 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -32,8 +32,7 @@ static int test_entry_cmp(const void *cmp_data, static struct test_entry *alloc_test_entry(int hash, char *key, int klen, char *value, int vlen) { - struct test_entry *entry = malloc(sizeof(struct test_entry) + klen - + vlen + 2); + struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2)); hashmap_entry_init(entry, hash); memcpy(entry->key, key, klen + 1); memcpy(entry->key + klen + 1, value, vlen + 1); From cbadf0ee37def5cea81fb7702941af8234dd094d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:06:57 -0500 Subject: [PATCH 3/6] test-hashmap: use xsnprintf rather than snprintf In general, using a bare snprintf can truncate the resulting buffer, leading to confusing results. In this case we know that our buffer is sized large enough to accommodate our loop, so there's no bug. However, we should use xsnprintf() to document (and check) that assumption, and to model good practice to people reading the code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 2100877c2b..28b913fbd6 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -87,7 +87,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) ALLOC_ARRAY(entries, TEST_SIZE); ALLOC_ARRAY(hashes, TEST_SIZE); for (i = 0; i < TEST_SIZE; i++) { - snprintf(buf, sizeof(buf), "%i", i); + xsnprintf(buf, sizeof(buf), "%i", i); entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); hashes[i] = hash(method, i, entries[i]->key); } From 7e8089c986790fd8ef9d89bf71c9a91901d7f884 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:07:19 -0500 Subject: [PATCH 4/6] test-hashmap: use strbuf_getline rather than fgets Using fgets() with a fixed-size buffer can lead to lines being accidentally split across two calls if they are larger than the buffer size. As this is just a test helper, this is unlikely to be a problem in practice. But since people may look at test helpers as reference code, it's a good idea for them to model the preferred behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 28b913fbd6..15fc4e372f 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "hashmap.h" +#include "strbuf.h" struct test_entry { @@ -143,7 +144,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) */ int cmd_main(int argc, const char **argv) { - char line[1024]; + struct strbuf line = STRBUF_INIT; struct hashmap map; int icase; @@ -152,13 +153,13 @@ int cmd_main(int argc, const char **argv) hashmap_init(&map, test_entry_cmp, &icase, 0); /* process commands from stdin */ - while (fgets(line, sizeof(line), stdin)) { + while (strbuf_getline(&line, stdin) != EOF) { char *cmd, *p1 = NULL, *p2 = NULL; int l1 = 0, l2 = 0, hash = 0; struct test_entry *entry; /* break line into command and up to two parameters */ - cmd = strtok(line, DELIM); + cmd = strtok(line.buf, DELIM); /* ignore empty lines */ if (!cmd || *cmd == '#') continue; @@ -262,6 +263,7 @@ int cmd_main(int argc, const char **argv) } } + strbuf_release(&line); hashmap_free(&map, 1); return 0; } From 7daa825d677dcbd40724cb146f3949b7d574e8b3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:08:20 -0500 Subject: [PATCH 5/6] test-hashmap: simplify alloc_test_entry This function takes two ptr/len pairs, which implies that they can be arbitrary buffers. But internally, it assumes that each "ptr" is NUL-terminated at "len" (because we memcpy an extra byte to pick up the NUL terminator). In practice this works because each caller only ever passes strlen(ptr) as the length. But let's drop the "len" parameters to make our expectations clear. Note that we can get rid of the "l1" and "l2" variables from cmd_main() as a further cleanup, since they are now mostly used to check whether the p1 and p2 arguments are present (technically the length parameters conflated NULL with the empty string, which we no longer do, but I think that is actually an improvement). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 15fc4e372f..56efff36e8 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -30,9 +30,10 @@ static int test_entry_cmp(const void *cmp_data, return strcmp(e1->key, key ? key : e2->key); } -static struct test_entry *alloc_test_entry(int hash, char *key, int klen, - char *value, int vlen) +static struct test_entry *alloc_test_entry(int hash, char *key, char *value) { + size_t klen = strlen(key); + size_t vlen = strlen(value); struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2)); hashmap_entry_init(entry, hash); memcpy(entry->key, key, klen + 1); @@ -89,7 +90,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) ALLOC_ARRAY(hashes, TEST_SIZE); for (i = 0; i < TEST_SIZE; i++) { xsnprintf(buf, sizeof(buf), "%i", i); - entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); + entries[i] = alloc_test_entry(0, buf, ""); hashes[i] = hash(method, i, entries[i]->key); } @@ -155,7 +156,7 @@ int cmd_main(int argc, const char **argv) /* process commands from stdin */ while (strbuf_getline(&line, stdin) != EOF) { char *cmd, *p1 = NULL, *p2 = NULL; - int l1 = 0, l2 = 0, hash = 0; + int hash = 0; struct test_entry *entry; /* break line into command and up to two parameters */ @@ -166,31 +167,29 @@ int cmd_main(int argc, const char **argv) p1 = strtok(NULL, DELIM); if (p1) { - l1 = strlen(p1); hash = icase ? strihash(p1) : strhash(p1); p2 = strtok(NULL, DELIM); - if (p2) - l2 = strlen(p2); } - if (!strcmp("hash", cmd) && l1) { + if (!strcmp("hash", cmd) && p1) { /* print results of different hash functions */ - printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1), - strihash(p1), memihash(p1, l1)); + printf("%u %u %u %u\n", + strhash(p1), memhash(p1, strlen(p1)), + strihash(p1), memihash(p1, strlen(p1))); - } else if (!strcmp("add", cmd) && l1 && l2) { + } else if (!strcmp("add", cmd) && p1 && p2) { /* create entry with key = p1, value = p2 */ - entry = alloc_test_entry(hash, p1, l1, p2, l2); + entry = alloc_test_entry(hash, p1, p2); /* add to hashmap */ hashmap_add(&map, entry); - } else if (!strcmp("put", cmd) && l1 && l2) { + } else if (!strcmp("put", cmd) && p1 && p2) { /* create entry with key = p1, value = p2 */ - entry = alloc_test_entry(hash, p1, l1, p2, l2); + entry = alloc_test_entry(hash, p1, p2); /* add / replace entry */ entry = hashmap_put(&map, entry); @@ -199,7 +198,7 @@ int cmd_main(int argc, const char **argv) puts(entry ? get_value(entry) : "NULL"); free(entry); - } else if (!strcmp("get", cmd) && l1) { + } else if (!strcmp("get", cmd) && p1) { /* lookup entry in hashmap */ entry = hashmap_get_from_hash(&map, hash, p1); @@ -212,7 +211,7 @@ int cmd_main(int argc, const char **argv) entry = hashmap_get_next(&map, entry); } - } else if (!strcmp("remove", cmd) && l1) { + } else if (!strcmp("remove", cmd) && p1) { /* setup static key */ struct hashmap_entry key; @@ -238,7 +237,7 @@ int cmd_main(int argc, const char **argv) printf("%u %u\n", map.tablesize, hashmap_get_size(&map)); - } else if (!strcmp("intern", cmd) && l1) { + } else if (!strcmp("intern", cmd) && p1) { /* test that strintern works */ const char *i1 = strintern(p1); @@ -252,7 +251,7 @@ int cmd_main(int argc, const char **argv) else printf("%s\n", i1); - } else if (!strcmp("perfhashmap", cmd) && l1 && l2) { + } else if (!strcmp("perfhashmap", cmd) && p1 && p2) { perf_hashmap(atoi(p1), atoi(p2)); From a6119f82b118c7adea9ede0b3813810b06e37668 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 14 Feb 2018 13:08:57 -0500 Subject: [PATCH 6/6] test-hashmap: use "unsigned int" for hash storage The hashmap API always use an unsigned value for storing and comparing hashes. Whereas this test code uses "int". This works out in practice since one can typically round-trip between "int" and "unsigned int". But since this is essentially reference code for the hashmap API, we should model using the correct types. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/helper/test-hashmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/t/helper/test-hashmap.c b/t/helper/test-hashmap.c index 56efff36e8..9ae9281c07 100644 --- a/t/helper/test-hashmap.c +++ b/t/helper/test-hashmap.c @@ -30,7 +30,8 @@ static int test_entry_cmp(const void *cmp_data, return strcmp(e1->key, key ? key : e2->key); } -static struct test_entry *alloc_test_entry(int hash, char *key, char *value) +static struct test_entry *alloc_test_entry(unsigned int hash, + char *key, char *value) { size_t klen = strlen(key); size_t vlen = strlen(value); @@ -156,7 +157,7 @@ int cmd_main(int argc, const char **argv) /* process commands from stdin */ while (strbuf_getline(&line, stdin) != EOF) { char *cmd, *p1 = NULL, *p2 = NULL; - int hash = 0; + unsigned int hash = 0; struct test_entry *entry; /* break line into command and up to two parameters */