fast-export: tighten anonymize_mem() interface to handle only strings

While the anonymize_mem() interface _can_ store arbitrary byte
sequences, none of the callers uses this feature (as of the previous
commit). We'd like to keep it that way, as we'll be exposing the
string-like nature of the anonymization routines to the user. So let's
tighten up the interface a bit:

  - don't treat "len" as an out-parameter from anonymize_mem(); this
    ensures callers treat the pointer result as a NUL-terminated string

  - likewise, don't treat "len" as an out-parameter from generator
    functions

  - swap out "void *" for "char *" as appropriate to signal that we
    don't handle arbitrary memory

  - rename the function to anonymize_str()

This will also open up some optimization opportunities in a future
patch.

Note that we can't drop the "len" parameter entirely. Some callers do
pass in partial strings (e.g., "foo/bar", len=3) to avoid copying, and
we need to handle those still.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2020-06-23 11:24:54 -04:00 committed by Junio C Hamano
parent 750bb32589
commit 7f40759496

View File

@ -145,31 +145,30 @@ static int anonymized_entry_cmp(const void *unused_cmp_data,
* the same anonymized string with another. The actual generation * the same anonymized string with another. The actual generation
* is farmed out to the generate function. * is farmed out to the generate function.
*/ */
static const void *anonymize_mem(struct hashmap *map, static const char *anonymize_str(struct hashmap *map,
void *(*generate)(const void *, size_t *), char *(*generate)(const char *, size_t),
const void *orig, size_t *len) const char *orig, size_t len)
{ {
struct anonymized_entry key, *ret; struct anonymized_entry key, *ret;
if (!map->cmpfn) if (!map->cmpfn)
hashmap_init(map, anonymized_entry_cmp, NULL, 0); hashmap_init(map, anonymized_entry_cmp, NULL, 0);
hashmap_entry_init(&key.hash, memhash(orig, *len)); hashmap_entry_init(&key.hash, memhash(orig, len));
key.orig = orig; key.orig = orig;
key.orig_len = *len; key.orig_len = len;
ret = hashmap_get_entry(map, &key, hash, NULL); ret = hashmap_get_entry(map, &key, hash, NULL);
if (!ret) { if (!ret) {
ret = xmalloc(sizeof(*ret)); ret = xmalloc(sizeof(*ret));
hashmap_entry_init(&ret->hash, key.hash.hash); hashmap_entry_init(&ret->hash, key.hash.hash);
ret->orig = xmemdupz(orig, *len); ret->orig = xmemdupz(orig, len);
ret->orig_len = *len; ret->orig_len = len;
ret->anon = generate(orig, len); ret->anon = generate(orig, len);
ret->anon_len = *len; ret->anon_len = strlen(ret->anon);
hashmap_put(map, &ret->hash); hashmap_put(map, &ret->hash);
} }
*len = ret->anon_len;
return ret->anon; return ret->anon;
} }
@ -181,13 +180,13 @@ static const void *anonymize_mem(struct hashmap *map,
*/ */
static void anonymize_path(struct strbuf *out, const char *path, static void anonymize_path(struct strbuf *out, const char *path,
struct hashmap *map, struct hashmap *map,
void *(*generate)(const void *, size_t *)) char *(*generate)(const char *, size_t))
{ {
while (*path) { while (*path) {
const char *end_of_component = strchrnul(path, '/'); const char *end_of_component = strchrnul(path, '/');
size_t len = end_of_component - path; size_t len = end_of_component - path;
const char *c = anonymize_mem(map, generate, path, &len); const char *c = anonymize_str(map, generate, path, len);
strbuf_add(out, c, len); strbuf_addstr(out, c);
path = end_of_component; path = end_of_component;
if (*path) if (*path)
strbuf_addch(out, *path++); strbuf_addch(out, *path++);
@ -361,12 +360,12 @@ static void print_path_1(const char *path)
printf("%s", path); printf("%s", path);
} }
static void *anonymize_path_component(const void *path, size_t *len) static char *anonymize_path_component(const char *path, size_t len)
{ {
static int counter; static int counter;
struct strbuf out = STRBUF_INIT; struct strbuf out = STRBUF_INIT;
strbuf_addf(&out, "path%d", counter++); strbuf_addf(&out, "path%d", counter++);
return strbuf_detach(&out, len); return strbuf_detach(&out, NULL);
} }
static void print_path(const char *path) static void print_path(const char *path)
@ -383,7 +382,7 @@ static void print_path(const char *path)
} }
} }
static void *generate_fake_oid(const void *old, size_t *len) static char *generate_fake_oid(const char *old, size_t len)
{ {
static uint32_t counter = 1; /* avoid null oid */ static uint32_t counter = 1; /* avoid null oid */
const unsigned hashsz = the_hash_algo->rawsz; const unsigned hashsz = the_hash_algo->rawsz;
@ -399,7 +398,7 @@ static const char *anonymize_oid(const char *oid_hex)
{ {
static struct hashmap objs; static struct hashmap objs;
size_t len = strlen(oid_hex); size_t len = strlen(oid_hex);
return anonymize_mem(&objs, generate_fake_oid, oid_hex, &len); return anonymize_str(&objs, generate_fake_oid, oid_hex, len);
} }
static void show_filemodify(struct diff_queue_struct *q, static void show_filemodify(struct diff_queue_struct *q,
@ -496,12 +495,12 @@ static const char *find_encoding(const char *begin, const char *end)
return bol; return bol;
} }
static void *anonymize_ref_component(const void *old, size_t *len) static char *anonymize_ref_component(const char *old, size_t len)
{ {
static int counter; static int counter;
struct strbuf out = STRBUF_INIT; struct strbuf out = STRBUF_INIT;
strbuf_addf(&out, "ref%d", counter++); strbuf_addf(&out, "ref%d", counter++);
return strbuf_detach(&out, len); return strbuf_detach(&out, NULL);
} }
static const char *anonymize_refname(const char *refname) static const char *anonymize_refname(const char *refname)
@ -550,13 +549,13 @@ static char *anonymize_commit_message(const char *old)
} }
static struct hashmap idents; static struct hashmap idents;
static void *anonymize_ident(const void *old, size_t *len) static char *anonymize_ident(const char *old, size_t len)
{ {
static int counter; static int counter;
struct strbuf out = STRBUF_INIT; struct strbuf out = STRBUF_INIT;
strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter); strbuf_addf(&out, "User %d <user%d@example.com>", counter, counter);
counter++; counter++;
return strbuf_detach(&out, len); return strbuf_detach(&out, NULL);
} }
/* /*
@ -591,9 +590,9 @@ static void anonymize_ident_line(const char **beg, const char **end)
size_t len; size_t len;
len = split.mail_end - split.name_begin; len = split.mail_end - split.name_begin;
ident = anonymize_mem(&idents, anonymize_ident, ident = anonymize_str(&idents, anonymize_ident,
split.name_begin, &len); split.name_begin, len);
strbuf_add(out, ident, len); strbuf_addstr(out, ident);
strbuf_addch(out, ' '); strbuf_addch(out, ' ');
strbuf_add(out, split.date_begin, split.tz_end - split.date_begin); strbuf_add(out, split.date_begin, split.tz_end - split.date_begin);
} else { } else {
@ -733,12 +732,12 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
show_progress(); show_progress();
} }
static void *anonymize_tag(const void *old, size_t *len) static char *anonymize_tag(const char *old, size_t len)
{ {
static int counter; static int counter;
struct strbuf out = STRBUF_INIT; struct strbuf out = STRBUF_INIT;
strbuf_addf(&out, "tag message %d", counter++); strbuf_addf(&out, "tag message %d", counter++);
return strbuf_detach(&out, len); return strbuf_detach(&out, NULL);
} }
static void handle_tail(struct object_array *commits, struct rev_info *revs, static void handle_tail(struct object_array *commits, struct rev_info *revs,
@ -808,8 +807,8 @@ static void handle_tag(const char *name, struct tag *tag)
name = anonymize_refname(name); name = anonymize_refname(name);
if (message) { if (message) {
static struct hashmap tags; static struct hashmap tags;
message = anonymize_mem(&tags, anonymize_tag, message = anonymize_str(&tags, anonymize_tag,
message, &message_size); message, message_size);
} }
} }