git-commit-vandalism/mailmap.c

363 lines
8.9 KiB
C
Raw Normal View History

#include "cache.h"
#include "string-list.h"
#include "mailmap.h"
#define DEBUG_MAILMAP 0
#if DEBUG_MAILMAP
#define debug_mm(...) fprintf(stderr, __VA_ARGS__)
#define debug_str(X) ((X) ? (X) : "(none)")
#else
static inline void debug_mm(const char *format, ...) {}
static inline const char *debug_str(const char *s) { return s; }
#endif
const char *git_mailmap_file;
const char *git_mailmap_blob;
struct mailmap_info {
char *name;
char *email;
};
struct mailmap_entry {
/* name and email for the simple mail-only case */
char *name;
char *email;
/* name and email for the complex mail and name matching case */
struct string_list namemap;
};
static void free_mailmap_info(void *p, const char *s)
{
struct mailmap_info *mi = (struct mailmap_info *)p;
debug_mm("mailmap: -- complex: '%s' -> '%s' <%s>\n",
s, debug_str(mi->name), debug_str(mi->email));
free(mi->name);
free(mi->email);
}
static void free_mailmap_entry(void *p, const char *s)
{
struct mailmap_entry *me = (struct mailmap_entry *)p;
debug_mm("mailmap: removing entries for <%s>, with %d sub-entries\n",
s, me->namemap.nr);
debug_mm("mailmap: - simple: '%s' <%s>\n",
debug_str(me->name), debug_str(me->email));
free(me->name);
free(me->email);
me->namemap.strdup_strings = 1;
string_list_clear_func(&me->namemap, free_mailmap_info);
}
/*
* On some systems (e.g. MinGW 4.0), string.h has _only_ inline
* definition of strcasecmp and no non-inline implementation is
* supplied anywhere, which is, eh, "unusual"; we cannot take an
* address of such a function to store it in namemap.cmp. This is
* here as a workaround---do not assign strcasecmp directly to
* namemap.cmp until we know no systems that matter have such an
* "unusual" string.h.
*/
static int namemap_cmp(const char *a, const char *b)
{
return strcasecmp(a, b);
}
static void add_mapping(struct string_list *map,
char *new_name, char *new_email,
char *old_name, char *old_email)
{
struct mailmap_entry *me;
struct string_list_item *item;
if (old_email == NULL) {
old_email = new_email;
new_email = NULL;
}
item = string_list_insert(map, old_email);
if (item->util) {
me = (struct mailmap_entry *)item->util;
} else {
me = xcalloc(1, sizeof(struct mailmap_entry));
me->namemap.strdup_strings = 1;
me->namemap.cmp = namemap_cmp;
item->util = me;
}
if (old_name == NULL) {
debug_mm("mailmap: adding (simple) entry for '%s'\n", old_email);
/* Replace current name and new email for simple entry */
if (new_name) {
free(me->name);
me->name = xstrdup(new_name);
}
if (new_email) {
free(me->email);
me->email = xstrdup(new_email);
}
} else {
struct mailmap_info *mi = xcalloc(1, sizeof(struct mailmap_info));
debug_mm("mailmap: adding (complex) entry for '%s'\n", old_email);
mi->name = xstrdup_or_null(new_name);
mi->email = xstrdup_or_null(new_email);
string_list_insert(&me->namemap, old_name)->util = mi;
}
debug_mm("mailmap: '%s' <%s> -> '%s' <%s>\n",
debug_str(old_name), old_email,
debug_str(new_name), debug_str(new_email));
}
static char *parse_name_and_email(char *buffer, char **name,
char **email, int allow_empty_email)
{
char *left, *right, *nstart, *nend;
*name = *email = NULL;
if ((left = strchr(buffer, '<')) == NULL)
return NULL;
if ((right = strchr(left+1, '>')) == NULL)
return NULL;
if (!allow_empty_email && (left+1 == right))
return NULL;
/* remove whitespace from beginning and end of name */
nstart = buffer;
while (isspace(*nstart) && nstart < left)
++nstart;
nend = left-1;
while (nend > nstart && isspace(*nend))
--nend;
*name = (nstart <= nend ? nstart : NULL);
*email = left+1;
*(nend+1) = '\0';
*right++ = '\0';
return (*right == '\0' ? NULL : right);
}
static void read_mailmap_line(struct string_list *map, char *buffer,
char **repo_abbrev)
{
char *name1 = NULL, *email1 = NULL, *name2 = NULL, *email2 = NULL;
if (buffer[0] == '#') {
static const char abbrev[] = "# repo-abbrev:";
int abblen = sizeof(abbrev) - 1;
int len = strlen(buffer);
if (!repo_abbrev)
return;
if (len && buffer[len - 1] == '\n')
buffer[--len] = 0;
if (!strncmp(buffer, abbrev, abblen)) {
char *cp;
free(*repo_abbrev);
for (cp = buffer + abblen; isspace(*cp); cp++)
; /* nothing */
*repo_abbrev = xstrdup(cp);
}
return;
}
if ((name2 = parse_name_and_email(buffer, &name1, &email1, 0)) != NULL)
parse_name_and_email(name2, &name2, &email2, 1);
if (email1)
add_mapping(map, name1, email1, name2, email2);
}
static int read_mailmap_file(struct string_list *map, const char *filename,
char **repo_abbrev)
{
char buffer[1024];
FILE *f;
if (!filename)
return 0;
f = fopen(filename, "r");
if (!f) {
if (errno == ENOENT)
return 0;
return error_errno("unable to open mailmap at %s", filename);
}
while (fgets(buffer, sizeof(buffer), f) != NULL)
read_mailmap_line(map, buffer, repo_abbrev);
fclose(f);
return 0;
}
mailmap: handle mailmap blobs without trailing newlines The read_mailmap_buf function reads each line of the mailmap using strchrnul, like: const char *end = strchrnul(buf, '\n'); unsigned long linelen = end - buf + 1; But that's off-by-one when we actually hit the NUL byte; our line does not have a terminator, and so is only "end - buf" bytes long. As a result, when we subtract the linelen from the total len, we end up with (unsigned long)-1 bytes left in the buffer, and we start reading random junk from memory. We could fix it with: unsigned long linelen = end - buf + !!*end; but let's take a step back for a moment. It's questionable in the first place for a function that takes a buffer and length to be using strchrnul. But it works because we only have one caller (and are only likely to ever have this one), which is handing us data from read_sha1_file. Which means that it's always NUL-terminated. Instead of tightening the assumptions to make the buffer/length pair work for a caller that doesn't actually exist, let's let loosen the assumptions to what the real caller has: a modifiable, NUL-terminated string. This makes the code simpler and shorter (because we don't have to correlate strchrnul with the length calculation), correct (because the code with the off-by-one just goes away), and more efficient (we can drop the extra allocation we needed to create NUL-terminated strings for each line, and just terminate in place). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-28 03:41:39 +02:00
static void read_mailmap_string(struct string_list *map, char *buf,
char **repo_abbrev)
{
mailmap: handle mailmap blobs without trailing newlines The read_mailmap_buf function reads each line of the mailmap using strchrnul, like: const char *end = strchrnul(buf, '\n'); unsigned long linelen = end - buf + 1; But that's off-by-one when we actually hit the NUL byte; our line does not have a terminator, and so is only "end - buf" bytes long. As a result, when we subtract the linelen from the total len, we end up with (unsigned long)-1 bytes left in the buffer, and we start reading random junk from memory. We could fix it with: unsigned long linelen = end - buf + !!*end; but let's take a step back for a moment. It's questionable in the first place for a function that takes a buffer and length to be using strchrnul. But it works because we only have one caller (and are only likely to ever have this one), which is handing us data from read_sha1_file. Which means that it's always NUL-terminated. Instead of tightening the assumptions to make the buffer/length pair work for a caller that doesn't actually exist, let's let loosen the assumptions to what the real caller has: a modifiable, NUL-terminated string. This makes the code simpler and shorter (because we don't have to correlate strchrnul with the length calculation), correct (because the code with the off-by-one just goes away), and more efficient (we can drop the extra allocation we needed to create NUL-terminated strings for each line, and just terminate in place). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-28 03:41:39 +02:00
while (*buf) {
char *end = strchrnul(buf, '\n');
mailmap: handle mailmap blobs without trailing newlines The read_mailmap_buf function reads each line of the mailmap using strchrnul, like: const char *end = strchrnul(buf, '\n'); unsigned long linelen = end - buf + 1; But that's off-by-one when we actually hit the NUL byte; our line does not have a terminator, and so is only "end - buf" bytes long. As a result, when we subtract the linelen from the total len, we end up with (unsigned long)-1 bytes left in the buffer, and we start reading random junk from memory. We could fix it with: unsigned long linelen = end - buf + !!*end; but let's take a step back for a moment. It's questionable in the first place for a function that takes a buffer and length to be using strchrnul. But it works because we only have one caller (and are only likely to ever have this one), which is handing us data from read_sha1_file. Which means that it's always NUL-terminated. Instead of tightening the assumptions to make the buffer/length pair work for a caller that doesn't actually exist, let's let loosen the assumptions to what the real caller has: a modifiable, NUL-terminated string. This makes the code simpler and shorter (because we don't have to correlate strchrnul with the length calculation), correct (because the code with the off-by-one just goes away), and more efficient (we can drop the extra allocation we needed to create NUL-terminated strings for each line, and just terminate in place). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-28 03:41:39 +02:00
if (*end)
*end++ = '\0';
mailmap: handle mailmap blobs without trailing newlines The read_mailmap_buf function reads each line of the mailmap using strchrnul, like: const char *end = strchrnul(buf, '\n'); unsigned long linelen = end - buf + 1; But that's off-by-one when we actually hit the NUL byte; our line does not have a terminator, and so is only "end - buf" bytes long. As a result, when we subtract the linelen from the total len, we end up with (unsigned long)-1 bytes left in the buffer, and we start reading random junk from memory. We could fix it with: unsigned long linelen = end - buf + !!*end; but let's take a step back for a moment. It's questionable in the first place for a function that takes a buffer and length to be using strchrnul. But it works because we only have one caller (and are only likely to ever have this one), which is handing us data from read_sha1_file. Which means that it's always NUL-terminated. Instead of tightening the assumptions to make the buffer/length pair work for a caller that doesn't actually exist, let's let loosen the assumptions to what the real caller has: a modifiable, NUL-terminated string. This makes the code simpler and shorter (because we don't have to correlate strchrnul with the length calculation), correct (because the code with the off-by-one just goes away), and more efficient (we can drop the extra allocation we needed to create NUL-terminated strings for each line, and just terminate in place). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-28 03:41:39 +02:00
read_mailmap_line(map, buf, repo_abbrev);
buf = end;
}
}
static int read_mailmap_blob(struct string_list *map,
const char *name,
char **repo_abbrev)
{
struct object_id oid;
char *buf;
unsigned long size;
enum object_type type;
if (!name)
return 0;
if (get_oid(name, &oid) < 0)
return 0;
buf = read_object_file(&oid, &type, &size);
if (!buf)
return error("unable to read mailmap object at %s", name);
if (type != OBJ_BLOB)
return error("mailmap is not a blob: %s", name);
mailmap: handle mailmap blobs without trailing newlines The read_mailmap_buf function reads each line of the mailmap using strchrnul, like: const char *end = strchrnul(buf, '\n'); unsigned long linelen = end - buf + 1; But that's off-by-one when we actually hit the NUL byte; our line does not have a terminator, and so is only "end - buf" bytes long. As a result, when we subtract the linelen from the total len, we end up with (unsigned long)-1 bytes left in the buffer, and we start reading random junk from memory. We could fix it with: unsigned long linelen = end - buf + !!*end; but let's take a step back for a moment. It's questionable in the first place for a function that takes a buffer and length to be using strchrnul. But it works because we only have one caller (and are only likely to ever have this one), which is handing us data from read_sha1_file. Which means that it's always NUL-terminated. Instead of tightening the assumptions to make the buffer/length pair work for a caller that doesn't actually exist, let's let loosen the assumptions to what the real caller has: a modifiable, NUL-terminated string. This makes the code simpler and shorter (because we don't have to correlate strchrnul with the length calculation), correct (because the code with the off-by-one just goes away), and more efficient (we can drop the extra allocation we needed to create NUL-terminated strings for each line, and just terminate in place). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-28 03:41:39 +02:00
read_mailmap_string(map, buf, repo_abbrev);
free(buf);
return 0;
}
int read_mailmap(struct string_list *map, char **repo_abbrev)
{
int err = 0;
map->strdup_strings = 1;
map->cmp = namemap_cmp;
if (!git_mailmap_blob && is_bare_repository())
git_mailmap_blob = "HEAD:.mailmap";
err |= read_mailmap_file(map, ".mailmap", repo_abbrev);
if (startup_info->have_repository)
err |= read_mailmap_blob(map, git_mailmap_blob, repo_abbrev);
err |= read_mailmap_file(map, git_mailmap_file, repo_abbrev);
return err;
}
void clear_mailmap(struct string_list *map)
{
debug_mm("mailmap: clearing %d entries...\n", map->nr);
map->strdup_strings = 1;
string_list_clear_func(map, free_mailmap_entry);
debug_mm("mailmap: cleared\n");
}
mailmap: remove email copy and length limitation In map_user(), we have email pointer that points at the beginning of an e-mail address, but the buffer is not terminated with a NUL after the e-mail address. It typically has ">" after the address, and it could have even more if it comes from author/committer line in a commit object. Or it may not have ">" after it. We used to copy the e-mail address proper into a temporary buffer before asking the string-list API to find the e-mail address in the mailmap, because string_list_lookup() function only takes a NUL terminated full string. Introduce a helper function lookup_prefix that takes the email pointer and the length, and finds a matching entry in the string list used for the mailmap, by doing the following: - First ask string_list_find_insert_index() where in its sorted list the e-mail address we have (including the possible trailing junk ">...") would be inserted. - It could find an exact match (e.g. we had a clean e-mail address without any trailing junk). We can return the item in that case. - Or it could return the index of an item that sorts after the e-mail address we have. - If we did not find an exact match against a clean e-mail address, then the record we are looking for in the mailmap has to exist before the index returned by the function (i.e. "email>junk" always sorts later than "email"). Iterate, starting from that index, down the map->items[] array until we find the exact record we are looking for, or we see a record with a key that definitely sorts earlier than the e-mail we are looking for (i.e. when we are looking for "email" in "email>junk", a record in the mailmap that begins with "emaik" strictly sorts before "email", if such a key existed in the mailmap). This, together with the earlier enhancement to support case-insensitive sorting, allow us to remove an extra copy of email buffer to downcase it. A part of this is based on Antoine Pelisse's previous work. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-05 22:26:39 +01:00
/*
* Look for an entry in map that match string[0:len]; string[len]
* does not have to be NUL (but it could be).
*/
static struct string_list_item *lookup_prefix(struct string_list *map,
const char *string, size_t len)
{
int i = string_list_find_insert_index(map, string, 1);
if (i < 0) {
/* exact match */
i = -1 - i;
if (!string[len])
return &map->items[i];
/*
* that map entry matches exactly to the string, including
* the cruft at the end beyond "len". That is not a match
* with string[0:len] that we are looking for.
*/
} else if (!string[len]) {
/*
* asked with the whole string, and got nothing. No
* matching entry can exist in the map.
*/
return NULL;
}
/*
* i is at the exact match to an overlong key, or location the
* overlong key would be inserted, which must come after the
* real location of the key if one exists.
*/
while (0 <= --i && i < map->nr) {
int cmp = strncasecmp(map->items[i].string, string, len);
if (cmp < 0)
/*
* "i" points at a key definitely below the prefix;
* the map does not have string[0:len] in it.
*/
break;
else if (!cmp && !map->items[i].string[len])
/* found it */
return &map->items[i];
/*
* otherwise, the string at "i" may be string[0:len]
* followed by a string that sorts later than string[len:];
* keep trying.
*/
}
return NULL;
}
int map_user(struct string_list *map,
const char **email, size_t *emaillen,
const char **name, size_t *namelen)
{
struct string_list_item *item;
struct mailmap_entry *me;
mailmap: remove email copy and length limitation In map_user(), we have email pointer that points at the beginning of an e-mail address, but the buffer is not terminated with a NUL after the e-mail address. It typically has ">" after the address, and it could have even more if it comes from author/committer line in a commit object. Or it may not have ">" after it. We used to copy the e-mail address proper into a temporary buffer before asking the string-list API to find the e-mail address in the mailmap, because string_list_lookup() function only takes a NUL terminated full string. Introduce a helper function lookup_prefix that takes the email pointer and the length, and finds a matching entry in the string list used for the mailmap, by doing the following: - First ask string_list_find_insert_index() where in its sorted list the e-mail address we have (including the possible trailing junk ">...") would be inserted. - It could find an exact match (e.g. we had a clean e-mail address without any trailing junk). We can return the item in that case. - Or it could return the index of an item that sorts after the e-mail address we have. - If we did not find an exact match against a clean e-mail address, then the record we are looking for in the mailmap has to exist before the index returned by the function (i.e. "email>junk" always sorts later than "email"). Iterate, starting from that index, down the map->items[] array until we find the exact record we are looking for, or we see a record with a key that definitely sorts earlier than the e-mail we are looking for (i.e. when we are looking for "email" in "email>junk", a record in the mailmap that begins with "emaik" strictly sorts before "email", if such a key existed in the mailmap). This, together with the earlier enhancement to support case-insensitive sorting, allow us to remove an extra copy of email buffer to downcase it. A part of this is based on Antoine Pelisse's previous work. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-05 22:26:39 +01:00
debug_mm("map_user: map '%.*s' <%.*s>\n",
(int)*namelen, debug_str(*name),
(int)*emaillen, debug_str(*email));
mailmap: remove email copy and length limitation In map_user(), we have email pointer that points at the beginning of an e-mail address, but the buffer is not terminated with a NUL after the e-mail address. It typically has ">" after the address, and it could have even more if it comes from author/committer line in a commit object. Or it may not have ">" after it. We used to copy the e-mail address proper into a temporary buffer before asking the string-list API to find the e-mail address in the mailmap, because string_list_lookup() function only takes a NUL terminated full string. Introduce a helper function lookup_prefix that takes the email pointer and the length, and finds a matching entry in the string list used for the mailmap, by doing the following: - First ask string_list_find_insert_index() where in its sorted list the e-mail address we have (including the possible trailing junk ">...") would be inserted. - It could find an exact match (e.g. we had a clean e-mail address without any trailing junk). We can return the item in that case. - Or it could return the index of an item that sorts after the e-mail address we have. - If we did not find an exact match against a clean e-mail address, then the record we are looking for in the mailmap has to exist before the index returned by the function (i.e. "email>junk" always sorts later than "email"). Iterate, starting from that index, down the map->items[] array until we find the exact record we are looking for, or we see a record with a key that definitely sorts earlier than the e-mail we are looking for (i.e. when we are looking for "email" in "email>junk", a record in the mailmap that begins with "emaik" strictly sorts before "email", if such a key existed in the mailmap). This, together with the earlier enhancement to support case-insensitive sorting, allow us to remove an extra copy of email buffer to downcase it. A part of this is based on Antoine Pelisse's previous work. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-05 22:26:39 +01:00
item = lookup_prefix(map, *email, *emaillen);
if (item != NULL) {
me = (struct mailmap_entry *)item->util;
if (me->namemap.nr) {
/*
* The item has multiple items, so we'll look up on
* name too. If the name is not found, we choose the
* simple entry.
*/
struct string_list_item *subitem;
subitem = lookup_prefix(&me->namemap, *name, *namelen);
if (subitem)
item = subitem;
}
}
if (item != NULL) {
struct mailmap_info *mi = (struct mailmap_info *)item->util;
if (mi->name == NULL && mi->email == NULL) {
debug_mm("map_user: -- (no simple mapping)\n");
return 0;
}
if (mi->email) {
*email = mi->email;
*emaillen = strlen(*email);
}
if (mi->name) {
*name = mi->name;
*namelen = strlen(*name);
}
debug_mm("map_user: to '%.*s' <%.*s>\n",
(int)*namelen, debug_str(*name),
(int)*emaillen, debug_str(*email));
return 1;
}
debug_mm("map_user: --\n");
return 0;
}