From 8dd5afc926acb9829ebf56e9b78826a5242cd638 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 7 Jan 2013 12:24:55 -0800 Subject: [PATCH 01/11] string-list: allow case-insensitive string list Some string list needs to be searched case insensitively, and for that to work correctly, the string needs to be sorted case insensitively from the beginning. Allow a custom comparison function to be defined on a string list instance and use it throughout in place of strcmp(). Signed-off-by: Junio C Hamano --- string-list.c | 17 +++++++++++++---- string-list.h | 4 ++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/string-list.c b/string-list.c index 397e6cfa7d..6f3d8cf091 100644 --- a/string-list.c +++ b/string-list.c @@ -7,10 +7,11 @@ static int get_entry_index(const struct string_list *list, const char *string, int *exact_match) { int left = -1, right = list->nr; + compare_strings_fn cmp = list->cmp ? list->cmp : strcmp; while (left + 1 < right) { int middle = (left + right) / 2; - int compare = strcmp(string, list->items[middle].string); + int compare = cmp(string, list->items[middle].string); if (compare < 0) right = middle; else if (compare > 0) @@ -96,8 +97,9 @@ void string_list_remove_duplicates(struct string_list *list, int free_util) { if (list->nr > 1) { int src, dst; + compare_strings_fn cmp = list->cmp ? list->cmp : strcmp; for (src = dst = 1; src < list->nr; src++) { - if (!strcmp(list->items[dst - 1].string, list->items[src].string)) { + if (!cmp(list->items[dst - 1].string, list->items[src].string)) { if (list->strdup_strings) free(list->items[src].string); if (free_util) @@ -230,15 +232,20 @@ struct string_list_item *string_list_append(struct string_list *list, list->strdup_strings ? xstrdup(string) : (char *)string); } +/* Yuck */ +static compare_strings_fn compare_for_qsort; + +/* Only call this from inside sort_string_list! */ static int cmp_items(const void *a, const void *b) { const struct string_list_item *one = a; const struct string_list_item *two = b; - return strcmp(one->string, two->string); + return compare_for_qsort(one->string, two->string); } void sort_string_list(struct string_list *list) { + compare_for_qsort = list->cmp ? list->cmp : strcmp; qsort(list->items, list->nr, sizeof(*list->items), cmp_items); } @@ -246,8 +253,10 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list, const char *string) { int i; + compare_strings_fn cmp = list->cmp ? list->cmp : strcmp; + for (i = 0; i < list->nr; i++) - if (!strcmp(string, list->items[i].string)) + if (!cmp(string, list->items[i].string)) return list->items + i; return NULL; } diff --git a/string-list.h b/string-list.h index c50b0d0dea..446e79e425 100644 --- a/string-list.h +++ b/string-list.h @@ -5,10 +5,14 @@ struct string_list_item { char *string; void *util; }; + +typedef int (*compare_strings_fn)(const char *, const char *); + struct string_list { struct string_list_item *items; unsigned int nr, alloc; unsigned int strdup_strings:1; + compare_strings_fn cmp; /* NULL uses strcmp() */ }; #define STRING_LIST_INIT_NODUP { NULL, 0, 0, 0 } From 3c020bd528d5dc320b82bd787670edfe6695f097 Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:38 +0100 Subject: [PATCH 02/11] Use split_ident_line to parse author and committer Currently blame.c::get_acline(), pretty.c::pp_user_info() and shortlog.c::insert_one_record() are parsing author name, email, time and tz themselves. Use ident.c::split_ident_line() for better code reuse. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- builtin/blame.c | 59 +++++++++++++++++----------------------------- builtin/shortlog.c | 34 +++++++------------------- pretty.c | 35 ++++++++++++++++----------- 3 files changed, 51 insertions(+), 77 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index cfae569905..dd4aff91e0 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1343,8 +1343,9 @@ static void get_ac_line(const char *inbuf, const char *what, int mail_len, char *mail, unsigned long *time, const char **tz) { - int len, tzlen, maillen; - char *tmp, *endp, *timepos, *mailpos; + struct ident_split ident; + int len, tzlen, maillen, namelen; + char *tmp, *endp, *mailpos; tmp = strstr(inbuf, what); if (!tmp) @@ -1355,7 +1356,10 @@ static void get_ac_line(const char *inbuf, const char *what, len = strlen(tmp); else len = endp - tmp; - if (person_len <= len) { + if (person_len <= len) + goto error_out; + + if (split_ident_line(&ident, tmp, len)) { error_out: /* Ugh */ *tz = "(unknown)"; @@ -1364,48 +1368,27 @@ static void get_ac_line(const char *inbuf, const char *what, *time = 0; return; } - memcpy(person, tmp, len); - tmp = person; - tmp += len; - *tmp = 0; - while (person < tmp && *tmp != ' ') - tmp--; - if (tmp <= person) - goto error_out; - *tz = tmp+1; - tzlen = (person+len)-(tmp+1); + namelen = ident.name_end - ident.name_begin; + memcpy(person, ident.name_begin, namelen); + person[namelen] = 0; - *tmp = 0; - while (person < tmp && *tmp != ' ') - tmp--; - if (tmp <= person) - goto error_out; - *time = strtoul(tmp, NULL, 10); - timepos = tmp; + maillen = ident.mail_end - ident.mail_begin + 2; + memcpy(mail, ident.mail_begin - 1, maillen); + mail[maillen] = 0; - *tmp = 0; - while (person < tmp && !(*tmp == ' ' && tmp[1] == '<')) - tmp--; - if (tmp <= person) - return; - mailpos = tmp + 1; - *tmp = 0; - maillen = timepos - tmp; - memcpy(mail, mailpos, maillen); + *time = strtoul(ident.date_begin, NULL, 10); + + tzlen = ident.tz_end - ident.tz_begin; + + /* Place tz at the end of person */ + *tz = tmp = person + person_len - (tzlen + 1); + memcpy(tmp, ident.tz_begin, tzlen); + tmp[tzlen] = 0; if (!mailmap.nr) return; - /* - * mailmap expansion may make the name longer. - * make room by pushing stuff down. - */ - tmp = person + person_len - (tzlen + 1); - memmove(tmp, *tz, tzlen); - tmp[tzlen] = 0; - *tz = tmp; - /* * Now, convert both name and e-mail using mailmap */ diff --git a/builtin/shortlog.c b/builtin/shortlog.c index b316cf37ca..03c6cd7e86 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -40,40 +40,24 @@ static void insert_one_record(struct shortlog *log, char emailbuf[1024]; size_t len; const char *eol; - const char *boemail, *eoemail; struct strbuf subject = STRBUF_INIT; + struct ident_split ident; - boemail = strchr(author, '<'); - if (!boemail) - return; - eoemail = strchr(boemail, '>'); - if (!eoemail) + if (split_ident_line(&ident, author, strlen(author))) return; /* copy author name to namebuf, to support matching on both name and email */ - memcpy(namebuf, author, boemail - author); - len = boemail - author; - while (len > 0 && isspace(namebuf[len-1])) - len--; + len = ident.name_end - ident.name_begin; + memcpy(namebuf, ident.name_begin, len); namebuf[len] = 0; /* copy email name to emailbuf, to allow email replacement as well */ - memcpy(emailbuf, boemail+1, eoemail - boemail); - emailbuf[eoemail - boemail - 1] = 0; + len = ident.mail_end - ident.mail_begin; + memcpy(emailbuf, ident.mail_begin, len); + emailbuf[len] = 0; - if (!map_user(&log->mailmap, emailbuf, sizeof(emailbuf), namebuf, sizeof(namebuf))) { - while (author < boemail && isspace(*author)) - author++; - for (len = 0; - len < sizeof(namebuf) - 1 && author + len < boemail; - len++) - namebuf[len] = author[len]; - while (0 < len && isspace(namebuf[len-1])) - len--; - namebuf[len] = '\0'; - } - else - len = strlen(namebuf); + map_user(&log->mailmap, emailbuf, sizeof(emailbuf), namebuf, sizeof(namebuf)); + len = strlen(namebuf); if (log->email) { size_t room = sizeof(namebuf) - len - 1; diff --git a/pretty.c b/pretty.c index 5bdc2e70bc..350d1df3fe 100644 --- a/pretty.c +++ b/pretty.c @@ -387,29 +387,36 @@ void pp_user_info(const struct pretty_print_context *pp, const char *what, struct strbuf *sb, const char *line, const char *encoding) { + struct ident_split ident; + int linelen, namelen; + char *line_end, *date; int max_length = 78; /* per rfc2822 */ - char *date; - int namelen; unsigned long time; int tz; if (pp->fmt == CMIT_FMT_ONELINE) return; - date = strchr(line, '>'); - if (!date) + + line_end = strchr(line, '\n'); + if (!line_end) { + line_end = strchr(line, '\0'); + if (!line_end) + return; + } + + linelen = ++line_end - line; + if (split_ident_line(&ident, line, linelen)) return; - namelen = ++date - line; - time = strtoul(date, &date, 10); + + namelen = ident.mail_end - ident.name_begin + 1; + time = strtoul(ident.date_begin, &date, 10); tz = strtol(date, NULL, 10); if (pp->fmt == CMIT_FMT_EMAIL) { - char *name_tail = strchr(line, '<'); int display_name_length; - if (!name_tail) - return; - while (line < name_tail && isspace(name_tail[-1])) - name_tail--; - display_name_length = name_tail - line; + + display_name_length = ident.name_end - ident.name_begin; + strbuf_addstr(sb, "From: "); if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) { add_rfc2047(sb, line, display_name_length, @@ -427,10 +434,10 @@ void pp_user_info(const struct pretty_print_context *pp, } if (namelen - display_name_length + last_line_length(sb) > max_length) { strbuf_addch(sb, '\n'); - if (!isspace(name_tail[0])) + if (!isspace(ident.name_end[0])) strbuf_addch(sb, ' '); } - strbuf_add(sb, name_tail, namelen - display_name_length); + strbuf_add(sb, ident.name_end, namelen - display_name_length); strbuf_addch(sb, '\n'); } else { strbuf_addf(sb, "%s: %.*s%.*s\n", what, From 388c7f8a275a40697cee2eec5fb124ae8457bf77 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 5 Jan 2013 22:26:39 +0100 Subject: [PATCH 03/11] mailmap: remove email copy and length limitation In map_user(), we have email pointer that points at the beginning of an e-mail address, but the buffer is not terminated with a NUL after the e-mail address. It typically has ">" after the address, and it could have even more if it comes from author/committer line in a commit object. Or it may not have ">" after it. We used to copy the e-mail address proper into a temporary buffer before asking the string-list API to find the e-mail address in the mailmap, because string_list_lookup() function only takes a NUL terminated full string. Introduce a helper function lookup_prefix that takes the email pointer and the length, and finds a matching entry in the string list used for the mailmap, by doing the following: - First ask string_list_find_insert_index() where in its sorted list the e-mail address we have (including the possible trailing junk ">...") would be inserted. - It could find an exact match (e.g. we had a clean e-mail address without any trailing junk). We can return the item in that case. - Or it could return the index of an item that sorts after the e-mail address we have. - If we did not find an exact match against a clean e-mail address, then the record we are looking for in the mailmap has to exist before the index returned by the function (i.e. "email>junk" always sorts later than "email"). Iterate, starting from that index, down the map->items[] array until we find the exact record we are looking for, or we see a record with a key that definitely sorts earlier than the e-mail we are looking for (i.e. when we are looking for "email" in "email>junk", a record in the mailmap that begins with "emaik" strictly sorts before "email", if such a key existed in the mailmap). This, together with the earlier enhancement to support case-insensitive sorting, allow us to remove an extra copy of email buffer to downcase it. A part of this is based on Antoine Pelisse's previous work. Signed-off-by: Junio C Hamano --- mailmap.c | 71 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/mailmap.c b/mailmap.c index ea4b471ede..998846fdaa 100644 --- a/mailmap.c +++ b/mailmap.c @@ -174,6 +174,7 @@ static int read_single_mailmap(struct string_list *map, const char *filename, ch int read_mailmap(struct string_list *map, char **repo_abbrev) { map->strdup_strings = 1; + map->cmp = strcasecmp; /* each failure returns 1, so >1 means both calls failed */ return read_single_mailmap(map, ".mailmap", repo_abbrev) + read_single_mailmap(map, git_mailmap_file, repo_abbrev) > 1; @@ -187,14 +188,64 @@ void clear_mailmap(struct string_list *map) debug_mm("mailmap: cleared\n"); } +/* + * Look for an entry in map that match string[0:len]; string[len] + * does not have to be NUL (but it could be). + */ +static struct string_list_item *lookup_prefix(struct string_list *map, + const char *string, size_t len) +{ + int i = string_list_find_insert_index(map, string, 1); + if (i < 0) { + /* exact match */ + i = -1 - i; + if (!string[len]) + return &map->items[i]; + /* + * that map entry matches exactly to the string, including + * the cruft at the end beyond "len". That is not a match + * with string[0:len] that we are looking for. + */ + } else if (!string[len]) { + /* + * asked with the whole string, and got nothing. No + * matching entry can exist in the map. + */ + return NULL; + } + + /* + * i is at the exact match to an overlong key, or location the + * overlong key would be inserted, which must come after the + * real location of the key if one exists. + */ + while (0 <= --i && i < map->nr) { + int cmp = strncasecmp(map->items[i].string, string, len); + if (cmp < 0) + /* + * "i" points at a key definitely below the prefix; + * the map does not have string[0:len] in it. + */ + break; + else if (!cmp && !map->items[i].string[len]) + /* found it */ + return &map->items[i]; + /* + * otherwise, the string at "i" may be string[0:len] + * followed by a string that sorts later than string[len:]; + * keep trying. + */ + } + return NULL; +} + int map_user(struct string_list *map, char *email, int maxlen_email, char *name, int maxlen_name) { char *end_of_email; struct string_list_item *item; struct mailmap_entry *me; - char buf[1024], *mailbuf; - int i; + size_t maillen; /* figure out space requirement for email */ end_of_email = strchr(email, '>'); @@ -204,18 +255,12 @@ int map_user(struct string_list *map, if (!end_of_email) return 0; } - if (end_of_email - email + 1 < sizeof(buf)) - mailbuf = buf; - else - mailbuf = xmalloc(end_of_email - email + 1); - /* downcase the email address */ - for (i = 0; i < end_of_email - email; i++) - mailbuf[i] = tolower(email[i]); - mailbuf[i] = 0; + maillen = end_of_email - email; - debug_mm("map_user: map '%s' <%s>\n", name, mailbuf); - item = string_list_lookup(map, mailbuf); + debug_mm("map_user: map '%s' <%.*s>\n", name, maillen, email); + + item = lookup_prefix(map, email, maillen); if (item != NULL) { me = (struct mailmap_entry *)item->util; if (me->namemap.nr) { @@ -226,8 +271,6 @@ int map_user(struct string_list *map, item = subitem; } } - if (mailbuf != buf) - free(mailbuf); if (item != NULL) { struct mailmap_info *mi = (struct mailmap_info *)item->util; if (mi->name == NULL && (mi->email == NULL || maxlen_email == 0)) { From ea02ffa38571084007eb7c63f650d0011e44a3dd Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:40 +0100 Subject: [PATCH 04/11] mailmap: simplify map_user() interface Simplify map_user(), mostly to avoid copies of string buffers. It also simplifies caller functions. map_user() directly receive pointers and length from the commit buffer as mail and name. If mapping of the user and mail can be done, the pointer is updated to a new location. Lengths are also updated if necessary. The caller of map_user() can then copy the new email and name if necessary. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- builtin/blame.c | 156 +++++++++++++++++++++++---------------------- builtin/shortlog.c | 32 ++++------ mailmap.c | 43 ++++++------- mailmap.h | 4 +- pretty.c | 33 ++++------ 5 files changed, 125 insertions(+), 143 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index dd4aff91e0..9d20e25552 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1321,31 +1321,31 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) * Information on commits, used for output. */ struct commit_info { - const char *author; - const char *author_mail; + struct strbuf author; + struct strbuf author_mail; unsigned long author_time; - const char *author_tz; + struct strbuf author_tz; /* filled only when asked for details */ - const char *committer; - const char *committer_mail; + struct strbuf committer; + struct strbuf committer_mail; unsigned long committer_time; - const char *committer_tz; + struct strbuf committer_tz; - const char *summary; + struct strbuf summary; }; /* * Parse author/committer line in the commit object buffer */ static void get_ac_line(const char *inbuf, const char *what, - int person_len, char *person, - int mail_len, char *mail, - unsigned long *time, const char **tz) + struct strbuf *name, struct strbuf *mail, + unsigned long *time, struct strbuf *tz) { struct ident_split ident; - int len, tzlen, maillen, namelen; - char *tmp, *endp, *mailpos; + size_t len, maillen, namelen; + char *tmp, *endp; + const char *namebuf, *mailbuf; tmp = strstr(inbuf, what); if (!tmp) @@ -1356,51 +1356,61 @@ static void get_ac_line(const char *inbuf, const char *what, len = strlen(tmp); else len = endp - tmp; - if (person_len <= len) - goto error_out; if (split_ident_line(&ident, tmp, len)) { error_out: /* Ugh */ - *tz = "(unknown)"; - strcpy(person, *tz); - strcpy(mail, *tz); + tmp = "(unknown)"; + strbuf_addstr(name, tmp); + strbuf_addstr(mail, tmp); + strbuf_addstr(tz, tmp); *time = 0; return; } namelen = ident.name_end - ident.name_begin; - memcpy(person, ident.name_begin, namelen); - person[namelen] = 0; + namebuf = ident.name_begin; - maillen = ident.mail_end - ident.mail_begin + 2; - memcpy(mail, ident.mail_begin - 1, maillen); - mail[maillen] = 0; + maillen = ident.mail_end - ident.mail_begin; + mailbuf = ident.mail_begin; *time = strtoul(ident.date_begin, NULL, 10); - tzlen = ident.tz_end - ident.tz_begin; - - /* Place tz at the end of person */ - *tz = tmp = person + person_len - (tzlen + 1); - memcpy(tmp, ident.tz_begin, tzlen); - tmp[tzlen] = 0; - - if (!mailmap.nr) - return; + len = ident.tz_end - ident.tz_begin; + strbuf_add(tz, ident.tz_begin, len); /* * Now, convert both name and e-mail using mailmap */ - if (map_user(&mailmap, mail+1, mail_len-1, person, tmp-person-1)) { - /* Add a trailing '>' to email, since map_user returns plain emails - Note: It already has '<', since we replace from mail+1 */ - mailpos = memchr(mail, '\0', mail_len); - if (mailpos && mailpos-mail < mail_len - 1) { - *mailpos = '>'; - *(mailpos+1) = '\0'; - } - } + map_user(&mailmap, &mailbuf, &maillen, + &namebuf, &namelen); + + strbuf_addf(mail, "<%.*s>", (int)maillen, mailbuf); + strbuf_add(name, namebuf, namelen); +} + +static void commit_info_init(struct commit_info *ci) +{ + + strbuf_init(&ci->author, 0); + strbuf_init(&ci->author_mail, 0); + strbuf_init(&ci->author_tz, 0); + strbuf_init(&ci->committer, 0); + strbuf_init(&ci->committer_mail, 0); + strbuf_init(&ci->committer_tz, 0); + strbuf_init(&ci->summary, 0); +} + +static void commit_info_destroy(struct commit_info *ci) +{ + + strbuf_release(&ci->author); + strbuf_release(&ci->author_mail); + strbuf_release(&ci->author_tz); + strbuf_release(&ci->committer); + strbuf_release(&ci->committer_mail); + strbuf_release(&ci->committer_tz); + strbuf_release(&ci->summary); } static void get_commit_info(struct commit *commit, @@ -1410,11 +1420,8 @@ static void get_commit_info(struct commit *commit, int len; const char *subject, *encoding; char *reencoded, *message; - static char author_name[1024]; - static char author_mail[1024]; - static char committer_name[1024]; - static char committer_mail[1024]; - static char summary_buf[1024]; + + commit_info_init(ret); /* * We've operated without save_commit_buffer, so @@ -1432,11 +1439,8 @@ static void get_commit_info(struct commit *commit, encoding = get_log_output_encoding(); reencoded = logmsg_reencode(commit, encoding); message = reencoded ? reencoded : commit->buffer; - ret->author = author_name; - ret->author_mail = author_mail; get_ac_line(message, "\nauthor ", - sizeof(author_name), author_name, - sizeof(author_mail), author_mail, + &ret->author, &ret->author_mail, &ret->author_time, &ret->author_tz); if (!detailed) { @@ -1444,21 +1448,16 @@ static void get_commit_info(struct commit *commit, return; } - ret->committer = committer_name; - ret->committer_mail = committer_mail; get_ac_line(message, "\ncommitter ", - sizeof(committer_name), committer_name, - sizeof(committer_mail), committer_mail, + &ret->committer, &ret->committer_mail, &ret->committer_time, &ret->committer_tz); - ret->summary = summary_buf; len = find_commit_subject(message, &subject); - if (len && len < sizeof(summary_buf)) { - memcpy(summary_buf, subject, len); - summary_buf[len] = 0; - } else { - sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); - } + if (len) + strbuf_add(&ret->summary, subject, len); + else + strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1)); + free(reencoded); } @@ -1487,15 +1486,15 @@ static int emit_one_suspect_detail(struct origin *suspect, int repeat) suspect->commit->object.flags |= METAINFO_SHOWN; get_commit_info(suspect->commit, &ci, 1); - printf("author %s\n", ci.author); - printf("author-mail %s\n", ci.author_mail); + printf("author %s\n", ci.author.buf); + printf("author-mail %s\n", ci.author_mail.buf); printf("author-time %lu\n", ci.author_time); - printf("author-tz %s\n", ci.author_tz); - printf("committer %s\n", ci.committer); - printf("committer-mail %s\n", ci.committer_mail); + printf("author-tz %s\n", ci.author_tz.buf); + printf("committer %s\n", ci.committer.buf); + printf("committer-mail %s\n", ci.committer_mail.buf); printf("committer-time %lu\n", ci.committer_time); - printf("committer-tz %s\n", ci.committer_tz); - printf("summary %s\n", ci.summary); + printf("committer-tz %s\n", ci.committer_tz.buf); + printf("summary %s\n", ci.summary.buf); if (suspect->commit->object.flags & UNINTERESTING) printf("boundary\n"); if (suspect->previous) { @@ -1503,6 +1502,9 @@ static int emit_one_suspect_detail(struct origin *suspect, int repeat) printf("previous %s ", sha1_to_hex(prev->commit->object.sha1)); write_name_quoted(prev->path, stdout, '\n'); } + + commit_info_destroy(&ci); + return 1; } @@ -1689,11 +1691,11 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) if (opt & OUTPUT_ANNOTATE_COMPAT) { const char *name; if (opt & OUTPUT_SHOW_EMAIL) - name = ci.author_mail; + name = ci.author_mail.buf; else - name = ci.author; + name = ci.author.buf; printf("\t(%10s\t%10s\t%d)", name, - format_time(ci.author_time, ci.author_tz, + format_time(ci.author_time, ci.author_tz.buf, show_raw_time), ent->lno + 1 + cnt); } else { @@ -1712,14 +1714,14 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) const char *name; int pad; if (opt & OUTPUT_SHOW_EMAIL) - name = ci.author_mail; + name = ci.author_mail.buf; else - name = ci.author; + name = ci.author.buf; pad = longest_author - utf8_strwidth(name); printf(" (%s%*s %10s", name, pad, "", format_time(ci.author_time, - ci.author_tz, + ci.author_tz.buf, show_raw_time)); } printf(" %*d) ", @@ -1734,6 +1736,8 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) if (sb->final_buf_size && cp[-1] != '\n') putchar('\n'); + + commit_info_destroy(&ci); } static void output(struct scoreboard *sb, int option) @@ -1858,9 +1862,9 @@ static void find_alignment(struct scoreboard *sb, int *option) suspect->commit->object.flags |= METAINFO_SHOWN; get_commit_info(suspect->commit, &ci, 1); if (*option & OUTPUT_SHOW_EMAIL) - num = utf8_strwidth(ci.author_mail); + num = utf8_strwidth(ci.author_mail.buf); else - num = utf8_strwidth(ci.author); + num = utf8_strwidth(ci.author.buf); if (longest_author < num) longest_author = num; } @@ -1872,6 +1876,8 @@ static void find_alignment(struct scoreboard *sb, int *option) longest_dst_lines = num; if (largest_score < ent_score(sb, e)) largest_score = ent_score(sb, e); + + commit_info_destroy(&ci); } max_orig_digits = decimal_width(longest_src_lines); max_digits = decimal_width(longest_dst_lines); diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 03c6cd7e86..1eeed0ffd8 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -36,36 +36,28 @@ static void insert_one_record(struct shortlog *log, const char *dot3 = log->common_repo_prefix; char *buffer, *p; struct string_list_item *item; - char namebuf[1024]; - char emailbuf[1024]; - size_t len; + const char *mailbuf, *namebuf; + size_t namelen, maillen; const char *eol; struct strbuf subject = STRBUF_INIT; + struct strbuf namemailbuf = STRBUF_INIT; struct ident_split ident; if (split_ident_line(&ident, author, strlen(author))) return; - /* copy author name to namebuf, to support matching on both name and email */ - len = ident.name_end - ident.name_begin; - memcpy(namebuf, ident.name_begin, len); - namebuf[len] = 0; + namebuf = ident.name_begin; + mailbuf = ident.mail_begin; + namelen = ident.name_end - ident.name_begin; + maillen = ident.mail_end - ident.mail_begin; - /* copy email name to emailbuf, to allow email replacement as well */ - len = ident.mail_end - ident.mail_begin; - memcpy(emailbuf, ident.mail_begin, len); - emailbuf[len] = 0; + map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen); + strbuf_add(&namemailbuf, namebuf, namelen); - map_user(&log->mailmap, emailbuf, sizeof(emailbuf), namebuf, sizeof(namebuf)); - len = strlen(namebuf); + if (log->email) + strbuf_addf(&namemailbuf, " <%.*s>", (int)maillen, mailbuf); - if (log->email) { - size_t room = sizeof(namebuf) - len - 1; - int maillen = strlen(emailbuf); - snprintf(namebuf + len, room, " <%.*s>", maillen, emailbuf); - } - - item = string_list_insert(&log->list, namebuf); + item = string_list_insert(&log->list, namemailbuf.buf); if (item->util == NULL) item->util = xcalloc(1, sizeof(struct string_list)); diff --git a/mailmap.c b/mailmap.c index 998846fdaa..743dbdae2f 100644 --- a/mailmap.c +++ b/mailmap.c @@ -240,50 +240,43 @@ static struct string_list_item *lookup_prefix(struct string_list *map, } int map_user(struct string_list *map, - char *email, int maxlen_email, char *name, int maxlen_name) + const char **email, size_t *emaillen, + const char **name, size_t *namelen) { - char *end_of_email; struct string_list_item *item; struct mailmap_entry *me; - size_t maillen; - /* figure out space requirement for email */ - end_of_email = strchr(email, '>'); - if (!end_of_email) { - /* email passed in might not be wrapped in <>, but end with a \0 */ - end_of_email = memchr(email, '\0', maxlen_email); - if (!end_of_email) - return 0; - } + debug_mm("map_user: map '%.*s' <%.*s>\n", + *name, *namelen, *emaillen, *email); - maillen = end_of_email - email; - - debug_mm("map_user: map '%s' <%.*s>\n", name, maillen, email); - - item = lookup_prefix(map, email, maillen); + item = lookup_prefix(map, *email, *emaillen); if (item != NULL) { me = (struct mailmap_entry *)item->util; if (me->namemap.nr) { /* The item has multiple items, so we'll look up on name too */ /* If the name is not found, we choose the simple entry */ - struct string_list_item *subitem = string_list_lookup(&me->namemap, name); + struct string_list_item *subitem; + subitem = lookup_prefix(&me->namemap, *name, *namelen); if (subitem) item = subitem; } } if (item != NULL) { struct mailmap_info *mi = (struct mailmap_info *)item->util; - if (mi->name == NULL && (mi->email == NULL || maxlen_email == 0)) { + if (mi->name == NULL && mi->email == NULL) { debug_mm("map_user: -- (no simple mapping)\n"); return 0; } - if (maxlen_email && mi->email) - strlcpy(email, mi->email, maxlen_email); - else - *end_of_email = '\0'; - if (maxlen_name && mi->name) - strlcpy(name, mi->name, maxlen_name); - debug_mm("map_user: to '%s' <%s>\n", name, mi->email ? mi->email : ""); + if (mi->email) { + *email = mi->email; + *emaillen = strlen(*email); + } + if (mi->name) { + *name = mi->name; + *namelen = strlen(*name); + } + debug_mm("map_user: to '%.*s' <.*%s>\n", *namelen, *name, + *emaillen, *email); return 1; } debug_mm("map_user: --\n"); diff --git a/mailmap.h b/mailmap.h index d5c3664322..ed7c93b05c 100644 --- a/mailmap.h +++ b/mailmap.h @@ -4,7 +4,7 @@ int read_mailmap(struct string_list *map, char **repo_abbrev); void clear_mailmap(struct string_list *map); -int map_user(struct string_list *mailmap, - char *email, int maxlen_email, char *name, int maxlen_name); +int map_user(struct string_list *map, + const char **email, size_t *emaillen, const char **name, size_t *namelen); #endif diff --git a/pretty.c b/pretty.c index 350d1df3fe..dffcadec9e 100644 --- a/pretty.c +++ b/pretty.c @@ -593,7 +593,8 @@ char *logmsg_reencode(const struct commit *commit, return out; } -static int mailmap_name(char *email, int email_len, char *name, int name_len) +static int mailmap_name(const char **email, size_t *email_len, + const char **name, size_t *name_len) { static struct string_list *mail_map; if (!mail_map) { @@ -610,36 +611,26 @@ static size_t format_person_part(struct strbuf *sb, char part, const int placeholder_len = 2; int tz; unsigned long date = 0; - char person_name[1024]; - char person_mail[1024]; struct ident_split s; - const char *name_start, *name_end, *mail_start, *mail_end; + const char *name, *mail; + size_t maillen, namelen; if (split_ident_line(&s, msg, len) < 0) goto skip; - name_start = s.name_begin; - name_end = s.name_end; - mail_start = s.mail_begin; - mail_end = s.mail_end; + name = s.name_begin; + namelen = s.name_end - s.name_begin; + mail = s.mail_begin; + maillen = s.mail_end - s.mail_begin; - if (part == 'N' || part == 'E') { /* mailmap lookup */ - snprintf(person_name, sizeof(person_name), "%.*s", - (int)(name_end - name_start), name_start); - snprintf(person_mail, sizeof(person_mail), "%.*s", - (int)(mail_end - mail_start), mail_start); - mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name)); - name_start = person_name; - name_end = name_start + strlen(person_name); - mail_start = person_mail; - mail_end = mail_start + strlen(person_mail); - } + if (part == 'N' || part == 'E') /* mailmap lookup */ + mailmap_name(&mail, &maillen, &name, &namelen); if (part == 'n' || part == 'N') { /* name */ - strbuf_add(sb, name_start, name_end-name_start); + strbuf_add(sb, name, namelen); return placeholder_len; } if (part == 'e' || part == 'E') { /* email */ - strbuf_add(sb, mail_start, mail_end-mail_start); + strbuf_add(sb, mail, maillen); return placeholder_len; } From 0e2913b042825cb3ff6798245f9224f41183d159 Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:41 +0100 Subject: [PATCH 05/11] mailmap: add mailmap structure to rev_info and pp Pass a mailmap from rev_info to pretty_print_context to so that the pretty printer can use rewritten name and email address when showing commits. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- commit.h | 1 + log-tree.c | 1 + revision.h | 1 + 3 files changed, 3 insertions(+) diff --git a/commit.h b/commit.h index b6ad8f3f30..7f8f9878d8 100644 --- a/commit.h +++ b/commit.h @@ -89,6 +89,7 @@ struct pretty_print_context { char *notes_message; struct reflog_walk_info *reflog_info; const char *output_encoding; + struct string_list *mailmap; }; struct userformat_want { diff --git a/log-tree.c b/log-tree.c index 4f86defe32..92254fd493 100644 --- a/log-tree.c +++ b/log-tree.c @@ -671,6 +671,7 @@ void show_log(struct rev_info *opt) ctx.preserve_subject = opt->preserve_subject; ctx.reflog_info = opt->reflog_info; ctx.fmt = opt->commit_format; + ctx.mailmap = opt->mailmap; pretty_print_commit(&ctx, commit, &msgbuf); if (opt->add_signoff) diff --git a/revision.h b/revision.h index 059bfff812..83a79f5815 100644 --- a/revision.h +++ b/revision.h @@ -143,6 +143,7 @@ struct rev_info { const char *subject_prefix; int no_inline; int show_log_size; + struct string_list *mailmap; /* Filter by commit log message */ struct grep_opt grep_filter; From dffd325f379b825ca449e1592fa9fb31eddf1780 Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:42 +0100 Subject: [PATCH 06/11] pretty: use mailmap to display username and email Use the mailmap information to display the rewritten username and email address in all log commands. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- pretty.c | 58 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/pretty.c b/pretty.c index dffcadec9e..622275c70b 100644 --- a/pretty.c +++ b/pretty.c @@ -387,9 +387,13 @@ void pp_user_info(const struct pretty_print_context *pp, const char *what, struct strbuf *sb, const char *line, const char *encoding) { + struct strbuf name; + struct strbuf mail; struct ident_split ident; - int linelen, namelen; + int linelen; char *line_end, *date; + const char *mailbuf, *namebuf; + size_t namelen, maillen; int max_length = 78; /* per rfc2822 */ unsigned long time; int tz; @@ -408,42 +412,54 @@ void pp_user_info(const struct pretty_print_context *pp, if (split_ident_line(&ident, line, linelen)) return; - namelen = ident.mail_end - ident.name_begin + 1; + + mailbuf = ident.mail_begin; + maillen = ident.mail_end - ident.mail_begin; + namebuf = ident.name_begin; + namelen = ident.name_end - ident.name_begin; + + if (pp->mailmap) + map_user(pp->mailmap, &mailbuf, &maillen, &namebuf, &namelen); + + strbuf_init(&mail, 0); + strbuf_init(&name, 0); + + strbuf_add(&mail, mailbuf, maillen); + strbuf_add(&name, namebuf, namelen); + + namelen = name.len + mail.len + 3; /* ' ' + '<' + '>' */ time = strtoul(ident.date_begin, &date, 10); tz = strtol(date, NULL, 10); if (pp->fmt == CMIT_FMT_EMAIL) { - int display_name_length; - - display_name_length = ident.name_end - ident.name_begin; - strbuf_addstr(sb, "From: "); - if (needs_rfc2047_encoding(line, display_name_length, RFC2047_ADDRESS)) { - add_rfc2047(sb, line, display_name_length, - encoding, RFC2047_ADDRESS); + if (needs_rfc2047_encoding(name.buf, name.len, RFC2047_ADDRESS)) { + add_rfc2047(sb, name.buf, name.len, + encoding, RFC2047_ADDRESS); max_length = 76; /* per rfc2047 */ - } else if (needs_rfc822_quoting(line, display_name_length)) { + } else if (needs_rfc822_quoting(name.buf, name.len)) { struct strbuf quoted = STRBUF_INIT; - add_rfc822_quoted("ed, line, display_name_length); + add_rfc822_quoted("ed, name.buf, name.len); strbuf_add_wrapped_bytes(sb, quoted.buf, quoted.len, -6, 1, max_length); strbuf_release("ed); } else { - strbuf_add_wrapped_bytes(sb, line, display_name_length, - -6, 1, max_length); + strbuf_add_wrapped_bytes(sb, name.buf, name.len, + -6, 1, max_length); } - if (namelen - display_name_length + last_line_length(sb) > max_length) { + if (namelen - name.len + last_line_length(sb) > max_length) strbuf_addch(sb, '\n'); - if (!isspace(ident.name_end[0])) - strbuf_addch(sb, ' '); - } - strbuf_add(sb, ident.name_end, namelen - display_name_length); - strbuf_addch(sb, '\n'); + + strbuf_addf(sb, " <%s>\n", mail.buf); } else { - strbuf_addf(sb, "%s: %.*s%.*s\n", what, + strbuf_addf(sb, "%s: %.*s%s <%s>\n", what, (pp->fmt == CMIT_FMT_FULLER) ? 4 : 0, - " ", namelen, line); + " ", name.buf, mail.buf); } + + strbuf_release(&mail); + strbuf_release(&name); + switch (pp->fmt) { case CMIT_FMT_MEDIUM: strbuf_addf(sb, "Date: %s\n", show_date(time, tz, pp->date_mode)); From ea57bc0d41b03cc1528be5fafa5eb8bfd4f773cb Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:43 +0100 Subject: [PATCH 07/11] log: add --use-mailmap option Add the --use-mailmap option to log commands. It allows to display names from mailmap file when displaying logs, whatever the format used. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- Documentation/git-log.txt | 5 +++++ builtin/log.c | 9 ++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/git-log.txt b/Documentation/git-log.txt index 585dac40ba..a99be97335 100644 --- a/Documentation/git-log.txt +++ b/Documentation/git-log.txt @@ -47,6 +47,11 @@ OPTIONS Print out the ref name given on the command line by which each commit was reached. +--use-mailmap:: + Use mailmap file to map author and committer names and email + to canonical real names and email addresses. See + linkgit:git-shortlog[1]. + --full-diff:: Without this flag, "git log -p ..." shows commits that touch the specified paths, and diffs about the same specified diff --git a/builtin/log.c b/builtin/log.c index e7b7db1cac..d2bd8cea78 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -22,6 +22,7 @@ #include "branch.h" #include "streaming.h" #include "version.h" +#include "mailmap.h" /* Set a default date-time format for git log ("log.date" config variable) */ static const char *default_date_mode = NULL; @@ -94,11 +95,12 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix, struct rev_info *rev, struct setup_revision_opt *opt) { struct userformat_want w; - int quiet = 0, source = 0; + int quiet = 0, source = 0, mailmap = 0; const struct option builtin_log_options[] = { OPT_BOOLEAN(0, "quiet", &quiet, N_("suppress diff output")), OPT_BOOLEAN(0, "source", &source, N_("show source")), + OPT_BOOLEAN(0, "use-mailmap", &mailmap, N_("Use mail map file")), { OPTION_CALLBACK, 0, "decorate", NULL, NULL, N_("decorate options"), PARSE_OPT_OPTARG, decorate_callback}, OPT_END() @@ -136,6 +138,11 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix, if (source) rev->show_source = 1; + if (mailmap) { + rev->mailmap = xcalloc(1, sizeof(struct string_list)); + read_mailmap(rev->mailmap, NULL); + } + if (rev->pretty_given && rev->commit_format == CMIT_FMT_RAW) { /* * "log --pretty=raw" is special; ignore UI oriented From d20743433e5889319a82c26beb5789e708c450a2 Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:44 +0100 Subject: [PATCH 08/11] test: add test for --use-mailmap option The new option '--use-mailmap' can be used to make sure that mailmap file is used to convert name when running log commands. The test is simple and checks that the Author line is correctly replaced when running log. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- t/t4203-mailmap.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh index 1f182f612c..db043dcec6 100755 --- a/t/t4203-mailmap.sh +++ b/t/t4203-mailmap.sh @@ -239,6 +239,20 @@ test_expect_success 'Log output (complex mapping)' ' test_cmp expect actual ' +cat >expect <<\EOF +Author: CTO +Author: Santa Claus +Author: Santa Claus +Author: Other Author +Author: Other Author +Author: Some Dude +Author: A U Thor +EOF +test_expect_success 'Log output with --use-mailmap' ' + git log --use-mailmap | grep Author >actual && + test_cmp expect actual +' + # git blame cat >expect <<\EOF ^OBJI (A U Thor DATE 1) one From d72fbe8111fbd3083d7063078c96eed27098490c Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:45 +0100 Subject: [PATCH 09/11] log: grep author/committer using mailmap Currently you can use mailmap to display log authors and committers but you can't use the mailmap to find commits with mapped values. This commit allows you to run: git log --use-mailmap --author mapped_name_or_email git log --use-mailmap --committer mapped_name_or_email Of course it only works if the --use-mailmap option is used. The new name and email are copied only when necessary. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- revision.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++ t/t4203-mailmap.sh | 18 ++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/revision.c b/revision.c index 95d21e6472..2cce85a8e0 100644 --- a/revision.c +++ b/revision.c @@ -13,6 +13,7 @@ #include "decorate.h" #include "log-tree.h" #include "string-list.h" +#include "mailmap.h" volatile show_early_output_fn_t show_early_output; @@ -2219,6 +2220,51 @@ static int rewrite_parents(struct rev_info *revs, struct commit *commit) return 0; } +static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap) +{ + char *person, *endp; + size_t len, namelen, maillen; + const char *name; + const char *mail; + struct ident_split ident; + + person = strstr(buf->buf, what); + if (!person) + return 0; + + person += strlen(what); + endp = strchr(person, '\n'); + if (!endp) + return 0; + + len = endp - person; + + if (split_ident_line(&ident, person, len)) + return 0; + + mail = ident.mail_begin; + maillen = ident.mail_end - ident.mail_begin; + name = ident.name_begin; + namelen = ident.name_end - ident.name_begin; + + if (map_user(mailmap, &mail, &maillen, &name, &namelen)) { + struct strbuf namemail = STRBUF_INIT; + + strbuf_addf(&namemail, "%.*s <%.*s>", + (int)namelen, name, (int)maillen, mail); + + strbuf_splice(buf, ident.name_begin - buf->buf, + ident.mail_end - ident.name_begin + 1, + namemail.buf, namemail.len); + + strbuf_release(&namemail); + + return 1; + } + + return 0; +} + static int commit_match(struct commit *commit, struct rev_info *opt) { int retval; @@ -2237,6 +2283,14 @@ static int commit_match(struct commit *commit, struct rev_info *opt) if (buf.len) strbuf_addstr(&buf, commit->buffer); + if (opt->mailmap) { + if (!buf.len) + strbuf_addstr(&buf, commit->buffer); + + commit_rewrite_person(&buf, "\nauthor ", opt->mailmap); + commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap); + } + /* Append "fake" message parts as needed */ if (opt->show_notes) { if (!buf.len) diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh index db043dcec6..e16187fd17 100755 --- a/t/t4203-mailmap.sh +++ b/t/t4203-mailmap.sh @@ -248,11 +248,29 @@ Author: Other Author Author: Some Dude Author: A U Thor EOF + test_expect_success 'Log output with --use-mailmap' ' git log --use-mailmap | grep Author >actual && test_cmp expect actual ' +cat >expect <<\EOF +Author: Santa Claus +Author: Santa Claus +EOF + +test_expect_success 'Grep author with --use-mailmap' ' + git log --use-mailmap --author Santa | grep Author >actual && + test_cmp expect actual +' + +>expect + +test_expect_success 'Only grep replaced author with --use-mailmap' ' + git log --use-mailmap --author "" >actual && + test_cmp expect actual +' + # git blame cat >expect <<\EOF ^OBJI (A U Thor DATE 1) one From e6bb5f78fb351c89a80826befda3c8f06f91f249 Mon Sep 17 00:00:00 2001 From: Antoine Pelisse Date: Sat, 5 Jan 2013 22:26:46 +0100 Subject: [PATCH 10/11] log: add log.mailmap configuration option Teach "log.mailmap" configuration variable to turn "--use-mailmap" option on to "git log", "git show" and "git whatchanged". The "--no-use-mailmap" option from the command line can countermand the setting. Signed-off-by: Antoine Pelisse Signed-off-by: Junio C Hamano --- Documentation/config.txt | 4 ++++ builtin/log.c | 7 +++++++ t/t4203-mailmap.sh | 24 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index bf8f911e1f..226362a0ea 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1509,6 +1509,10 @@ log.showroot:: Tools like linkgit:git-log[1] or linkgit:git-whatchanged[1], which normally hide the root commit will now show it. True by default. +log.mailmap:: + If true, makes linkgit:git-log[1], linkgit:git-show[1], and + linkgit:git-whatchanged[1] assume `--use-mailmap`. + mailmap.file:: The location of an augmenting mailmap file. The default mailmap, located in the root of the repository, is loaded diff --git a/builtin/log.c b/builtin/log.c index d2bd8cea78..16e6520a5d 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -31,6 +31,7 @@ static int default_abbrev_commit; static int default_show_root = 1; static int decoration_style; static int decoration_given; +static int use_mailmap_config; static const char *fmt_patch_subject_prefix = "PATCH"; static const char *fmt_pretty; @@ -106,6 +107,7 @@ static void cmd_log_init_finish(int argc, const char **argv, const char *prefix, OPT_END() }; + mailmap = use_mailmap_config; argc = parse_options(argc, argv, prefix, builtin_log_options, builtin_log_usage, PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN | @@ -358,6 +360,11 @@ static int git_log_config(const char *var, const char *value, void *cb) } if (!prefixcmp(var, "color.decorate.")) return parse_decorate_color_config(var, 15, value); + if (!strcmp(var, "log.mailmap")) { + use_mailmap_config = git_config_bool(var, value); + return 0; + } + if (grep_config(var, value, cb) < 0) return -1; return git_diff_ui_config(var, value, cb); diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh index e16187fd17..7d4d31c830 100755 --- a/t/t4203-mailmap.sh +++ b/t/t4203-mailmap.sh @@ -254,6 +254,21 @@ test_expect_success 'Log output with --use-mailmap' ' test_cmp expect actual ' +cat >expect <<\EOF +Author: CTO +Author: Santa Claus +Author: Santa Claus +Author: Other Author +Author: Other Author +Author: Some Dude +Author: A U Thor +EOF + +test_expect_success 'Log output with log.mailmap' ' + git -c log.mailmap=True log | grep Author >actual && + test_cmp expect actual +' + cat >expect <<\EOF Author: Santa Claus Author: Santa Claus @@ -263,6 +278,15 @@ test_expect_success 'Grep author with --use-mailmap' ' git log --use-mailmap --author Santa | grep Author >actual && test_cmp expect actual ' +cat >expect <<\EOF +Author: Santa Claus +Author: Santa Claus +EOF + +test_expect_success 'Grep author with log.mailmap' ' + git -c log.mailmap=True log --author Santa | grep Author >actual && + test_cmp expect actual +' >expect From df874fa82ef06c8c438b9a19c56c0b5eb089c012 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 8 Jan 2013 00:02:49 -0800 Subject: [PATCH 11/11] log --use-mailmap: optimize for cases without --author/--committer search When we taught the commit_match() mechanism to pay attention to the new --use-mailmap option, we started to unconditionally copy the commit object to a temporary buffer, just in case we need the author and committer lines updated via the mailmap mechanism, and rewrite author and committer using the mailmap. It turns out that this has a rather unpleasant performance implications. In the linux kernel repository, running $ git log --author='Junio C Hamano' --pretty=short >/dev/null under /usr/bin/time, with and without --use-mailmap (the .mailmap file is 118 entries long, the particular author does not appear in it), cost (with warm cache): [without --use-mailmap] 5.42user 0.26system 0:05.70elapsed 99%CPU (0avgtext+0avgdata 2005936maxresident)k 0inputs+0outputs (0major+137669minor)pagefaults 0swaps [with --use-mailmap] 6.47user 0.30system 0:06.78elapsed 99%CPU (0avgtext+0avgdata 2006288maxresident)k 0inputs+0outputs (0major+137692minor)pagefaults 0swaps which incurs about 20% overhead. The command is doing extra work, so the extra cost may be justified. But it is inexcusable to pay the cost when we do not need author/committer match. In the same repository, $ git log --grep='fix menuconfig on debian lenny' --pretty=short >/dev/null shows very similar numbers as the above: [without --use-mailmap] 5.32user 0.30system 0:05.63elapsed 99%CPU (0avgtext+0avgdata 2005984maxresident)k 0inputs+0outputs (0major+137672minor)pagefaults 0swaps [with --use-mailmap] 6.64user 0.24system 0:06.89elapsed 99%CPU (0avgtext+0avgdata 2006320maxresident)k 0inputs+0outputs (0major+137694minor)pagefaults 0swaps The latter case is an unnecessary performance regression. We may want to _show_ the result with mailmap applied, but we do not have to copy and rewrite the author/committer of all commits we try to match if we do not query for these fields. Trivially optimize this performace regression by limiting the rewrites for only when we are matching with author/committer fields. Signed-off-by: Junio C Hamano --- revision.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/revision.c b/revision.c index 2cce85a8e0..d7562ee500 100644 --- a/revision.c +++ b/revision.c @@ -2283,7 +2283,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) if (buf.len) strbuf_addstr(&buf, commit->buffer); - if (opt->mailmap) { + if (opt->grep_filter.header_list && opt->mailmap) { if (!buf.len) strbuf_addstr(&buf, commit->buffer);