2006-10-22 13:23:31 +02:00
|
|
|
#include "builtin.h"
|
|
|
|
#include "cache.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
2008-07-21 20:03:49 +02:00
|
|
|
#include "string-list.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
#include "revision.h"
|
2006-12-22 22:15:59 +01:00
|
|
|
#include "utf8.h"
|
2007-04-27 09:41:15 +02:00
|
|
|
#include "mailmap.h"
|
2008-02-26 00:24:14 +01:00
|
|
|
#include "shortlog.h"
|
2008-07-09 23:38:33 +02:00
|
|
|
#include "parse-options.h"
|
2020-09-27 10:40:04 +02:00
|
|
|
#include "trailer.h"
|
2020-11-11 21:02:21 +01:00
|
|
|
#include "strmap.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static char const * const shortlog_usage[] = {
|
2018-03-10 12:52:11 +01:00
|
|
|
N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
|
|
|
|
N_("git log --pretty=short | git shortlog [<options>]"),
|
2008-07-09 23:38:33 +02:00
|
|
|
NULL
|
|
|
|
};
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:59 +01:00
|
|
|
/*
|
|
|
|
* The util field of our string_list_items will contain one of two things:
|
|
|
|
*
|
|
|
|
* - if --summary is not in use, it will point to a string list of the
|
|
|
|
* oneline subjects assigned to this author
|
|
|
|
*
|
|
|
|
* - if --summary is in use, we don't need that list; we only need to know
|
|
|
|
* its size. So we abuse the pointer slot to store our integer counter.
|
|
|
|
*
|
|
|
|
* This macro accesses the latter.
|
|
|
|
*/
|
|
|
|
#define UTIL_TO_INT(x) ((intptr_t)(x)->util)
|
|
|
|
|
|
|
|
static int compare_by_counter(const void *a1, const void *a2)
|
|
|
|
{
|
|
|
|
const struct string_list_item *i1 = a1, *i2 = a2;
|
|
|
|
return UTIL_TO_INT(i2) - UTIL_TO_INT(i1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_by_list(const void *a1, const void *a2)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2008-07-21 20:03:49 +02:00
|
|
|
const struct string_list_item *i1 = a1, *i2 = a2;
|
|
|
|
const struct string_list *l1 = i1->util, *l2 = i2->util;
|
2006-10-22 13:23:31 +02:00
|
|
|
|
|
|
|
if (l1->nr < l2->nr)
|
2006-11-21 21:12:06 +01:00
|
|
|
return 1;
|
2006-10-22 13:23:31 +02:00
|
|
|
else if (l1->nr == l2->nr)
|
|
|
|
return 0;
|
|
|
|
else
|
2006-11-21 21:12:06 +01:00
|
|
|
return -1;
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
static void insert_one_record(struct shortlog *log,
|
2020-09-25 09:01:50 +02:00
|
|
|
const char *ident,
|
2007-12-08 02:07:41 +01:00
|
|
|
const char *oneline)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2008-07-21 20:03:49 +02:00
|
|
|
struct string_list_item *item;
|
2007-12-08 02:07:41 +01:00
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
item = string_list_insert(&log->list, ident);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:56 +01:00
|
|
|
if (log->summary)
|
2016-01-18 21:02:59 +01:00
|
|
|
item->util = (void *)(UTIL_TO_INT(item) + 1);
|
2016-01-18 21:02:56 +01:00
|
|
|
else {
|
|
|
|
const char *dot3 = log->common_repo_prefix;
|
|
|
|
char *buffer, *p;
|
|
|
|
struct strbuf subject = STRBUF_INIT;
|
|
|
|
const char *eol;
|
|
|
|
|
|
|
|
/* Skip any leading whitespace, including any blank lines. */
|
|
|
|
while (*oneline && isspace(*oneline))
|
|
|
|
oneline++;
|
|
|
|
eol = strchr(oneline, '\n');
|
|
|
|
if (!eol)
|
|
|
|
eol = oneline + strlen(oneline);
|
|
|
|
if (starts_with(oneline, "[PATCH")) {
|
|
|
|
char *eob = strchr(oneline, ']');
|
|
|
|
if (eob && (!eol || eob < eol))
|
|
|
|
oneline = eob + 1;
|
|
|
|
}
|
|
|
|
while (*oneline && isspace(*oneline) && *oneline != '\n')
|
|
|
|
oneline++;
|
|
|
|
format_subject(&subject, oneline, " ");
|
|
|
|
buffer = strbuf_detach(&subject, NULL);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:56 +01:00
|
|
|
if (dot3) {
|
|
|
|
int dot3len = strlen(dot3);
|
|
|
|
if (dot3len > 5) {
|
|
|
|
while ((p = strstr(buffer, dot3)) != NULL) {
|
|
|
|
int taillen = strlen(p) - dot3len;
|
|
|
|
memcpy(p, "/.../", 5);
|
|
|
|
memmove(p + 5, p + dot3len, taillen + 1);
|
|
|
|
}
|
2006-11-25 09:01:27 +01:00
|
|
|
}
|
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:59 +01:00
|
|
|
if (item->util == NULL)
|
|
|
|
item->util = xcalloc(1, sizeof(struct string_list));
|
2016-01-18 21:02:56 +01:00
|
|
|
string_list_append(item->util, buffer);
|
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:40:09 +02:00
|
|
|
static int parse_ident(struct shortlog *log,
|
|
|
|
struct strbuf *out, const char *in)
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
{
|
|
|
|
const char *mailbuf, *namebuf;
|
|
|
|
size_t namelen, maillen;
|
|
|
|
struct ident_split ident;
|
|
|
|
|
|
|
|
if (split_ident_line(&ident, in, strlen(in)))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
namebuf = ident.name_begin;
|
|
|
|
mailbuf = ident.mail_begin;
|
|
|
|
namelen = ident.name_end - ident.name_begin;
|
|
|
|
maillen = ident.mail_end - ident.mail_begin;
|
|
|
|
|
|
|
|
map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
|
|
|
|
strbuf_add(out, namebuf, namelen);
|
|
|
|
if (log->email)
|
|
|
|
strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
static void read_from_stdin(struct shortlog *log)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2020-09-25 09:01:50 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
|
|
|
struct strbuf mapped_ident = STRBUF_INIT;
|
2016-01-18 21:02:44 +01:00
|
|
|
struct strbuf oneline = STRBUF_INIT;
|
2016-10-11 20:45:58 +02:00
|
|
|
static const char *author_match[2] = { "Author: ", "author " };
|
|
|
|
static const char *committer_match[2] = { "Commit: ", "committer " };
|
|
|
|
const char **match;
|
2007-12-08 02:07:41 +01:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (HAS_MULTI_BITS(log->groups))
|
|
|
|
die(_("using multiple --group options with stdin is not supported"));
|
|
|
|
|
|
|
|
switch (log->groups) {
|
2020-09-27 10:39:59 +02:00
|
|
|
case SHORTLOG_GROUP_AUTHOR:
|
|
|
|
match = author_match;
|
|
|
|
break;
|
|
|
|
case SHORTLOG_GROUP_COMMITTER:
|
|
|
|
match = committer_match;
|
|
|
|
break;
|
2020-09-27 10:40:04 +02:00
|
|
|
case SHORTLOG_GROUP_TRAILER:
|
|
|
|
die(_("using --group=trailer with stdin is not supported"));
|
2020-09-27 10:39:59 +02:00
|
|
|
default:
|
|
|
|
BUG("unhandled shortlog group");
|
|
|
|
}
|
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
while (strbuf_getline_lf(&ident, stdin) != EOF) {
|
2016-01-18 21:02:40 +01:00
|
|
|
const char *v;
|
2020-09-25 09:01:50 +02:00
|
|
|
if (!skip_prefix(ident.buf, match[0], &v) &&
|
|
|
|
!skip_prefix(ident.buf, match[1], &v))
|
2007-12-08 02:07:41 +01:00
|
|
|
continue;
|
2016-01-29 01:10:14 +01:00
|
|
|
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
|
2016-01-18 21:02:44 +01:00
|
|
|
oneline.len)
|
2007-12-08 02:07:41 +01:00
|
|
|
; /* discard headers */
|
2016-01-29 01:10:14 +01:00
|
|
|
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
|
2016-01-18 21:02:44 +01:00
|
|
|
!oneline.len)
|
2007-12-08 02:07:41 +01:00
|
|
|
; /* discard blanks */
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_reset(&mapped_ident);
|
2020-09-27 10:40:09 +02:00
|
|
|
if (parse_ident(log, &mapped_ident, v) < 0)
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
continue;
|
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
insert_one_record(log, mapped_ident.buf, oneline.buf);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_release(&ident);
|
|
|
|
strbuf_release(&mapped_ident);
|
2016-01-18 21:02:44 +01:00
|
|
|
strbuf_release(&oneline);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:40:04 +02:00
|
|
|
static void insert_records_from_trailers(struct shortlog *log,
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
struct strset *dups,
|
2020-09-27 10:40:04 +02:00
|
|
|
struct commit *commit,
|
|
|
|
struct pretty_print_context *ctx,
|
|
|
|
const char *oneline)
|
|
|
|
{
|
|
|
|
struct trailer_iterator iter;
|
|
|
|
const char *commit_buffer, *body;
|
2020-09-27 10:40:11 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
2020-09-27 10:40:04 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Using format_commit_message("%B") would be simpler here, but
|
|
|
|
* this saves us copying the message.
|
|
|
|
*/
|
|
|
|
commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
|
|
|
|
body = strstr(commit_buffer, "\n\n");
|
|
|
|
if (!body)
|
|
|
|
return;
|
|
|
|
|
|
|
|
trailer_iterator_init(&iter, body);
|
|
|
|
while (trailer_iterator_advance(&iter)) {
|
|
|
|
const char *value = iter.val.buf;
|
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!string_list_has_string(&log->trailers, iter.key.buf))
|
2020-09-27 10:40:04 +02:00
|
|
|
continue;
|
|
|
|
|
2020-09-27 10:40:11 +02:00
|
|
|
strbuf_reset(&ident);
|
|
|
|
if (!parse_ident(log, &ident, value))
|
|
|
|
value = ident.buf;
|
|
|
|
|
2020-11-11 21:02:21 +01:00
|
|
|
if (!strset_add(dups, value))
|
2020-09-27 10:40:07 +02:00
|
|
|
continue;
|
2020-09-27 10:40:04 +02:00
|
|
|
insert_one_record(log, value, oneline);
|
|
|
|
}
|
|
|
|
trailer_iterator_release(&iter);
|
|
|
|
|
2020-09-27 10:40:11 +02:00
|
|
|
strbuf_release(&ident);
|
2020-09-27 10:40:04 +02:00
|
|
|
unuse_commit_buffer(commit, commit_buffer);
|
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_add_commit(struct shortlog *log, struct commit *commit)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2020-09-25 09:01:50 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
2016-01-18 21:02:48 +01:00
|
|
|
struct strbuf oneline = STRBUF_INIT;
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
struct strset dups = STRSET_INIT;
|
2016-01-18 21:02:48 +01:00
|
|
|
struct pretty_print_context ctx = {0};
|
2020-09-27 10:39:59 +02:00
|
|
|
const char *oneline_str;
|
2016-01-18 21:02:48 +01:00
|
|
|
|
|
|
|
ctx.fmt = CMIT_FMT_USERFORMAT;
|
|
|
|
ctx.abbrev = log->abbrev;
|
2017-03-01 12:37:07 +01:00
|
|
|
ctx.print_email_subject = 1;
|
2016-01-18 21:02:48 +01:00
|
|
|
ctx.date_mode.type = DATE_NORMAL;
|
|
|
|
ctx.output_encoding = get_log_output_encoding();
|
|
|
|
|
2016-01-18 21:02:52 +01:00
|
|
|
if (!log->summary) {
|
|
|
|
if (log->user_format)
|
|
|
|
pretty_print_commit(&ctx, commit, &oneline);
|
2008-02-26 00:24:14 +01:00
|
|
|
else
|
2016-01-18 21:02:52 +01:00
|
|
|
format_commit_message(commit, "%s", &oneline, &ctx);
|
shortlog: ignore commits with missing authors
Most of git's traversals are robust against minor breakages
in commit data. For example, "git log" will still output an
entry for a commit that has a broken encoding or missing
author, and will not abort the whole operation.
Shortlog, on the other hand, will die as soon as it sees a
commit without an author, meaning that a repository with
a broken commit cannot get any shortlog output at all.
Let's downgrade this fatal error to a warning, and continue
the operation.
We simply ignore the commit and do not count it in the total
(since we do not have any author under which to file it).
Alternatively, we could output some kind of "<empty>" record
to collect these bogus commits. It is probably not worth it,
though; we have already warned to stderr, so the user is
aware that such bogosities exist, and any placeholder we
came up with would either be syntactically invalid, or would
potentially conflict with real data.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-18 23:14:00 +02:00
|
|
|
}
|
2020-09-27 10:39:59 +02:00
|
|
|
oneline_str = oneline.len ? oneline.buf : "<none>";
|
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (log->groups & SHORTLOG_GROUP_AUTHOR) {
|
|
|
|
strbuf_reset(&ident);
|
2020-09-27 10:39:59 +02:00
|
|
|
format_commit_message(commit,
|
|
|
|
log->email ? "%aN <%aE>" : "%aN",
|
|
|
|
&ident, &ctx);
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!HAS_MULTI_BITS(log->groups) ||
|
2020-11-11 21:02:21 +01:00
|
|
|
strset_add(&dups, ident.buf))
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
insert_one_record(log, ident.buf, oneline_str);
|
|
|
|
}
|
|
|
|
if (log->groups & SHORTLOG_GROUP_COMMITTER) {
|
|
|
|
strbuf_reset(&ident);
|
2020-09-27 10:39:59 +02:00
|
|
|
format_commit_message(commit,
|
|
|
|
log->email ? "%cN <%cE>" : "%cN",
|
|
|
|
&ident, &ctx);
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!HAS_MULTI_BITS(log->groups) ||
|
2020-11-11 21:02:21 +01:00
|
|
|
strset_add(&dups, ident.buf))
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
insert_one_record(log, ident.buf, oneline_str);
|
|
|
|
}
|
|
|
|
if (log->groups & SHORTLOG_GROUP_TRAILER) {
|
|
|
|
insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
|
2020-09-27 10:39:59 +02:00
|
|
|
}
|
2016-01-18 21:02:48 +01:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
strset_clear(&dups);
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_release(&ident);
|
2016-01-18 21:02:48 +01:00
|
|
|
strbuf_release(&oneline);
|
2008-02-26 00:24:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void get_from_rev(struct rev_info *rev, struct shortlog *log)
|
|
|
|
{
|
|
|
|
struct commit *commit;
|
|
|
|
|
|
|
|
if (prepare_revision_walk(rev))
|
2011-02-23 00:42:32 +01:00
|
|
|
die(_("revision walk setup failed"));
|
2008-02-26 00:24:14 +01:00
|
|
|
while ((commit = get_revision(rev)) != NULL)
|
|
|
|
shortlog_add_commit(log, commit);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static int parse_uint(char const **arg, int comma, int defval)
|
2007-04-08 10:28:00 +02:00
|
|
|
{
|
|
|
|
unsigned long ul;
|
|
|
|
int ret;
|
|
|
|
char *endp;
|
|
|
|
|
|
|
|
ul = strtoul(*arg, &endp, 10);
|
2008-07-09 23:38:33 +02:00
|
|
|
if (*endp && *endp != comma)
|
2007-04-08 10:28:00 +02:00
|
|
|
return -1;
|
2008-07-09 23:38:33 +02:00
|
|
|
if (ul > INT_MAX)
|
2007-04-08 10:28:00 +02:00
|
|
|
return -1;
|
2008-07-09 23:38:33 +02:00
|
|
|
ret = *arg == endp ? defval : (int)ul;
|
|
|
|
*arg = *endp ? endp + 1 : endp;
|
2007-04-08 10:28:00 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char wrap_arg_usage[] = "-w[<width>[,<indent1>[,<indent2>]]]";
|
|
|
|
#define DEFAULT_WRAPLEN 76
|
|
|
|
#define DEFAULT_INDENT1 6
|
|
|
|
#define DEFAULT_INDENT2 9
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
|
2007-04-08 10:28:00 +02:00
|
|
|
{
|
2008-07-09 23:38:33 +02:00
|
|
|
struct shortlog *log = opt->value;
|
|
|
|
|
|
|
|
log->wrap_lines = !unset;
|
|
|
|
if (unset)
|
|
|
|
return 0;
|
|
|
|
if (!arg) {
|
|
|
|
log->wrap = DEFAULT_WRAPLEN;
|
|
|
|
log->in1 = DEFAULT_INDENT1;
|
|
|
|
log->in2 = DEFAULT_INDENT2;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
log->wrap = parse_uint(&arg, ',', DEFAULT_WRAPLEN);
|
|
|
|
log->in1 = parse_uint(&arg, ',', DEFAULT_INDENT1);
|
|
|
|
log->in2 = parse_uint(&arg, '\0', DEFAULT_INDENT2);
|
|
|
|
if (log->wrap < 0 || log->in1 < 0 || log->in2 < 0)
|
|
|
|
return error(wrap_arg_usage);
|
|
|
|
if (log->wrap &&
|
|
|
|
((log->in1 && log->wrap <= log->in1) ||
|
|
|
|
(log->in2 && log->wrap <= log->in2)))
|
|
|
|
return error(wrap_arg_usage);
|
|
|
|
return 0;
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:39:59 +02:00
|
|
|
static int parse_group_option(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
struct shortlog *log = opt->value;
|
2020-09-27 10:40:04 +02:00
|
|
|
const char *field;
|
2020-09-27 10:39:59 +02:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (unset) {
|
|
|
|
log->groups = 0;
|
|
|
|
string_list_clear(&log->trailers, 0);
|
|
|
|
} else if (!strcasecmp(arg, "author"))
|
|
|
|
log->groups |= SHORTLOG_GROUP_AUTHOR;
|
2020-09-27 10:39:59 +02:00
|
|
|
else if (!strcasecmp(arg, "committer"))
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->groups |= SHORTLOG_GROUP_COMMITTER;
|
2020-09-27 10:40:04 +02:00
|
|
|
else if (skip_prefix(arg, "trailer:", &field)) {
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->groups |= SHORTLOG_GROUP_TRAILER;
|
|
|
|
string_list_append(&log->trailers, field);
|
2020-09-27 10:40:04 +02:00
|
|
|
} else
|
2020-09-27 10:39:59 +02:00
|
|
|
return error(_("unknown group type: %s"), arg);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_init(struct shortlog *log)
|
|
|
|
{
|
|
|
|
memset(log, 0, sizeof(*log));
|
|
|
|
|
2009-02-08 15:34:27 +01:00
|
|
|
read_mailmap(&log->mailmap, &log->common_repo_prefix);
|
2008-02-26 00:24:14 +01:00
|
|
|
|
2008-07-21 20:03:49 +02:00
|
|
|
log->list.strdup_strings = 1;
|
2008-02-26 00:24:14 +01:00
|
|
|
log->wrap = DEFAULT_WRAPLEN;
|
|
|
|
log->in1 = DEFAULT_INDENT1;
|
|
|
|
log->in2 = DEFAULT_INDENT2;
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->trailers.strdup_strings = 1;
|
|
|
|
log->trailers.cmp = strcasecmp;
|
2008-02-26 00:24:14 +01:00
|
|
|
}
|
|
|
|
|
2006-10-22 13:23:31 +02:00
|
|
|
int cmd_shortlog(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
2016-06-13 07:39:28 +02:00
|
|
|
struct shortlog log = { STRING_LIST_INIT_NODUP };
|
|
|
|
struct rev_info rev;
|
2010-08-06 05:01:37 +02:00
|
|
|
int nongit = !startup_info->have_repository;
|
2008-02-26 00:24:14 +01:00
|
|
|
|
2016-06-13 07:39:28 +02:00
|
|
|
const struct option options[] = {
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
OPT_BIT('c', "committer", &log.groups,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("group by committer rather than author"),
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
SHORTLOG_GROUP_COMMITTER),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('n', "numbered", &log.sort_by_number,
|
|
|
|
N_("sort output according to the number of commits per author")),
|
|
|
|
OPT_BOOL('s', "summary", &log.summary,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("suppress commit descriptions, only provides commit count")),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('e', "email", &log.email,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("show the email address of each author")),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("linewrap output"), PARSE_OPT_OPTARG,
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
&parse_wrap_args),
|
2020-09-27 10:39:59 +02:00
|
|
|
OPT_CALLBACK(0, "group", &log, N_("field"),
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("group by field"), parse_group_option),
|
2008-07-09 23:38:33 +02:00
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
|
|
|
|
struct parse_opt_ctx_t ctx;
|
|
|
|
|
2009-02-08 15:34:27 +01:00
|
|
|
git_config(git_default_config, NULL);
|
2008-02-26 00:24:14 +01:00
|
|
|
shortlog_init(&log);
|
2018-09-21 17:57:38 +02:00
|
|
|
repo_init_revisions(the_repository, &rev, prefix);
|
2010-12-06 08:57:42 +01:00
|
|
|
parse_options_start(&ctx, argc, argv, prefix, options,
|
|
|
|
PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
|
2008-07-09 23:38:33 +02:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
switch (parse_options_step(&ctx, options, shortlog_usage)) {
|
|
|
|
case PARSE_OPT_HELP:
|
2018-03-22 19:43:51 +01:00
|
|
|
case PARSE_OPT_ERROR:
|
2008-07-09 23:38:33 +02:00
|
|
|
exit(129);
|
2018-12-11 16:35:01 +01:00
|
|
|
case PARSE_OPT_COMPLETE:
|
|
|
|
exit(0);
|
2008-07-09 23:38:33 +02:00
|
|
|
case PARSE_OPT_DONE:
|
|
|
|
goto parse_done;
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
2008-07-09 23:38:34 +02:00
|
|
|
parse_revision_opt(&rev, &ctx, options, shortlog_usage);
|
2008-07-09 23:38:33 +02:00
|
|
|
}
|
|
|
|
parse_done:
|
|
|
|
argc = parse_options_end(&ctx);
|
|
|
|
|
2018-03-14 22:34:19 +01:00
|
|
|
if (nongit && argc > 1) {
|
|
|
|
error(_("too many arguments given outside repository"));
|
|
|
|
usage_with_options(shortlog_usage, options);
|
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
if (setup_revisions(argc, argv, &rev, NULL) != 1) {
|
2011-02-23 00:42:32 +01:00
|
|
|
error(_("unrecognized argument: %s"), argv[1]);
|
2008-07-09 23:38:33 +02:00
|
|
|
usage_with_options(shortlog_usage, options);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-07-14 20:08:52 +02:00
|
|
|
log.user_format = rev.commit_format == CMIT_FMT_USERFORMAT;
|
2010-05-04 05:18:57 +02:00
|
|
|
log.abbrev = rev.abbrev;
|
2016-06-22 17:02:07 +02:00
|
|
|
log.file = rev.diffopt.file;
|
2008-07-14 20:08:52 +02:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!log.groups)
|
|
|
|
log.groups = SHORTLOG_GROUP_AUTHOR;
|
|
|
|
string_list_sort(&log.trailers);
|
|
|
|
|
2007-12-11 19:09:04 +01:00
|
|
|
/* assume HEAD if from a tty */
|
2008-03-14 22:35:24 +01:00
|
|
|
if (!nongit && !rev.pending.nr && isatty(0))
|
2007-12-11 19:09:04 +01:00
|
|
|
add_head_to_pending(&rev);
|
2007-03-08 11:12:06 +01:00
|
|
|
if (rev.pending.nr == 0) {
|
2010-02-24 21:49:03 +01:00
|
|
|
if (isatty(0))
|
2011-02-23 00:42:32 +01:00
|
|
|
fprintf(stderr, _("(reading log message from standard input)\n"));
|
2008-02-26 00:24:14 +01:00
|
|
|
read_from_stdin(&log);
|
2007-03-08 11:12:06 +01:00
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
else
|
2008-02-26 00:24:14 +01:00
|
|
|
get_from_rev(&rev, &log);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
shortlog_output(&log);
|
2016-06-22 17:02:07 +02:00
|
|
|
if (log.file != stdout)
|
|
|
|
fclose(log.file);
|
2008-02-26 00:24:14 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2010-02-19 23:15:01 +01:00
|
|
|
static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
|
|
|
|
const struct shortlog *log)
|
|
|
|
{
|
2012-12-11 06:59:21 +01:00
|
|
|
strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap);
|
|
|
|
strbuf_addch(sb, '\n');
|
2010-02-19 23:15:01 +01:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_output(struct shortlog *log)
|
|
|
|
{
|
|
|
|
int i, j;
|
2010-02-19 23:15:01 +01:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->sort_by_number)
|
2016-09-30 01:40:14 +02:00
|
|
|
QSORT(log->list.items, log->list.nr,
|
2016-01-18 21:02:59 +01:00
|
|
|
log->summary ? compare_by_counter : compare_by_list);
|
2008-02-26 00:24:14 +01:00
|
|
|
for (i = 0; i < log->list.nr; i++) {
|
2016-01-18 21:02:59 +01:00
|
|
|
const struct string_list_item *item = &log->list.items[i];
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->summary) {
|
2016-06-22 17:01:49 +02:00
|
|
|
fprintf(log->file, "%6d\t%s\n",
|
|
|
|
(int)UTIL_TO_INT(item), item->string);
|
2006-11-21 21:49:45 +01:00
|
|
|
} else {
|
2016-01-18 21:02:59 +01:00
|
|
|
struct string_list *onelines = item->util;
|
2016-06-22 17:01:49 +02:00
|
|
|
fprintf(log->file, "%s (%d):\n",
|
|
|
|
item->string, onelines->nr);
|
2006-12-22 22:15:59 +01:00
|
|
|
for (j = onelines->nr - 1; j >= 0; j--) {
|
2008-07-21 20:03:49 +02:00
|
|
|
const char *msg = onelines->items[j].string;
|
2007-04-08 10:28:00 +02:00
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->wrap_lines) {
|
2010-02-19 23:15:01 +01:00
|
|
|
strbuf_reset(&sb);
|
|
|
|
add_wrapped_shortlog_msg(&sb, msg, log);
|
2016-06-22 17:01:49 +02:00
|
|
|
fwrite(sb.buf, sb.len, 1, log->file);
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
|
|
|
else
|
2016-06-22 17:01:49 +02:00
|
|
|
fprintf(log->file, " %s\n", msg);
|
2006-12-22 22:15:59 +01:00
|
|
|
}
|
2016-06-22 17:01:49 +02:00
|
|
|
putc('\n', log->file);
|
2016-01-18 21:02:59 +01:00
|
|
|
onelines->strdup_strings = 1;
|
|
|
|
string_list_clear(onelines, 0);
|
|
|
|
free(onelines);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
log->list.items[i].util = NULL;
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2010-02-19 23:15:01 +01:00
|
|
|
strbuf_release(&sb);
|
2008-07-21 20:03:49 +02:00
|
|
|
log->list.strdup_strings = 1;
|
|
|
|
string_list_clear(&log->list, 1);
|
2009-02-08 15:34:30 +01:00
|
|
|
clear_mailmap(&log->mailmap);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|