2006-10-22 13:23:31 +02:00
|
|
|
#include "builtin.h"
|
|
|
|
#include "cache.h"
|
2017-06-14 20:07:36 +02:00
|
|
|
#include "config.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
#include "commit.h"
|
|
|
|
#include "diff.h"
|
2008-07-21 20:03:49 +02:00
|
|
|
#include "string-list.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
#include "revision.h"
|
2006-12-22 22:15:59 +01:00
|
|
|
#include "utf8.h"
|
2007-04-27 09:41:15 +02:00
|
|
|
#include "mailmap.h"
|
2008-02-26 00:24:14 +01:00
|
|
|
#include "shortlog.h"
|
2008-07-09 23:38:33 +02:00
|
|
|
#include "parse-options.h"
|
2020-09-27 10:40:04 +02:00
|
|
|
#include "trailer.h"
|
2020-11-11 21:02:21 +01:00
|
|
|
#include "strmap.h"
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static char const * const shortlog_usage[] = {
|
2018-03-10 12:52:11 +01:00
|
|
|
N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"),
|
|
|
|
N_("git log --pretty=short | git shortlog [<options>]"),
|
2008-07-09 23:38:33 +02:00
|
|
|
NULL
|
|
|
|
};
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:59 +01:00
|
|
|
/*
|
|
|
|
* The util field of our string_list_items will contain one of two things:
|
|
|
|
*
|
|
|
|
* - if --summary is not in use, it will point to a string list of the
|
|
|
|
* oneline subjects assigned to this author
|
|
|
|
*
|
|
|
|
* - if --summary is in use, we don't need that list; we only need to know
|
|
|
|
* its size. So we abuse the pointer slot to store our integer counter.
|
|
|
|
*
|
|
|
|
* This macro accesses the latter.
|
|
|
|
*/
|
|
|
|
#define UTIL_TO_INT(x) ((intptr_t)(x)->util)
|
|
|
|
|
|
|
|
static int compare_by_counter(const void *a1, const void *a2)
|
|
|
|
{
|
|
|
|
const struct string_list_item *i1 = a1, *i2 = a2;
|
|
|
|
return UTIL_TO_INT(i2) - UTIL_TO_INT(i1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_by_list(const void *a1, const void *a2)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2008-07-21 20:03:49 +02:00
|
|
|
const struct string_list_item *i1 = a1, *i2 = a2;
|
|
|
|
const struct string_list *l1 = i1->util, *l2 = i2->util;
|
2006-10-22 13:23:31 +02:00
|
|
|
|
|
|
|
if (l1->nr < l2->nr)
|
2006-11-21 21:12:06 +01:00
|
|
|
return 1;
|
2006-10-22 13:23:31 +02:00
|
|
|
else if (l1->nr == l2->nr)
|
|
|
|
return 0;
|
|
|
|
else
|
2006-11-21 21:12:06 +01:00
|
|
|
return -1;
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
static void insert_one_record(struct shortlog *log,
|
2020-09-25 09:01:50 +02:00
|
|
|
const char *ident,
|
2007-12-08 02:07:41 +01:00
|
|
|
const char *oneline)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2008-07-21 20:03:49 +02:00
|
|
|
struct string_list_item *item;
|
2007-12-08 02:07:41 +01:00
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
item = string_list_insert(&log->list, ident);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2016-01-18 21:02:56 +01:00
|
|
|
if (log->summary)
|
2016-01-18 21:02:59 +01:00
|
|
|
item->util = (void *)(UTIL_TO_INT(item) + 1);
|
2016-01-18 21:02:56 +01:00
|
|
|
else {
|
2021-01-12 21:18:06 +01:00
|
|
|
char *buffer;
|
2016-01-18 21:02:56 +01:00
|
|
|
struct strbuf subject = STRBUF_INIT;
|
|
|
|
const char *eol;
|
|
|
|
|
|
|
|
/* Skip any leading whitespace, including any blank lines. */
|
|
|
|
while (*oneline && isspace(*oneline))
|
|
|
|
oneline++;
|
|
|
|
eol = strchr(oneline, '\n');
|
|
|
|
if (!eol)
|
|
|
|
eol = oneline + strlen(oneline);
|
|
|
|
if (starts_with(oneline, "[PATCH")) {
|
|
|
|
char *eob = strchr(oneline, ']');
|
|
|
|
if (eob && (!eol || eob < eol))
|
|
|
|
oneline = eob + 1;
|
|
|
|
}
|
|
|
|
while (*oneline && isspace(*oneline) && *oneline != '\n')
|
|
|
|
oneline++;
|
|
|
|
format_subject(&subject, oneline, " ");
|
|
|
|
buffer = strbuf_detach(&subject, NULL);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2022-04-13 22:01:32 +02:00
|
|
|
if (!item->util) {
|
|
|
|
item->util = xmalloc(sizeof(struct string_list));
|
|
|
|
string_list_init_nodup(item->util);
|
|
|
|
}
|
2016-01-18 21:02:56 +01:00
|
|
|
string_list_append(item->util, buffer);
|
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:40:09 +02:00
|
|
|
static int parse_ident(struct shortlog *log,
|
|
|
|
struct strbuf *out, const char *in)
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
{
|
|
|
|
const char *mailbuf, *namebuf;
|
|
|
|
size_t namelen, maillen;
|
|
|
|
struct ident_split ident;
|
|
|
|
|
|
|
|
if (split_ident_line(&ident, in, strlen(in)))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
namebuf = ident.name_begin;
|
|
|
|
mailbuf = ident.mail_begin;
|
|
|
|
namelen = ident.name_end - ident.name_begin;
|
|
|
|
maillen = ident.mail_end - ident.mail_begin;
|
|
|
|
|
|
|
|
map_user(&log->mailmap, &mailbuf, &maillen, &namebuf, &namelen);
|
|
|
|
strbuf_add(out, namebuf, namelen);
|
|
|
|
if (log->email)
|
|
|
|
strbuf_addf(out, " <%.*s>", (int)maillen, mailbuf);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
static void read_from_stdin(struct shortlog *log)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2020-09-25 09:01:50 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
|
|
|
struct strbuf mapped_ident = STRBUF_INIT;
|
2016-01-18 21:02:44 +01:00
|
|
|
struct strbuf oneline = STRBUF_INIT;
|
2016-10-11 20:45:58 +02:00
|
|
|
static const char *author_match[2] = { "Author: ", "author " };
|
|
|
|
static const char *committer_match[2] = { "Commit: ", "committer " };
|
|
|
|
const char **match;
|
2007-12-08 02:07:41 +01:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (HAS_MULTI_BITS(log->groups))
|
|
|
|
die(_("using multiple --group options with stdin is not supported"));
|
|
|
|
|
|
|
|
switch (log->groups) {
|
2020-09-27 10:39:59 +02:00
|
|
|
case SHORTLOG_GROUP_AUTHOR:
|
|
|
|
match = author_match;
|
|
|
|
break;
|
|
|
|
case SHORTLOG_GROUP_COMMITTER:
|
|
|
|
match = committer_match;
|
|
|
|
break;
|
2020-09-27 10:40:04 +02:00
|
|
|
case SHORTLOG_GROUP_TRAILER:
|
2022-10-24 20:55:36 +02:00
|
|
|
die(_("using %s with stdin is not supported"), "--group=trailer");
|
2022-10-24 20:55:39 +02:00
|
|
|
case SHORTLOG_GROUP_FORMAT:
|
|
|
|
die(_("using %s with stdin is not supported"), "--group=format");
|
2020-09-27 10:39:59 +02:00
|
|
|
default:
|
|
|
|
BUG("unhandled shortlog group");
|
|
|
|
}
|
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
while (strbuf_getline_lf(&ident, stdin) != EOF) {
|
2016-01-18 21:02:40 +01:00
|
|
|
const char *v;
|
2020-09-25 09:01:50 +02:00
|
|
|
if (!skip_prefix(ident.buf, match[0], &v) &&
|
|
|
|
!skip_prefix(ident.buf, match[1], &v))
|
2007-12-08 02:07:41 +01:00
|
|
|
continue;
|
2016-01-29 01:10:14 +01:00
|
|
|
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
|
2016-01-18 21:02:44 +01:00
|
|
|
oneline.len)
|
2007-12-08 02:07:41 +01:00
|
|
|
; /* discard headers */
|
2016-01-29 01:10:14 +01:00
|
|
|
while (strbuf_getline_lf(&oneline, stdin) != EOF &&
|
2016-01-18 21:02:44 +01:00
|
|
|
!oneline.len)
|
2007-12-08 02:07:41 +01:00
|
|
|
; /* discard blanks */
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_reset(&mapped_ident);
|
2020-09-27 10:40:09 +02:00
|
|
|
if (parse_ident(log, &mapped_ident, v) < 0)
|
shortlog: skip format/parse roundtrip for internal traversal
The original git-shortlog command parsed the output of
git-log, and the logic went something like this:
1. Read stdin looking for "author" lines.
2. Parse the identity into its name/email bits.
3. Apply mailmap to the name/email.
4. Reformat the identity into a single buffer that is our
"key" for grouping entries (either a name by default,
or "name <email>" if --email was given).
The first part happens in read_from_stdin(), and the other
three steps are part of insert_one_record().
When we do an internal traversal, we just swap out the stdin
read in step 1 for reading the commit objects ourselves.
Prior to 2db6b83d18 (shortlog: replace hand-parsing of
author with pretty-printer, 2016-01-18), that made sense; we
still had to parse the ident in the commit message.
But after that commit, we use pretty.c's "%an <%ae>" to get
the author ident (for simplicity). Which means that the
pretty printer is doing a parse/format under the hood, and
then we parse the result, apply the mailmap, and format the
result again.
Instead, we can just ask pretty.c to do all of those steps
for us (including the mailmap via "%aN <%aE>", and not
formatting the address when --email is missing).
And then we can push steps 2-4 into read_from_stdin(). This
speeds up "git shortlog -ns" on linux.git by about 3%, and
eliminates a leak in insert_one_record() of the namemailbuf
strbuf.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-08 11:21:27 +02:00
|
|
|
continue;
|
|
|
|
|
2020-09-25 09:01:50 +02:00
|
|
|
insert_one_record(log, mapped_ident.buf, oneline.buf);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_release(&ident);
|
|
|
|
strbuf_release(&mapped_ident);
|
2016-01-18 21:02:44 +01:00
|
|
|
strbuf_release(&oneline);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:40:04 +02:00
|
|
|
static void insert_records_from_trailers(struct shortlog *log,
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
struct strset *dups,
|
2020-09-27 10:40:04 +02:00
|
|
|
struct commit *commit,
|
|
|
|
struct pretty_print_context *ctx,
|
|
|
|
const char *oneline)
|
|
|
|
{
|
|
|
|
struct trailer_iterator iter;
|
|
|
|
const char *commit_buffer, *body;
|
2020-09-27 10:40:11 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
2020-09-27 10:40:04 +02:00
|
|
|
|
2022-10-24 20:55:33 +02:00
|
|
|
if (!log->trailers.nr)
|
|
|
|
return;
|
|
|
|
|
2020-09-27 10:40:04 +02:00
|
|
|
/*
|
|
|
|
* Using format_commit_message("%B") would be simpler here, but
|
|
|
|
* this saves us copying the message.
|
|
|
|
*/
|
|
|
|
commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding);
|
|
|
|
body = strstr(commit_buffer, "\n\n");
|
|
|
|
if (!body)
|
|
|
|
return;
|
|
|
|
|
|
|
|
trailer_iterator_init(&iter, body);
|
|
|
|
while (trailer_iterator_advance(&iter)) {
|
|
|
|
const char *value = iter.val.buf;
|
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!string_list_has_string(&log->trailers, iter.key.buf))
|
2020-09-27 10:40:04 +02:00
|
|
|
continue;
|
|
|
|
|
2020-09-27 10:40:11 +02:00
|
|
|
strbuf_reset(&ident);
|
|
|
|
if (!parse_ident(log, &ident, value))
|
|
|
|
value = ident.buf;
|
|
|
|
|
2020-11-11 21:02:21 +01:00
|
|
|
if (!strset_add(dups, value))
|
2020-09-27 10:40:07 +02:00
|
|
|
continue;
|
2020-09-27 10:40:04 +02:00
|
|
|
insert_one_record(log, value, oneline);
|
|
|
|
}
|
|
|
|
trailer_iterator_release(&iter);
|
|
|
|
|
2020-09-27 10:40:11 +02:00
|
|
|
strbuf_release(&ident);
|
2020-09-27 10:40:04 +02:00
|
|
|
unuse_commit_buffer(commit, commit_buffer);
|
|
|
|
}
|
|
|
|
|
2022-10-24 20:55:39 +02:00
|
|
|
static int shortlog_needs_dedup(const struct shortlog *log)
|
|
|
|
{
|
|
|
|
return HAS_MULTI_BITS(log->groups) || log->format.nr > 1 || log->trailers.nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void insert_records_from_format(struct shortlog *log,
|
|
|
|
struct strset *dups,
|
|
|
|
struct commit *commit,
|
|
|
|
struct pretty_print_context *ctx,
|
|
|
|
const char *oneline)
|
|
|
|
{
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
for_each_string_list_item(item, &log->format) {
|
|
|
|
strbuf_reset(&buf);
|
|
|
|
|
|
|
|
format_commit_message(commit, item->string, &buf, ctx);
|
|
|
|
|
|
|
|
if (!shortlog_needs_dedup(log) || strset_add(dups, buf.buf))
|
|
|
|
insert_one_record(log, buf.buf, oneline);
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&buf);
|
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_add_commit(struct shortlog *log, struct commit *commit)
|
2006-10-22 13:23:31 +02:00
|
|
|
{
|
2020-09-25 09:01:50 +02:00
|
|
|
struct strbuf ident = STRBUF_INIT;
|
2016-01-18 21:02:48 +01:00
|
|
|
struct strbuf oneline = STRBUF_INIT;
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
struct strset dups = STRSET_INIT;
|
2016-01-18 21:02:48 +01:00
|
|
|
struct pretty_print_context ctx = {0};
|
2020-09-27 10:39:59 +02:00
|
|
|
const char *oneline_str;
|
2016-01-18 21:02:48 +01:00
|
|
|
|
|
|
|
ctx.fmt = CMIT_FMT_USERFORMAT;
|
|
|
|
ctx.abbrev = log->abbrev;
|
2017-03-01 12:37:07 +01:00
|
|
|
ctx.print_email_subject = 1;
|
2022-10-24 20:55:30 +02:00
|
|
|
ctx.date_mode = log->date_mode;
|
2016-01-18 21:02:48 +01:00
|
|
|
ctx.output_encoding = get_log_output_encoding();
|
|
|
|
|
2016-01-18 21:02:52 +01:00
|
|
|
if (!log->summary) {
|
|
|
|
if (log->user_format)
|
|
|
|
pretty_print_commit(&ctx, commit, &oneline);
|
2008-02-26 00:24:14 +01:00
|
|
|
else
|
2016-01-18 21:02:52 +01:00
|
|
|
format_commit_message(commit, "%s", &oneline, &ctx);
|
shortlog: ignore commits with missing authors
Most of git's traversals are robust against minor breakages
in commit data. For example, "git log" will still output an
entry for a commit that has a broken encoding or missing
author, and will not abort the whole operation.
Shortlog, on the other hand, will die as soon as it sees a
commit without an author, meaning that a repository with
a broken commit cannot get any shortlog output at all.
Let's downgrade this fatal error to a warning, and continue
the operation.
We simply ignore the commit and do not count it in the total
(since we do not have any author under which to file it).
Alternatively, we could output some kind of "<empty>" record
to collect these bogus commits. It is probably not worth it,
though; we have already warned to stderr, so the user is
aware that such bogosities exist, and any placeholder we
came up with would either be syntactically invalid, or would
potentially conflict with real data.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-09-18 23:14:00 +02:00
|
|
|
}
|
2020-09-27 10:39:59 +02:00
|
|
|
oneline_str = oneline.len ? oneline.buf : "<none>";
|
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (log->groups & SHORTLOG_GROUP_COMMITTER) {
|
|
|
|
strbuf_reset(&ident);
|
2020-09-27 10:39:59 +02:00
|
|
|
format_commit_message(commit,
|
|
|
|
log->email ? "%cN <%cE>" : "%cN",
|
|
|
|
&ident, &ctx);
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!HAS_MULTI_BITS(log->groups) ||
|
2020-11-11 21:02:21 +01:00
|
|
|
strset_add(&dups, ident.buf))
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
insert_one_record(log, ident.buf, oneline_str);
|
|
|
|
}
|
2022-10-24 20:55:33 +02:00
|
|
|
insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str);
|
2022-10-24 20:55:39 +02:00
|
|
|
insert_records_from_format(log, &dups, commit, &ctx, oneline_str);
|
2016-01-18 21:02:48 +01:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
strset_clear(&dups);
|
2020-09-25 09:01:50 +02:00
|
|
|
strbuf_release(&ident);
|
2016-01-18 21:02:48 +01:00
|
|
|
strbuf_release(&oneline);
|
2008-02-26 00:24:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void get_from_rev(struct rev_info *rev, struct shortlog *log)
|
|
|
|
{
|
|
|
|
struct commit *commit;
|
|
|
|
|
|
|
|
if (prepare_revision_walk(rev))
|
2011-02-23 00:42:32 +01:00
|
|
|
die(_("revision walk setup failed"));
|
2008-02-26 00:24:14 +01:00
|
|
|
while ((commit = get_revision(rev)) != NULL)
|
|
|
|
shortlog_add_commit(log, commit);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static int parse_uint(char const **arg, int comma, int defval)
|
2007-04-08 10:28:00 +02:00
|
|
|
{
|
|
|
|
unsigned long ul;
|
|
|
|
int ret;
|
|
|
|
char *endp;
|
|
|
|
|
|
|
|
ul = strtoul(*arg, &endp, 10);
|
2008-07-09 23:38:33 +02:00
|
|
|
if (*endp && *endp != comma)
|
2007-04-08 10:28:00 +02:00
|
|
|
return -1;
|
2008-07-09 23:38:33 +02:00
|
|
|
if (ul > INT_MAX)
|
2007-04-08 10:28:00 +02:00
|
|
|
return -1;
|
2008-07-09 23:38:33 +02:00
|
|
|
ret = *arg == endp ? defval : (int)ul;
|
|
|
|
*arg = *endp ? endp + 1 : endp;
|
2007-04-08 10:28:00 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char wrap_arg_usage[] = "-w[<width>[,<indent1>[,<indent2>]]]";
|
|
|
|
#define DEFAULT_WRAPLEN 76
|
|
|
|
#define DEFAULT_INDENT1 6
|
|
|
|
#define DEFAULT_INDENT2 9
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
static int parse_wrap_args(const struct option *opt, const char *arg, int unset)
|
2007-04-08 10:28:00 +02:00
|
|
|
{
|
2008-07-09 23:38:33 +02:00
|
|
|
struct shortlog *log = opt->value;
|
|
|
|
|
|
|
|
log->wrap_lines = !unset;
|
|
|
|
if (unset)
|
|
|
|
return 0;
|
|
|
|
if (!arg) {
|
|
|
|
log->wrap = DEFAULT_WRAPLEN;
|
|
|
|
log->in1 = DEFAULT_INDENT1;
|
|
|
|
log->in2 = DEFAULT_INDENT2;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
log->wrap = parse_uint(&arg, ',', DEFAULT_WRAPLEN);
|
|
|
|
log->in1 = parse_uint(&arg, ',', DEFAULT_INDENT1);
|
|
|
|
log->in2 = parse_uint(&arg, '\0', DEFAULT_INDENT2);
|
|
|
|
if (log->wrap < 0 || log->in1 < 0 || log->in2 < 0)
|
|
|
|
return error(wrap_arg_usage);
|
|
|
|
if (log->wrap &&
|
|
|
|
((log->in1 && log->wrap <= log->in1) ||
|
|
|
|
(log->in2 && log->wrap <= log->in2)))
|
|
|
|
return error(wrap_arg_usage);
|
|
|
|
return 0;
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
|
|
|
|
2020-09-27 10:39:59 +02:00
|
|
|
static int parse_group_option(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
struct shortlog *log = opt->value;
|
2020-09-27 10:40:04 +02:00
|
|
|
const char *field;
|
2020-09-27 10:39:59 +02:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (unset) {
|
|
|
|
log->groups = 0;
|
|
|
|
string_list_clear(&log->trailers, 0);
|
2022-10-24 20:55:39 +02:00
|
|
|
string_list_clear(&log->format, 0);
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
} else if (!strcasecmp(arg, "author"))
|
|
|
|
log->groups |= SHORTLOG_GROUP_AUTHOR;
|
2020-09-27 10:39:59 +02:00
|
|
|
else if (!strcasecmp(arg, "committer"))
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->groups |= SHORTLOG_GROUP_COMMITTER;
|
2020-09-27 10:40:04 +02:00
|
|
|
else if (skip_prefix(arg, "trailer:", &field)) {
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->groups |= SHORTLOG_GROUP_TRAILER;
|
|
|
|
string_list_append(&log->trailers, field);
|
2022-10-24 20:55:39 +02:00
|
|
|
} else if (skip_prefix(arg, "format:", &field)) {
|
|
|
|
log->groups |= SHORTLOG_GROUP_FORMAT;
|
|
|
|
string_list_append(&log->format, field);
|
|
|
|
} else if (strchr(arg, '%')) {
|
|
|
|
log->groups |= SHORTLOG_GROUP_FORMAT;
|
|
|
|
string_list_append(&log->format, arg);
|
|
|
|
} else {
|
2020-09-27 10:39:59 +02:00
|
|
|
return error(_("unknown group type: %s"), arg);
|
2022-10-24 20:55:39 +02:00
|
|
|
}
|
2020-09-27 10:39:59 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_init(struct shortlog *log)
|
|
|
|
{
|
|
|
|
memset(log, 0, sizeof(*log));
|
|
|
|
|
2021-01-12 21:18:06 +01:00
|
|
|
read_mailmap(&log->mailmap);
|
2008-02-26 00:24:14 +01:00
|
|
|
|
2008-07-21 20:03:49 +02:00
|
|
|
log->list.strdup_strings = 1;
|
2008-02-26 00:24:14 +01:00
|
|
|
log->wrap = DEFAULT_WRAPLEN;
|
|
|
|
log->in1 = DEFAULT_INDENT1;
|
|
|
|
log->in2 = DEFAULT_INDENT2;
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
log->trailers.strdup_strings = 1;
|
|
|
|
log->trailers.cmp = strcasecmp;
|
2022-10-24 20:55:39 +02:00
|
|
|
log->format.strdup_strings = 1;
|
2008-02-26 00:24:14 +01:00
|
|
|
}
|
|
|
|
|
2022-10-24 20:55:41 +02:00
|
|
|
void shortlog_finish_setup(struct shortlog *log)
|
|
|
|
{
|
2022-10-24 20:55:44 +02:00
|
|
|
if (log->groups & SHORTLOG_GROUP_AUTHOR)
|
|
|
|
string_list_append(&log->format,
|
|
|
|
log->email ? "%aN <%aE>" : "%aN");
|
|
|
|
|
2022-10-24 20:55:41 +02:00
|
|
|
string_list_sort(&log->trailers);
|
|
|
|
}
|
|
|
|
|
2006-10-22 13:23:31 +02:00
|
|
|
int cmd_shortlog(int argc, const char **argv, const char *prefix)
|
|
|
|
{
|
2016-06-13 07:39:28 +02:00
|
|
|
struct shortlog log = { STRING_LIST_INIT_NODUP };
|
|
|
|
struct rev_info rev;
|
2010-08-06 05:01:37 +02:00
|
|
|
int nongit = !startup_info->have_repository;
|
2008-02-26 00:24:14 +01:00
|
|
|
|
2016-06-13 07:39:28 +02:00
|
|
|
const struct option options[] = {
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
OPT_BIT('c', "committer", &log.groups,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("group by committer rather than author"),
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
SHORTLOG_GROUP_COMMITTER),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('n', "numbered", &log.sort_by_number,
|
|
|
|
N_("sort output according to the number of commits per author")),
|
|
|
|
OPT_BOOL('s', "summary", &log.summary,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("suppress commit descriptions, only provides commit count")),
|
2013-08-03 13:51:19 +02:00
|
|
|
OPT_BOOL('e', "email", &log.email,
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("show the email address of each author")),
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"),
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("linewrap output"), PARSE_OPT_OPTARG,
|
Use OPT_CALLBACK and OPT_CALLBACK_F
In the codebase, there are many options which use OPTION_CALLBACK in a
plain ol' struct definition. However, we have the OPT_CALLBACK and
OPT_CALLBACK_F macros which are meant to abstract these plain struct
definitions away. These macros are useful as they semantically signal to
developers that these are just normal callback option with nothing fancy
happening.
Replace plain struct definitions of OPTION_CALLBACK with OPT_CALLBACK or
OPT_CALLBACK_F where applicable. The heavy lifting was done using the
following (disgusting) shell script:
#!/bin/sh
do_replacement () {
tr '\n' '\r' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\s*0,\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK(\1,\2,\3,\4,\5,\6)/g' |
sed -e 's/{\s*OPTION_CALLBACK,\s*\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\([^,]*\),\(\s*[^[:space:]}]*\)\s*}/OPT_CALLBACK_F(\1,\2,\3,\4,\5,\6,\7)/g' |
tr '\r' '\n'
}
for f in $(git ls-files \*.c)
do
do_replacement <"$f" >"$f.tmp"
mv "$f.tmp" "$f"
done
The result was manually inspected and then reformatted to match the
style of the surrounding code. Finally, using
`git grep OPTION_CALLBACK \*.c`, leftover results which were not handled
by the script were manually transformed.
Signed-off-by: Denton Liu <liu.denton@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-04-28 10:36:28 +02:00
|
|
|
&parse_wrap_args),
|
2020-09-27 10:39:59 +02:00
|
|
|
OPT_CALLBACK(0, "group", &log, N_("field"),
|
2021-01-06 15:44:03 +01:00
|
|
|
N_("group by field"), parse_group_option),
|
2008-07-09 23:38:33 +02:00
|
|
|
OPT_END(),
|
|
|
|
};
|
|
|
|
|
|
|
|
struct parse_opt_ctx_t ctx;
|
|
|
|
|
2009-02-08 15:34:27 +01:00
|
|
|
git_config(git_default_config, NULL);
|
2008-02-26 00:24:14 +01:00
|
|
|
shortlog_init(&log);
|
2018-09-21 17:57:38 +02:00
|
|
|
repo_init_revisions(the_repository, &rev, prefix);
|
2010-12-06 08:57:42 +01:00
|
|
|
parse_options_start(&ctx, argc, argv, prefix, options,
|
|
|
|
PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0);
|
2008-07-09 23:38:33 +02:00
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
switch (parse_options_step(&ctx, options, shortlog_usage)) {
|
2021-10-08 21:07:39 +02:00
|
|
|
case PARSE_OPT_NON_OPTION:
|
|
|
|
case PARSE_OPT_UNKNOWN:
|
|
|
|
break;
|
2008-07-09 23:38:33 +02:00
|
|
|
case PARSE_OPT_HELP:
|
2018-03-22 19:43:51 +01:00
|
|
|
case PARSE_OPT_ERROR:
|
parse-options: add support for parsing subcommands
Several Git commands have subcommands to implement mutually exclusive
"operation modes", and they usually parse their subcommand argument
with a bunch of if-else if statements.
Teach parse-options to handle subcommands as well, which will result
in shorter and simpler code with consistent error handling and error
messages on unknown or missing subcommand, and it will also make
possible for our Bash completion script to handle subcommands
programmatically.
The approach is guided by the following observations:
- Most subcommands [1] are implemented in dedicated functions, and
most of those functions [2] either have a signature matching the
'int cmd_foo(int argc, const char **argc, const char *prefix)'
signature of builtin commands or can be trivially converted to
that signature, because they miss only that last prefix parameter
or have no parameters at all.
- Subcommand arguments only have long form, and they have no double
dash prefix, no negated form, and no description, and they don't
take any arguments, and can't be abbreviated.
- There must be exactly one subcommand among the arguments, or zero
if the command has a default operation mode.
- All arguments following the subcommand are considered to be
arguments of the subcommand, and, conversely, arguments meant for
the subcommand may not preceed the subcommand.
So in the end subcommand declaration and parsing would look something
like this:
parse_opt_subcommand_fn *fn = NULL;
struct option builtin_commit_graph_options[] = {
OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"),
N_("the object directory to store the graph")),
OPT_SUBCOMMAND("verify", &fn, graph_verify),
OPT_SUBCOMMAND("write", &fn, graph_write),
OPT_END(),
};
argc = parse_options(argc, argv, prefix, options,
builtin_commit_graph_usage, 0);
return fn(argc, argv, prefix);
Here each OPT_SUBCOMMAND specifies the name of the subcommand and the
function implementing it, and the address of the same 'fn' subcommand
function pointer. parse_options() then processes the arguments until
it finds the first argument matching one of the subcommands, sets 'fn'
to the function associated with that subcommand, and returns, leaving
the rest of the arguments unprocessed. If none of the listed
subcommands is found among the arguments, parse_options() will show
usage and abort.
If a command has a default operation mode, 'fn' should be initialized
to the function implementing that mode, and parse_options() should be
invoked with the PARSE_OPT_SUBCOMMAND_OPTIONAL flag. In this case
parse_options() won't error out when not finding any subcommands, but
will return leaving 'fn' unchanged. Note that if that default
operation mode has any --options, then the PARSE_OPT_KEEP_UNKNOWN_OPT
flag is necessary as well (otherwise parse_options() would error out
upon seeing the unknown option meant to the default operation mode).
Some thoughts about the implementation:
- The same pointer to 'fn' must be specified as 'value' for each
OPT_SUBCOMMAND, because there can be only one set of mutually
exclusive subcommands; parse_options() will BUG() otherwise.
There are other ways to tell parse_options() where to put the
function associated with the subcommand given on the command line,
but I didn't like them:
- Change parse_options()'s signature by adding a pointer to
subcommand function to be set to the function associated with
the given subcommand, affecting all callsites, even those that
don't have subcommands.
- Introduce a specific parse_options_and_subcommand() variant
with that extra funcion parameter.
- I decided against automatically calling the subcommand function
from within parse_options(), because:
- There are commands that have to perform additional actions
after option parsing but before calling the function
implementing the specified subcommand.
- The return code of the subcommand is usually the return code
of the git command, but preserving the return code of the
automatically called subcommand function would have made the
API awkward.
- Also add a OPT_SUBCOMMAND_F() variant to allow specifying an
option flag: we have two subcommands that are purposefully
excluded from completion ('git remote rm' and 'git stash save'),
so they'll have to be specified with the PARSE_OPT_NOCOMPLETE
flag.
- Some of the 'parse_opt_flags' don't make sense with subcommands,
and using them is probably just an oversight or misunderstanding.
Therefore parse_options() will BUG() when invoked with any of the
following flags while the options array contains at least one
OPT_SUBCOMMAND:
- PARSE_OPT_KEEP_DASHDASH: parse_options() stops parsing
arguments when encountering a "--" argument, so it doesn't
make sense to expect and keep one before a subcommand, because
it would prevent the parsing of the subcommand.
However, this flag is allowed in combination with the
PARSE_OPT_SUBCOMMAND_OPTIONAL flag, because the double dash
might be meaningful for the command's default operation mode,
e.g. to disambiguate refs and pathspecs.
- PARSE_OPT_STOP_AT_NON_OPTION: As its name suggests, this flag
tells parse_options() to stop as soon as it encouners a
non-option argument, but subcommands are by definition not
options... so how could they be parsed, then?!
- PARSE_OPT_KEEP_UNKNOWN: This flag can be used to collect any
unknown --options and then pass them to a different command or
subsystem. Surely if a command has subcommands, then this
functionality should rather be delegated to one of those
subcommands, and not performed by the command itself.
However, this flag is allowed in combination with the
PARSE_OPT_SUBCOMMAND_OPTIONAL flag, making possible to pass
--options to the default operation mode.
- If the command with subcommands has a default operation mode, then
all arguments to the command must preceed the arguments of the
subcommand.
AFAICT we don't have any commands where this makes a difference,
because in those commands either only the command accepts any
arguments ('notes' and 'remote'), or only the default subcommand
('reflog' and 'stash'), but never both.
- The 'argv' array passed to subcommand functions currently starts
with the name of the subcommand. Keep this behavior. AFAICT no
subcommand functions depend on the actual content of 'argv[0]',
but the parse_options() call handling their options expects that
the options start at argv[1].
- To support handling subcommands programmatically in our Bash
completion script, 'git cmd --git-completion-helper' will now list
both subcommands and regular --options, if any. This means that
the completion script will have to separate subcommands (i.e.
words without a double dash prefix) from --options on its own, but
that's rather easy to do, and it's not much work either, because
the number of subcommands a command might have is rather low, and
those commands accept only a single --option or none at all. An
alternative would be to introduce a separate option that lists
only subcommands, but then the completion script would need not
one but two git invocations and command substitutions for commands
with subcommands.
Note that this change doesn't affect the behavior of our Bash
completion script, because when completing the --option of a
command with subcommands, e.g. for 'git notes --<TAB>', then all
subcommands will be filtered out anyway, as none of them will
match the word to be completed starting with that double dash
prefix.
[1] Except 'git rerere', because many of its subcommands are
implemented in the bodies of the if-else if statements parsing the
command's subcommand argument.
[2] Except 'credential', 'credential-store' and 'fsmonitor--daemon',
because some of the functions implementing their subcommands take
special parameters.
Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-08-19 18:04:00 +02:00
|
|
|
case PARSE_OPT_SUBCOMMAND:
|
2008-07-09 23:38:33 +02:00
|
|
|
exit(129);
|
2018-12-11 16:35:01 +01:00
|
|
|
case PARSE_OPT_COMPLETE:
|
|
|
|
exit(0);
|
2008-07-09 23:38:33 +02:00
|
|
|
case PARSE_OPT_DONE:
|
|
|
|
goto parse_done;
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
2008-07-09 23:38:34 +02:00
|
|
|
parse_revision_opt(&rev, &ctx, options, shortlog_usage);
|
2008-07-09 23:38:33 +02:00
|
|
|
}
|
|
|
|
parse_done:
|
2022-02-11 17:36:25 +01:00
|
|
|
revision_opts_finish(&rev);
|
2008-07-09 23:38:33 +02:00
|
|
|
argc = parse_options_end(&ctx);
|
|
|
|
|
2018-03-14 22:34:19 +01:00
|
|
|
if (nongit && argc > 1) {
|
|
|
|
error(_("too many arguments given outside repository"));
|
|
|
|
usage_with_options(shortlog_usage, options);
|
|
|
|
}
|
|
|
|
|
2008-07-09 23:38:33 +02:00
|
|
|
if (setup_revisions(argc, argv, &rev, NULL) != 1) {
|
2011-02-23 00:42:32 +01:00
|
|
|
error(_("unrecognized argument: %s"), argv[1]);
|
2008-07-09 23:38:33 +02:00
|
|
|
usage_with_options(shortlog_usage, options);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-07-14 20:08:52 +02:00
|
|
|
log.user_format = rev.commit_format == CMIT_FMT_USERFORMAT;
|
2010-05-04 05:18:57 +02:00
|
|
|
log.abbrev = rev.abbrev;
|
2016-06-22 17:02:07 +02:00
|
|
|
log.file = rev.diffopt.file;
|
2022-10-24 20:55:30 +02:00
|
|
|
log.date_mode = rev.date_mode;
|
2008-07-14 20:08:52 +02:00
|
|
|
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
if (!log.groups)
|
|
|
|
log.groups = SHORTLOG_GROUP_AUTHOR;
|
2022-10-24 20:55:41 +02:00
|
|
|
shortlog_finish_setup(&log);
|
shortlog: allow multiple groups to be specified
Now that shortlog supports reading from trailers, it can be useful to
combine counts from multiple trailers, or between trailers and authors.
This can be done manually by post-processing the output from multiple
runs, but it's non-trivial to make sure that each name/commit pair is
counted only once.
This patch teaches shortlog to accept multiple --group options on the
command line, and pull data from all of them. That makes it possible to
run:
git shortlog -ns --group=author --group=trailer:co-authored-by
to get a shortlog that counts authors and co-authors equally.
The implementation is mostly straightforward. The "group" enum becomes a
bitfield, and the trailer key becomes a list. I didn't bother
implementing the multi-group semantics for reading from stdin. It would
be possible to do, but the existing matching code makes it awkward, and
I doubt anybody cares.
The duplicate suppression we used for trailers now covers authors and
committers as well (though in non-trailer single-group mode we can skip
the hash insertion and lookup, since we only see one value per commit).
There is one subtlety: we now care about the case when no group bit is
set (in which case we default to showing the author). The caller in
builtin/log.c needs to be adapted to ask explicitly for authors, rather
than relying on shortlog_init(). It would be possible with some
gymnastics to make this keep working as-is, but it's not worth it for a
single caller.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-27 10:40:15 +02:00
|
|
|
|
2007-12-11 19:09:04 +01:00
|
|
|
/* assume HEAD if from a tty */
|
2008-03-14 22:35:24 +01:00
|
|
|
if (!nongit && !rev.pending.nr && isatty(0))
|
2007-12-11 19:09:04 +01:00
|
|
|
add_head_to_pending(&rev);
|
2007-03-08 11:12:06 +01:00
|
|
|
if (rev.pending.nr == 0) {
|
2010-02-24 21:49:03 +01:00
|
|
|
if (isatty(0))
|
2011-02-23 00:42:32 +01:00
|
|
|
fprintf(stderr, _("(reading log message from standard input)\n"));
|
2008-02-26 00:24:14 +01:00
|
|
|
read_from_stdin(&log);
|
2007-03-08 11:12:06 +01:00
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
else
|
2008-02-26 00:24:14 +01:00
|
|
|
get_from_rev(&rev, &log);
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2022-04-13 22:01:36 +02:00
|
|
|
release_revisions(&rev);
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
shortlog_output(&log);
|
2016-06-22 17:02:07 +02:00
|
|
|
if (log.file != stdout)
|
|
|
|
fclose(log.file);
|
2008-02-26 00:24:14 +01:00
|
|
|
return 0;
|
|
|
|
}
|
2006-10-22 13:23:31 +02:00
|
|
|
|
2010-02-19 23:15:01 +01:00
|
|
|
static void add_wrapped_shortlog_msg(struct strbuf *sb, const char *s,
|
|
|
|
const struct shortlog *log)
|
|
|
|
{
|
2012-12-11 06:59:21 +01:00
|
|
|
strbuf_add_wrapped_text(sb, s, log->in1, log->in2, log->wrap);
|
|
|
|
strbuf_addch(sb, '\n');
|
2010-02-19 23:15:01 +01:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
void shortlog_output(struct shortlog *log)
|
|
|
|
{
|
2022-03-07 16:27:08 +01:00
|
|
|
size_t i, j;
|
2010-02-19 23:15:01 +01:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->sort_by_number)
|
2022-07-14 17:43:49 +02:00
|
|
|
STABLE_QSORT(log->list.items, log->list.nr,
|
2016-01-18 21:02:59 +01:00
|
|
|
log->summary ? compare_by_counter : compare_by_list);
|
2008-02-26 00:24:14 +01:00
|
|
|
for (i = 0; i < log->list.nr; i++) {
|
2016-01-18 21:02:59 +01:00
|
|
|
const struct string_list_item *item = &log->list.items[i];
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->summary) {
|
2016-06-22 17:01:49 +02:00
|
|
|
fprintf(log->file, "%6d\t%s\n",
|
|
|
|
(int)UTIL_TO_INT(item), item->string);
|
2006-11-21 21:49:45 +01:00
|
|
|
} else {
|
2016-01-18 21:02:59 +01:00
|
|
|
struct string_list *onelines = item->util;
|
2022-03-07 16:27:08 +01:00
|
|
|
fprintf(log->file, "%s (%"PRIuMAX"):\n",
|
|
|
|
item->string, (uintmax_t)onelines->nr);
|
|
|
|
for (j = onelines->nr; j >= 1; j--) {
|
|
|
|
const char *msg = onelines->items[j - 1].string;
|
2007-04-08 10:28:00 +02:00
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
if (log->wrap_lines) {
|
2010-02-19 23:15:01 +01:00
|
|
|
strbuf_reset(&sb);
|
|
|
|
add_wrapped_shortlog_msg(&sb, msg, log);
|
2016-06-22 17:01:49 +02:00
|
|
|
fwrite(sb.buf, sb.len, 1, log->file);
|
2007-04-08 10:28:00 +02:00
|
|
|
}
|
|
|
|
else
|
2016-06-22 17:01:49 +02:00
|
|
|
fprintf(log->file, " %s\n", msg);
|
2006-12-22 22:15:59 +01:00
|
|
|
}
|
2016-06-22 17:01:49 +02:00
|
|
|
putc('\n', log->file);
|
2016-01-18 21:02:59 +01:00
|
|
|
onelines->strdup_strings = 1;
|
|
|
|
string_list_clear(onelines, 0);
|
|
|
|
free(onelines);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2008-02-26 00:24:14 +01:00
|
|
|
log->list.items[i].util = NULL;
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|
|
|
|
|
2010-02-19 23:15:01 +01:00
|
|
|
strbuf_release(&sb);
|
2008-07-21 20:03:49 +02:00
|
|
|
log->list.strdup_strings = 1;
|
|
|
|
string_list_clear(&log->list, 1);
|
2009-02-08 15:34:30 +01:00
|
|
|
clear_mailmap(&log->mailmap);
|
2022-10-24 20:55:39 +02:00
|
|
|
string_list_clear(&log->format, 0);
|
2006-10-22 13:23:31 +02:00
|
|
|
}
|