Merge branch 'es/contacts'

A helper to read from a set of format-patch output files or a range
of commits and find those who may have insights to the code that
the changes touch by running a series of "git blame" commands.

* es/contacts:
  contrib: contacts: add documentation
  contrib: contacts: add mailmap support
  contrib: contacts: interpret committish akin to format-patch
  contrib: contacts: add ability to parse from committish
  contrib: add git-contacts helper
This commit is contained in:
Junio C Hamano 2013-07-24 19:22:57 -07:00
commit 4274cdf44a
2 changed files with 282 additions and 0 deletions

188
contrib/contacts/git-contacts Executable file
View File

@ -0,0 +1,188 @@
#!/usr/bin/perl
# List people who might be interested in a patch. Useful as the argument to
# git-send-email --cc-cmd option, and in other situations.
#
# Usage: git contacts <file | rev-list option> ...
use strict;
use warnings;
use IPC::Open2;
my $since = '5-years-ago';
my $min_percent = 10;
my $labels_rx = qr/Signed-off-by|Reviewed-by|Acked-by|Cc/i;
my %seen;
sub format_contact {
my ($name, $email) = @_;
return "$name <$email>";
}
sub parse_commit {
my ($commit, $data) = @_;
my $contacts = $commit->{contacts};
my $inbody = 0;
for (split(/^/m, $data)) {
if (not $inbody) {
if (/^author ([^<>]+) <(\S+)> .+$/) {
$contacts->{format_contact($1, $2)} = 1;
} elsif (/^$/) {
$inbody = 1;
}
} elsif (/^$labels_rx:\s+([^<>]+)\s+<(\S+?)>$/o) {
$contacts->{format_contact($1, $2)} = 1;
}
}
}
sub import_commits {
my ($commits) = @_;
return unless %$commits;
my $pid = open2 my $reader, my $writer, qw(git cat-file --batch);
for my $id (keys(%$commits)) {
print $writer "$id\n";
my $line = <$reader>;
if ($line =~ /^([0-9a-f]{40}) commit (\d+)/) {
my ($cid, $len) = ($1, $2);
die "expected $id but got $cid\n" unless $id eq $cid;
my $data;
# cat-file emits newline after data, so read len+1
read $reader, $data, $len + 1;
parse_commit($commits->{$id}, $data);
}
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-cat-file error: $?\n" if $?;
}
sub get_blame {
my ($commits, $source, $start, $len, $from) = @_;
$len = 1 unless defined($len);
return if $len == 0;
open my $f, '-|',
qw(git blame --porcelain -C), '-L', "$start,+$len",
'--since', $since, "$from^", '--', $source or die;
while (<$f>) {
if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) {
my $id = $1;
$commits->{$id} = { id => $id, contacts => {} }
unless $seen{$id};
$seen{$id} = 1;
}
}
close $f;
}
sub scan_patches {
my ($commits, $id, $f) = @_;
my $source;
while (<$f>) {
if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) {
$id = $1;
$seen{$id} = 1;
}
next unless $id;
if (m{^--- (?:a/(.+)|/dev/null)$}) {
$source = $1;
} elsif (/^--- /) {
die "Cannot parse hunk source: $_\n";
} elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) {
get_blame($commits, $source, $1, $2, $id);
}
}
}
sub scan_patch_file {
my ($commits, $file) = @_;
open my $f, '<', $file or die "read failure: $file: $!\n";
scan_patches($commits, undef, $f);
close $f;
}
sub parse_rev_args {
my @args = @_;
open my $f, '-|',
qw(git rev-parse --revs-only --default HEAD --symbolic), @args
or die;
my @revs;
while (<$f>) {
chomp;
push @revs, $_;
}
close $f;
return @revs if scalar(@revs) != 1;
return "^$revs[0]", 'HEAD' unless $revs[0] =~ /^-/;
return $revs[0], 'HEAD';
}
sub scan_rev_args {
my ($commits, $args) = @_;
my @revs = parse_rev_args(@$args);
open my $f, '-|', qw(git rev-list --reverse), @revs or die;
while (<$f>) {
chomp;
my $id = $_;
$seen{$id} = 1;
open my $g, '-|', qw(git show -C --oneline), $id or die;
scan_patches($commits, $id, $g);
close $g;
}
close $f;
}
sub mailmap_contacts {
my ($contacts) = @_;
my %mapped;
my $pid = open2 my $reader, my $writer, qw(git check-mailmap --stdin);
for my $contact (keys(%$contacts)) {
print $writer "$contact\n";
my $canonical = <$reader>;
chomp $canonical;
$mapped{$canonical} += $contacts->{$contact};
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-check-mailmap error: $?\n" if $?;
return \%mapped;
}
if (!@ARGV) {
die "No input revisions or patch files\n";
}
my (@files, @rev_args);
for (@ARGV) {
if (-e) {
push @files, $_;
} else {
push @rev_args, $_;
}
}
my %commits;
for (@files) {
scan_patch_file(\%commits, $_);
}
if (@rev_args) {
scan_rev_args(\%commits, \@rev_args)
}
import_commits(\%commits);
my $contacts = {};
for my $commit (values %commits) {
for my $contact (keys %{$commit->{contacts}}) {
$contacts->{$contact}++;
}
}
$contacts = mailmap_contacts($contacts);
my $ncommits = scalar(keys %commits);
for my $contact (keys %$contacts) {
my $percent = $contacts->{$contact} * 100 / $ncommits;
next if $percent < $min_percent;
print "$contact\n";
}

View File

@ -0,0 +1,94 @@
git-contacts(1)
===============
NAME
----
git-contacts - List people who might be interested in a set of changes
SYNOPSIS
--------
[verse]
'git contacts' (<patch>|<range>|<rev>)...
DESCRIPTION
-----------
Given a set of changes, specified as patch files or revisions, determine people
who might be interested in those changes. This is done by consulting the
history of each patch or revision hunk to find people mentioned by commits
which touched the lines of files under consideration.
Input consists of one or more patch files or revision arguments. A revision
argument can be a range or a single `<rev>` which is interpreted as
`<rev>..HEAD`, thus the same revision arguments are accepted as for
linkgit:git-format-patch[1]. Patch files and revision arguments can be combined
in the same invocation.
This command can be useful for determining the list of people with whom to
discuss proposed changes, or for finding the list of recipients to Cc: when
submitting a patch series via `git send-email`. For the latter case, `git
contacts` can be used as the argument to `git send-email`'s `--cc-cmd` option.
DISCUSSION
----------
`git blame` is invoked for each hunk in a patch file or revision. For each
commit mentioned by `git blame`, the commit message is consulted for people who
authored, reviewed, signed, acknowledged, or were Cc:'d. Once the list of
participants is known, each person's relevance is computed by considering how
many commits mentioned that person compared with the total number of commits
under consideration. The final output consists only of participants who exceed
a minimum threshold of participation.
OUTPUT
------
For each person of interest, a single line is output, terminated by a newline.
If the person's name is known, ``Name $$<user@host>$$'' is printed; otherwise
only ``$$<user@host>$$'' is printed.
EXAMPLES
--------
* Consult patch files:
+
------------
$ git contacts feature/*.patch
------------
* Revision range:
+
------------
$ git contacts R1..R2
------------
* From a single revision to `HEAD`:
+
------------
$ git contacts origin
------------
* Helper for `git send-email`:
+
------------
$ git send-email --cc-cmd='git contacts' feature/*.patch
------------
LIMITATIONS
-----------
Several conditions controlling a person's significance are currently
hard-coded, such as minimum participation level (10%), blame date-limiting (5
years), and `-C` level for detecting moved and copied lines (a single `-C`). In
the future, these conditions may become configurable.
GIT
---
Part of the linkgit:git[1] suite