git-commit-vandalism/contrib/contacts/git-contacts
Eric Sunshine db8cae7e60 contacts: gather all blame sources prior to invoking git-blame
git-contacts invokes git-blame immediately upon encountering a patch
hunk. No attempt is made to consolidate invocations for multiple hunks
referencing the same file at the same revision. This can become
expensive quickly.

Any effort to reduce the number of times git-blame is run will need to
to know in advance which line ranges to blame per file per revision.
Make this information available by collecting all sources as a distinct
step from invoking git-blame.  A subsequent patch will utilize the
information to optimize git-blame invocations.

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-13 09:09:01 -07:00

203 lines
4.4 KiB
Perl
Executable File

#!/usr/bin/perl
# List people who might be interested in a patch. Useful as the argument to
# git-send-email --cc-cmd option, and in other situations.
#
# Usage: git contacts <file | rev-list option> ...
use strict;
use warnings;
use IPC::Open2;
my $since = '5-years-ago';
my $min_percent = 10;
my $labels_rx = qr/Signed-off-by|Reviewed-by|Acked-by|Cc/i;
my %seen;
sub format_contact {
my ($name, $email) = @_;
return "$name <$email>";
}
sub parse_commit {
my ($commit, $data) = @_;
my $contacts = $commit->{contacts};
my $inbody = 0;
for (split(/^/m, $data)) {
if (not $inbody) {
if (/^author ([^<>]+) <(\S+)> .+$/) {
$contacts->{format_contact($1, $2)} = 1;
} elsif (/^$/) {
$inbody = 1;
}
} elsif (/^$labels_rx:\s+([^<>]+)\s+<(\S+?)>$/o) {
$contacts->{format_contact($1, $2)} = 1;
}
}
}
sub import_commits {
my ($commits) = @_;
return unless %$commits;
my $pid = open2 my $reader, my $writer, qw(git cat-file --batch);
for my $id (keys(%$commits)) {
print $writer "$id\n";
my $line = <$reader>;
if ($line =~ /^([0-9a-f]{40}) commit (\d+)/) {
my ($cid, $len) = ($1, $2);
die "expected $id but got $cid\n" unless $id eq $cid;
my $data;
# cat-file emits newline after data, so read len+1
read $reader, $data, $len + 1;
parse_commit($commits->{$id}, $data);
}
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-cat-file error: $?\n" if $?;
}
sub get_blame {
my ($commits, $source, $start, $len, $from) = @_;
open my $f, '-|',
qw(git blame --porcelain -C), '-L', "$start,+$len",
'--since', $since, "$from^", '--', $source or die;
while (<$f>) {
if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) {
my $id = $1;
$commits->{$id} = { id => $id, contacts => {} }
unless $seen{$id};
$seen{$id} = 1;
}
}
close $f;
}
sub blame_sources {
my ($sources, $commits) = @_;
for my $s (keys %$sources) {
for my $id (keys %{$sources->{$s}}) {
for my $range (@{$sources->{$s}{$id}}) {
get_blame($commits, $s,
$range->[0], $range->[1], $id);
}
}
}
}
sub scan_patches {
my ($sources, $id, $f) = @_;
my $source;
while (<$f>) {
if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) {
$id = $1;
$seen{$id} = 1;
}
next unless $id;
if (m{^--- (?:a/(.+)|/dev/null)$}) {
$source = $1;
} elsif (/^--- /) {
die "Cannot parse hunk source: $_\n";
} elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) {
my $len = defined($2) ? $2 : 1;
push @{$sources->{$source}{$id}}, [$1, $len] if $len;
}
}
}
sub scan_patch_file {
my ($commits, $file) = @_;
open my $f, '<', $file or die "read failure: $file: $!\n";
scan_patches($commits, undef, $f);
close $f;
}
sub parse_rev_args {
my @args = @_;
open my $f, '-|',
qw(git rev-parse --revs-only --default HEAD --symbolic), @args
or die;
my @revs;
while (<$f>) {
chomp;
push @revs, $_;
}
close $f;
return @revs if scalar(@revs) != 1;
return "^$revs[0]", 'HEAD' unless $revs[0] =~ /^-/;
return $revs[0], 'HEAD';
}
sub scan_rev_args {
my ($commits, $args) = @_;
my @revs = parse_rev_args(@$args);
open my $f, '-|', qw(git rev-list --reverse), @revs or die;
while (<$f>) {
chomp;
my $id = $_;
$seen{$id} = 1;
open my $g, '-|', qw(git show -C --oneline), $id or die;
scan_patches($commits, $id, $g);
close $g;
}
close $f;
}
sub mailmap_contacts {
my ($contacts) = @_;
my %mapped;
my $pid = open2 my $reader, my $writer, qw(git check-mailmap --stdin);
for my $contact (keys(%$contacts)) {
print $writer "$contact\n";
my $canonical = <$reader>;
chomp $canonical;
$mapped{$canonical} += $contacts->{$contact};
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-check-mailmap error: $?\n" if $?;
return \%mapped;
}
if (!@ARGV) {
die "No input revisions or patch files\n";
}
my (@files, @rev_args);
for (@ARGV) {
if (-e) {
push @files, $_;
} else {
push @rev_args, $_;
}
}
my %sources;
for (@files) {
scan_patch_file(\%sources, $_);
}
if (@rev_args) {
scan_rev_args(\%sources, \@rev_args)
}
my %commits;
blame_sources(\%sources, \%commits);
import_commits(\%commits);
my $contacts = {};
for my $commit (values %commits) {
for my $contact (keys %{$commit->{contacts}}) {
$contacts->{$contact}++;
}
}
$contacts = mailmap_contacts($contacts);
my $ncommits = scalar(keys %commits);
for my $contact (keys %$contacts) {
my $percent = $contacts->{$contact} * 100 / $ncommits;
next if $percent < $min_percent;
print "$contact\n";
}