git-commit-vandalism/contrib/contacts/git-contacts
Eric Sunshine 4c70cfbfbc contacts: reduce git-blame invocations
git-contacts invokes git-blame once for each patch hunk it encounters.
No attempt is made to consolidate invocations for multiple hunks
referencing the same file at the same revision. This can become
expensive quickly.

Reduce the number of git-blame invocations by taking advantage of the
ability to specify multiple -L ranges for a single invocation.

Without this patch, on a randomly chosen range of commits:

  % time git-contacts 25fba78d36be6297^..23c339c0f262aad2 >/dev/null
  real  0m6.142s
  user  0m5.429s
  sys   0m0.356s

With this patch:

  % time git-contacts 25fba78d36be6297^..23c339c0f262aad2 >/dev/null
  real  0m2.285s
  user  0m2.093s
  sys   0m0.165s

Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-13 09:09:03 -07:00

202 lines
4.3 KiB
Perl
Executable File

#!/usr/bin/perl
# List people who might be interested in a patch. Useful as the argument to
# git-send-email --cc-cmd option, and in other situations.
#
# Usage: git contacts <file | rev-list option> ...
use strict;
use warnings;
use IPC::Open2;
my $since = '5-years-ago';
my $min_percent = 10;
my $labels_rx = qr/Signed-off-by|Reviewed-by|Acked-by|Cc/i;
my %seen;
sub format_contact {
my ($name, $email) = @_;
return "$name <$email>";
}
sub parse_commit {
my ($commit, $data) = @_;
my $contacts = $commit->{contacts};
my $inbody = 0;
for (split(/^/m, $data)) {
if (not $inbody) {
if (/^author ([^<>]+) <(\S+)> .+$/) {
$contacts->{format_contact($1, $2)} = 1;
} elsif (/^$/) {
$inbody = 1;
}
} elsif (/^$labels_rx:\s+([^<>]+)\s+<(\S+?)>$/o) {
$contacts->{format_contact($1, $2)} = 1;
}
}
}
sub import_commits {
my ($commits) = @_;
return unless %$commits;
my $pid = open2 my $reader, my $writer, qw(git cat-file --batch);
for my $id (keys(%$commits)) {
print $writer "$id\n";
my $line = <$reader>;
if ($line =~ /^([0-9a-f]{40}) commit (\d+)/) {
my ($cid, $len) = ($1, $2);
die "expected $id but got $cid\n" unless $id eq $cid;
my $data;
# cat-file emits newline after data, so read len+1
read $reader, $data, $len + 1;
parse_commit($commits->{$id}, $data);
}
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-cat-file error: $?\n" if $?;
}
sub get_blame {
my ($commits, $source, $from, $ranges) = @_;
return unless @$ranges;
open my $f, '-|',
qw(git blame --porcelain -C),
map({"-L$_->[0],+$_->[1]"} @$ranges),
'--since', $since, "$from^", '--', $source or die;
while (<$f>) {
if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) {
my $id = $1;
$commits->{$id} = { id => $id, contacts => {} }
unless $seen{$id};
$seen{$id} = 1;
}
}
close $f;
}
sub blame_sources {
my ($sources, $commits) = @_;
for my $s (keys %$sources) {
for my $id (keys %{$sources->{$s}}) {
get_blame($commits, $s, $id, $sources->{$s}{$id});
}
}
}
sub scan_patches {
my ($sources, $id, $f) = @_;
my $source;
while (<$f>) {
if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) {
$id = $1;
$seen{$id} = 1;
}
next unless $id;
if (m{^--- (?:a/(.+)|/dev/null)$}) {
$source = $1;
} elsif (/^--- /) {
die "Cannot parse hunk source: $_\n";
} elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) {
my $len = defined($2) ? $2 : 1;
push @{$sources->{$source}{$id}}, [$1, $len] if $len;
}
}
}
sub scan_patch_file {
my ($commits, $file) = @_;
open my $f, '<', $file or die "read failure: $file: $!\n";
scan_patches($commits, undef, $f);
close $f;
}
sub parse_rev_args {
my @args = @_;
open my $f, '-|',
qw(git rev-parse --revs-only --default HEAD --symbolic), @args
or die;
my @revs;
while (<$f>) {
chomp;
push @revs, $_;
}
close $f;
return @revs if scalar(@revs) != 1;
return "^$revs[0]", 'HEAD' unless $revs[0] =~ /^-/;
return $revs[0], 'HEAD';
}
sub scan_rev_args {
my ($commits, $args) = @_;
my @revs = parse_rev_args(@$args);
open my $f, '-|', qw(git rev-list --reverse), @revs or die;
while (<$f>) {
chomp;
my $id = $_;
$seen{$id} = 1;
open my $g, '-|', qw(git show -C --oneline), $id or die;
scan_patches($commits, $id, $g);
close $g;
}
close $f;
}
sub mailmap_contacts {
my ($contacts) = @_;
my %mapped;
my $pid = open2 my $reader, my $writer, qw(git check-mailmap --stdin);
for my $contact (keys(%$contacts)) {
print $writer "$contact\n";
my $canonical = <$reader>;
chomp $canonical;
$mapped{$canonical} += $contacts->{$contact};
}
close $reader;
close $writer;
waitpid($pid, 0);
die "git-check-mailmap error: $?\n" if $?;
return \%mapped;
}
if (!@ARGV) {
die "No input revisions or patch files\n";
}
my (@files, @rev_args);
for (@ARGV) {
if (-e) {
push @files, $_;
} else {
push @rev_args, $_;
}
}
my %sources;
for (@files) {
scan_patch_file(\%sources, $_);
}
if (@rev_args) {
scan_rev_args(\%sources, \@rev_args)
}
my %commits;
blame_sources(\%sources, \%commits);
import_commits(\%commits);
my $contacts = {};
for my $commit (values %commits) {
for my $contact (keys %{$commit->{contacts}}) {
$contacts->{$contact}++;
}
}
$contacts = mailmap_contacts($contacts);
my $ncommits = scalar(keys %commits);
for my $contact (keys %$contacts) {
my $percent = $contacts->{$contact} * 100 / $ncommits;
next if $percent < $min_percent;
print "$contact\n";
}