git-svn: only look at the new parts of svn:mergeinfo

In a Subversion repository where many feature branches are merged into a
trunk, the svn:mergeinfo property can grow very large. This severely
slows down git-svn's make_log_entry() because it is checking all
mergeinfo entries every time the property changes.

In most cases, the additions to svn:mergeinfo since the last commit are
pretty small, and there is nothing to gain by checking merges that were
already checked for the last commit in the branch.

Add a mergeinfo_changes() function which computes the set of interesting
changes to svn:mergeinfo since the last commit. Filter out merged
branches whose ranges haven't changed, and remove a common prefix of
ranges from other merged branches.

This speeds up "git svn fetch" by several orders of magnitude on a large
repository where thousands of feature branches have been merged.

Signed-off-by: Jakob Stoklund Olesen <stoklund@2pi.dk>
Signed-off-by: Eric Wong <normalperson@yhbt.net>
This commit is contained in:
Jakob Stoklund Olesen 2014-04-16 23:54:05 -07:00 committed by Eric Wong
parent fbecd99861
commit abfef3bbf5

View File

@ -1178,7 +1178,7 @@ sub find_parent_branch {
or die "SVN connection failed somewhere...\n"; or die "SVN connection failed somewhere...\n";
} }
print STDERR "Successfully followed parent\n" unless $::_q > 1; print STDERR "Successfully followed parent\n" unless $::_q > 1;
return $self->make_log_entry($rev, [$parent], $ed); return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
} }
return undef; return undef;
} }
@ -1210,7 +1210,7 @@ sub do_fetch {
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) { unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n"; die "SVN connection failed somewhere...\n";
} }
$self->make_log_entry($rev, \@parents, $ed); $self->make_log_entry($rev, \@parents, $ed, $last_rev);
} }
sub mkemptydirs { sub mkemptydirs {
@ -1478,9 +1478,9 @@ sub find_extra_svk_parents {
sub lookup_svn_merge { sub lookup_svn_merge {
my $uuid = shift; my $uuid = shift;
my $url = shift; my $url = shift;
my $merge = shift; my $source = shift;
my $revs = shift;
my ($source, $revs) = split ":", $merge;
my $path = $source; my $path = $source;
$path =~ s{^/}{}; $path =~ s{^/}{};
my $gs = Git::SVN->find_by_url($url.$source, $url, $path); my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@ -1702,6 +1702,62 @@ sub parents_exclude {
return @excluded; return @excluded;
} }
# Compute what's new in svn:mergeinfo.
sub mergeinfo_changes {
my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
my $old_minfo = {};
# Initialize cache on the first call.
unless (defined $self->{cached_mergeinfo_rev}) {
$self->{cached_mergeinfo_rev} = {};
$self->{cached_mergeinfo} = {};
}
my $cached_rev = $self->{cached_mergeinfo_rev}{$old_path};
if (defined $cached_rev && $cached_rev == $old_rev) {
$old_minfo = $self->{cached_mergeinfo}{$old_path};
} else {
my $ra = $self->ra;
# Give up if $old_path isn't in the repo.
# This is probably a merge on a subtree.
if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
warn "W: ignoring svn:mergeinfo on $old_path, ",
"directory didn't exist in r$old_rev\n";
return {};
}
my (undef, undef, $props) =
$self->ra->get_dir($old_path, $old_rev);
if (defined $props->{"svn:mergeinfo"}) {
my %omi = map {split ":", $_ } split "\n",
$props->{"svn:mergeinfo"};
$old_minfo = \%omi;
}
$self->{cached_mergeinfo}{$old_path} = $old_minfo;
$self->{cached_mergeinfo_rev}{$old_path} = $old_rev;
}
# Cache the new mergeinfo.
$self->{cached_mergeinfo}{$path} = \%minfo;
$self->{cached_mergeinfo_rev}{$path} = $rev;
my %changes = ();
foreach my $p (keys %minfo) {
my $a = $old_minfo->{$p} || "";
my $b = $minfo{$p};
# Omit merged branches whose ranges lists are unchanged.
next if $a eq $b;
# Remove any common range list prefix.
($a ^ $b) =~ /^[\0]*/;
my $common_prefix = rindex $b, ",", $+[0] - 1;
$changes{$p} = substr $b, $common_prefix + 1;
}
print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
scalar(keys %minfo), " sources, ",
scalar(keys %changes), " changed\n";
return \%changes;
}
# note: this function should only be called if the various dirprops # note: this function should only be called if the various dirprops
# have actually changed # have actually changed
@ -1715,14 +1771,15 @@ sub find_extra_svn_parents {
# history. Then, we figure out which git revisions are in # history. Then, we figure out which git revisions are in
# that tip, but not this revision. If all of those revisions # that tip, but not this revision. If all of those revisions
# are now marked as merge, we can add the tip as a parent. # are now marked as merge, we can add the tip as a parent.
my @merges = split "\n", $mergeinfo; my @merges = sort keys %$mergeinfo;
my @merge_tips; my @merge_tips;
my $url = $self->url; my $url = $self->url;
my $uuid = $self->ra_uuid; my $uuid = $self->ra_uuid;
my @all_ranges; my @all_ranges;
for my $merge ( @merges ) { for my $merge ( @merges ) {
my ($tip_commit, @ranges) = my ($tip_commit, @ranges) =
lookup_svn_merge( $uuid, $url, $merge ); lookup_svn_merge( $uuid, $url,
$merge, $mergeinfo->{$merge} );
unless (!$tip_commit or unless (!$tip_commit or
grep { $_ eq $tip_commit } @$parents ) { grep { $_ eq $tip_commit } @$parents ) {
push @merge_tips, $tip_commit; push @merge_tips, $tip_commit;
@ -1738,8 +1795,9 @@ sub find_extra_svn_parents {
# check merge tips for new parents # check merge tips for new parents
my @new_parents; my @new_parents;
for my $merge_tip ( @merge_tips ) { for my $merge_tip ( @merge_tips ) {
my $spec = shift @merges; my $merge = shift @merges;
next unless $merge_tip and $excluded{$merge_tip}; next unless $merge_tip and $excluded{$merge_tip};
my $spec = "$merge:$mergeinfo->{$merge}";
# check out 'new' tips # check out 'new' tips
my $merge_base; my $merge_base;
@ -1770,7 +1828,7 @@ sub find_extra_svn_parents {
.@incomplete." commit(s) (eg $incomplete[0])\n"; .@incomplete." commit(s) (eg $incomplete[0])\n";
} else { } else {
warn warn
"Found merge parent (svn:mergeinfo prop): ", "Found merge parent ($spec): ",
$merge_tip, "\n"; $merge_tip, "\n";
push @new_parents, $merge_tip; push @new_parents, $merge_tip;
} }
@ -1797,7 +1855,7 @@ sub find_extra_svn_parents {
} }
sub make_log_entry { sub make_log_entry {
my ($self, $rev, $parents, $ed) = @_; my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
my $untracked = $self->get_untracked($ed); my $untracked = $self->get_untracked($ed);
my @parents = @$parents; my @parents = @$parents;
@ -1809,10 +1867,12 @@ sub make_log_entry {
($ed, $props->{"svk:merge"}, \@parents); ($ed, $props->{"svk:merge"}, \@parents);
} }
if ( $props->{"svn:mergeinfo"} ) { if ( $props->{"svn:mergeinfo"} ) {
my $mi_changes = $self->mergeinfo_changes
($parent_path || $path, $parent_rev,
$path, $rev,
$props->{"svn:mergeinfo"});
$self->find_extra_svn_parents $self->find_extra_svn_parents
($ed, ($ed, $mi_changes, \@parents);
$props->{"svn:mergeinfo"},
\@parents);
} }
} }