summaryrefslogtreecommitdiff
path: root/perl/Git/SVN.pm
diff options
context:
space:
mode:
Diffstat (limited to 'perl/Git/SVN.pm')
-rw-r--r--perl/Git/SVN.pm238
1 files changed, 169 insertions, 69 deletions
diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm
index a59564f..b2c14e2 100644
--- a/perl/Git/SVN.pm
+++ b/perl/Git/SVN.pm
@@ -9,11 +9,10 @@ use vars qw/$_no_metadata
$_use_log_author $_add_author_from $_localtime/;
use Carp qw/croak/;
use File::Path qw/mkpath/;
-use File::Copy qw/copy/;
use IPC::Open3;
use Memoize; # core since 5.8.0, Jul 2002
-use Memoize::Storable;
use POSIX qw(:signal_h);
+use Time::Local;
use Git qw(
command
@@ -32,11 +31,7 @@ use Git::SVN::Utils qw(
add_path_to_url
);
-my $can_use_yaml;
-BEGIN {
- $can_use_yaml = eval { require Git::SVN::Memoize::YAML; 1};
-}
-
+my $memo_backend;
our $_follow_parent = 1;
our $_minimize_url = 'unset';
our $default_repo_id = 'svn';
@@ -1178,7 +1173,7 @@ sub find_parent_branch {
or die "SVN connection failed somewhere...\n";
}
print STDERR "Successfully followed parent\n" unless $::_q > 1;
- return $self->make_log_entry($rev, [$parent], $ed);
+ return $self->make_log_entry($rev, [$parent], $ed, $r0, $branch_from);
}
return undef;
}
@@ -1210,26 +1205,93 @@ sub do_fetch {
unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) {
die "SVN connection failed somewhere...\n";
}
- $self->make_log_entry($rev, \@parents, $ed);
+ $self->make_log_entry($rev, \@parents, $ed, $last_rev, $self->path);
}
sub mkemptydirs {
my ($self, $r) = @_;
+ # add/remove/collect a paths table
+ #
+ # Paths are split into a tree of nodes, stored as a hash of hashes.
+ #
+ # Each node contains a 'path' entry for the path (if any) associated
+ # with that node and a 'children' entry for any nodes under that
+ # location.
+ #
+ # Removing a path requires a hash lookup for each component then
+ # dropping that node (and anything under it), which is substantially
+ # faster than a grep slice into a single hash of paths for large
+ # numbers of paths.
+ #
+ # For a large (200K) number of empty_dir directives this reduces
+ # scanning time to 3 seconds vs 10 minutes for grep+delete on a single
+ # hash of paths.
+ sub add_path {
+ my ($paths_table, $path) = @_;
+ my $node_ref;
+
+ foreach my $x (split('/', $path)) {
+ if (!exists($paths_table->{$x})) {
+ $paths_table->{$x} = { children => {} };
+ }
+
+ $node_ref = $paths_table->{$x};
+ $paths_table = $paths_table->{$x}->{children};
+ }
+
+ $node_ref->{path} = $path;
+ }
+
+ sub remove_path {
+ my ($paths_table, $path) = @_;
+ my $nodes_ref;
+ my $node_name;
+
+ foreach my $x (split('/', $path)) {
+ if (!exists($paths_table->{$x})) {
+ return;
+ }
+
+ $nodes_ref = $paths_table;
+ $node_name = $x;
+
+ $paths_table = $paths_table->{$x}->{children};
+ }
+
+ delete($nodes_ref->{$node_name});
+ }
+
+ sub collect_paths {
+ my ($paths_table, $paths_ref) = @_;
+
+ foreach my $v (values %$paths_table) {
+ my $p = $v->{path};
+ my $c = $v->{children};
+
+ collect_paths($c, $paths_ref);
+
+ if (defined($p)) {
+ push(@$paths_ref, $p);
+ }
+ }
+ }
+
sub scan {
- my ($r, $empty_dirs, $line) = @_;
+ my ($r, $paths_table, $line) = @_;
if (defined $r && $line =~ /^r(\d+)$/) {
return 0 if $1 > $r;
} elsif ($line =~ /^ \+empty_dir: (.+)$/) {
- $empty_dirs->{$1} = 1;
+ add_path($paths_table, $1);
} elsif ($line =~ /^ \-empty_dir: (.+)$/) {
- my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs);
- delete @$empty_dirs{@d};
+ remove_path($paths_table, $1);
}
1; # continue
};
- my %empty_dirs = ();
+ my @empty_dirs;
+ my %paths_table;
+
my $gz_file = "$self->{dir}/unhandled.log.gz";
if (-f $gz_file) {
if (!can_compress()) {
@@ -1240,7 +1302,7 @@ sub mkemptydirs {
die "Unable to open $gz_file: $!\n";
my $line;
while ($gz->gzreadline($line) > 0) {
- scan($r, \%empty_dirs, $line) or last;
+ scan($r, \%paths_table, $line) or last;
}
$gz->gzclose;
}
@@ -1249,13 +1311,14 @@ sub mkemptydirs {
if (open my $fh, '<', "$self->{dir}/unhandled.log") {
binmode $fh or croak "binmode: $!";
while (<$fh>) {
- scan($r, \%empty_dirs, $_) or last;
+ scan($r, \%paths_table, $_) or last;
}
close $fh;
}
+ collect_paths(\%paths_table, \@empty_dirs);
my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/;
- foreach my $d (sort keys %empty_dirs) {
+ foreach my $d (sort @empty_dirs) {
$d = uri_decode($d);
$d =~ s/$strip//;
next unless length($d);
@@ -1321,7 +1384,7 @@ sub get_untracked {
sub parse_svn_date {
my $date = shift || return '+0000 1970-01-01 00:00:00';
my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T
- (\d\d)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
+ (\d\d?)\:(\d\d)\:(\d\d)\.\d*Z$/x) or
croak "Unable to parse date: $date\n";
my $parsed_date; # Set next.
@@ -1332,7 +1395,7 @@ sub parse_svn_date {
$ENV{TZ} = 'UTC';
my $epoch_in_UTC =
- POSIX::strftime('%s', $S, $M, $H, $d, $m - 1, $Y - 1900);
+ Time::Local::timelocal($S, $M, $H, $d, $m - 1, $Y - 1900);
# Determine our local timezone (including DST) at the
# time of $epoch_in_UTC. $Git::SVN::Log::TZ stored the
@@ -1433,7 +1496,7 @@ sub check_author {
}
sub find_extra_svk_parents {
- my ($self, $ed, $tickets, $parents) = @_;
+ my ($self, $tickets, $parents) = @_;
# aha! svk:merge property changed...
my @tickets = split "\n", $tickets;
my @known_parents;
@@ -1478,9 +1541,9 @@ sub find_extra_svk_parents {
sub lookup_svn_merge {
my $uuid = shift;
my $url = shift;
- my $merge = shift;
+ my $source = shift;
+ my $revs = shift;
- my ($source, $revs) = split ":", $merge;
my $path = $source;
$path =~ s{^/}{};
my $gs = Git::SVN->find_by_url($url.$source, $url, $path);
@@ -1537,7 +1600,7 @@ sub _rev_list {
@rv;
}
-sub check_cherry_pick {
+sub check_cherry_pick2 {
my $base = shift;
my $tip = shift;
my $parents = shift;
@@ -1552,7 +1615,8 @@ sub check_cherry_pick {
delete $commits{$commit};
}
}
- return (keys %commits);
+ my @k = (keys %commits);
+ return (scalar @k, $k[0]);
}
sub has_no_changes {
@@ -1577,7 +1641,16 @@ sub tie_for_persistent_memoization {
my $hash = shift;
my $path = shift;
- if ($can_use_yaml) {
+ unless ($memo_backend) {
+ if (eval { require Git::SVN::Memoize::YAML; 1}) {
+ $memo_backend = 1;
+ } else {
+ require Memoize::Storable;
+ $memo_backend = -1;
+ }
+ }
+
+ if ($memo_backend > 0) {
tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml";
} else {
tie %$hash => 'Memoize::Storable', "$path.db", 'nstore';
@@ -1597,9 +1670,8 @@ sub tie_for_persistent_memoization {
mkpath([$cache_path]) unless -d $cache_path;
my %lookup_svn_merge_cache;
- my %check_cherry_pick_cache;
+ my %check_cherry_pick2_cache;
my %has_no_changes_cache;
- my %_rev_list_cache;
tie_for_persistent_memoization(\%lookup_svn_merge_cache,
"$cache_path/lookup_svn_merge");
@@ -1608,11 +1680,11 @@ sub tie_for_persistent_memoization {
LIST_CACHE => ['HASH' => \%lookup_svn_merge_cache],
;
- tie_for_persistent_memoization(\%check_cherry_pick_cache,
- "$cache_path/check_cherry_pick");
- memoize 'check_cherry_pick',
+ tie_for_persistent_memoization(\%check_cherry_pick2_cache,
+ "$cache_path/check_cherry_pick2");
+ memoize 'check_cherry_pick2',
SCALAR_CACHE => 'FAULT',
- LIST_CACHE => ['HASH' => \%check_cherry_pick_cache],
+ LIST_CACHE => ['HASH' => \%check_cherry_pick2_cache],
;
tie_for_persistent_memoization(\%has_no_changes_cache,
@@ -1621,14 +1693,6 @@ sub tie_for_persistent_memoization {
SCALAR_CACHE => ['HASH' => \%has_no_changes_cache],
LIST_CACHE => 'FAULT',
;
-
- tie_for_persistent_memoization(\%_rev_list_cache,
- "$cache_path/_rev_list");
- memoize '_rev_list',
- SCALAR_CACHE => 'FAULT',
- LIST_CACHE => ['HASH' => \%_rev_list_cache],
- ;
-
}
sub unmemoize_svn_mergeinfo_functions {
@@ -1636,9 +1700,8 @@ sub tie_for_persistent_memoization {
$memoized = 0;
Memoize::unmemoize 'lookup_svn_merge';
- Memoize::unmemoize 'check_cherry_pick';
+ Memoize::unmemoize 'check_cherry_pick2';
Memoize::unmemoize 'has_no_changes';
- Memoize::unmemoize '_rev_list';
}
sub clear_memoized_mergeinfo_caches {
@@ -1648,7 +1711,8 @@ sub tie_for_persistent_memoization {
return unless -d $cache_path;
for my $cache_file (("$cache_path/lookup_svn_merge",
- "$cache_path/check_cherry_pick",
+ "$cache_path/check_cherry_pick", # old
+ "$cache_path/check_cherry_pick2",
"$cache_path/has_no_changes")) {
for my $suffix (qw(yaml db)) {
my $file = "$cache_file.$suffix";
@@ -1702,11 +1766,49 @@ sub parents_exclude {
return @excluded;
}
+# Compute what's new in svn:mergeinfo.
+sub mergeinfo_changes {
+ my ($self, $old_path, $old_rev, $path, $rev, $mergeinfo_prop) = @_;
+ my %minfo = map {split ":", $_ } split "\n", $mergeinfo_prop;
+ my $old_minfo = {};
+
+ my $ra = $self->ra;
+ # Give up if $old_path isn't in the repo.
+ # This is probably a merge on a subtree.
+ if ($ra->check_path($old_path, $old_rev) != $SVN::Node::dir) {
+ warn "W: ignoring svn:mergeinfo on $old_path, ",
+ "directory didn't exist in r$old_rev\n";
+ return {};
+ }
+ my (undef, undef, $props) = $ra->get_dir($old_path, $old_rev);
+ if (defined $props->{"svn:mergeinfo"}) {
+ my %omi = map {split ":", $_ } split "\n",
+ $props->{"svn:mergeinfo"};
+ $old_minfo = \%omi;
+ }
+
+ my %changes = ();
+ foreach my $p (keys %minfo) {
+ my $a = $old_minfo->{$p} || "";
+ my $b = $minfo{$p};
+ # Omit merged branches whose ranges lists are unchanged.
+ next if $a eq $b;
+ # Remove any common range list prefix.
+ ($a ^ $b) =~ /^[\0]*/;
+ my $common_prefix = rindex $b, ",", $+[0] - 1;
+ $changes{$p} = substr $b, $common_prefix + 1;
+ }
+ print STDERR "Checking svn:mergeinfo changes since r$old_rev: ",
+ scalar(keys %minfo), " sources, ",
+ scalar(keys %changes), " changed\n";
+
+ return \%changes;
+}
# note: this function should only be called if the various dirprops
# have actually changed
sub find_extra_svn_parents {
- my ($self, $ed, $mergeinfo, $parents) = @_;
+ my ($self, $mergeinfo, $parents) = @_;
# aha! svk:merge property changed...
memoize_svn_mergeinfo_functions();
@@ -1715,14 +1817,15 @@ sub find_extra_svn_parents {
# history. Then, we figure out which git revisions are in
# that tip, but not this revision. If all of those revisions
# are now marked as merge, we can add the tip as a parent.
- my @merges = split "\n", $mergeinfo;
+ my @merges = sort keys %$mergeinfo;
my @merge_tips;
my $url = $self->url;
my $uuid = $self->ra_uuid;
my @all_ranges;
for my $merge ( @merges ) {
my ($tip_commit, @ranges) =
- lookup_svn_merge( $uuid, $url, $merge );
+ lookup_svn_merge( $uuid, $url,
+ $merge, $mergeinfo->{$merge} );
unless (!$tip_commit or
grep { $_ eq $tip_commit } @$parents ) {
push @merge_tips, $tip_commit;
@@ -1738,8 +1841,9 @@ sub find_extra_svn_parents {
# check merge tips for new parents
my @new_parents;
for my $merge_tip ( @merge_tips ) {
- my $spec = shift @merges;
+ my $merge = shift @merges;
next unless $merge_tip and $excluded{$merge_tip};
+ my $spec = "$merge:$mergeinfo->{$merge}";
# check out 'new' tips
my $merge_base;
@@ -1759,19 +1863,17 @@ sub find_extra_svn_parents {
}
# double check that there are no missing non-merge commits
- my (@incomplete) = check_cherry_pick(
+ my ($ninc, $ifirst) = check_cherry_pick2(
$merge_base, $merge_tip,
$parents,
@all_ranges,
);
- if ( @incomplete ) {
- warn "W:svn cherry-pick ignored ($spec) - missing "
- .@incomplete." commit(s) (eg $incomplete[0])\n";
+ if ($ninc) {
+ warn "W: svn cherry-pick ignored ($spec) - missing " .
+ "$ninc commit(s) (eg $ifirst)\n";
} else {
- warn
- "Found merge parent (svn:mergeinfo prop): ",
- $merge_tip, "\n";
+ warn "Found merge parent ($spec): ", $merge_tip, "\n";
push @new_parents, $merge_tip;
}
}
@@ -1797,23 +1899,20 @@ sub find_extra_svn_parents {
}
sub make_log_entry {
- my ($self, $rev, $parents, $ed) = @_;
+ my ($self, $rev, $parents, $ed, $parent_rev, $parent_path) = @_;
my $untracked = $self->get_untracked($ed);
my @parents = @$parents;
- my $ps = $ed->{path_strip} || "";
- for my $path ( grep { m/$ps/ } %{$ed->{dir_prop}} ) {
- my $props = $ed->{dir_prop}{$path};
- if ( $props->{"svk:merge"} ) {
- $self->find_extra_svk_parents
- ($ed, $props->{"svk:merge"}, \@parents);
- }
- if ( $props->{"svn:mergeinfo"} ) {
- $self->find_extra_svn_parents
- ($ed,
- $props->{"svn:mergeinfo"},
- \@parents);
- }
+ my $props = $ed->{dir_prop}{$self->path};
+ if ( $props->{"svk:merge"} ) {
+ $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents);
+ }
+ if ( $props->{"svn:mergeinfo"} ) {
+ my $mi_changes = $self->mergeinfo_changes
+ ($parent_path, $parent_rev,
+ $self->path, $rev,
+ $props->{"svn:mergeinfo"});
+ $self->find_extra_svn_parents($mi_changes, \@parents);
}
open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!;
@@ -2161,8 +2260,9 @@ sub rev_map_set {
# both of these options make our .rev_db file very, very important
# and we can't afford to lose it because rebuild() won't work
if ($self->use_svm_props || $self->no_metadata) {
+ require File::Copy;
$sync = 1;
- copy($db, $db_lock) or die "rev_map_set(@_): ",
+ File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ",
"Failed to copy: ",
"$db => $db_lock ($!)\n";
} else {
@@ -2338,7 +2438,7 @@ sub _new {
# Older repos imported by us used $GIT_DIR/svn/foo instead of
# $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo
- if ($ref_id =~ m{^refs/remotes/(.*)}) {
+ if ($ref_id =~ m{^refs/remotes/(.+)}) {
my $old_dir = "$ENV{GIT_DIR}/svn/$1";
if (-d $old_dir && ! -d $dir) {
$dir = $old_dir;