summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2006-03-26 02:52:31 (GMT)
committerJunio C Hamano <junkio@cox.net>2006-03-26 05:23:54 (GMT)
commit03823184247215fba1dd9c9c39659d08dea3bad7 (patch)
tree05b63f65bfa633155332720987821fd749424572
parentc150462824008957f568ca7aa05a65b35d860eb9 (diff)
downloadgit-03823184247215fba1dd9c9c39659d08dea3bad7.zip
git-03823184247215fba1dd9c9c39659d08dea3bad7.tar.gz
git-03823184247215fba1dd9c9c39659d08dea3bad7.tar.bz2
contrib/git-svn: stabilize memory usage for big fetches
We should be safely able to import histories with thousands of revisions without hogging up lots of memory. With this, we lose the ability to autocorrect mistakes when people specify revisions in reverse, but it's probably no longer a problem since we only have one method of log parsing nowadays. I've added an extra check to ensure that revision numbers do increment. Also, increment the version number to 0.11.0. I really should just call it 1.0 soon... Signed-off-by: Eric Wong <normalperson@yhbt.net> Signed-off-by: Junio C Hamano <junkio@cox.net>
-rwxr-xr-xcontrib/git-svn/git-svn.perl109
1 files changed, 63 insertions, 46 deletions
diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index f3fc3ec..3e5733e 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -8,7 +8,7 @@ use vars qw/ $AUTHOR $VERSION
$GIT_SVN_INDEX $GIT_SVN
$GIT_DIR $REV_DIR/;
$AUTHOR = 'Eric Wong <normalperson@yhbt.net>';
-$VERSION = '0.10.0';
+$VERSION = '0.11.0';
$GIT_DIR = $ENV{GIT_DIR} || "$ENV{PWD}/.git";
# make sure the svn binary gives consistent output between locales and TZs:
$ENV{TZ} = 'UTC';
@@ -217,9 +217,8 @@ sub fetch {
push @log_args, '--stop-on-copy' unless $_no_stop_copy;
my $svn_log = svn_log_raw(@log_args);
- @$svn_log = sort { $a->{revision} <=> $b->{revision} } @$svn_log;
- my $base = shift @$svn_log or croak "No base revision!\n";
+ my $base = next_log_entry($svn_log) or croak "No base revision!\n";
my $last_commit = undef;
unless (-d $SVN_WC) {
svn_cmd_checkout($SVN_URL,$base->{revision},$SVN_WC);
@@ -234,18 +233,22 @@ sub fetch {
}
my @svn_up = qw(svn up);
push @svn_up, '--ignore-externals' unless $_no_ignore_ext;
- my $last_rev = $base->{revision};
- foreach my $log_msg (@$svn_log) {
- assert_svn_wc_clean($last_rev, $last_commit);
- $last_rev = $log_msg->{revision};
- sys(@svn_up,"-r$last_rev");
+ my $last = $base;
+ while (my $log_msg = next_log_entry($svn_log)) {
+ assert_svn_wc_clean($last->{revision}, $last_commit);
+ if ($last->{revision} >= $log_msg->{revision}) {
+ croak "Out of order: last >= current: ",
+ "$last->{revision} >= $log_msg->{revision}\n";
+ }
+ sys(@svn_up,"-r$log_msg->{revision}");
$last_commit = git_commit($log_msg, $last_commit, @parents);
+ $last = $log_msg;
}
- assert_svn_wc_clean($last_rev, $last_commit);
+ assert_svn_wc_clean($last->{revision}, $last_commit);
unless (-e "$GIT_DIR/refs/heads/master") {
sys(qw(git-update-ref refs/heads/master),$last_commit);
}
- return pop @$svn_log;
+ return $last;
}
sub commit {
@@ -708,49 +711,61 @@ sub svn_commit_tree {
return fetch("$rev_committed=$commit")->{revision};
}
+# read the entire log into a temporary file (which is removed ASAP)
+# and store the file handle + parser state
sub svn_log_raw {
my (@log_args) = @_;
- my $pid = open my $log_fh,'-|';
+ my $log_fh = IO::File->new_tmpfile or croak $!;
+ my $pid = fork;
defined $pid or croak $!;
-
- if ($pid == 0) {
+ if (!$pid) {
+ open STDOUT, '>&', $log_fh or croak $!;
exec (qw(svn log), @log_args) or croak $!
}
+ waitpid $pid, 0;
+ croak if $?;
+ seek $log_fh, 0, 0 or croak $!;
+ return { state => 'sep', fh => $log_fh };
+}
+
+sub next_log_entry {
+ my $log = shift; # retval of svn_log_raw()
+ my $ret = undef;
+ my $fh = $log->{fh};
- my @svn_log;
- my $state = 'sep';
- while (<$log_fh>) {
+ while (<$fh>) {
chomp;
if (/^\-{72}$/) {
- if ($state eq 'msg') {
- if ($svn_log[$#svn_log]->{lines}) {
- $svn_log[$#svn_log]->{msg} .= $_."\n";
- unless(--$svn_log[$#svn_log]->{lines}) {
- $state = 'sep';
+ if ($log->{state} eq 'msg') {
+ if ($ret->{lines}) {
+ $ret->{msg} .= $_."\n";
+ unless(--$ret->{lines}) {
+ $log->{state} = 'sep';
}
} else {
croak "Log parse error at: $_\n",
- $svn_log[$#svn_log]->{revision},
+ $ret->{revision},
"\n";
}
next;
}
- if ($state ne 'sep') {
+ if ($log->{state} ne 'sep') {
croak "Log parse error at: $_\n",
- "state: $state\n",
- $svn_log[$#svn_log]->{revision},
+ "state: $log->{state}\n",
+ $ret->{revision},
"\n";
}
- $state = 'rev';
+ $log->{state} = 'rev';
# if we have an empty log message, put something there:
- if (@svn_log) {
- $svn_log[$#svn_log]->{msg} ||= "\n";
- delete $svn_log[$#svn_log]->{lines};
+ if ($ret) {
+ $ret->{msg} ||= "\n";
+ delete $ret->{lines};
+ return $ret;
}
next;
}
- if ($state eq 'rev' && s/^r(\d+)\s*\|\s*//) {
+ if ($log->{state} eq 'rev' && s/^r(\d+)\s*\|\s*//) {
my $rev = $1;
my ($author, $date, $lines) = split(/\s*\|\s*/, $_, 3);
($lines) = ($lines =~ /(\d+)/);
@@ -758,36 +773,34 @@ sub svn_log_raw {
/(\d{4})\-(\d\d)\-(\d\d)\s
(\d\d)\:(\d\d)\:(\d\d)\s([\-\+]\d+)/x)
or croak "Failed to parse date: $date\n";
- my %log_msg = ( revision => $rev,
+ $ret = { revision => $rev,
date => "$tz $Y-$m-$d $H:$M:$S",
author => $author,
lines => $lines,
- msg => '' );
+ msg => '' };
if (defined $_authors && ! defined $users{$author}) {
die "Author: $author not defined in ",
"$_authors file\n";
}
- push @svn_log, \%log_msg;
- $state = 'msg_start';
+ $log->{state} = 'msg_start';
next;
}
# skip the first blank line of the message:
- if ($state eq 'msg_start' && /^$/) {
- $state = 'msg';
- } elsif ($state eq 'msg') {
- if ($svn_log[$#svn_log]->{lines}) {
- $svn_log[$#svn_log]->{msg} .= $_."\n";
- unless (--$svn_log[$#svn_log]->{lines}) {
- $state = 'sep';
+ if ($log->{state} eq 'msg_start' && /^$/) {
+ $log->{state} = 'msg';
+ } elsif ($log->{state} eq 'msg') {
+ if ($ret->{lines}) {
+ $ret->{msg} .= $_."\n";
+ unless (--$ret->{lines}) {
+ $log->{state} = 'sep';
}
} else {
croak "Log parse error at: $_\n",
- $svn_log[$#svn_log]->{revision},"\n";
+ $ret->{revision},"\n";
}
}
}
- close $log_fh or croak $?;
- return \@svn_log;
+ return $ret;
}
sub svn_info {
@@ -1114,9 +1127,13 @@ __END__
Data structures:
-@svn_log = array of log_msg hashes
+$svn_log hashref (as returned by svn_log_raw)
+{
+ fh => file handle of the log file,
+ state => state of the log file parser (sep/msg/rev/msg_start...)
+}
-$log_msg hash
+$log_msg hashref as returned by next_log_entry($svn_log)
{
msg => 'whitespace-formatted log entry
', # trailing newline is preserved