From 0c977dbc8180892af42d7ab9235fd3e51d6c4078 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 Jun 2017 12:30:55 -0400 Subject: diff-highlight: split code into module The diff-so-fancy project is also written in perl, and most of its users pipe diffs through both diff-highlight and diff-so-fancy. It would be nice if this could be done in a single script. So let's pull most of diff-highlight's code into its own module which can be used by diff-so-fancy. In addition, we'll abstract a few basic items like reading from stdio so that a script using the module can do more processing before or after diff-highlight handles the lines. See the README update for more details. One small downside is that the diff-highlight script must now be built using the Makefile. There are ways around this, but it quickly gets into perl arcana. Let's go with the simple solution. As a bonus, our Makefile now respects the PERL_PATH variable if it is set. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/contrib/diff-highlight/.gitignore b/contrib/diff-highlight/.gitignore new file mode 100644 index 0000000..c074548 --- /dev/null +++ b/contrib/diff-highlight/.gitignore @@ -0,0 +1,2 @@ +shebang.perl +diff-highlight diff --git a/contrib/diff-highlight/DiffHighlight.pm b/contrib/diff-highlight/DiffHighlight.pm new file mode 100644 index 0000000..663992e --- /dev/null +++ b/contrib/diff-highlight/DiffHighlight.pm @@ -0,0 +1,233 @@ +package DiffHighlight; + +use 5.008; +use warnings FATAL => 'all'; +use strict; + +# Highlight by reversing foreground and background. You could do +# other things like bold or underline if you prefer. +my @OLD_HIGHLIGHT = ( + color_config('color.diff-highlight.oldnormal'), + color_config('color.diff-highlight.oldhighlight', "\x1b[7m"), + color_config('color.diff-highlight.oldreset', "\x1b[27m") +); +my @NEW_HIGHLIGHT = ( + color_config('color.diff-highlight.newnormal', $OLD_HIGHLIGHT[0]), + color_config('color.diff-highlight.newhighlight', $OLD_HIGHLIGHT[1]), + color_config('color.diff-highlight.newreset', $OLD_HIGHLIGHT[2]) +); + +my $RESET = "\x1b[m"; +my $COLOR = qr/\x1b\[[0-9;]*m/; +my $BORING = qr/$COLOR|\s/; + +# The patch portion of git log -p --graph should only ever have preceding | and +# not / or \ as merge history only shows up on the commit line. +my $GRAPH = qr/$COLOR?\|$COLOR?\s+/; + +my @removed; +my @added; +my $in_hunk; + +our $line_cb = sub { print @_ }; +our $flush_cb = sub { local $| = 1 }; + +sub handle_line { + local $_ = shift; + + if (!$in_hunk) { + $line_cb->($_); + $in_hunk = /^$GRAPH*$COLOR*\@\@ /; + } + elsif (/^$GRAPH*$COLOR*-/) { + push @removed, $_; + } + elsif (/^$GRAPH*$COLOR*\+/) { + push @added, $_; + } + else { + show_hunk(\@removed, \@added); + @removed = (); + @added = (); + + $line_cb->($_); + $in_hunk = /^$GRAPH*$COLOR*[\@ ]/; + } + + # Most of the time there is enough output to keep things streaming, + # but for something like "git log -Sfoo", you can get one early + # commit and then many seconds of nothing. We want to show + # that one commit as soon as possible. + # + # Since we can receive arbitrary input, there's no optimal + # place to flush. Flushing on a blank line is a heuristic that + # happens to match git-log output. + if (!length) { + $flush_cb->(); + } +} + +sub flush { + # Flush any queued hunk (this can happen when there is no trailing + # context in the final diff of the input). + show_hunk(\@removed, \@added); +} + +sub highlight_stdin { + while () { + handle_line($_); + } + flush(); +} + +# Ideally we would feed the default as a human-readable color to +# git-config as the fallback value. But diff-highlight does +# not otherwise depend on git at all, and there are reports +# of it being used in other settings. Let's handle our own +# fallback, which means we will work even if git can't be run. +sub color_config { + my ($key, $default) = @_; + my $s = `git config --get-color $key 2>/dev/null`; + return length($s) ? $s : $default; +} + +sub show_hunk { + my ($a, $b) = @_; + + # If one side is empty, then there is nothing to compare or highlight. + if (!@$a || !@$b) { + $line_cb->(@$a, @$b); + return; + } + + # If we have mismatched numbers of lines on each side, we could try to + # be clever and match up similar lines. But for now we are simple and + # stupid, and only handle multi-line hunks that remove and add the same + # number of lines. + if (@$a != @$b) { + $line_cb->(@$a, @$b); + return; + } + + my @queue; + for (my $i = 0; $i < @$a; $i++) { + my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]); + $line_cb->($rm); + push @queue, $add; + } + $line_cb->(@queue); +} + +sub highlight_pair { + my @a = split_line(shift); + my @b = split_line(shift); + + # Find common prefix, taking care to skip any ansi + # color codes. + my $seen_plusminus; + my ($pa, $pb) = (0, 0); + while ($pa < @a && $pb < @b) { + if ($a[$pa] =~ /$COLOR/) { + $pa++; + } + elsif ($b[$pb] =~ /$COLOR/) { + $pb++; + } + elsif ($a[$pa] eq $b[$pb]) { + $pa++; + $pb++; + } + elsif (!$seen_plusminus && $a[$pa] eq '-' && $b[$pb] eq '+') { + $seen_plusminus = 1; + $pa++; + $pb++; + } + else { + last; + } + } + + # Find common suffix, ignoring colors. + my ($sa, $sb) = ($#a, $#b); + while ($sa >= $pa && $sb >= $pb) { + if ($a[$sa] =~ /$COLOR/) { + $sa--; + } + elsif ($b[$sb] =~ /$COLOR/) { + $sb--; + } + elsif ($a[$sa] eq $b[$sb]) { + $sa--; + $sb--; + } + else { + last; + } + } + + if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) { + return highlight_line(\@a, $pa, $sa, \@OLD_HIGHLIGHT), + highlight_line(\@b, $pb, $sb, \@NEW_HIGHLIGHT); + } + else { + return join('', @a), + join('', @b); + } +} + +# we split either by $COLOR or by character. This has the side effect of +# leaving in graph cruft. It works because the graph cruft does not contain "-" +# or "+" +sub split_line { + local $_ = shift; + return utf8::decode($_) ? + map { utf8::encode($_); $_ } + map { /$COLOR/ ? $_ : (split //) } + split /($COLOR+)/ : + map { /$COLOR/ ? $_ : (split //) } + split /($COLOR+)/; +} + +sub highlight_line { + my ($line, $prefix, $suffix, $theme) = @_; + + my $start = join('', @{$line}[0..($prefix-1)]); + my $mid = join('', @{$line}[$prefix..$suffix]); + my $end = join('', @{$line}[($suffix+1)..$#$line]); + + # If we have a "normal" color specified, then take over the whole line. + # Otherwise, we try to just manipulate the highlighted bits. + if (defined $theme->[0]) { + s/$COLOR//g for ($start, $mid, $end); + chomp $end; + return join('', + $theme->[0], $start, $RESET, + $theme->[1], $mid, $RESET, + $theme->[0], $end, $RESET, + "\n" + ); + } else { + return join('', + $start, + $theme->[1], $mid, $theme->[2], + $end + ); + } +} + +# Pairs are interesting to highlight only if we are going to end up +# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting +# is just useless noise. We can detect this by finding either a matching prefix +# or suffix (disregarding boring bits like whitespace and colorization). +sub is_pair_interesting { + my ($a, $pa, $sa, $b, $pb, $sb) = @_; + my $prefix_a = join('', @$a[0..($pa-1)]); + my $prefix_b = join('', @$b[0..($pb-1)]); + my $suffix_a = join('', @$a[($sa+1)..$#$a]); + my $suffix_b = join('', @$b[($sb+1)..$#$b]); + + return $prefix_a !~ /^$GRAPH*$COLOR*-$BORING*$/ || + $prefix_b !~ /^$GRAPH*$COLOR*\+$BORING*$/ || + $suffix_a !~ /^$BORING*$/ || + $suffix_b !~ /^$BORING*$/; +} diff --git a/contrib/diff-highlight/Makefile b/contrib/diff-highlight/Makefile index 9018724..fbf5c58 100644 --- a/contrib/diff-highlight/Makefile +++ b/contrib/diff-highlight/Makefile @@ -1,5 +1,20 @@ -# nothing to build -all: +all: diff-highlight -test: +PERL_PATH = /usr/bin/perl +-include ../../config.mak + +PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) + +diff-highlight: shebang.perl DiffHighlight.pm diff-highlight.perl + cat $^ >$@+ + chmod +x $@+ + mv $@+ $@ + +shebang.perl: FORCE + @echo '#!$(PERL_PATH_SQ)' >$@+ + @cmp $@+ $@ >/dev/null 2>/dev/null || mv $@+ $@ + +test: all $(MAKE) -C t + +.PHONY: FORCE diff --git a/contrib/diff-highlight/README b/contrib/diff-highlight/README index 836b97a..d4c2343 100644 --- a/contrib/diff-highlight/README +++ b/contrib/diff-highlight/README @@ -99,6 +99,36 @@ newHighlight = "black #aaffaa" --------------------------------------------- +Using diff-highlight as a module +-------------------------------- + +If you want to pre- or post- process the highlighted lines as part of +another perl script, you can use the DiffHighlight module. You can +either "require" it or just cat the module together with your script (to +avoid run-time dependencies). + +Your script may set up one or more of the following variables: + + - $DiffHighlight::line_cb - this should point to a function which is + called whenever DiffHighlight has lines (which may contain + highlights) to output. The default function prints each line to + stdout. Note that the function may be called with multiple lines. + + - $DiffHighlight::flush_cb - this should point to a function which + flushes the output (because DiffHighlight believes it has completed + processing a logical chunk of input). The default function flushes + stdout. + +The script may then feed lines, one at a time, to DiffHighlight::handle_line(). +When lines are done processing, they will be fed to $line_cb. Note that +DiffHighlight may queue up many input lines (to analyze a whole hunk) +before calling $line_cb. After providing all lines, call +DiffHighlight::flush() to flush any unprocessed lines. + +If you just want to process stdin, DiffHighlight::highlight_stdin() +is a convenience helper which will loop and flush for you. + + Bugs ---- diff --git a/contrib/diff-highlight/diff-highlight b/contrib/diff-highlight/diff-highlight deleted file mode 100755 index 81bd804..0000000 --- a/contrib/diff-highlight/diff-highlight +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/perl - -use 5.008; -use warnings FATAL => 'all'; -use strict; - -# Highlight by reversing foreground and background. You could do -# other things like bold or underline if you prefer. -my @OLD_HIGHLIGHT = ( - color_config('color.diff-highlight.oldnormal'), - color_config('color.diff-highlight.oldhighlight', "\x1b[7m"), - color_config('color.diff-highlight.oldreset', "\x1b[27m") -); -my @NEW_HIGHLIGHT = ( - color_config('color.diff-highlight.newnormal', $OLD_HIGHLIGHT[0]), - color_config('color.diff-highlight.newhighlight', $OLD_HIGHLIGHT[1]), - color_config('color.diff-highlight.newreset', $OLD_HIGHLIGHT[2]) -); - -my $RESET = "\x1b[m"; -my $COLOR = qr/\x1b\[[0-9;]*m/; -my $BORING = qr/$COLOR|\s/; - -# The patch portion of git log -p --graph should only ever have preceding | and -# not / or \ as merge history only shows up on the commit line. -my $GRAPH = qr/$COLOR?\|$COLOR?\s+/; - -my @removed; -my @added; -my $in_hunk; - -# Some scripts may not realize that SIGPIPE is being ignored when launching the -# pager--for instance scripts written in Python. -$SIG{PIPE} = 'DEFAULT'; - -while (<>) { - if (!$in_hunk) { - print; - $in_hunk = /^$GRAPH*$COLOR*\@\@ /; - } - elsif (/^$GRAPH*$COLOR*-/) { - push @removed, $_; - } - elsif (/^$GRAPH*$COLOR*\+/) { - push @added, $_; - } - else { - show_hunk(\@removed, \@added); - @removed = (); - @added = (); - - print; - $in_hunk = /^$GRAPH*$COLOR*[\@ ]/; - } - - # Most of the time there is enough output to keep things streaming, - # but for something like "git log -Sfoo", you can get one early - # commit and then many seconds of nothing. We want to show - # that one commit as soon as possible. - # - # Since we can receive arbitrary input, there's no optimal - # place to flush. Flushing on a blank line is a heuristic that - # happens to match git-log output. - if (!length) { - local $| = 1; - } -} - -# Flush any queued hunk (this can happen when there is no trailing context in -# the final diff of the input). -show_hunk(\@removed, \@added); - -exit 0; - -# Ideally we would feed the default as a human-readable color to -# git-config as the fallback value. But diff-highlight does -# not otherwise depend on git at all, and there are reports -# of it being used in other settings. Let's handle our own -# fallback, which means we will work even if git can't be run. -sub color_config { - my ($key, $default) = @_; - my $s = `git config --get-color $key 2>/dev/null`; - return length($s) ? $s : $default; -} - -sub show_hunk { - my ($a, $b) = @_; - - # If one side is empty, then there is nothing to compare or highlight. - if (!@$a || !@$b) { - print @$a, @$b; - return; - } - - # If we have mismatched numbers of lines on each side, we could try to - # be clever and match up similar lines. But for now we are simple and - # stupid, and only handle multi-line hunks that remove and add the same - # number of lines. - if (@$a != @$b) { - print @$a, @$b; - return; - } - - my @queue; - for (my $i = 0; $i < @$a; $i++) { - my ($rm, $add) = highlight_pair($a->[$i], $b->[$i]); - print $rm; - push @queue, $add; - } - print @queue; -} - -sub highlight_pair { - my @a = split_line(shift); - my @b = split_line(shift); - - # Find common prefix, taking care to skip any ansi - # color codes. - my $seen_plusminus; - my ($pa, $pb) = (0, 0); - while ($pa < @a && $pb < @b) { - if ($a[$pa] =~ /$COLOR/) { - $pa++; - } - elsif ($b[$pb] =~ /$COLOR/) { - $pb++; - } - elsif ($a[$pa] eq $b[$pb]) { - $pa++; - $pb++; - } - elsif (!$seen_plusminus && $a[$pa] eq '-' && $b[$pb] eq '+') { - $seen_plusminus = 1; - $pa++; - $pb++; - } - else { - last; - } - } - - # Find common suffix, ignoring colors. - my ($sa, $sb) = ($#a, $#b); - while ($sa >= $pa && $sb >= $pb) { - if ($a[$sa] =~ /$COLOR/) { - $sa--; - } - elsif ($b[$sb] =~ /$COLOR/) { - $sb--; - } - elsif ($a[$sa] eq $b[$sb]) { - $sa--; - $sb--; - } - else { - last; - } - } - - if (is_pair_interesting(\@a, $pa, $sa, \@b, $pb, $sb)) { - return highlight_line(\@a, $pa, $sa, \@OLD_HIGHLIGHT), - highlight_line(\@b, $pb, $sb, \@NEW_HIGHLIGHT); - } - else { - return join('', @a), - join('', @b); - } -} - -# we split either by $COLOR or by character. This has the side effect of -# leaving in graph cruft. It works because the graph cruft does not contain "-" -# or "+" -sub split_line { - local $_ = shift; - return utf8::decode($_) ? - map { utf8::encode($_); $_ } - map { /$COLOR/ ? $_ : (split //) } - split /($COLOR+)/ : - map { /$COLOR/ ? $_ : (split //) } - split /($COLOR+)/; -} - -sub highlight_line { - my ($line, $prefix, $suffix, $theme) = @_; - - my $start = join('', @{$line}[0..($prefix-1)]); - my $mid = join('', @{$line}[$prefix..$suffix]); - my $end = join('', @{$line}[($suffix+1)..$#$line]); - - # If we have a "normal" color specified, then take over the whole line. - # Otherwise, we try to just manipulate the highlighted bits. - if (defined $theme->[0]) { - s/$COLOR//g for ($start, $mid, $end); - chomp $end; - return join('', - $theme->[0], $start, $RESET, - $theme->[1], $mid, $RESET, - $theme->[0], $end, $RESET, - "\n" - ); - } else { - return join('', - $start, - $theme->[1], $mid, $theme->[2], - $end - ); - } -} - -# Pairs are interesting to highlight only if we are going to end up -# highlighting a subset (i.e., not the whole line). Otherwise, the highlighting -# is just useless noise. We can detect this by finding either a matching prefix -# or suffix (disregarding boring bits like whitespace and colorization). -sub is_pair_interesting { - my ($a, $pa, $sa, $b, $pb, $sb) = @_; - my $prefix_a = join('', @$a[0..($pa-1)]); - my $prefix_b = join('', @$b[0..($pb-1)]); - my $suffix_a = join('', @$a[($sa+1)..$#$a]); - my $suffix_b = join('', @$b[($sb+1)..$#$b]); - - return $prefix_a !~ /^$GRAPH*$COLOR*-$BORING*$/ || - $prefix_b !~ /^$GRAPH*$COLOR*\+$BORING*$/ || - $suffix_a !~ /^$BORING*$/ || - $suffix_b !~ /^$BORING*$/; -} diff --git a/contrib/diff-highlight/diff-highlight.perl b/contrib/diff-highlight/diff-highlight.perl new file mode 100644 index 0000000..9b3e9c1 --- /dev/null +++ b/contrib/diff-highlight/diff-highlight.perl @@ -0,0 +1,8 @@ +package main; + +# Some scripts may not realize that SIGPIPE is being ignored when launching the +# pager--for instance scripts written in Python. +$SIG{PIPE} = 'DEFAULT'; + +DiffHighlight::highlight_stdin(); +exit 0; -- cgit v0.10.2-6-g49f6