From b4f25b07c74fc294cab6c12d09faa2021c67f25a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:39 +0000 Subject: t: add skeleton chainlint.pl Although chainlint.sed usefully identifies broken &&-chains in tests, it has several shortcomings which include: * only detects &&-chain breakage in subshells (one-level deep) * does not check for broken top-level &&-chains; that task is left to the "magic exit code 117" checker built into test-lib.sh, however, that detection does not extend to `{...}` blocks, `$(...)` expressions, or compound statements such as `if...fi`, `while...done`, `case...esac` * uses heuristics, which makes it (potentially) fallible and difficult to tweak to handle additional real-world cases * written in `sed` and employs advanced `sed` operators which are probably not well-known to many programmers, thus the pool of people who can maintain it is likely small * manually simulates recursion into subshells which makes it much more difficult to reason about than, say, a traditional top-down parser * checks each test as the test is run, which can get expensive for tests which are run repeatedly by functions or loops since their bodies will be checked over and over (tens or hundreds of times) unnecessarily To address these shortcomings, begin implementing a more functional and precise test linter which understands shell syntax and semantics rather than employing heuristics, thus is able to recognize structural problems with tests beyond broken &&-chains. The new linter is written in Perl, thus should be more accessible to a wider audience, and is structured as a traditional top-down parser which makes it much easier to reason about, and allows it to inspect compound statements within test bodies to any depth. Furthermore, it can check all test definitions in the entire project in a single invocation rather than having to be invoked once per test, and each test definition is checked only once no matter how many times the test is actually run. At this stage, the new linter is just a skeleton containing boilerplate which handles command-line options, collects and reports statistics, and feeds its arguments -- paths of test scripts -- to a (presently) do-nothing script parser for validation. Subsequent changes will flesh out the functionality. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl new file mode 100755 index 0000000..e8ab95c --- /dev/null +++ b/t/chainlint.pl @@ -0,0 +1,115 @@ +#!/usr/bin/env perl +# +# Copyright (c) 2021-2022 Eric Sunshine +# +# This tool scans shell scripts for test definitions and checks those tests for +# problems, such as broken &&-chains, which might hide bugs in the tests +# themselves or in behaviors being exercised by the tests. +# +# Input arguments are pathnames of shell scripts containing test definitions, +# or globs referencing a collection of scripts. For each problem discovered, +# the pathname of the script containing the test is printed along with the test +# name and the test body with a `?!FOO?!` annotation at the location of each +# detected problem, where "FOO" is a tag such as "AMP" which indicates a broken +# &&-chain. Returns zero if no problems are discovered, otherwise non-zero. + +use warnings; +use strict; +use File::Glob; +use Getopt::Long; + +my $show_stats; +my $emit_all; + +package ScriptParser; + +sub new { + my $class = shift @_; + my $self = bless {} => $class; + $self->{output} = []; + $self->{ntests} = 0; + return $self; +} + +sub parse_cmd { + return undef; +} + +# main contains high-level functionality for processing command-line switches, +# feeding input test scripts to ScriptParser, and reporting results. +package main; + +my $getnow = sub { return time(); }; +my $interval = sub { return time() - shift; }; +if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { + $getnow = sub { return [Time::HiRes::gettimeofday()]; }; + $interval = sub { return Time::HiRes::tv_interval(shift); }; +} + +sub show_stats { + my ($start_time, $stats) = @_; + my $walltime = $interval->($start_time); + my ($usertime) = times(); + my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); + $total_workers++; + $total_scripts += $nscripts; + $total_tests += $ntests; + $total_errs += $nerrs; + } + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); +} + +sub check_script { + my ($id, $next_script, $emit) = @_; + my ($nscripts, $ntests, $nerrs) = (0, 0, 0); + while (my $path = $next_script->()) { + $nscripts++; + my $fh; + unless (open($fh, "<", $path)) { + $emit->("?!ERR?! $path: $!\n"); + next; + } + my $s = do { local $/; <$fh> }; + close($fh); + my $parser = ScriptParser->new(\$s); + 1 while $parser->parse_cmd(); + if (@{$parser->{output}}) { + my $s = join('', @{$parser->{output}}); + $emit->("# chainlint: $path\n" . $s); + $nerrs += () = $s =~ /\?![^?]+\?!/g; + } + $ntests += $parser->{ntests}; + } + return [$id, $nscripts, $ntests, $nerrs]; +} + +sub exit_code { + my $stats = shift @_; + for (@$stats) { + my ($worker, $nscripts, $ntests, $nerrs) = @$_; + return 1 if $nerrs; + } + return 0; +} + +Getopt::Long::Configure(qw{bundling}); +GetOptions( + "emit-all!" => \$emit_all, + "stats|show-stats!" => \$show_stats) or die("option error\n"); + +my $start_time = $getnow->(); +my @stats; + +my @scripts; +push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV); +unless (@scripts) { + show_stats($start_time, \@stats) if $show_stats; + exit; +} + +push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +show_stats($start_time, \@stats) if $show_stats; +exit(exit_code(\@stats)); -- cgit v0.10.2-6-g49f6 From 7d4804731ed642b92b516908fb93397b08e986bf Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:40 +0000 Subject: chainlint.pl: add POSIX shell lexical analyzer Begin fleshing out chainlint.pl by adding a lexical analyzer for the POSIX shell command language. The sole entry point Lexer::scan_token() returns the next token from the input. It will be called by the upcoming shell language parser. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index e8ab95c..81ffbf2 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -21,6 +21,183 @@ use Getopt::Long; my $show_stats; my $emit_all; +# Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3 +# "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although +# similar to lexical analyzers for other languages, this one differs in a few +# substantial ways due to quirks of the shell command language. +# +# For instance, in many languages, newline is just whitespace like space or +# TAB, but in shell a newline is a command separator, thus a distinct lexical +# token. A newline is significant and returned as a distinct token even at the +# end of a shell comment. +# +# In other languages, `1+2` would typically be scanned as three tokens +# (`1`, `+`, and `2`), but in shell it is a single token. However, the similar +# `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well. +# In shell, several characters with special meaning lose that meaning when not +# surrounded by whitespace. For instance, the negation operator `!` is special +# when standing alone surrounded by whitespace; whereas in `foo!uucp` it is +# just a plain character in the longer token "foo!uucp". In many other +# languages, `"string"/foo:'string'` might be scanned as five tokens ("string", +# `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token. +# +# The lexical analyzer for the shell command language is also somewhat unusual +# in that it recursively invokes the parser to handle the body of `$(...)` +# expressions which can contain arbitrary shell code. Such expressions may be +# encountered both inside and outside of double-quoted strings. +# +# The lexical analyzer is responsible for consuming shell here-doc bodies which +# extend from the line following a `< $parser, + buff => $s, + heretags => [] + } => $class; +} + +sub scan_heredoc_tag { + my $self = shift @_; + ${$self->{buff}} =~ /\G(-?)/gc; + my $indented = $1; + my $tag = $self->scan_token(); + $tag =~ s/['"\\]//g; + push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag"); + return "<<$indented$tag"; +} + +sub scan_op { + my ($self, $c) = @_; + my $b = $self->{buff}; + return $c unless $$b =~ /\G(.)/sgc; + my $cc = $c . $1; + return scan_heredoc_tag($self) if $cc eq '<<'; + return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/; + pos($$b)--; + return $c; +} + +sub scan_sqstring { + my $self = shift @_; + ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc; + return "'" . $1; +} + +sub scan_dqstring { + my $self = shift @_; + my $b = $self->{buff}; + my $s = '"'; + while (1) { + # slurp up non-special characters + $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + $s .= '"', last if $c eq '"'; + $s .= '$' . $self->scan_dollar(), next if $c eq '$'; + if ($c eq '\\') { + $s .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n"; # line splice + # backslash escapes only $, `, ", \ in dq-string + $s .= '\\' unless $c =~ /^[\$`"\\]$/; + $s .= $c; + next; + } + die("internal error scanning dq-string '$c'\n"); + } + return $s; +} + +sub scan_balanced { + my ($self, $c1, $c2) = @_; + my $b = $self->{buff}; + my $depth = 1; + my $s = $c1; + while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) { + $s .= $1; + $depth++, next if $s =~ /\Q$c1\E$/; + $depth--; + last if $depth == 0; + } + return $s; +} + +sub scan_subst { + my $self = shift @_; + my @tokens = $self->{parser}->parse(qr/^\)$/); + $self->{parser}->next_token(); # closing ")" + return @tokens; +} + +sub scan_dollar { + my $self = shift @_; + my $b = $self->{buff}; + return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...)) + return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...) + return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...} + return $1 if $$b =~ /\G(\w+)/gc; # $var + return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc. + return ''; +} + +sub swallow_heredocs { + my $self = shift @_; + my $b = $self->{buff}; + my $tags = $self->{heretags}; + while (my $tag = shift @$tags) { + my $indent = $tag =~ s/^\t// ? '\\s*' : ''; + $$b =~ /(?:\G|\n)$indent\Q$tag\E(?:\n|\z)/gc; + } +} + +sub scan_token { + my $self = shift @_; + my $b = $self->{buff}; + my $token = ''; +RESTART: + $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline) + return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment + while (1) { + # slurp up non-special characters + $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc; + # handle special characters + last unless $$b =~ /\G(.)/sgc; + my $c = $1; + last if $c =~ /^[ \t]$/; # whitespace ends token + pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/; + $token .= $self->scan_sqstring(), next if $c eq "'"; + $token .= $self->scan_dqstring(), next if $c eq '"'; + $token .= $c . $self->scan_dollar(), next if $c eq '$'; + $self->swallow_heredocs(), $token = $c, last if $c eq "\n"; + $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/; + $token = $c, last if $c =~ /^[(){}]$/; + if ($c eq '\\') { + $token .= '\\', last unless $$b =~ /\G(.)/sgc; + $c = $1; + next if $c eq "\n" && length($token); # line splice + goto RESTART if $c eq "\n"; # line splice + $token .= '\\' . $c; + next; + } + die("internal error scanning character '$c'\n"); + } + return length($token) ? $token : undef; +} + package ScriptParser; sub new { -- cgit v0.10.2-6-g49f6 From 6594554119811a01888b44112a7daec6fa0312b2 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:41 +0000 Subject: chainlint.pl: add POSIX shell parser Continue fleshing out chainlint.pl by adding a general purpose recursive descent parser for the POSIX shell command language. Although never invoked directly, upcoming parser subclasses will extend its functionality for specific purposes, such as plucking test definitions from input scripts and applying domain-specific knowledge to perform test validation. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 81ffbf2..cdf1368 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -198,6 +198,249 @@ RESTART: return length($token) ? $token : undef; } +# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It +# is a recursive descent parser very roughly modeled after section 2.10 "Shell +# Grammar" of POSIX chapter 2 "Shell Command Language". +package ShellParser; + +sub new { + my ($class, $s) = @_; + my $self = bless { + buff => [], + stop => [], + output => [] + } => $class; + $self->{lexer} = Lexer->new($self, $s); + return $self; +} + +sub next_token { + my $self = shift @_; + return pop(@{$self->{buff}}) if @{$self->{buff}}; + return $self->{lexer}->scan_token(); +} + +sub untoken { + my $self = shift @_; + push(@{$self->{buff}}, @_); +} + +sub peek { + my $self = shift @_; + my $token = $self->next_token(); + return undef unless defined($token); + $self->untoken($token); + return $token; +} + +sub stop_at { + my ($self, $token) = @_; + return 1 unless defined($token); + my $stop = ${$self->{stop}}[-1] if @{$self->{stop}}; + return defined($stop) && $token =~ $stop; +} + +sub expect { + my ($self, $expect) = @_; + my $token = $self->next_token(); + return $token if defined($token) && $token eq $expect; + push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "") . "'\n"); + $self->untoken($token) if defined($token); + return (); +} + +sub optional_newlines { + my $self = shift @_; + my @tokens; + while (my $token = $self->peek()) { + last unless $token eq "\n"; + push(@tokens, $self->next_token()); + } + return @tokens; +} + +sub parse_group { + my $self = shift @_; + return ($self->parse(qr/^}$/), + $self->expect('}')); +} + +sub parse_subshell { + my $self = shift @_; + return ($self->parse(qr/^\)$/), + $self->expect(')')); +} + +sub parse_case_pattern { + my $self = shift @_; + my @tokens; + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +sub parse_case { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # subject + $self->optional_newlines(), + $self->expect('in'), + $self->optional_newlines()); + while (1) { + my $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->parse_case_pattern(), + $self->optional_newlines(), + $self->parse(qr/^(?:;;|esac)$/)); # item body + $token = $self->peek(); + last unless defined($token) && $token ne 'esac'; + push(@tokens, + $self->expect(';;'), + $self->optional_newlines()); + } + push(@tokens, $self->expect('esac')); + return @tokens; +} + +sub parse_for { + my $self = shift @_; + my @tokens; + push(@tokens, + $self->next_token(), # variable + $self->optional_newlines()); + my $token = $self->peek(); + if (defined($token) && $token eq 'in') { + push(@tokens, + $self->expect('in'), + $self->optional_newlines()); + } + push(@tokens, + $self->parse(qr/^do$/), # items + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); + return @tokens; +} + +sub parse_if { + my $self = shift @_; + my @tokens; + while (1) { + push(@tokens, + $self->parse(qr/^then$/), # if/elif condition + $self->expect('then'), + $self->optional_newlines(), + $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body + my $token = $self->peek(); + last unless defined($token) && $token eq 'elif'; + push(@tokens, $self->expect('elif')); + } + my $token = $self->peek(); + if (defined($token) && $token eq 'else') { + push(@tokens, + $self->expect('else'), + $self->optional_newlines(), + $self->parse(qr/^fi$/)); # else body + } + push(@tokens, $self->expect('fi')); + return @tokens; +} + +sub parse_loop_body { + my $self = shift @_; + return $self->parse(qr/^done$/); +} + +sub parse_loop { + my $self = shift @_; + return ($self->parse(qr/^do$/), # condition + $self->expect('do'), + $self->optional_newlines(), + $self->parse_loop_body(), + $self->expect('done')); +} + +sub parse_func { + my $self = shift @_; + return ($self->expect('('), + $self->expect(')'), + $self->optional_newlines(), + $self->parse_cmd()); # body +} + +sub parse_bash_array_assignment { + my $self = shift @_; + my @tokens = $self->expect('('); + while (defined(my $token = $self->next_token())) { + push(@tokens, $token); + last if $token eq ')'; + } + return @tokens; +} + +my %compound = ( + '{' => \&parse_group, + '(' => \&parse_subshell, + 'case' => \&parse_case, + 'for' => \&parse_for, + 'if' => \&parse_if, + 'until' => \&parse_loop, + 'while' => \&parse_loop); + +sub parse_cmd { + my $self = shift @_; + my $cmd = $self->next_token(); + return () unless defined($cmd); + return $cmd if $cmd eq "\n"; + + my $token; + my @tokens = $cmd; + if ($cmd eq '!') { + push(@tokens, $self->parse_cmd()); + return @tokens; + } elsif (my $f = $compound{$cmd}) { + push(@tokens, $self->$f()); + } elsif (defined($token = $self->peek()) && $token eq '(') { + if ($cmd !~ /\w=$/) { + push(@tokens, $self->parse_func()); + return @tokens; + } + $tokens[-1] .= join(' ', $self->parse_bash_array_assignment()); + } + + while (defined(my $token = $self->next_token())) { + $self->untoken($token), last if $self->stop_at($token); + push(@tokens, $token); + last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/; + } + push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n"; + return @tokens; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + push(@$tokens, @$cmd); +} + +sub parse { + my ($self, $stop) = @_; + push(@{$self->{stop}}, $stop); + goto DONE if $self->stop_at($self->peek()); + my @tokens; + while (my @cmd = $self->parse_cmd()) { + $self->accumulate(\@tokens, \@cmd); + last if $self->stop_at($self->peek()); + } +DONE: + pop(@{$self->{stop}}); + return @tokens; +} + package ScriptParser; sub new { -- cgit v0.10.2-6-g49f6 From 6d932e92fcb49b59b780bc018fe550d867bb3d84 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:42 +0000 Subject: chainlint.pl: add parser to validate tests Continue fleshing out chainlint.pl by adding TestParser, a parser with special knowledge about how Git tests should be written; for instance, it knows that commands within a test body should be chained together with `&&`. An upcoming parser which plucks test definitions from test scripts will invoke TestParser for each test body it encounters. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index cdf1368..ad25710 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -441,6 +441,52 @@ DONE: return @tokens; } +# TestParser is a subclass of ShellParser which, beyond parsing shell script +# code, is also imbued with semantic knowledge of test construction, and checks +# tests for common problems (such as broken &&-chains) which might hide bugs in +# the tests themselves or in behaviors being exercised by the tests. As such, +# TestParser is only called upon to parse test bodies, not the top-level +# scripts in which the tests are defined. +package TestParser; + +use base 'ShellParser'; + +sub find_non_nl { + my $tokens = shift @_; + my $n = shift @_; + $n = $#$tokens if !defined($n); + $n-- while $n >= 0 && $$tokens[$n] eq "\n"; + return $n; +} + +sub ends_with { + my ($tokens, $needles) = @_; + my $n = find_non_nl($tokens); + for my $needle (reverse(@$needles)) { + return undef if $n < 0; + $n = find_non_nl($tokens, $n), next if $needle eq "\n"; + return undef if $$tokens[$n] !~ $needle; + $n--; + } + return 1; +} + +sub accumulate { + my ($self, $tokens, $cmd) = @_; + goto DONE unless @$tokens; + goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; + + # did previous command end with "&&", "||", "|"? + goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + + # flag missing "&&" at end of previous command + my $n = find_non_nl($tokens); + splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; + +DONE: + $self->SUPER::accumulate($tokens, $cmd); +} + package ScriptParser; sub new { -- cgit v0.10.2-6-g49f6 From d99ebd6d2e57baa3ec45b939d40cf939b85301a3 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:43 +0000 Subject: chainlint.pl: add parser to identify test definitions Finish fleshing out chainlint.pl by adding ScriptParser, a parser which scans shell scripts for tests defined by test_expect_success() and test_expect_failure(), plucks the test body from each definition, and passes it to TestParser for validation. It recognizes test definitions not only at the top-level of test scripts but also tests synthesized within compound commands such as loops and function. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index ad25710..d526723 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,18 +487,75 @@ DONE: $self->SUPER::accumulate($tokens, $cmd); } +# ScriptParser is a subclass of ShellParser which identifies individual test +# definitions within test scripts, and passes each test body through TestParser +# to identify possible problems. ShellParser detects test definitions not only +# at the top-level of test scripts but also within compound commands such as +# loops and function definitions. package ScriptParser; +use base 'ShellParser'; + sub new { my $class = shift @_; - my $self = bless {} => $class; - $self->{output} = []; + my $self = $class->SUPER::new(@_); $self->{ntests} = 0; return $self; } +# extract the raw content of a token, which may be a single string or a +# composition of multiple strings and non-string character runs; for instance, +# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d` +sub unwrap { + my $token = @_ ? shift @_ : $_; + # simple case: 'sqstring' or "dqstring" + return $token if $token =~ s/^'([^']*)'$/$1/; + return $token if $token =~ s/^"([^"]*)"$/$1/; + + # composite case + my ($s, $q, $escaped); + while (1) { + # slurp up non-special characters + $s .= $1 if $token =~ /\G([^\\'"]*)/gc; + # handle special characters + last unless $token =~ /\G(.)/sgc; + my $c = $1; + $q = undef, next if defined($q) && $c eq $q; + $q = $c, next if !defined($q) && $c =~ /^['"]$/; + if ($c eq '\\') { + last unless $token =~ /\G(.)/sgc; + $c = $1; + $s .= '\\' if $c eq "\n"; # preserve line splice + } + $s .= $c; + } + return $s +} + +sub check_test { + my $self = shift @_; + my ($title, $body) = map(unwrap, @_); + $self->{ntests}++; + my $parser = TestParser->new(\$body); + my @tokens = $parser->parse(); + return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $checked = join(' ', @tokens); + $checked =~ s/^\n//; + $checked =~ s/^ //mg; + $checked =~ s/ $//mg; + $checked .= "\n" unless $checked =~ /\n$/; + push(@{$self->{output}}, "# chainlint: $title\n$checked"); +} + sub parse_cmd { - return undef; + my $self = shift @_; + my @tokens = $self->SUPER::parse_cmd(); + return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/; + my $n = $#tokens; + $n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/; + $self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body + $self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body + return @tokens; } # main contains high-level functionality for processing command-line switches, -- cgit v0.10.2-6-g49f6 From 29fb2ec384a867ca577335a12f4b45c184e7b642 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:44 +0000 Subject: chainlint.pl: validate test scripts in parallel Although chainlint.pl has undergone a good deal of optimization during its development -- increasing in speed significantly -- parsing and validating 1050+ scripts and 16500+ tests via Perl is not exactly instantaneous. However, perceived performance can be improved by taking advantage of the fact that there is no interdependence between test scripts or test definitions, thus parsing and validating can be done in parallel. The number of available cores is determined automatically but can be overridden via the --jobs option. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index d526723..898573a 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -15,9 +15,11 @@ use warnings; use strict; +use Config; use File::Glob; use Getopt::Long; +my $jobs = -1; my $show_stats; my $emit_all; @@ -569,6 +571,16 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { $interval = sub { return Time::HiRes::tv_interval(shift); }; } +sub ncores { + # Windows + return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); + # Linux / MSYS2 / Cygwin / WSL + do { local @ARGV='/proc/cpuinfo'; return scalar(grep(/^processor\s*:/, <>)); } if -r '/proc/cpuinfo'; + # macOS & BSD + return qx/sysctl -n hw.ncpu/ if $^O =~ /(?:^darwin$|bsd)/; + return 1; +} + sub show_stats { my ($start_time, $stats) = @_; my $walltime = $interval->($start_time); @@ -621,7 +633,9 @@ sub exit_code { Getopt::Long::Configure(qw{bundling}); GetOptions( "emit-all!" => \$emit_all, + "jobs|j=i" => \$jobs, "stats|show-stats!" => \$show_stats) or die("option error\n"); +$jobs = ncores() if $jobs < 1; my $start_time = $getnow->(); my @stats; @@ -633,6 +647,40 @@ unless (@scripts) { exit; } -push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); +unless ($Config{useithreads} && eval { + require threads; threads->import(); + require Thread::Queue; Thread::Queue->import(); + 1; + }) { + push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); })); + show_stats($start_time, \@stats) if $show_stats; + exit(exit_code(\@stats)); +} + +my $script_queue = Thread::Queue->new(); +my $output_queue = Thread::Queue->new(); + +sub next_script { return $script_queue->dequeue(); } +sub emit { $output_queue->enqueue(@_); } + +sub monitor { + while (my $s = $output_queue->dequeue()) { + print($s); + } +} + +my $mon = threads->create({'context' => 'void'}, \&monitor); +threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs; + +$script_queue->enqueue(@scripts); +$script_queue->end(); + +for (threads->list()) { + push(@stats, $_->join()) unless $_ == $mon; +} + +$output_queue->end(); +$mon->join(); + show_stats($start_time, \@stats) if $show_stats; exit(exit_code(\@stats)); -- cgit v0.10.2-6-g49f6 From 35ebb1e37b25b9d799d1064d36a2ce668ad20264 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:45 +0000 Subject: chainlint.pl: don't require `return|exit|continue` to end with `&&` In order to check for &&-chain breakage, each time TestParser encounters a new command, it checks whether the previous command ends with `&&`, and -- with a couple exceptions -- signals breakage if it does not. The first exception is that a command may validly end with `||`, which is commonly employed as `command || return 1` at the very end of a loop body to terminate the loop early. The second is that piping one command's output with `|` to another command does not constitute a &&-chain break (the exit status of the pipe is the exit status of the final command in the pipe). However, it turns out that there are a few additional cases found in the wild in which it is likely safe for `&&` to be missing even when other commands follow. For instance: while {condition-1} do test {condition-2} || return 1 # or `exit 1` within a subshell more-commands done while {condition-1} do test {condition-2} || continue more-commands done Such cases indicate deliberate thought about failure modes by the test author, thus flagging them as breaking the &&-chain is not helpful. Therefore, take these special cases into consideration when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 898573a..31c4440 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -473,13 +473,29 @@ sub ends_with { return 1; } +sub match_ending { + my ($tokens, $endings) = @_; + for my $needles (@$endings) { + next if @$tokens < scalar(grep {$_ ne "\n"} @$needles); + return 1 if ends_with($tokens, $needles); + } + return undef; +} + +my @safe_endings = ( + [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], + [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], + [qr/^(?:exit|return|continue)$/], + [qr/^(?:exit|return|continue)$/, qr/^;$/]); + sub accumulate { my ($self, $tokens, $cmd) = @_; goto DONE unless @$tokens; goto DONE if @$cmd == 1 && $$cmd[0] eq "\n"; - # did previous command end with "&&", "||", "|"? - goto DONE if ends_with($tokens, [qr/^(?:&&|\|\||\|)$/]); + # did previous command end with "&&", "|", "|| return" or similar? + goto DONE if match_ending($tokens, \@safe_endings); # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); diff --git a/t/chainlint/chain-break-continue.expect b/t/chainlint/chain-break-continue.expect new file mode 100644 index 0000000..47a3457 --- /dev/null +++ b/t/chainlint/chain-break-continue.expect @@ -0,0 +1,12 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-continue.test b/t/chainlint/chain-break-continue.test new file mode 100644 index 0000000..f0af71d --- /dev/null +++ b/t/chainlint/chain-break-continue.test @@ -0,0 +1,13 @@ +git ls-tree --name-only -r refs/notes/many_notes | +while read path +do +# LINT: broken &&-chain okay if explicit "continue" + test "$path" = "foobar/non-note.txt" && continue + test "$path" = "deadbeef" && continue + test "$path" = "de/adbeef" && continue + + if test $(expr length "$path") -ne $hexsz + then + return 1 + fi +done diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect new file mode 100644 index 0000000..dba292e --- /dev/null +++ b/t/chainlint/chain-break-return-exit.expect @@ -0,0 +1,4 @@ +for i in 1 2 3 4 ; do + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test new file mode 100644 index 0000000..e2b0599 --- /dev/null +++ b/t/chainlint/chain-break-return-exit.test @@ -0,0 +1,5 @@ +for i in 1 2 3 4 ; do +# LINT: broken &&-chain okay if explicit "return $?" signals failure + git checkout main -b $i || return $? + test_commit $i $i $i tag$i || return $? +done diff --git a/t/chainlint/return-loop.expect b/t/chainlint/return-loop.expect new file mode 100644 index 0000000..cfc0549 --- /dev/null +++ b/t/chainlint/return-loop.expect @@ -0,0 +1,5 @@ +while test $i -lt $((num - 5)) +do + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done diff --git a/t/chainlint/return-loop.test b/t/chainlint/return-loop.test new file mode 100644 index 0000000..f90b171 --- /dev/null +++ b/t/chainlint/return-loop.test @@ -0,0 +1,6 @@ +while test $i -lt $((num - 5)) +do +# LINT: "|| return {n}" valid loop escape outside subshell; no "&&" needed + git notes add -m "notes for commit$i" HEAD~$i || return 1 + i=$((i + 1)) +done -- cgit v0.10.2-6-g49f6 From d00113ec3474a1652a73c11695c7e7b5182d80a7 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:46 +0000 Subject: t/Makefile: apply chainlint.pl to existing self-tests Now that chainlint.pl is functional, take advantage of the existing chainlint self-tests to validate its operation. (While at it, stop validating chainlint.sed against the self-tests since it will soon be retired.) Due to chainlint.sed implementation limitations leaking into the self-test "expect" files, a few of them require minor adjustment to make them compatible with chainlint.pl which does not share those limitations. First, because `sed` does not provide any sort of real recursion, chainlint.sed only emulates recursion into subshells, and each level of recursion leads to a multiplicative increase in complexity of the `sed` rules. To avoid substantial complexity, chainlint.sed, therefore, only emulates subshell recursion one level deep. Any subshell deeper than that is passed through as-is, which means that &&-chains are not checked in deeper subshells. chainlint.pl, on the other hand, employs a proper recursive descent parser, thus checks subshells to any depth and correctly flags broken &&-chains in deep subshells. Second, due to sed's line-oriented nature, chainlint.sed, by necessity, folds multi-line quoted strings into a single line. chainlint.pl, on the other hand, employs a proper lexical analyzer which preserves quoted strings as-is, including embedded newlines. Furthermore, the output of chainlint.sed and chainlint.pl do not match precisely in terms of whitespace. However, since the purpose of the self-checks is to verify that the ?!AMP?! annotations are being correctly added, minor whitespace differences are immaterial. For this reason, rather than adjusting whitespace in all existing self-test "expect" files to match the new linter's output, the `check-chainlint` target ignores whitespace differences. Since `diff -w` is not POSIX, `check-chainlint` attempts to employ `git diff -w`, and only falls back to non-POSIX `diff -w` (and `-u`) if `git diff` is not available. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/Makefile b/t/Makefile index 1c80c0c..11f2767 100644 --- a/t/Makefile +++ b/t/Makefile @@ -38,7 +38,7 @@ T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) -CHAINLINT = sed -f chainlint.sed +CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl all: $(DEFAULT_TEST_TARGET) @@ -73,10 +73,29 @@ clean-chainlint: check-chainlint: @mkdir -p '$(CHAINLINTTMP_SQ)' && \ - sed -e '/^# LINT: /d' $(patsubst %,chainlint/%.test,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/tests && \ - sed -e '/^[ ]*$$/d' $(patsubst %,chainlint/%.expect,$(CHAINLINTTESTS)) >'$(CHAINLINTTMP_SQ)'/expect && \ - $(CHAINLINT) '$(CHAINLINTTMP_SQ)'/tests | grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ - diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual + for i in $(CHAINLINTTESTS); do \ + echo "test_expect_success '$$i' '" && \ + sed -e '/^# LINT: /d' chainlint/$$i.test && \ + echo "'"; \ + done >'$(CHAINLINTTMP_SQ)'/tests && \ + { \ + echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \ + for i in $(CHAINLINTTESTS); do \ + echo "# chainlint: $$i" && \ + sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \ + done \ + } >'$(CHAINLINTTMP_SQ)'/expect && \ + $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \ + grep -v '^[ ]*$$' >'$(CHAINLINTTMP_SQ)'/actual && \ + if test -f ../GIT-BUILD-OPTIONS; then \ + . ../GIT-BUILD-OPTIONS; \ + fi && \ + if test -x ../git$$X; then \ + DIFFW="../git$$X --no-pager diff -w --no-index"; \ + else \ + DIFFW="diff -w -u"; \ + fi && \ + $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index da60257..37dbf7d 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -1,7 +1,7 @@ ( foo && { - echo a + echo a ?!AMP?! echo b } && bar && diff --git a/t/chainlint/here-doc-multi-line-string.expect b/t/chainlint/here-doc-multi-line-string.expect index 2578191..be64b26 100644 --- a/t/chainlint/here-doc-multi-line-string.expect +++ b/t/chainlint/here-doc-multi-line-string.expect @@ -1,4 +1,5 @@ ( - cat <<-TXT && echo "multi-line string" ?!AMP?! + cat <<-TXT && echo "multi-line + string" ?!AMP?! bap ) diff --git a/t/chainlint/multi-line-string.expect b/t/chainlint/multi-line-string.expect index ab0dadf..27ff952 100644 --- a/t/chainlint/multi-line-string.expect +++ b/t/chainlint/multi-line-string.expect @@ -1,9 +1,14 @@ ( - x="line 1 line 2 line 3" && - y="line 1 line2" ?!AMP?! + x="line 1 + line 2 + line 3" && + y="line 1 + line2" ?!AMP?! foobar ) && ( - echo "xyz" "abc def ghi" && + echo "xyz" "abc + def + ghi" && barfoo ) diff --git a/t/chainlint/nested-subshell.expect b/t/chainlint/nested-subshell.expect index 41a48ad..02e0a9f 100644 --- a/t/chainlint/nested-subshell.expect +++ b/t/chainlint/nested-subshell.expect @@ -6,7 +6,7 @@ ) >file && cd foo && ( - echo a + echo a ?!AMP?! echo b ) >file ) diff --git a/t/chainlint/t7900-subtree.expect b/t/chainlint/t7900-subtree.expect index 1cccc7b..69167da 100644 --- a/t/chainlint/t7900-subtree.expect +++ b/t/chainlint/t7900-subtree.expect @@ -1,10 +1,17 @@ ( - chks="sub1sub2sub3sub4" && + chks="sub1 +sub2 +sub3 +sub4" && chks_sub=$(cat < Date: Thu, 1 Sep 2022 00:29:47 +0000 Subject: chainlint.pl: don't require `&` background command to end with `&&` The exit status of the `&` asynchronous operator which starts a command in the background is unconditionally zero, and the few places in the test scripts which launch commands asynchronously are not interested in the exit status of the `&` operator (though they often capture the background command's PID). As such, there is little value in complaining about broken &&-chain for a command launched in the background, and doing so would only make busy-work for test authors. Therefore, take this special case into account when checking for &&-chain breakage. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 31c4440..ba3fcb0 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -483,7 +483,7 @@ sub match_ending { } my @safe_endings = ( - [qr/^(?:&&|\|\||\|)$/], + [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/], [qr/^(?:exit|return|continue)$/], diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect new file mode 100644 index 0000000..28f9114 --- /dev/null +++ b/t/chainlint/chain-break-background.expect @@ -0,0 +1,9 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +> empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ + jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git diff --git a/t/chainlint/chain-break-background.test b/t/chainlint/chain-break-background.test new file mode 100644 index 0000000..e10f656 --- /dev/null +++ b/t/chainlint/chain-break-background.test @@ -0,0 +1,10 @@ +JGIT_DAEMON_PID= && +git init --bare empty.git && +>empty.git/git-daemon-export-ok && +mkfifo jgit_daemon_output && +{ +# LINT: exit status of "&" is always 0 so &&-chaining immaterial + jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output & + JGIT_DAEMON_PID=$! +} && +test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git -- cgit v0.10.2-6-g49f6 From a8f30ee0502b89ecb660af36784f653a8c3fb20d Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:48 +0000 Subject: chainlint.pl: don't flag broken &&-chain if `$?` handled explicitly There are cases in which tests capture and check a command's exit code explicitly without employing test_expect_code(). They do so by intentionally breaking the &&-chain since it would be impossible to capture "$?" in the failing case if the `status=$?` assignment was part of the &&-chain. Since such constructs are manually checking the exit code, their &&-chain breakage is legitimate and safe, thus should not be flagged. Therefore, stop flagging &&-chain breakage in such cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index ba3fcb0..14e1db3 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -497,6 +497,12 @@ sub accumulate { # did previous command end with "&&", "|", "|| return" or similar? goto DONE if match_ending($tokens, \@safe_endings); + # if this command handles "$?" specially, then okay for previous + # command to be missing "&&" + for my $token (@$cmd) { + goto DONE if $token =~ /\$\?/; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect new file mode 100644 index 0000000..f4bada9 --- /dev/null +++ b/t/chainlint/chain-break-status.expect @@ -0,0 +1,9 @@ +OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" && + +{ test-tool sigchain > actual ; ret=$? ; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual diff --git a/t/chainlint/chain-break-status.test b/t/chainlint/chain-break-status.test new file mode 100644 index 0000000..a6602a7 --- /dev/null +++ b/t/chainlint/chain-break-status.test @@ -0,0 +1,11 @@ +# LINT: broken &&-chain okay if next command handles "$?" explicitly +OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" && + +# LINT: broken &&-chain okay if next command handles "$?" explicitly +{ test-tool sigchain >actual; ret=$?; } && +{ + test_match_signal 15 "$ret" || + test "$ret" = 3 +} && +test_cmp expect actual -- cgit v0.10.2-6-g49f6 From 832c68b3c210267c93e1dcb2f2763372339ca36c Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:49 +0000 Subject: chainlint.pl: don't flag broken &&-chain if failure indicated explicitly There are quite a few tests which print an error messages and then explicitly signal failure with `false`, `return 1`, or `exit 1` as the final command in an `if` branch. In these cases, the tests don't bother maintaining the &&-chain between `echo` and the explicit "test failed" indicator. Since such constructs are manually signaling failure, their &&-chain breakage is legitimate and safe -- both for the command immediately preceding `false`, `return`, or `exit`, as well as for all preceding commands in the `if` branch. Therefore, stop flagging &&-chain breakage in these sorts of cases. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 14e1db3..a76a09e 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -503,6 +503,14 @@ sub accumulate { goto DONE if $token =~ /\$\?/; } + # if this command is "false", "return 1", or "exit 1" (which signal + # failure explicitly), then okay for all preceding commands to be + # missing "&&" + if ($$cmd[0] =~ /^(?:false|return|exit)$/) { + @$tokens = grep(!/^\?!AMP\?!$/, @$tokens); + goto DONE; + } + # flag missing "&&" at end of previous command my $n = find_non_nl($tokens); splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0; diff --git a/t/chainlint/chain-break-false.expect b/t/chainlint/chain-break-false.expect new file mode 100644 index 0000000..989766f --- /dev/null +++ b/t/chainlint/chain-break-false.expect @@ -0,0 +1,9 @@ +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay ?!AMP?! + congratulate user +fi diff --git a/t/chainlint/chain-break-false.test b/t/chainlint/chain-break-false.test new file mode 100644 index 0000000..a5aaff8 --- /dev/null +++ b/t/chainlint/chain-break-false.test @@ -0,0 +1,10 @@ +# LINT: broken &&-chain okay if explicit "false" signals failure +if condition not satisified +then + echo it did not work... + echo failed! + false +else + echo it went okay + congratulate user +fi diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect index dba292e..1732d22 100644 --- a/t/chainlint/chain-break-return-exit.expect +++ b/t/chainlint/chain-break-return-exit.expect @@ -1,3 +1,18 @@ +case "$(git ls-files)" in +one ) echo pass one ;; +* ) echo bad one ; return 1 ;; +esac && +( + case "$(git ls-files)" in + two ) echo pass two ;; + * ) echo bad two ; exit 1 ;; +esac +) && +case "$(git ls-files)" in +dir/two"$LF"one ) echo pass both ;; +* ) echo bad ; return 1 ;; +esac && + for i in 1 2 3 4 ; do git checkout main -b $i || return $? test_commit $i $i $i tag$i || return $? diff --git a/t/chainlint/chain-break-return-exit.test b/t/chainlint/chain-break-return-exit.test index e2b0599..46542ed 100644 --- a/t/chainlint/chain-break-return-exit.test +++ b/t/chainlint/chain-break-return-exit.test @@ -1,3 +1,21 @@ +case "$(git ls-files)" in +one) echo pass one ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad one; return 1 ;; +esac && +( + case "$(git ls-files)" in + two) echo pass two ;; +# LINT: broken &&-chain okay if explicit "exit 1" signals failuire + *) echo bad two; exit 1 ;; + esac +) && +case "$(git ls-files)" in +dir/two"$LF"one) echo pass both ;; +# LINT: broken &&-chain okay if explicit "return 1" signals failuire +*) echo bad; return 1 ;; +esac && + for i in 1 2 3 4 ; do # LINT: broken &&-chain okay if explicit "return $?" signals failure git checkout main -b $i || return $? diff --git a/t/chainlint/if-in-loop.expect b/t/chainlint/if-in-loop.expect index 03b82a3..d6514ae 100644 --- a/t/chainlint/if-in-loop.expect +++ b/t/chainlint/if-in-loop.expect @@ -3,7 +3,7 @@ do if false then - echo "err" ?!AMP?! + echo "err" exit 1 fi ?!AMP?! foo diff --git a/t/chainlint/if-in-loop.test b/t/chainlint/if-in-loop.test index f0cf19c..90c2397 100644 --- a/t/chainlint/if-in-loop.test +++ b/t/chainlint/if-in-loop.test @@ -3,7 +3,7 @@ do if false then -# LINT: missing "&&" on "echo" +# LINT: missing "&&" on "echo" okay since "exit 1" signals error explicitly echo "err" exit 1 # LINT: missing "&&" on "fi" -- cgit v0.10.2-6-g49f6 From fd4094c3cad7c62adb0b7080e0dca37f66bf0c6e Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:50 +0000 Subject: chainlint.pl: complain about loops lacking explicit failure handling Shell `for` and `while` loops do not terminate automatically just because a command fails within the loop body. Instead, the loop continues to iterate and eventually returns the exit status of the final command of the final iteration, which may not be the command which failed, thus it is possible for failures to go undetected. Consequently, it is important for test authors to explicitly handle failure within the loop body by terminating the loop manually upon failure. This can be done by returning a non-zero exit code from within the loop body (i.e. `|| return 1`) or exiting (i.e. `|| exit 1`) if the loop is within a subshell, or by manually checking `$?` and taking some appropriate action. Therefore, add logic to detect and complain about loops which lack explicit `return` or `exit`, or `$?` check. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index a76a09e..674b3dd 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -482,6 +482,17 @@ sub match_ending { return undef; } +sub parse_loop_body { + my $self = shift @_; + my @tokens = $self->SUPER::parse_loop_body(@_); + # did loop signal failure via "|| return" or "|| exit"? + return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # flag missing "return/exit" handling explicit failure in loop body + my $n = find_non_nl(\@tokens); + splice(@tokens, $n + 1, 0, '?!LOOP?!'); + return @tokens; +} + my @safe_endings = ( [qr/^(?:&&|\|\||\||&)$/], [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/], diff --git a/t/chainlint/complex-if-in-cuddled-loop.expect b/t/chainlint/complex-if-in-cuddled-loop.expect index 2fca183..dac2d0f 100644 --- a/t/chainlint/complex-if-in-cuddled-loop.expect +++ b/t/chainlint/complex-if-in-cuddled-loop.expect @@ -4,6 +4,6 @@ : else echo >file - fi + fi ?!LOOP?! done) && test ! -f file diff --git a/t/chainlint/for-loop.expect b/t/chainlint/for-loop.expect index 6671b8c..a5810c9 100644 --- a/t/chainlint/for-loop.expect +++ b/t/chainlint/for-loop.expect @@ -2,10 +2,10 @@ for i in a b c do echo $i ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! for i in a b c; do echo $i && - cat $i + cat $i ?!LOOP?! done ) diff --git a/t/chainlint/loop-detect-failure.expect b/t/chainlint/loop-detect-failure.expect new file mode 100644 index 0000000..a66025c --- /dev/null +++ b/t/chainlint/loop-detect-failure.expect @@ -0,0 +1,15 @@ +git init r1 && +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" ?!LOOP?! +done diff --git a/t/chainlint/loop-detect-failure.test b/t/chainlint/loop-detect-failure.test new file mode 100644 index 0000000..b9791cc --- /dev/null +++ b/t/chainlint/loop-detect-failure.test @@ -0,0 +1,17 @@ +git init r1 && +# LINT: loop handles failure explicitly with "|| return 1" +for n in 1 2 3 4 5 +do + echo "This is file: $n" > r1/file.$n && + git -C r1 add file.$n && + git -C r1 commit -m "$n" || return 1 +done && + +git init r2 && +# LINT: loop fails to handle failure explicitly with "|| return 1" +for n in 1000 10000 +do + printf "%"$n"s" X > r2/large.$n && + git -C r2 add large.$n && + git -C r2 commit -m "$n" +done diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect new file mode 100644 index 0000000..0ad23bb --- /dev/null +++ b/t/chainlint/loop-detect-status.expect @@ -0,0 +1,18 @@ +( while test $i -le $blobcount +do + printf "Generating blob $i/$blobcount\r" >& 2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status +done && +echo "commit refs/heads/main" && +echo "author A U Thor 123456789 +0000" && +echo "committer C O Mitter 123456789 +0000" && +echo "data 5" && +echo ">2gb" && +cat commit ) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-detect-status.test b/t/chainlint/loop-detect-status.test new file mode 100644 index 0000000..1c6c23c --- /dev/null +++ b/t/chainlint/loop-detect-status.test @@ -0,0 +1,19 @@ +# LINT: "$?" handled explicitly within loop body +(while test $i -le $blobcount + do + printf "Generating blob $i/$blobcount\r" >&2 && + printf "blob\nmark :$i\ndata $blobsize\n" && + #test-tool genrandom $i $blobsize && + printf "%-${blobsize}s" $i && + echo "M 100644 :$i $i" >> commit && + i=$(($i+1)) || + echo $? > exit-status + done && + echo "commit refs/heads/main" && + echo "author A U Thor 123456789 +0000" && + echo "committer C O Mitter 123456789 +0000" && + echo "data 5" && + echo ">2gb" && + cat commit) | +git fast-import --big-file-threshold=2 && +test ! -f exit-status diff --git a/t/chainlint/loop-in-if.expect b/t/chainlint/loop-in-if.expect index e1be423..6c5d6e5 100644 --- a/t/chainlint/loop-in-if.expect +++ b/t/chainlint/loop-in-if.expect @@ -4,7 +4,7 @@ while true do echo "pop" ?!AMP?! - echo "glup" + echo "glup" ?!LOOP?! done ?!AMP?! foo fi ?!AMP?! diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect new file mode 100644 index 0000000..4793a0e --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.expect @@ -0,0 +1,31 @@ +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done ?!LOOP?! +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" ?!LOOP?! + done || return 1 +done && + +for i in 0 1 2 3 4 5 6 7 8 9 ; +do + for j in 0 1 2 3 4 5 6 7 8 9 ; + do + echo "$i$j" > "path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/nested-loop-detect-failure.test b/t/chainlint/nested-loop-detect-failure.test new file mode 100644 index 0000000..e6f0c1a --- /dev/null +++ b/t/chainlint/nested-loop-detect-failure.test @@ -0,0 +1,35 @@ +# LINT: neither loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done +done && + +# LINT: inner loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done +done && + +# LINT: outer loop handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" + done || return 1 +done && + +# LINT: inner & outer loops handles failure explicitly with "|| return 1" +for i in 0 1 2 3 4 5 6 7 8 9; +do + for j in 0 1 2 3 4 5 6 7 8 9; + do + echo "$i$j" >"path$i$j" || return 1 + done || return 1 +done diff --git a/t/chainlint/semicolon.expect b/t/chainlint/semicolon.expect index ed0b370..3aa2259 100644 --- a/t/chainlint/semicolon.expect +++ b/t/chainlint/semicolon.expect @@ -15,5 +15,5 @@ ) && (cd foo && for i in a b c; do - echo; + echo; ?!LOOP?! done) diff --git a/t/chainlint/while-loop.expect b/t/chainlint/while-loop.expect index 0d3a9b3..f272aa2 100644 --- a/t/chainlint/while-loop.expect +++ b/t/chainlint/while-loop.expect @@ -2,10 +2,10 @@ while true do echo foo ?!AMP?! - cat <<-EOF + cat <<-EOF ?!LOOP?! done ?!AMP?! while true; do echo foo && - cat bar + cat bar ?!LOOP?! done ) -- cgit v0.10.2-6-g49f6 From ae0c55abf8217bb06422f9eafcd7a30b2c8f9e8b Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:51 +0000 Subject: chainlint.pl: allow `|| echo` to signal failure upstream of a pipe The use of `|| return` (or `|| exit`) to signal failure within a loop isn't effective when the loop is upstream of a pipe since the pipe swallows all upstream exit codes and returns only the exit code of the final command in the pipeline. To work around this limitation, tests may adopt an alternative strategy of signaling failure by emitting text which would never be emitted in the non-failing case. For instance: while condition do command1 && command2 || echo "impossible text" done | sort >actual && Such usage indicates deliberate thought about failure cases by the test author, thus flagging them as missing `|| return` (or `|| exit`) is not helpful. Therefore, take this case into consideration when checking for explicit loop termination. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 674b3dd..386999c 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -487,6 +487,9 @@ sub parse_loop_body { my @tokens = $self->SUPER::parse_loop_body(@_); # did loop signal failure via "|| return" or "|| exit"? return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens); + # did loop upstream of a pipe signal failure via "|| echo 'impossible + # text'" as the final command in the loop body? + return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]); # flag missing "return/exit" handling explicit failure in loop body my $n = find_non_nl(\@tokens); splice(@tokens, $n + 1, 0, '?!LOOP?!'); diff --git a/t/chainlint/loop-upstream-pipe.expect b/t/chainlint/loop-upstream-pipe.expect new file mode 100644 index 0000000..0b82ecc --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.expect @@ -0,0 +1,10 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && diff --git a/t/chainlint/loop-upstream-pipe.test b/t/chainlint/loop-upstream-pipe.test new file mode 100644 index 0000000..efb77da --- /dev/null +++ b/t/chainlint/loop-upstream-pipe.test @@ -0,0 +1,11 @@ +( + git rev-list --objects --no-object-names base..loose | + while read oid + do +# LINT: "|| echo" signals failure in loop upstream of a pipe + path="$objdir/$(test_oid_to_path "$oid")" && + printf "%s %d\n" "$oid" "$(test-tool chmtime --get "$path")" || + echo "object list generation failed for $oid" + done | + sort -k1 +) >expect && -- cgit v0.10.2-6-g49f6 From 56066523ed3ebd16b455e99ce954ec19b6ac5ada Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:52 +0000 Subject: t/chainlint: add more chainlint.pl self-tests During the development of chainlint.pl, numerous new self-tests were created to verify correct functioning beyond the checks already represented by the existing self-tests. The new checks fall into several categories: * behavior of the lexical analyzer for complex cases, such as line splicing, token pasting, entering and exiting string contexts inside and outside of test script bodies; for instance: test_expect_success 'title' ' x=$(echo "something" | sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' ' * behavior of the parser for all compound grammatical constructs, such as `if...fi`, `case...esac`, `while...done`, `{...}`, etc., and for other legal shell grammatical constructs not covered by existing chainlint.sed self-tests, as well as complex cases, such as: OUT=$( ((large_git 1>&3) | :) 3>&1 ) && * detection of problems, such as &&-chain breakage, from top-level to any depth since the existing self-tests do not cover any top-level context and only cover subshells one level deep due to limitations of chainlint.sed * address blind spots in chainlint.sed (such as not detecting a broken &&-chain on a one-line for-loop in a subshell[1]) which chainlint.pl correctly detects * real-world cases which tripped up chainlint.pl during its development [1]: https://lore.kernel.org/git/dce35a47012fecc6edc11c68e91dbb485c5bc36f.1661663880.git.gitgitgadget@gmail.com/ Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect new file mode 100644 index 0000000..48ed4eb --- /dev/null +++ b/t/chainlint/blank-line-before-esac.expect @@ -0,0 +1,18 @@ +test_done ( ) { + case "$test_failure" in + 0 ) + test_at_end_hook_ + + exit 0 ;; + + * ) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/blank-line-before-esac.test b/t/chainlint/blank-line-before-esac.test new file mode 100644 index 0000000..cecccad --- /dev/null +++ b/t/chainlint/blank-line-before-esac.test @@ -0,0 +1,19 @@ +# LINT: blank line before "esac" +test_done () { + case "$test_failure" in + 0) + test_at_end_hook_ + + exit 0 ;; + + *) + if test $test_external_has_tap -eq 0 + then + say_color error "# failed $test_failure among $msg" + say "1..$test_count" + fi + + exit 1 ;; + + esac +} diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect index 37dbf7d..a3bcea4 100644 --- a/t/chainlint/block.expect +++ b/t/chainlint/block.expect @@ -9,4 +9,15 @@ echo c } ?!AMP?! baz -) +) && + +{ + echo a ; ?!AMP?! echo b +} && +{ echo a ; ?!AMP?! echo b ; } && + +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/block.test b/t/chainlint/block.test index 0a82fd5..4ab69a4 100644 --- a/t/chainlint/block.test +++ b/t/chainlint/block.test @@ -11,4 +11,17 @@ echo c } baz -) +) && + +# LINT: ";" not allowed in place of "&&" +{ + echo a; echo b +} && +{ echo a; echo b; } && + +# LINT: "}" inside string not mistaken as end of block +{ + echo "${var}9" && + echo "done" +} && +finis diff --git a/t/chainlint/chained-block.expect b/t/chainlint/chained-block.expect new file mode 100644 index 0000000..574cdce --- /dev/null +++ b/t/chainlint/chained-block.expect @@ -0,0 +1,9 @@ +echo nobody home && { + test the doohicky ?!AMP?! + right now +} && + +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-block.test b/t/chainlint/chained-block.test new file mode 100644 index 0000000..86f81ec --- /dev/null +++ b/t/chainlint/chained-block.test @@ -0,0 +1,11 @@ +# LINT: start of block chained to preceding command +echo nobody home && { + test the doohicky + right now +} && + +# LINT: preceding command pipes to block on same line +GIT_EXTERNAL_DIFF=echo git diff | { + read path oldfile oldhex oldmode newfile newhex newmode && + test "z$oh" = "z$oldhex" +} diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect new file mode 100644 index 0000000..af0369d --- /dev/null +++ b/t/chainlint/chained-subshell.expect @@ -0,0 +1,10 @@ +mkdir sub && ( + cd sub && + foo the bar ?!AMP?! + nuff said +) && + +cut "-d " -f actual | ( read s1 s2 s3 && +test -f $s1 ?!AMP?! +test $(cat $s2) = tree2path1 && +test $(cat $s3) = tree3path1 ) diff --git a/t/chainlint/chained-subshell.test b/t/chainlint/chained-subshell.test new file mode 100644 index 0000000..4ff6ddd --- /dev/null +++ b/t/chainlint/chained-subshell.test @@ -0,0 +1,13 @@ +# LINT: start of subshell chained to preceding command +mkdir sub && ( + cd sub && + foo the bar + nuff said +) && + +# LINT: preceding command pipes to subshell on same line +cut "-d " -f actual | (read s1 s2 s3 && +test -f $s1 +test $(cat $s2) = tree2path1 && +# LINT: closing subshell ")" correctly detected on same line as "$(...)" +test $(cat $s3) = tree3path1) diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect new file mode 100644 index 0000000..ab2f79e --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.expect @@ -0,0 +1,2 @@ +OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/command-substitution-subsubshell.test b/t/chainlint/command-substitution-subsubshell.test new file mode 100644 index 0000000..321de29 --- /dev/null +++ b/t/chainlint/command-substitution-subsubshell.test @@ -0,0 +1,3 @@ +# LINT: subshell nested in subshell nested in command substitution +OUT=$( ((large_git 1>&3) | :) 3>&1 ) && +test_match_signal 13 "$OUT" diff --git a/t/chainlint/double-here-doc.expect b/t/chainlint/double-here-doc.expect new file mode 100644 index 0000000..75477bb --- /dev/null +++ b/t/chainlint/double-here-doc.expect @@ -0,0 +1,2 @@ +run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF && +check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR diff --git a/t/chainlint/double-here-doc.test b/t/chainlint/double-here-doc.test new file mode 100644 index 0000000..cd584a4 --- /dev/null +++ b/t/chainlint/double-here-doc.test @@ -0,0 +1,12 @@ +run_sub_test_lib_test_err run-inv-range-start \ + "--run invalid range start" \ + --run="a-5" <<-\EOF && +test_expect_success "passing test #1" "true" +test_done +EOF +check_sub_test_lib_test_err run-inv-range-start \ + <<-\EOF_OUT 3<<-EOF_ERR +> FATAL: Unexpected exit with code 1 +EOF_OUT +> error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ} +EOF_ERR diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect new file mode 100644 index 0000000..bf9ced6 --- /dev/null +++ b/t/chainlint/dqstring-line-splice.expect @@ -0,0 +1,3 @@ +echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual && +test_cmp expect actual diff --git a/t/chainlint/dqstring-line-splice.test b/t/chainlint/dqstring-line-splice.test new file mode 100644 index 0000000..b407144 --- /dev/null +++ b/t/chainlint/dqstring-line-splice.test @@ -0,0 +1,7 @@ +# LINT: line-splice within DQ-string +'" +echo 'fatal: reword option of --fixup is mutually exclusive with'\ + '--patch/--interactive/--all/--include/--only' >expect && +test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual && +test_cmp expect actual +"' diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect new file mode 100644 index 0000000..1072498 --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.expect @@ -0,0 +1,11 @@ +grep "^ ! [rejected][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +grep "^\.git$" output.txt && + + +( + cd client$version && + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input) +) > output && + cut -d ' ' -f 2 < output | sort > actual && + test_cmp expect actual diff --git a/t/chainlint/dqstring-no-interpolate.test b/t/chainlint/dqstring-no-interpolate.test new file mode 100644 index 0000000..d2f4219 --- /dev/null +++ b/t/chainlint/dqstring-no-interpolate.test @@ -0,0 +1,15 @@ +# LINT: regex dollar-sign eol anchor in double-quoted string not special +grep "^ ! \[rejected\][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out && + +# LINT: escaped "$" not mistaken for variable expansion +grep "^\\.git\$" output.txt && + +'" +( + cd client$version && +# LINT: escaped dollar-sign in double-quoted test body + GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. \$(cat ../input) +) >output && + cut -d ' ' -f 2 actual && + test_cmp expect actual +"' diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect new file mode 100644 index 0000000..f42f2d4 --- /dev/null +++ b/t/chainlint/empty-here-doc.expect @@ -0,0 +1,3 @@ +git ls-tree $tree path > current && +cat > expected <current && +# LINT: empty here-doc +cat >expected <<\EOF && +EOF +test_output diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect new file mode 100644 index 0000000..2d961a5 --- /dev/null +++ b/t/chainlint/exclamation.expect @@ -0,0 +1,4 @@ +if ! condition ; then echo nope ; else yep ; fi && +test_prerequisite !MINGW && +mail uucp!address && +echo !whatever! diff --git a/t/chainlint/exclamation.test b/t/chainlint/exclamation.test new file mode 100644 index 0000000..323595b --- /dev/null +++ b/t/chainlint/exclamation.test @@ -0,0 +1,8 @@ +# LINT: "! word" is two tokens +if ! condition; then echo nope; else yep; fi && +# LINT: "!word" is single token, not two tokens "!" and "word" +test_prerequisite !MINGW && +# LINT: "word!word" is single token, not three tokens "word", "!", and "word" +mail uucp!address && +# LINT: "!word!" is single token, not three tokens "!", "word", and "!" +echo !whatever! diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect new file mode 100644 index 0000000..a21007a --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.expect @@ -0,0 +1,5 @@ +for it +do + path=$(expr "$it" : ( [^:]*) ) && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/for-loop-abbreviated.test b/t/chainlint/for-loop-abbreviated.test new file mode 100644 index 0000000..1084ecc --- /dev/null +++ b/t/chainlint/for-loop-abbreviated.test @@ -0,0 +1,6 @@ +# LINT: for-loop lacking optional "in [word...]" before "do" +for it +do + path=$(expr "$it" : '\([^:]*\)') && + git update-index --add "$path" || exit +done diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect new file mode 100644 index 0000000..a14388e --- /dev/null +++ b/t/chainlint/function.expect @@ -0,0 +1,11 @@ +sha1_file ( ) { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +remove_object ( ) { + file=$(sha1_file "$*") && + test -e "$file" ?!AMP?! + rm -f "$file" +} ?!AMP?! + +sha1_file arg && remove_object arg diff --git a/t/chainlint/function.test b/t/chainlint/function.test new file mode 100644 index 0000000..5ee5956 --- /dev/null +++ b/t/chainlint/function.test @@ -0,0 +1,13 @@ +# LINT: "()" in function definition not mistaken for subshell +sha1_file() { + echo "$*" | sed "s#..#.git/objects/&/#" +} && + +# LINT: broken &&-chain in function and after function +remove_object() { + file=$(sha1_file "$*") && + test -e "$file" + rm -f "$file" +} + +sha1_file arg && remove_object arg diff --git a/t/chainlint/here-doc-indent-operator.expect b/t/chainlint/here-doc-indent-operator.expect new file mode 100644 index 0000000..fb6cf72 --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.expect @@ -0,0 +1,5 @@ +cat > expect <<-EOF && + +cat > expect <<-EOF ?!AMP?! + +cleanup diff --git a/t/chainlint/here-doc-indent-operator.test b/t/chainlint/here-doc-indent-operator.test new file mode 100644 index 0000000..c8a6f18 --- /dev/null +++ b/t/chainlint/here-doc-indent-operator.test @@ -0,0 +1,13 @@ +# LINT: whitespace between operator "<<-" and tag legal +cat >expect <<- EOF && +header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0 +num_commits: $1 +chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data +EOF + +# LINT: not an indented here-doc; just a plain here-doc with tag named "-EOF" +cat >expect << -EOF +this is not indented +-EOF + +cleanup diff --git a/t/chainlint/if-condition-split.expect b/t/chainlint/if-condition-split.expect new file mode 100644 index 0000000..ee745ef --- /dev/null +++ b/t/chainlint/if-condition-split.expect @@ -0,0 +1,7 @@ +if bob && + marcia || + kevin +then + echo "nomads" ?!AMP?! + echo "for sure" +fi diff --git a/t/chainlint/if-condition-split.test b/t/chainlint/if-condition-split.test new file mode 100644 index 0000000..240daa9 --- /dev/null +++ b/t/chainlint/if-condition-split.test @@ -0,0 +1,8 @@ +# LINT: "if" condition split across multiple lines at "&&" or "||" +if bob && + marcia || + kevin +then + echo "nomads" + echo "for sure" +fi diff --git a/t/chainlint/one-liner-for-loop.expect b/t/chainlint/one-liner-for-loop.expect new file mode 100644 index 0000000..51a3dc7 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.expect @@ -0,0 +1,9 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && + for i in a b c; do echo $i >olddir/$i; ?!LOOP?! done ?!AMP?! + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/one-liner-for-loop.test b/t/chainlint/one-liner-for-loop.test new file mode 100644 index 0000000..4bd8c06 --- /dev/null +++ b/t/chainlint/one-liner-for-loop.test @@ -0,0 +1,10 @@ +git init dir-rename-and-content && +( + cd dir-rename-and-content && + test_write_lines 1 2 3 4 5 >foo && + mkdir olddir && +# LINT: one-liner for-loop missing "|| exit"; also broken &&-chain + for i in a b c; do echo $i >olddir/$i; done + git add foo olddir && + git commit -m "original" && +) diff --git a/t/chainlint/sqstring-in-sqstring.expect b/t/chainlint/sqstring-in-sqstring.expect new file mode 100644 index 0000000..cf0b591 --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.expect @@ -0,0 +1,4 @@ +perl -e ' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/sqstring-in-sqstring.test b/t/chainlint/sqstring-in-sqstring.test new file mode 100644 index 0000000..77a425e --- /dev/null +++ b/t/chainlint/sqstring-in-sqstring.test @@ -0,0 +1,5 @@ +# LINT: SQ-string Perl code fragment within SQ-string +perl -e '\'' + defined($_ = -s $_) or die for @ARGV; + exit 1 if $ARGV[0] <= $ARGV[1]; +'\'' test-2-$packname_2.pack test-3-$packname_3.pack diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect new file mode 100644 index 0000000..342360b --- /dev/null +++ b/t/chainlint/token-pasting.expect @@ -0,0 +1,27 @@ +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" ?!AMP?! + echo "*.i ident" +} > .gitattributes && + +{ + echo a b c d e f g h i j k l m ?!AMP?! + echo n o p q r s t u v w x y z ?!AMP?! + echo '$Id$' +} > test && +cat test > test.t && +cat test > test.o && +cat test > test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" > test2.o && +echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?! + +downstream_url_for_sed=$( + printf "%sn" "$downstream_url" | + sed -e 's/\/\\/g' -e 's/[[/.*^$]/\&/g' +) diff --git a/t/chainlint/token-pasting.test b/t/chainlint/token-pasting.test new file mode 100644 index 0000000..b4610ce --- /dev/null +++ b/t/chainlint/token-pasting.test @@ -0,0 +1,32 @@ +# LINT: single token; composite of multiple strings +git config filter.rot13.smudge ./rot13.sh && +git config filter.rot13.clean ./rot13.sh && + +{ + echo "*.t filter=rot13" + echo "*.i ident" +} >.gitattributes && + +{ + echo a b c d e f g h i j k l m + echo n o p q r s t u v w x y z +# LINT: exit/enter string context and escaped-quote outside of string + echo '\''$Id$'\'' +} >test && +cat test >test.t && +cat test >test.o && +cat test >test.i && +git add test test.t test.i && +rm -f test test.t test.i && +git checkout -- test test.t test.i && + +echo "content-test2" >test2.o && +# LINT: exit/enter string context and escaped-quote outside of string +echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x=.o" + +# LINT: single token; composite of multiple strings +downstream_url_for_sed=$( + printf "%s\n" "$downstream_url" | +# LINT: exit/enter string context; "&" inside string not command terminator + sed -e '\''s/\\/\\\\/g'\'' -e '\''s/[[/.*^$]/\\&/g'\'' +) -- cgit v0.10.2-6-g49f6 From 9fd911237f94680e0d1985e1f2fba751b16f5a94 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:53 +0000 Subject: test-lib: retire "lint harder" optimization hack `test_run_` in test-lib.sh "lints" the body of a test by sending it down a `sed chainlint.sed | grep` pipeline; this happens once for each test run by a test script. Although this pipeline may seem relatively cheap in isolation, it can become expensive when invoked 26800+ times by `make test`, once for each test run, despite the existence of only 16500+ test definitions across all tests scripts. This difference in the number of tests defined in the scripts (16500+) and the number of tests actually run by `make test` (26800+) is explained by the fact that some test scripts run a very large number of small tests, all driven by a series of functions/loops which fill in the test bodies. This means that certain test definitions are being linted repeatedly (tens or hundreds of times) unnecessarily. To avoid such unnecessary work, 2d86a96220 (t: avoid sed-based chain-linting in some expensive cases, 2021-05-13) added an optimization hack which allows individual scripts to manually suppress the unnecessary repeated linting of the same test definition. However, unlike chainlint.sed which checks a test body as the test is run, chainlint.pl checks each test definition just once, no matter how many times the test is run, thus the sort of optimization hack introduced by 2d86a96220 is no longer needed and can be retired. Therefore, revert 2d86a96220. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/README b/t/README index 2f439f9..979b2d4 100644 --- a/t/README +++ b/t/README @@ -196,11 +196,6 @@ appropriately before running "make". Short options can be bundled, i.e. this feature by setting the GIT_TEST_CHAIN_LINT environment variable to "1" or "0", respectively. - A few test scripts disable some of the more advanced - chain-linting detection in the name of efficiency. You can - override this by setting the GIT_TEST_CHAIN_LINT_HARDER - environment variable to "1". - --stress:: Run the test script repeatedly in multiple parallel jobs until one of them fails. Useful for reproducing rare failures in diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index a22e0e1..a94ac1e 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -387,9 +387,7 @@ test_expect_success 'setup main' ' test_tick ' -# Disable extra chain-linting for the next set of tests. There are many -# auto-generated ones that are not worth checking over and over. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 + warn_LF_CRLF="LF will be replaced by CRLF" warn_CRLF_LF="CRLF will be replaced by LF" @@ -606,9 +604,6 @@ do checkout_files "" "$id" "crlf" true "" CRLF CRLF CRLF CRLF_mix_CR CRLF_nul done -# The rest of the tests are unique; do the usual linting. -unset GIT_TEST_CHAIN_LINT_HARDER_DEFAULT - # Should be the last test case: remove some files from the worktree test_expect_success 'ls-files --eol -d -z' ' rm crlf_false_attr__CRLF.txt crlf_false_attr__CRLF_mix_LF.txt crlf_false_attr__LF.txt .gitattributes && diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index f953996..5d871fd 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -5,11 +5,6 @@ test_description='wildmatch tests' TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh -# Disable expensive chain-lint tests; all of the tests in this script -# are variants of a few trivial test-tool invocations, and there are a lot of -# them. -GIT_TEST_CHAIN_LINT_HARDER_DEFAULT=0 - should_create_test_file() { file=$1 diff --git a/t/test-lib.sh b/t/test-lib.sh index 377cc1c..dc0d059 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,11 +1091,8 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" || - { - test "${GIT_TEST_CHAIN_LINT_HARDER:-${GIT_TEST_CHAIN_LINT_HARDER_DEFAULT:-1}}" != 0 && - $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') - } + if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || + test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi -- cgit v0.10.2-6-g49f6 From 23a14f301662df6d003b5bf4dc598f02311c6b30 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:54 +0000 Subject: test-lib: replace chainlint.sed with chainlint.pl By automatically invoking chainlint.sed upon each test it runs, `test_run_` in test-lib.sh ensures that broken &&-chains will be detected early as tests are modified or new are tests created since it is typical to run a test script manually (i.e. `./t1234-test-script.sh`) during test development. Now that the implementation of chainlint.pl is complete, modify test-lib.sh to invoke it automatically instead of chainlint.sed each time a test script is run. This change reduces the number of "linter" invocations from 26800+ (once per test run) down to 1050+ (once per test script), however, a subsequent change will drop the number of invocations to 1 per `make test`, thus fully realizing the benefit of the new linter. Note that the "magic exit code 117" &&-chain checker added by bb79af9d09 (t/test-lib: introduce --chain-lint option, 2015-03-20) which is built into t/test-lib.sh is retained since it has near zero-cost and (theoretically) may catch a broken &&-chain not caught by chainlint.pl. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 2237109..ca358a2 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -1076,7 +1076,7 @@ if(NOT ${CMAKE_BINARY_DIR}/CMakeCache.txt STREQUAL ${CACHE_PATH}) "string(REPLACE \"\${GIT_BUILD_DIR_REPL}\" \"GIT_BUILD_DIR=\\\"$TEST_DIRECTORY/../${BUILD_DIR_RELATIVE}\\\"\" content \"\${content}\")\n" "file(WRITE ${CMAKE_SOURCE_DIR}/t/test-lib.sh \${content})") #misc copies - file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.sed DESTINATION ${CMAKE_BINARY_DIR}/t/) + file(COPY ${CMAKE_SOURCE_DIR}/t/chainlint.pl DESTINATION ${CMAKE_BINARY_DIR}/t/) file(COPY ${CMAKE_SOURCE_DIR}/po/is.po DESTINATION ${CMAKE_BINARY_DIR}/po/) file(COPY ${CMAKE_SOURCE_DIR}/mergetools/tkdiff DESTINATION ${CMAKE_BINARY_DIR}/mergetools/) file(COPY ${CMAKE_SOURCE_DIR}/contrib/completion/git-prompt.sh DESTINATION ${CMAKE_BINARY_DIR}/contrib/completion/) diff --git a/t/test-lib.sh b/t/test-lib.sh index dc0d059..a65df2f 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1091,8 +1091,7 @@ test_run_ () { trace= # 117 is magic because it is unlikely to match the exit # code of other programs - if $(printf '%s\n' "$1" | sed -f "$GIT_BUILD_DIR/t/chainlint.sed" | grep -q '?![A-Z][A-Z]*?!') || - test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" + if test "OK-117" != "$(test_eval_ "(exit 117) && $1${LF}${LF}echo OK-\$?" 3>&1)" then BUG "broken &&-chain or run-away HERE-DOC: $1" fi @@ -1588,6 +1587,12 @@ then BAIL_OUT_ENV_NEEDS_SANITIZE_LEAK "GIT_TEST_SANITIZE_LEAK_LOG=true" fi +if test "${GIT_TEST_CHAIN_LINT:-1}" != 0 +then + "$PERL_PATH" "$TEST_DIRECTORY/chainlint.pl" "$0" || + BUG "lint error (see '?!...!? annotations above)" +fi + # Last-minute variable setup USER_HOME="$HOME" HOME="$TRASH_DIRECTORY" -- cgit v0.10.2-6-g49f6 From 69b9924b875079babb1d3f665bdc719c4871ba73 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:55 +0000 Subject: t/Makefile: teach `make test` and `make prove` to run chainlint.pl Unlike chainlint.sed which "lints" a single test body at a time, thus is invoked once per test, chainlint.pl can check all test bodies in all test scripts with a single invocation. As such, it is akin to other bulk "linters" run by the Makefile, such as `test-lint-shell-syntax`, `test-lint-duplicates`, etc. Therefore, teach `make test` and `make prove` to invoke chainlint.pl along with the other bulk linters. Also, since the single chainlint.pl invocation by `make test` or `make prove` has already checked all tests in all scripts, instruct the individual test scripts not to run chainlint.pl on themselves unnecessarily. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/Makefile b/t/Makefile index 11f2767..3db48c0 100644 --- a/t/Makefile +++ b/t/Makefile @@ -36,14 +36,21 @@ CHAINLINTTMP_SQ = $(subst ','\'',$(CHAINLINTTMP)) T = $(sort $(wildcard t[0-9][0-9][0-9][0-9]-*.sh)) THELPERS = $(sort $(filter-out $(T),$(wildcard *.sh))) +TLIBS = $(sort $(wildcard lib-*.sh)) annotate-tests.sh TPERF = $(sort $(wildcard perf/p[0-9][0-9][0-9][0-9]-*.sh)) +TINTEROP = $(sort $(wildcard interop/i[0-9][0-9][0-9][0-9]-*.sh)) CHAINLINTTESTS = $(sort $(patsubst chainlint/%.test,%,$(wildcard chainlint/*.test))) CHAINLINT = '$(PERL_PATH_SQ)' chainlint.pl +# `test-chainlint` (which is a dependency of `test-lint`, `test` and `prove`) +# checks all tests in all scripts via a single invocation, so tell individual +# scripts not to "chainlint" themselves +CHAINLINTSUPPRESS = GIT_TEST_CHAIN_LINT=0 && export GIT_TEST_CHAIN_LINT && + all: $(DEFAULT_TEST_TARGET) test: pre-clean check-chainlint $(TEST_LINT) - $(MAKE) aggregate-results-and-cleanup + $(CHAINLINTSUPPRESS) $(MAKE) aggregate-results-and-cleanup failed: @failed=$$(cd '$(TEST_RESULTS_DIRECTORY_SQ)' && \ @@ -52,7 +59,7 @@ failed: test -z "$$failed" || $(MAKE) $$failed prove: pre-clean check-chainlint $(TEST_LINT) - @echo "*** prove ***"; $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) + @echo "*** prove ***"; $(CHAINLINTSUPPRESS) $(PROVE) --exec '$(TEST_SHELL_PATH_SQ)' $(GIT_PROVE_OPTS) $(T) :: $(GIT_TEST_OPTS) $(MAKE) clean-except-prove-cache $(T): @@ -99,6 +106,9 @@ check-chainlint: test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \ test-lint-filenames +ifneq ($(GIT_TEST_CHAIN_LINT),0) +test-lint: test-chainlint +endif test-lint-duplicates: @dups=`echo $(T) $(TPERF) | tr ' ' '\n' | sed 's/-.*//' | sort | uniq -d` && \ @@ -121,6 +131,9 @@ test-lint-filenames: test -z "$$bad" || { \ echo >&2 "non-portable file name(s): $$bad"; exit 1; } +test-chainlint: + @$(CHAINLINT) $(T) $(TLIBS) $(TPERF) $(TINTEROP) + aggregate-results-and-cleanup: $(T) $(MAKE) aggregate-results $(MAKE) clean @@ -136,4 +149,5 @@ valgrind: perf: $(MAKE) -C perf/ all -.PHONY: pre-clean $(T) aggregate-results clean valgrind perf check-chainlint clean-chainlint +.PHONY: pre-clean $(T) aggregate-results clean valgrind perf \ + check-chainlint clean-chainlint test-chainlint -- cgit v0.10.2-6-g49f6 From fb41727b7ed7f62d121cd846f826fb1c62d1bc6a Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 1 Sep 2022 00:29:56 +0000 Subject: t: retire unused chainlint.sed Retire chainlint.sed since it has been replaced by a more accurate and functional &&-chain "linter", thus is no longer used. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.sed b/t/chainlint.sed deleted file mode 100644 index dc4ce37..0000000 --- a/t/chainlint.sed +++ /dev/null @@ -1,399 +0,0 @@ -#------------------------------------------------------------------------------ -# Detect broken &&-chains in tests. -# -# At present, only &&-chains in subshells are examined by this linter; -# top-level &&-chains are instead checked directly by the test framework. Like -# the top-level &&-chain linter, the subshell linter (intentionally) does not -# check &&-chains within {...} blocks. -# -# Checking for &&-chain breakage is done line-by-line by pure textual -# inspection. -# -# Incomplete lines (those ending with "\") are stitched together with following -# lines to simplify processing, particularly of "one-liner" statements. -# Top-level here-docs are swallowed to avoid false positives within the -# here-doc body, although the statement to which the here-doc is attached is -# retained. -# -# Heuristics are used to detect end-of-subshell when the closing ")" is cuddled -# with the final subshell statement on the same line: -# -# (cd foo && -# bar) -# -# in order to avoid misinterpreting the ")" in constructs such as "x=$(...)" -# and "case $x in *)" as ending the subshell. -# -# Lines missing a final "&&" are flagged with "?!AMP?!", as are lines which -# chain commands with ";" internally rather than "&&". A line may be flagged -# for both violations. -# -# Detection of a missing &&-link in a multi-line subshell is complicated by the -# fact that the last statement before the closing ")" must not end with "&&". -# Since processing is line-by-line, it is not known whether a missing "&&" is -# legitimate or not until the _next_ line is seen. To accommodate this, within -# multi-line subshells, each line is stored in sed's "hold" area until after -# the next line is seen and processed. If the next line is a stand-alone ")", -# then a missing "&&" on the previous line is legitimate; otherwise a missing -# "&&" is a break in the &&-chain. -# -# ( -# cd foo && -# bar -# ) -# -# In practical terms, when "bar" is encountered, it is flagged with "?!AMP?!", -# but when the stand-alone ")" line is seen which closes the subshell, the -# "?!AMP?!" violation is removed from the "bar" line (retrieved from the "hold" -# area) since the final statement of a subshell must not end with "&&". The -# final line of a subshell may still break the &&-chain by using ";" internally -# to chain commands together rather than "&&", but an internal "?!AMP?!" is -# never removed from a line even though a line-ending "?!AMP?!" might be. -# -# Care is taken to recognize the last _statement_ of a multi-line subshell, not -# necessarily the last textual _line_ within the subshell, since &&-chaining -# applies to statements, not to lines. Consequently, blank lines, comment -# lines, and here-docs are swallowed (but not the command to which the here-doc -# is attached), leaving the last statement in the "hold" area, not the last -# line, thus simplifying &&-link checking. -# -# The final statement before "done" in for- and while-loops, and before "elif", -# "else", and "fi" in if-then-else likewise must not end with "&&", thus -# receives similar treatment. -# -# Swallowing here-docs with arbitrary tags requires a bit of finesse. When a -# line such as "cat <cat <\n\1$/ is attempted to see if -# the content inside "<...>" matches the entirety of the newly-read line. For -# instance, if the next line read is "some data", when concatenated with the -# target line, it becomes "cat <cat <" does match the text following the -# newline, thus the closing here-doc tag has been found. The closing tag line -# and the "<...>" prefix on the target line are then discarded, leaving just -# the target line "cat <\1\2/ - :hered - N - /^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhered - } - s/^<[^>]*>// - s/\n.*$// -} -:notdoc - -# one-liner "(...) &&" -/^[ ]*!*[ ]*(..*)[ ]*&&[ ]*$/boneline - -# same as above but without trailing "&&" -/^[ ]*!*[ ]*(..*)[ ]*$/boneline - -# one-liner "(...) >x" (or "2>x" or "|&]/boneline - -# multi-line "(...\n...)" -/^[ ]*(/bsubsh - -# innocuous line -- print it and advance to next line -b - -# found one-liner "(...)" -- mark suspect if it uses ";" internally rather than -# "&&" (but not ";" in a string) -:oneline -/;/{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ -} -b - -:subsh -# bare "(" line? -- stash for later printing -/^[ ]*([ ]*$/ { - h - bnextln -} -# "(..." line -- "(" opening subshell cuddled with command; temporarily replace -# "(" with sentinel "^" and process the line as if "(" had been seen solo on -# the preceding line; this temporary replacement prevents several rules from -# accidentally thinking "(" introduces a nested subshell; "^" is changed back -# to "(" at output time -x -s/.*// -x -s/(/^/ -bslurp - -:nextln -N -s/.*\n// - -:slurp -# incomplete line "...\" -/\\$/bicmplte -# multi-line quoted string "...\n..."? -/"/bdqstr -# multi-line quoted string '...\n...'? (but not contraction in string "it's") -/'/{ - /"[^'"]*'[^'"]*"/!bsqstr -} -:folded -# here-doc -- swallow it (but not "<<" in a string) -/<<-*[ ]*[\\'"]*[A-Za-z0-9_]/{ - /"[^"]*<<[^"]*"/!bheredoc -} -# comment or empty line -- discard since final non-comment, non-empty line -# before closing ")", "done", "elsif", "else", or "fi" will need to be -# re-visited to drop "suspect" marking since final line of those constructs -# legitimately lacks "&&", so "suspect" mark must be removed -/^[ ]*#/bnextln -/^[ ]*$/bnextln -# in-line comment -- strip it (but not "#" in a string, Bash ${#...} array -# length, or Perforce "//depot/path#42" revision in filespec) -/[ ]#/{ - /"[^"]*#[^"]*"/!s/[ ]#.*$// -} -# one-liner "case ... esac" -/^[ ^]*case[ ]*..*esac/bchkchn -# multi-line "case ... esac" -/^[ ^]*case[ ]..*[ ]in/bcase -# multi-line "for ... done" or "while ... done" -/^[ ^]*for[ ]..*[ ]in/bcont -/^[ ^]*while[ ]/bcont -/^[ ]*do[ ]/bcont -/^[ ]*do[ ]*$/bcont -/;[ ]*do/bcont -/^[ ]*done[ ]*&&[ ]*$/bdone -/^[ ]*done[ ]*$/bdone -/^[ ]*done[ ]*[<>|]/bdone -/^[ ]*done[ ]*)/bdone -/||[ ]*exit[ ]/bcont -/||[ ]*exit[ ]*$/bcont -# multi-line "if...elsif...else...fi" -/^[ ^]*if[ ]/bcont -/^[ ]*then[ ]/bcont -/^[ ]*then[ ]*$/bcont -/;[ ]*then/bcont -/^[ ]*elif[ ]/belse -/^[ ]*elif[ ]*$/belse -/^[ ]*else[ ]/belse -/^[ ]*else[ ]*$/belse -/^[ ]*fi[ ]*&&[ ]*$/bdone -/^[ ]*fi[ ]*$/bdone -/^[ ]*fi[ ]*[<>|]/bdone -/^[ ]*fi[ ]*)/bdone -# nested one-liner "(...) &&" -/^[ ^]*(.*)[ ]*&&[ ]*$/bchkchn -# nested one-liner "(...)" -/^[ ^]*(.*)[ ]*$/bchkchn -# nested one-liner "(...) >x" (or "2>x" or "|]/bchkchn -# nested multi-line "(...\n...)" -/^[ ^]*(/bnest -# multi-line "{...\n...}" -/^[ ^]*{/bblock -# closing ")" on own line -- exit subshell -/^[ ]*)/bclssolo -# "$((...))" -- arithmetic expansion; not closing ")" -/\$(([^)][^)]*))[^)]*$/bchkchn -# "$(...)" -- command substitution; not closing ")" -/\$([^)][^)]*)[^)]*$/bchkchn -# multi-line "$(...\n...)" -- command substitution; treat as nested subshell -/\$([^)]*$/bnest -# "=(...)" -- Bash array assignment; not closing ")" -/=(/bchkchn -# closing "...) &&" -/)[ ]*&&[ ]*$/bclose -# closing "...)" -/)[ ]*$/bclose -# closing "...) >x" (or "2>x" or "|]/bclose -:chkchn -# mark suspect if line uses ";" internally rather than "&&" (but not ";" in a -# string and not ";;" in one-liner "case...esac") -/;/{ - /;;/!{ - /"[^"]*;[^"]*"/!s/;/; ?!AMP?!/ - } -} -# line ends with pipe "...|" -- valid; not missing "&&" -/|[ ]*$/bcont -# missing end-of-line "&&" -- mark suspect -/&&[ ]*$/!s/$/ ?!AMP?!/ -:cont -# retrieve and print previous line -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/<\1?!HERE?!\2\3/ -:hdocsub -N -/^<\([^>]*\)>.*\n[ ]*\1[ ]*$/!{ - s/\n.*$// - bhdocsub -} -s/^<[^>]*>// -s/\n.*$// -bfolded - -# found "case ... in" -- pass through untouched -:case -x -s/^\([ ]*\)^/\1(/ -s/?!HERE?!/< Date: Tue, 13 Sep 2022 04:01:47 +0000 Subject: chainlint: colorize problem annotations and test delimiters When `chainlint.pl` detects problems in a test definition, it emits the test definition with "?!FOO?!" annotations highlighting the problems it discovered. For instance, given this problematic test: test_expect_success 'discombobulate frobnitz' ' git frob babble && (echo balderdash; echo gnabgib) >expect && for i in three two one do git nitfol $i done >actual test_cmp expect actual ' chainlint.pl will output: # chainlint: t1234-confusing.sh # chainlint: discombobulate frobnitz git frob babble && (echo balderdash ; ?!AMP?! echo gnabgib) >expect && for i in three two one do git nitfol $i ?!LOOP?! done >actual ?!AMP?! test_cmp expect actual in which it may be difficult to spot the "?!FOO?!" annotations. The problem is compounded when multiple tests, possibly in multiple scripts, fail "linting", in which case it may be difficult to spot the "# chainlint:" lines which delimit one problematic test from another. To ameliorate this potential problem, colorize the "?!FOO?!" annotations in order to quickly draw the test author's attention to the problem spots, and colorize the "# chainlint:" lines to help the author identify the name of each script and each problematic test. Colorization is disabled automatically if output is not directed to a terminal or if NO_COLOR environment variable is set. The implementation is specific to Unix (it employs `tput` if available) but works equally well in the Git for Windows development environment which emulates Unix sufficiently. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano diff --git a/t/chainlint.pl b/t/chainlint.pl index 386999c..976db4b 100755 --- a/t/chainlint.pl +++ b/t/chainlint.pl @@ -585,12 +585,14 @@ sub check_test { my $parser = TestParser->new(\$body); my @tokens = $parser->parse(); return unless $emit_all || grep(/\?![^?]+\?!/, @tokens); + my $c = main::fd_colors(1); my $checked = join(' ', @tokens); $checked =~ s/^\n//; $checked =~ s/^ //mg; $checked =~ s/ $//mg; + $checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg; $checked .= "\n" unless $checked =~ /\n$/; - push(@{$self->{output}}, "# chainlint: $title\n$checked"); + push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked"); } sub parse_cmd { @@ -615,6 +617,41 @@ if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) { $interval = sub { return Time::HiRes::tv_interval(shift); }; } +# Restore TERM if test framework set it to "dumb" so 'tput' will work; do this +# outside of get_colors() since under 'ithreads' all threads use %ENV of main +# thread and ignore %ENV changes in subthreads. +$ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM}; + +my @NOCOLORS = (bold => '', rev => '', reset => '', blue => '', green => '', red => ''); +my %COLORS = (); +sub get_colors { + return \%COLORS if %COLORS; + if (exists($ENV{NO_COLOR}) || + system("tput sgr0 >/dev/null 2>&1") != 0 || + system("tput bold >/dev/null 2>&1") != 0 || + system("tput rev >/dev/null 2>&1") != 0 || + system("tput setaf 1 >/dev/null 2>&1") != 0) { + %COLORS = @NOCOLORS; + return \%COLORS; + } + %COLORS = (bold => `tput bold`, + rev => `tput rev`, + reset => `tput sgr0`, + blue => `tput setaf 4`, + green => `tput setaf 2`, + red => `tput setaf 1`); + chomp(%COLORS); + return \%COLORS; +} + +my %FD_COLORS = (); +sub fd_colors { + my $fd = shift; + return $FD_COLORS{$fd} if exists($FD_COLORS{$fd}); + $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS}; + return $FD_COLORS{$fd}; +} + sub ncores { # Windows return $ENV{NUMBER_OF_PROCESSORS} if exists($ENV{NUMBER_OF_PROCESSORS}); @@ -630,6 +667,8 @@ sub show_stats { my $walltime = $interval->($start_time); my ($usertime) = times(); my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0); + my $c = fd_colors(2); + print(STDERR $c->{green}); for (@$stats) { my ($worker, $nscripts, $ntests, $nerrs) = @$_; print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n"); @@ -638,7 +677,7 @@ sub show_stats { $total_tests += $ntests; $total_errs += $nerrs; } - printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); + printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime); } sub check_script { @@ -656,8 +695,9 @@ sub check_script { my $parser = ScriptParser->new(\$s); 1 while $parser->parse_cmd(); if (@{$parser->{output}}) { + my $c = fd_colors(1); my $s = join('', @{$parser->{output}}); - $emit->("# chainlint: $path\n" . $s); + $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s); $nerrs += () = $s =~ /\?![^?]+\?!/g; } $ntests += $parser->{ntests}; -- cgit v0.10.2-6-g49f6