t/chainlint.pl

   1 #!/usr/bin/env perl
   2 #
   3 # Copyright (c) 2021-2022 Eric Sunshine <sunshine@sunshineco.com>
   4 #
   5 # This tool scans shell scripts for test definitions and checks those tests for
   6 # problems, such as broken &&-chains, which might hide bugs in the tests
   7 # themselves or in behaviors being exercised by the tests.
   8 #
   9 # Input arguments are pathnames of shell scripts containing test definitions,
  10 # or globs referencing a collection of scripts. For each problem discovered,
  11 # the pathname of the script containing the test is printed along with the test
  12 # name and the test body with a `?!LINT: ...?!` annotation at the location of
  13 # each detected problem, where "..." is an explanation of the problem. Returns
  14 # zero if no problems are discovered, otherwise non-zero.
  15
  16 use warnings;
  17 use strict;
  18 use Config;
  19 use File::Glob;
  20 use Getopt::Long;
  21
  22 my $jobs = -1;
  23 my $show_stats;
  24 my $emit_all;
  25
  26 # Lexer tokenizes POSIX shell scripts. It is roughly modeled after section 2.3
  27 # "Token Recognition" of POSIX chapter 2 "Shell Command Language". Although
  28 # similar to lexical analyzers for other languages, this one differs in a few
  29 # substantial ways due to quirks of the shell command language.
  30 #
  31 # For instance, in many languages, newline is just whitespace like space or
  32 # TAB, but in shell a newline is a command separator, thus a distinct lexical
  33 # token. A newline is significant and returned as a distinct token even at the
  34 # end of a shell comment.
  35 #
  36 # In other languages, `1+2` would typically be scanned as three tokens
  37 # (`1`, `+`, and `2`), but in shell it is a single token. However, the similar
  38 # `1 + 2`, which embeds whitepace, is scanned as three token in shell, as well.
  39 # In shell, several characters with special meaning lose that meaning when not
  40 # surrounded by whitespace. For instance, the negation operator `!` is special
  41 # when standing alone surrounded by whitespace; whereas in `foo!uucp` it is
  42 # just a plain character in the longer token "foo!uucp". In many other
  43 # languages, `"string"/foo:'string'` might be scanned as five tokens ("string",
  44 # `/`, `foo`, `:`, and 'string'), but in shell, it is just a single token.
  45 #
  46 # The lexical analyzer for the shell command language is also somewhat unusual
  47 # in that it recursively invokes the parser to handle the body of `$(...)`
  48 # expressions which can contain arbitrary shell code. Such expressions may be
  49 # encountered both inside and outside of double-quoted strings.
  50 #
  51 # The lexical analyzer is responsible for consuming shell here-doc bodies which
  52 # extend from the line following a `<<TAG` operator until a line consisting
  53 # solely of `TAG`. Here-doc consumption begins when a newline is encountered.
  54 # It is legal for multiple here-doc `<<TAG` operators to be present on a single
  55 # line, in which case their bodies must be present one following the next, and
  56 # are consumed in the (left-to-right) order the `<<TAG` operators appear on the
  57 # line. A special complication is that the bodies of all here-docs must be
  58 # consumed when the newline is encountered even if the parse context depth has
  59 # changed. For instance, in `cat <<A && x=$(cat <<B &&\n`, bodies of here-docs
  60 # "A" and "B" must be consumed even though "A" was introduced outside the
  61 # recursive parse context in which "B" was introduced and in which the newline
  62 # is encountered.
  63 package Lexer;
  64
  65 sub new {
  66         my ($class, $parser, $s) = @_;
  67         bless {
  68                 parser => $parser,
  69                 buff => $s,
  70                 lineno => 1,
  71                 heretags => []
  72         } => $class;
  73 }
  74
  75 sub scan_heredoc_tag {
  76         my $self = shift @_;
  77         ${$self->{buff}} =~ /\G(-?)/gc;
  78         my $indented = $1;
  79         my $token = $self->scan_token();
  80         return "<<$indented" unless $token;
  81         my $tag = $token->[0];
  82         $tag =~ s/['"\\]//g;
  83         $$token[0] = $indented ? "\t$tag" : "$tag";
  84         push(@{$self->{heretags}}, $token);
  85         return "<<$indented$tag";
  86 }
  87
  88 sub scan_op {
  89         my ($self, $c) = @_;
  90         my $b = $self->{buff};
  91         return $c unless $$b =~ /\G(.)/sgc;
  92         my $cc = $c . $1;
  93         return scan_heredoc_tag($self) if $cc eq '<<';
  94         return $cc if $cc =~ /^(?:&&|\|\||>>|;;|<&|>&|<>|>\|)$/;
  95         pos($$b)--;
  96         return $c;
  97 }
  98
  99 sub scan_sqstring {
 100         my $self = shift @_;
 101         ${$self->{buff}} =~ /\G([^']*'|.*\z)/sgc;
 102         my $s = $1;
 103         $self->{lineno} += () = $s =~ /\n/sg;
 104         return "'" . $s;
 105 }
 106
 107 sub scan_dqstring {
 108         my $self = shift @_;
 109         my $b = $self->{buff};
 110         my $s = '"';
 111         while (1) {
 112                 # slurp up non-special characters
 113                 $s .= $1 if $$b =~ /\G([^"\$\\]+)/gc;
 114                 # handle special characters
 115                 last unless $$b =~ /\G(.)/sgc;
 116                 my $c = $1;
 117                 $s .= '"', last if $c eq '"';
 118                 $s .= '$' . $self->scan_dollar(), next if $c eq '$';
 119                 if ($c eq '\\') {
 120                         $s .= '\\', last unless $$b =~ /\G(.)/sgc;
 121                         $c = $1;
 122                         $self->{lineno}++, next if $c eq "\n"; # line splice
 123                         # backslash escapes only $, `, ", \ in dq-string
 124                         $s .= '\\' unless $c =~ /^[\$`"\\]$/;
 125                         $s .= $c;
 126                         next;
 127                 }
 128                 die("internal error scanning dq-string '$c'\n");
 129         }
 130         $self->{lineno} += () = $s =~ /\n/sg;
 131         return $s;
 132 }
 133
 134 sub scan_balanced {
 135         my ($self, $c1, $c2) = @_;
 136         my $b = $self->{buff};
 137         my $depth = 1;
 138         my $s = $c1;
 139         while ($$b =~ /\G([^\Q$c1$c2\E]*(?:[\Q$c1$c2\E]|\z))/gc) {
 140                 $s .= $1;
 141                 $depth++, next if $s =~ /\Q$c1\E$/;
 142                 $depth--;
 143                 last if $depth == 0;
 144         }
 145         $self->{lineno} += () = $s =~ /\n/sg;
 146         return $s;
 147 }
 148
 149 sub scan_subst {
 150         my $self = shift @_;
 151         my @tokens = $self->{parser}->parse(qr/^\)$/);
 152         $self->{parser}->next_token(); # closing ")"
 153         return @tokens;
 154 }
 155
 156 sub scan_dollar {
 157         my $self = shift @_;
 158         my $b = $self->{buff};
 159         return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
 160         return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
 161         return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
 162         return $1 if $$b =~ /\G(\w+)/gc; # $var
 163         return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
 164         return '';
 165 }
 166
 167 sub swallow_heredocs {
 168         my $self = shift @_;
 169         my $b = $self->{buff};
 170         my $tags = $self->{heretags};
 171         while (my $tag = shift @$tags) {
 172                 my $start = pos($$b);
 173                 my $indent = $$tag[0] =~ s/^\t// ? '\\s*' : '';
 174                 $$b =~ /(?:\G|\n)$indent\Q$$tag[0]\E(?:\n|\z)/gc;
 175                 if (pos($$b) > $start) {
 176                         my $body = substr($$b, $start, pos($$b) - $start);
 177                         $self->{parser}->{heredocs}->{$$tag[0]} = {
 178                                 content => substr($body, 0, length($body) - length($&)),
 179                                 start_line => $self->{lineno},
 180                         };
 181                         $self->{lineno} += () = $body =~ /\n/sg;
 182                         next;
 183                 }
 184                 push(@{$self->{parser}->{problems}}, ['HEREDOC', $tag]);
 185                 $$b =~ /(?:\G|\n).*\z/gc; # consume rest of input
 186                 my $body = substr($$b, $start, pos($$b) - $start);
 187                 $self->{lineno} += () = $body =~ /\n/sg;
 188                 last;
 189         }
 190 }
 191
 192 sub scan_token {
 193         my $self = shift @_;
 194         my $b = $self->{buff};
 195         my $token = '';
 196         my ($start, $startln);
 197 RESTART:
 198         $startln = $self->{lineno};
 199         $$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
 200         $start = pos($$b) || 0;
 201         $self->{lineno}++, return ["\n", $start, pos($$b), $startln, $startln] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
 202         while (1) {
 203                 # slurp up non-special characters
 204                 $token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
 205                 # handle special characters
 206                 last unless $$b =~ /\G(.)/sgc;
 207                 my $c = $1;
 208                 pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
 209                 pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
 210                 $token .= $self->scan_sqstring(), next if $c eq "'";
 211                 $token .= $self->scan_dqstring(), next if $c eq '"';
 212                 $token .= $c . $self->scan_dollar(), next if $c eq '$';
 213                 $self->{lineno}++, $self->swallow_heredocs(), $token = $c, last if $c eq "\n";
 214                 $token = $self->scan_op($c), last if $c =~ /^[;&|<>]$/;
 215                 $token = $c, last if $c =~ /^[(){}]$/;
 216                 if ($c eq '\\') {
 217                         $token .= '\\', last unless $$b =~ /\G(.)/sgc;
 218                         $c = $1;
 219                         $self->{lineno}++, next if $c eq "\n" && length($token); # line splice
 220                         $self->{lineno}++, goto RESTART if $c eq "\n"; # line splice
 221                         $token .= '\\' . $c;
 222                         next;
 223                 }
 224                 die("internal error scanning character '$c'\n");
 225         }
 226         return length($token) ? [$token, $start, pos($$b), $startln, $self->{lineno}] : undef;
 227 }
 228
 229 # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
 230 # is a recursive descent parser very roughly modeled after section 2.10 "Shell
 231 # Grammar" of POSIX chapter 2 "Shell Command Language".
 232 package ShellParser;
 233
 234 sub new {
 235         my ($class, $s) = @_;
 236         my $self = bless {
 237                 buff => [],
 238                 stop => [],
 239                 output => [],
 240                 heredocs => {},
 241                 insubshell => 0,
 242         } => $class;
 243         $self->{lexer} = Lexer->new($self, $s);
 244         return $self;
 245 }
 246
 247 sub next_token {
 248         my $self = shift @_;
 249         return pop(@{$self->{buff}}) if @{$self->{buff}};
 250         return $self->{lexer}->scan_token();
 251 }
 252
 253 sub untoken {
 254         my $self = shift @_;
 255         push(@{$self->{buff}}, @_);
 256 }
 257
 258 sub peek {
 259         my $self = shift @_;
 260         my $token = $self->next_token();
 261         return undef unless defined($token);
 262         $self->untoken($token);
 263         return $token;
 264 }
 265
 266 sub stop_at {
 267         my ($self, $token) = @_;
 268         return 1 unless defined($token);
 269         my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
 270         return defined($stop) && $token->[0] =~ $stop;
 271 }
 272
 273 sub expect {
 274         my ($self, $expect) = @_;
 275         my $token = $self->next_token();
 276         return $token if defined($token) && $token->[0] eq $expect;
 277         push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
 278         $self->untoken($token) if defined($token);
 279         return ();
 280 }
 281
 282 sub optional_newlines {
 283         my $self = shift @_;
 284         my @tokens;
 285         while (my $token = $self->peek()) {
 286                 last unless $token->[0] eq "\n";
 287                 push(@tokens, $self->next_token());
 288         }
 289         return @tokens;
 290 }
 291
 292 sub parse_group {
 293         my $self = shift @_;
 294         return ($self->parse(qr/^}$/),
 295                 $self->expect('}'));
 296 }
 297
 298 sub parse_subshell {
 299         my $self = shift @_;
 300         $self->{insubshell}++;
 301         my @tokens = ($self->parse(qr/^\)$/),
 302                       $self->expect(')'));
 303         $self->{insubshell}--;
 304         return @tokens;
 305 }
 306
 307 sub parse_case_pattern {
 308         my $self = shift @_;
 309         my @tokens;
 310         while (defined(my $token = $self->next_token())) {
 311                 push(@tokens, $token);
 312                 last if $token->[0] eq ')';
 313         }
 314         return @tokens;
 315 }
 316
 317 sub parse_case {
 318         my $self = shift @_;
 319         my @tokens;
 320         push(@tokens,
 321              $self->next_token(), # subject
 322              $self->optional_newlines(),
 323              $self->expect('in'),
 324              $self->optional_newlines());
 325         while (1) {
 326                 my $token = $self->peek();
 327                 last unless defined($token) && $token->[0] ne 'esac';
 328                 push(@tokens,
 329                      $self->parse_case_pattern(),
 330                      $self->optional_newlines(),
 331                      $self->parse(qr/^(?:;;|esac)$/)); # item body
 332                 $token = $self->peek();
 333                 last unless defined($token) && $token->[0] ne 'esac';
 334                 push(@tokens,
 335                      $self->expect(';;'),
 336                      $self->optional_newlines());
 337         }
 338         push(@tokens, $self->expect('esac'));
 339         return @tokens;
 340 }
 341
 342 sub parse_for {
 343         my $self = shift @_;
 344         my @tokens;
 345         push(@tokens,
 346              $self->next_token(), # variable
 347              $self->optional_newlines());
 348         my $token = $self->peek();
 349         if (defined($token) && $token->[0] eq 'in') {
 350                 push(@tokens,
 351                      $self->expect('in'),
 352                      $self->optional_newlines());
 353         }
 354         push(@tokens,
 355              $self->parse(qr/^do$/), # items
 356              $self->expect('do'),
 357              $self->optional_newlines(),
 358              $self->parse_loop_body(),
 359              $self->expect('done'));
 360         return @tokens;
 361 }
 362
 363 sub parse_if {
 364         my $self = shift @_;
 365         my @tokens;
 366         while (1) {
 367                 push(@tokens,
 368                      $self->parse(qr/^then$/), # if/elif condition
 369                      $self->expect('then'),
 370                      $self->optional_newlines(),
 371                      $self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
 372                 my $token = $self->peek();
 373                 last unless defined($token) && $token->[0] eq 'elif';
 374                 push(@tokens, $self->expect('elif'));
 375         }
 376         my $token = $self->peek();
 377         if (defined($token) && $token->[0] eq 'else') {
 378                 push(@tokens,
 379                      $self->expect('else'),
 380                      $self->optional_newlines(),
 381                      $self->parse(qr/^fi$/)); # else body
 382         }
 383         push(@tokens, $self->expect('fi'));
 384         return @tokens;
 385 }
 386
 387 sub parse_loop_body {
 388         my $self = shift @_;
 389         return $self->parse(qr/^done$/);
 390 }
 391
 392 sub parse_loop {
 393         my $self = shift @_;
 394         return ($self->parse(qr/^do$/), # condition
 395                 $self->expect('do'),
 396                 $self->optional_newlines(),
 397                 $self->parse_loop_body(),
 398                 $self->expect('done'));
 399 }
 400
 401 sub parse_func {
 402         my $self = shift @_;
 403         return ($self->expect('('),
 404                 $self->expect(')'),
 405                 $self->optional_newlines(),
 406                 $self->parse_cmd()); # body
 407 }
 408
 409 sub parse_bash_array_assignment {
 410         my $self = shift @_;
 411         my @tokens = $self->expect('(');
 412         while (defined(my $token = $self->next_token())) {
 413                 push(@tokens, $token);
 414                 last if $token->[0] eq ')';
 415         }
 416         return @tokens;
 417 }
 418
 419 my %compound = (
 420         '{' => \&parse_group,
 421         '(' => \&parse_subshell,
 422         'case' => \&parse_case,
 423         'for' => \&parse_for,
 424         'if' => \&parse_if,
 425         'until' => \&parse_loop,
 426         'while' => \&parse_loop);
 427
 428 sub parse_cmd {
 429         my $self = shift @_;
 430         my $cmd = $self->next_token();
 431         return () unless defined($cmd);
 432         return $cmd if $cmd->[0] eq "\n";
 433
 434         my $token;
 435         my @tokens = $cmd;
 436         if ($cmd->[0] eq '!') {
 437                 push(@tokens, $self->parse_cmd());
 438                 return @tokens;
 439         } elsif (my $f = $compound{$cmd->[0]}) {
 440                 push(@tokens, $self->$f());
 441         } elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
 442                 if ($cmd->[0] !~ /\w=$/) {
 443                         push(@tokens, $self->parse_func());
 444                         return @tokens;
 445                 }
 446                 my @array = $self->parse_bash_array_assignment();
 447                 $tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
 448                 $tokens[-1]->[2] = $array[$#array][2] if @array;
 449         }
 450
 451         while (defined(my $token = $self->next_token())) {
 452                 $self->untoken($token), last if $self->stop_at($token);
 453                 push(@tokens, $token);
 454                 last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
 455         }
 456         push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
 457         return @tokens;
 458 }
 459
 460 sub accumulate {
 461         my ($self, $tokens, $cmd) = @_;
 462         push(@$tokens, @$cmd);
 463 }
 464
 465 sub parse {
 466         my ($self, $stop) = @_;
 467         push(@{$self->{stop}}, $stop);
 468         goto DONE if $self->stop_at($self->peek());
 469         my @tokens;
 470         while (my @cmd = $self->parse_cmd()) {
 471                 $self->accumulate(\@tokens, \@cmd);
 472                 last if $self->stop_at($self->peek());
 473         }
 474 DONE:
 475         pop(@{$self->{stop}});
 476         return @tokens;
 477 }
 478
 479 # TestParser is a subclass of ShellParser which, beyond parsing shell script
 480 # code, is also imbued with semantic knowledge of test construction, and checks
 481 # tests for common problems (such as broken &&-chains) which might hide bugs in
 482 # the tests themselves or in behaviors being exercised by the tests. As such,
 483 # TestParser is only called upon to parse test bodies, not the top-level
 484 # scripts in which the tests are defined.
 485 package TestParser;
 486
 487 use base 'ShellParser';
 488
 489 sub new {
 490         my $class = shift @_;
 491         my $self = $class->SUPER::new(@_);
 492         $self->{problems} = [];
 493         return $self;
 494 }
 495
 496 sub find_non_nl {
 497         my $tokens = shift @_;
 498         my $n = shift @_;
 499         $n = $#$tokens if !defined($n);
 500         $n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
 501         return $n;
 502 }
 503
 504 sub ends_with {
 505         my ($tokens, $needles) = @_;
 506         my $n = find_non_nl($tokens);
 507         for my $needle (reverse(@$needles)) {
 508                 return undef if $n < 0;
 509                 $n = find_non_nl($tokens, $n), next if $needle eq "\n";
 510                 return undef if $$tokens[$n]->[0] !~ $needle;
 511                 $n--;
 512         }
 513         return 1;
 514 }
 515
 516 sub match_ending {
 517         my ($tokens, $endings) = @_;
 518         for my $needles (@$endings) {
 519                 next if @$tokens < scalar(grep {$_ ne "\n"} @$needles);
 520                 return 1 if ends_with($tokens, $needles);
 521         }
 522         return undef;
 523 }
 524
 525 sub parse_loop_body {
 526         my $self = shift @_;
 527         my @tokens = $self->SUPER::parse_loop_body(@_);
 528         # did loop signal failure via "|| return" or "|| exit"?
 529         return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
 530         # did loop upstream of a pipe signal failure via "|| echo 'impossible
 531         # text'" as the final command in the loop body?
 532         return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
 533         # flag missing "return/exit" handling explicit failure in loop body
 534         my $n = find_non_nl(\@tokens);
 535         push(@{$self->{problems}}, [$self->{insubshell} ? 'LOOPEXIT' : 'LOOPRETURN', $tokens[$n]]);
 536         return @tokens;
 537 }
 538
 539 my @safe_endings = (
 540         [qr/^(?:&&|\|\||\||&)$/],
 541         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/],
 542         [qr/^(?:exit|return)$/, qr/^(?:\d+|\$\?)$/, qr/^;$/],
 543         [qr/^(?:exit|return|continue)$/],
 544         [qr/^(?:exit|return|continue)$/, qr/^;$/]);
 545
 546 sub accumulate {
 547         my ($self, $tokens, $cmd) = @_;
 548         my $problems = $self->{problems};
 549
 550         # no previous command to check for missing "&&"
 551         goto DONE unless @$tokens;
 552
 553         # new command is empty line; can't yet check if previous is missing "&&"
 554         goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
 555
 556         # did previous command end with "&&", "|", "|| return" or similar?
 557         goto DONE if match_ending($tokens, \@safe_endings);
 558
 559         # if this command handles "$?" specially, then okay for previous
 560         # command to be missing "&&"
 561         for my $token (@$cmd) {
 562                 goto DONE if $token->[0] =~ /\$\?/;
 563         }
 564
 565         # if this command is "false", "return 1", or "exit 1" (which signal
 566         # failure explicitly), then okay for all preceding commands to be
 567         # missing "&&"
 568         if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
 569                 @$problems = grep {$_->[0] ne 'AMP'} @$problems;
 570                 goto DONE;
 571         }
 572
 573         # flag missing "&&" at end of previous command
 574         my $n = find_non_nl($tokens);
 575         push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
 576
 577 DONE:
 578         $self->SUPER::accumulate($tokens, $cmd);
 579 }
 580
 581 # ScriptParser is a subclass of ShellParser which identifies individual test
 582 # definitions within test scripts, and passes each test body through TestParser
 583 # to identify possible problems. ShellParser detects test definitions not only
 584 # at the top-level of test scripts but also within compound commands such as
 585 # loops and function definitions.
 586 package ScriptParser;
 587
 588 use base 'ShellParser';
 589
 590 sub new {
 591         my $class = shift @_;
 592         my $self = $class->SUPER::new(@_);
 593         $self->{ntests} = 0;
 594         $self->{nerrs} = 0;
 595         return $self;
 596 }
 597
 598 # extract the raw content of a token, which may be a single string or a
 599 # composition of multiple strings and non-string character runs; for instance,
 600 # `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
 601 sub unwrap {
 602         my $token = (@_ ? shift @_ : $_)->[0];
 603         # simple case: 'sqstring' or "dqstring"
 604         return $token if $token =~ s/^'([^']*)'$/$1/;
 605         return $token if $token =~ s/^"([^"]*)"$/$1/;
 606
 607         # composite case
 608         my ($s, $q, $escaped);
 609         while (1) {
 610                 # slurp up non-special characters
 611                 $s .= $1 if $token =~ /\G([^\\'"]*)/gc;
 612                 # handle special characters
 613                 last unless $token =~ /\G(.)/sgc;
 614                 my $c = $1;
 615                 $q = undef, next if defined($q) && $c eq $q;
 616                 $q = $c, next if !defined($q) && $c =~ /^['"]$/;
 617                 if ($c eq '\\') {
 618                         last unless $token =~ /\G(.)/sgc;
 619                         $c = $1;
 620                         $s .= '\\' if $c eq "\n"; # preserve line splice
 621                 }
 622                 $s .= $c;
 623         }
 624         return $s
 625 }
 626
 627 sub format_problem {
 628         local $_ = shift;
 629         /^AMP$/ && return "missing '&&'";
 630         /^LOOPRETURN$/ && return "missing '|| return 1'";
 631         /^LOOPEXIT$/ && return "missing '|| exit 1'";
 632         /^HEREDOC$/ && return 'unclosed heredoc';
 633         die("unrecognized problem type '$_'\n");
 634 }
 635
 636 sub check_test {
 637         my $self = shift @_;
 638         my $title = unwrap(shift @_);
 639         my $body = shift @_;
 640         my $lineno = $body->[3];
 641         $body = unwrap($body);
 642         if ($body eq '-') {
 643                 my $herebody = shift @_;
 644                 $body = $herebody->{content};
 645                 $lineno = $herebody->{start_line};
 646         }
 647         $self->{ntests}++;
 648         my $parser = TestParser->new(\$body);
 649         my @tokens = $parser->parse();
 650         my $problems = $parser->{problems};
 651         $self->{nerrs} += @$problems;
 652         return unless $emit_all || @$problems;
 653         my $c = main::fd_colors(1);
 654         my ($erropen, $errclose) = -t 1 ? ("$c->{rev}$c->{red}", $c->{reset}) : ('?!', '?!');
 655         my $start = 0;
 656         my $checked = '';
 657         for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
 658                 my ($label, $token) = @$_;
 659                 my $pos = $token->[2];
 660                 my $err = format_problem($label);
 661                 $checked .= substr($body, $start, $pos - $start);
 662                 $checked .= ' ' unless $checked =~ /\s$/;
 663                 $checked .= "${erropen}LINT: $err$errclose";
 664                 $checked .= ' ' unless $pos >= length($body) ||
 665                     substr($body, $pos, 1) =~ /^\s/;
 666                 $start = $pos;
 667         }
 668         $checked .= substr($body, $start);
 669         $checked =~ s/^/$lineno++ . ' '/mge;
 670         $checked =~ s/^\d+ \n//;
 671         $checked =~ s/^\d+/$c->{dim}$&$c->{reset}/mg;
 672         $checked .= "\n" unless $checked =~ /\n$/;
 673         push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
 674 }
 675
 676 sub parse_cmd {
 677         my $self = shift @_;
 678         my @tokens = $self->SUPER::parse_cmd();
 679         return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
 680         my $n = $#tokens;
 681         $n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
 682         my $herebody;
 683         if ($n >= 2 && $tokens[$n-1]->[0] eq '-' && $tokens[$n]->[0] =~ /^<<-?(.+)$/) {
 684                 $herebody = $self->{heredocs}->{$1};
 685                 $n--;
 686         }
 687         $self->check_test($tokens[1], $tokens[2], $herebody) if $n == 2; # title body
 688         $self->check_test($tokens[2], $tokens[3], $herebody) if $n > 2;  # prereq title body
 689         return @tokens;
 690 }
 691
 692 # main contains high-level functionality for processing command-line switches,
 693 # feeding input test scripts to ScriptParser, and reporting results.
 694 package main;
 695
 696 my $getnow = sub { return time(); };
 697 my $interval = sub { return time() - shift; };
 698 if (eval {require Time::HiRes; Time::HiRes->import(); 1;}) {
 699         $getnow = sub { return [Time::HiRes::gettimeofday()]; };
 700         $interval = sub { return Time::HiRes::tv_interval(shift); };
 701 }
 702
 703 # Restore TERM if test framework set it to "dumb" so 'tput' will work; do this
 704 # outside of get_colors() since under 'ithreads' all threads use %ENV of main
 705 # thread and ignore %ENV changes in subthreads.
 706 $ENV{TERM} = $ENV{USER_TERM} if $ENV{USER_TERM};
 707
 708 my @NOCOLORS = (bold => '', rev => '', dim => '', reset => '', blue => '', green => '', red => '');
 709 my %COLORS = ();
 710 sub get_colors {
 711         return \%COLORS if %COLORS;
 712         if (exists($ENV{NO_COLOR})) {
 713                 %COLORS = @NOCOLORS;
 714                 return \%COLORS;
 715         }
 716         if ($ENV{TERM} =~ /xterm|xterm-\d+color|xterm-new|xterm-direct|nsterm|nsterm-\d+color|nsterm-direct/) {
 717                 %COLORS = (bold  => "\e[1m",
 718                            rev   => "\e[7m",
 719                            dim   => "\e[2m",
 720                            reset => "\e[0m",
 721                            blue  => "\e[34m",
 722                            green => "\e[32m",
 723                            red   => "\e[31m");
 724                 return \%COLORS;
 725         }
 726         if (system("tput sgr0 >/dev/null 2>&1") == 0 &&
 727             system("tput bold >/dev/null 2>&1") == 0 &&
 728             system("tput rev  >/dev/null 2>&1") == 0 &&
 729             system("tput dim  >/dev/null 2>&1") == 0 &&
 730             system("tput setaf 1 >/dev/null 2>&1") == 0) {
 731                 %COLORS = (bold  => `tput bold`,
 732                            rev   => `tput rev`,
 733                            dim   => `tput dim`,
 734                            reset => `tput sgr0`,
 735                            blue  => `tput setaf 4`,
 736                            green => `tput setaf 2`,
 737                            red   => `tput setaf 1`);
 738                 return \%COLORS;
 739         }
 740         %COLORS = @NOCOLORS;
 741         return \%COLORS;
 742 }
 743
 744 my %FD_COLORS = ();
 745 sub fd_colors {
 746         my $fd = shift;
 747         return $FD_COLORS{$fd} if exists($FD_COLORS{$fd});
 748         $FD_COLORS{$fd} = -t $fd ? get_colors() : {@NOCOLORS};
 749         return $FD_COLORS{$fd};
 750 }
 751
 752 sub ncores {
 753         # Windows
 754         if (exists($ENV{NUMBER_OF_PROCESSORS})) {
 755                 my $ncpu = $ENV{NUMBER_OF_PROCESSORS};
 756                 return $ncpu > 0 ? $ncpu : 1;
 757         }
 758         # Linux / MSYS2 / Cygwin / WSL
 759         if (open my $fh, '<', '/proc/cpuinfo') {
 760                 my $cpuinfo = do { local $/; <$fh> };
 761                 close($fh);
 762                 if ($cpuinfo =~ /^n?cpus active\s*:\s*(\d+)/m) {
 763                         return $1 if $1 > 0;
 764                 }
 765                 my @matches = ($cpuinfo =~ /^(processor|CPU)[\s\d]*:/mg);
 766                 return @matches ? scalar(@matches) : 1;
 767         }
 768         # macOS & BSD
 769         if ($^O =~ /(?:^darwin$|bsd)/) {
 770                 my $ncpu = qx/sysctl -n hw.ncpu/;
 771                 return $ncpu > 0 ? $ncpu : 1;
 772         }
 773         return 1;
 774 }
 775
 776 sub show_stats {
 777         my ($start_time, $stats) = @_;
 778         my $walltime = $interval->($start_time);
 779         my ($usertime) = times();
 780         my ($total_workers, $total_scripts, $total_tests, $total_errs) = (0, 0, 0, 0);
 781         my $c = fd_colors(2);
 782         print(STDERR $c->{green});
 783         for (@$stats) {
 784                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 785                 print(STDERR "worker $worker: $nscripts scripts, $ntests tests, $nerrs errors\n");
 786                 $total_workers++;
 787                 $total_scripts += $nscripts;
 788                 $total_tests += $ntests;
 789                 $total_errs += $nerrs;
 790         }
 791         printf(STDERR "total: %d workers, %d scripts, %d tests, %d errors, %.2fs/%.2fs (wall/user)$c->{reset}\n", $total_workers, $total_scripts, $total_tests, $total_errs, $walltime, $usertime);
 792 }
 793
 794 sub check_script {
 795         my ($id, $next_script, $emit) = @_;
 796         my ($nscripts, $ntests, $nerrs) = (0, 0, 0);
 797         while (my $path = $next_script->()) {
 798                 $nscripts++;
 799                 my $fh;
 800                 unless (open($fh, "<:unix:crlf", $path)) {
 801                         $emit->("?!ERR?! $path: $!\n");
 802                         next;
 803                 }
 804                 my $s = do { local $/; <$fh> };
 805                 close($fh);
 806                 my $parser = ScriptParser->new(\$s);
 807                 1 while $parser->parse_cmd();
 808                 if (@{$parser->{output}}) {
 809                         my $c = fd_colors(1);
 810                         my $s = join('', @{$parser->{output}});
 811                         $emit->("$c->{bold}$c->{blue}# chainlint: $path$c->{reset}\n" . $s);
 812                 }
 813                 $ntests += $parser->{ntests};
 814                 $nerrs += $parser->{nerrs};
 815         }
 816         return [$id, $nscripts, $ntests, $nerrs];
 817 }
 818
 819 sub exit_code {
 820         my $stats = shift @_;
 821         for (@$stats) {
 822                 my ($worker, $nscripts, $ntests, $nerrs) = @$_;
 823                 return 1 if $nerrs;
 824         }
 825         return 0;
 826 }
 827
 828 Getopt::Long::Configure(qw{bundling});
 829 GetOptions(
 830         "emit-all!" => \$emit_all,
 831         "jobs|j=i" => \$jobs,
 832         "stats|show-stats!" => \$show_stats) or die("option error\n");
 833 $jobs = ncores() if $jobs < 1;
 834
 835 my $start_time = $getnow->();
 836 my @stats;
 837
 838 my @scripts;
 839 push(@scripts, File::Glob::bsd_glob($_)) for (@ARGV);
 840 unless (@scripts) {
 841         show_stats($start_time, \@stats) if $show_stats;
 842         exit;
 843 }
 844 $jobs = @scripts if @scripts < $jobs;
 845
 846 unless ($jobs > 1 &&
 847         $Config{useithreads} && eval {
 848         require threads; threads->import();
 849         require Thread::Queue; Thread::Queue->import();
 850         1;
 851         }) {
 852         push(@stats, check_script(1, sub { shift(@scripts); }, sub { print(@_); }));
 853         show_stats($start_time, \@stats) if $show_stats;
 854         exit(exit_code(\@stats));
 855 }
 856
 857 my $script_queue = Thread::Queue->new();
 858 my $output_queue = Thread::Queue->new();
 859
 860 sub next_script { return $script_queue->dequeue(); }
 861 sub emit { $output_queue->enqueue(@_); }
 862
 863 sub monitor {
 864         while (my $s = $output_queue->dequeue()) {
 865                 print($s);
 866         }
 867 }
 868
 869 my $mon = threads->create({'context' => 'void'}, \&monitor);
 870 threads->create({'context' => 'list'}, \&check_script, $_, \&next_script, \&emit) for 1..$jobs;
 871
 872 $script_queue->enqueue(@scripts);
 873 $script_queue->end();
 874
 875 for (threads->list()) {
 876         push(@stats, $_->join()) unless $_ == $mon;
 877 }
 878
 879 $output_queue->end();
 880 $mon->join();
 881
 882 show_stats($start_time, \@stats) if $show_stats;
 883 exit(exit_code(\@stats));