2 # Copyright (c) 2007-2020 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
40 git diff master.. | xapian-check-patch > tmp.qf
47 my ($last_fullline, $fullline);
52 my ($type, $msg, $n, $l) = @_;
53 print "$fnm:$n: $type: $msg";
62 # Report a diagnostic in the current line.
64 my ($type, $msg) = @_;
65 diagnostic_
($type, $msg, $lineno, $fullline);
68 # Report a diagnostic in the previous line.
70 my ($type, $msg) = @_;
71 diagnostic_
($type, $msg, $lineno - 1, $last_fullline);
77 for my $i (0 .. length($s) - 1) {
78 if (substr($s, $i, 1) eq "\t") {
79 # Advance to next multiple of 8 column.
80 $len = $len + (8 - $len % 8);
88 sub check_comment_content
{
90 if (/\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
91 diagnostic
('error', "Doxygen command '\\$1' introduced by '\\' not '\@'");
93 if (/\@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
94 diagnostic
('error', "Broken Doxygen command: whitespace between '\@' and '$1'");
96 if (/(\@[a-z]+)\s+\1\b/) {
97 diagnostic
('error', "Double Doxygen command: '$1 $1'");
104 # SVN property changes don't have an "Index: [...]" line.
106 my $check_indent = 0;
107 my $check_trailing = 0;
108 my $check_space_tab = 0;
109 my $check_end_new_line = 0;
112 my $header_guard_macro;
113 my $last_first_char = '';
116 my $preproc_continuation;
117 my ($top_level, $next_top_level); # undef for unknown, 0 for no, 1 for yes.
118 my $last_line_blank = 0;
119 my $last_line_block_start;
120 my $last_line_block_end;
121 my $penultimate_line_block_start;
122 # Indent in columns expected for this line (undef if we don't know).
124 # True if the indent increased due to a "case" or "default" without a { - this
125 # means that a following "case"/"default" should not be indented.
127 # Current file is a C/C++ header file.
129 # First line number for doxygen @file comment check.
130 my $doxygen_first_line;
132 if (defined $next_top_level) {
133 $top_level = $next_top_level;
134 $next_top_level = undef;
137 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)! || m!^\+\+\+ (\S+)!) {
140 (($ext) = ($fnm =~ /\.([\w.]+)$/)) or $ext = '';
144 $header_guard_macro = undef;
147 $preproc_continuation = 0;
153 $check_space_tab = 1;
155 $doxygen_first_line = 1;
156 $check_end_new_line = 1;
157 if ($fnm =~ m!xapian-applications/omega/testfiles!) {
158 $check_space_tab = 0;
160 $doxygen_first_line = 0;
161 $check_end_new_line = 0;
162 } elsif ($ext eq 'cc') {
163 if ($fnm =~ m!\b(?:cdb|portability/mkdtemp)! ||
164 $fnm =~ m!\bcommon/getopt\.cc$! ||
165 $fnm =~ m!\bomega/md5\.cc$! ||
166 $fnm =~ m!\bcommon/msvc_dirent\.cc$!) {
170 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
173 } elsif ($ext eq 'c') {
174 if ($fnm =~ m!\blanguages/compiler/! ||
175 $fnm =~ m!/lemon\.c$!) {
182 } elsif ($ext eq 'h') {
183 if ($fnm =~ m!\binclude/xapian/intrusive_ptr\.h! ||
184 $fnm =~ m!\blanguages/compiler/! ||
185 $fnm =~ m!\bcommon/msvc_dirent\.h$! ||
186 $fnm =~ m!\bcommon/heap\.h$! ||
187 $fnm =~ m!/omega/cdb! ||
188 $fnm =~ m!\bportability/mkdtemp!) {
196 } elsif ($ext eq 'lemony') {
199 } elsif ($ext eq 'lt') {
202 } elsif ($ext eq 'py' || $ext eq 'py.in') {
205 } elsif ($ext eq 'rb') {
208 } elsif ($ext eq 'sbl') {
209 $check_space_tab = 0;
211 } elsif ($ext eq 'patch') {
212 $check_space_tab = 0;
213 } elsif ($ext eq 'txt') {
214 # Imported text file with trailing whitespace.
215 if ($fnm =~ m!/testdata/etext\.txt$!) {
218 } elsif ($fnm =~ m!(?:^|/)Makefile!) {
221 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
225 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
228 my $pre3 = substr($_, 0, 3);
229 if ($pre3 eq '@@ ') {
230 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
231 $next_top_level = ($lineno == 1) ?
1 : undef;
232 $in_comment = ($lineno == 1) ?
0 : undef;
233 $last_line_blank = 0;
234 $last_line_block_start = undef;
235 $last_line_block_end = undef;
236 $penultimate_line_block_start = undef;
238 $last_first_char = '';
239 $last_fullline = undef;
242 if ($pre3 eq '---' || $pre3 eq '+++') {
247 $next_top_level = (/^.\s/ ?
0 : 1);
250 my $line_blank = /^[+ ]\s*$/;
253 my $first_char = substr($fullline, 0, 1);
255 if (defined $lang && ($lang eq 'c++' || $lang eq 'c')) {
256 if (!defined $in_comment) {
257 # Decide if we're in a C-style comment for the first line of a hunk.
258 $in_comment = /^.\s*\*+\s/;
260 if ($lineno == $doxygen_first_line && m!^\+!) {
261 if ($doxygen_first_line == 1 && m
,^\
+%include\b\s
*\
{,) {
262 # If the first line is %include{... check the second.
263 $doxygen_first_line = 2;
264 } elsif (m!^\+/\*\*\s+\@file\s*(.*)!) {
266 diagnostic
('error', "Doxygen \@file should not list explicit filename");
268 } elsif ($fnm =~ m!\bomega/md5\.h$!) {
271 diagnostic
('error', "Doxygen \@file missing");
275 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
276 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
278 # Check for comments without a space before the comment text.
279 if (m!^\+.*\s/([*/]{1,2})[A-Za-z0-9]!) {
280 if ($ext eq 'lemony' && $1 eq '*' && $' =~ m!^\w*-overwrites-\w+\*/!) {
281 # Magic comment in lemon grammar - lemon requires no spaces.
283 diagnostic('error
', "Missing space between comment characters and comment text");
289 if (s! /\*(.*?)\*/ ! !g) {
290 # C-style comment with spaces around, e.g.
291 # { T = P->as_phrase_query(); /*T-overwrites-P*/ }
292 if ($first_char eq '+') {
293 check_comment_content($1);
297 if (s!/\*(.*?)\*/!!g) {
298 # C-style comment without spaces on both sides, e.g.:
299 # foo(); /* blah blah */
300 if ($first_char eq '+') {
301 check_comment_content($1);
306 # Single line comment, e.g.:
308 if ($first_char eq '+') {
309 check_comment_content($1);
313 # Take care to avoid interpreting "foo/*" as a comment start.
314 if (s!^.(?:[^"]+?|"(?:[^\\"]*?|\\.)*?")*?/\*(.*)!!g) {
315 if ($first_char eq '+') {
316 check_comment_content($1);
322 if (s!^.\s*\*+(.*)\*/!$first_char!) {
323 # End of multiline comment with leading *, e.g.:
325 if ($first_char eq '+') {
326 check_comment_content($1);
330 } elsif (s!^.(.*)\*/!$first_char!) {
331 # End of multiline comment without leading *, e.g.:
333 if ($first_char eq '+') {
334 check_comment_content($1);
338 if ($first_char eq '+') {
339 if (m!^.\s*\*+(.*)!) {
340 # In multiline comment with leading *.
341 check_comment_content($1);
343 # In multiline comment without leading *.
344 check_comment_content(substr($_, 1));
350 } elsif (defined $lang && $lang eq 'py
') {
355 } elsif (defined $lang && $lang eq 'rb
') {
362 # Default to not being in a comment for languages other than C/C++.
365 # Replace multiple spaces before line continuation marker:
368 if (defined $lang && ($lang eq 'c
++' || $lang eq 'c
')) {
369 if ($first_char eq '+') {
370 my $expandedline = '';
371 for my $i (1..length($fullline) - 1) {
372 my $ch = substr($fullline, $i, 1);
374 $expandedline .= ('.' x (8 - length($expandedline) % 8));
376 $expandedline .= $ch;
379 chomp($expandedline);
380 if (length($expandedline) > 80 &&
381 # Logging annotations aren't really
for human eyes
.
382 !/^\+[ \t]*LOGCALL/ &&
383 # Allow length up to 84 if " in first column for formatting
384 # text blocks (the extra 4 being "\n").
385 (length($expandedline) > 84 || !/^\+"/) &&
386 # Allow longer copyright lines.
387 $fullline !~ m
,^\
+[ /]\
* Copyright
, &&
388 # Allow long initialisers (e.g. for testcases).
389 ! /^\+\s*\{.*\},?$/ &&
390 # Don't force wrapping of a long #error message.
391 !/^\+#\d*(error|warning)\b/) {
392 diagnostic
('error', "Line extends beyond column 80 (to column ".length($expandedline).")");
395 if (m
,^\
+\s
+LOGCALL
(?
:_
[A
-Z0
-9]+)*\
([^"]*"[^"]*(?<!operator)\(,) {
396 diagnostic('error', "Don
't include parentheses in debug logging method/class name");
398 if (/^\+\s+LOGCALL(?:_[A-Z0-9]+)*\(.*,$/) {
399 diagnostic('error
', "Don't wrap long LOGCALL lines
");
401 if (/^\+\s+(LOGCALL(?:_STATIC)?)\([^,]*,\s*void,$/) {
402 diagnostic('error', "Use
$1_VOID for a method with a void
return type
");
404 # Replace string literals containing escaped quotes:
406 my $quote = substr($_, $-[0], 1);
410 QUOTELOOP
: while (1) {
411 if ($i >= length($_)) {
412 $_ = substr($_, 0, $start) . "X\n";
415 my $c = substr($_, $i, 1);
417 $_ = substr($_, 0, $start) . "X" . substr($_, $i);
419 # See if there's another string after this one:
420 while ($i != length($_)) {
421 $c = substr($_, $i, 1);
423 if ($c eq '"' || $c eq "'") {
434 $c = substr($_, $i, 1);
436 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
438 } elsif ($c =~ /^[0-7]/) {
440 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
442 } elsif ($c eq '"' || $c eq "'") {
451 if ($check_trailing && $fullline =~ /^\+.*[ \t]$/) {
452 diagnostic
('error', "added/changed line has trailing whitespace");
454 if ($check_space_tab && /^\+.* \t/) {
455 diagnostic
('error', "added/changed line has space before tab");
457 if ($want_tabs == 1 and /^\+\t* {8}/) {
458 diagnostic
('error', "added/changed line uses spaces for indentation rather than tab");
460 if (!$want_tabs and /^\+ *\t/) {
461 diagnostic
('error', "added/changed line uses tab for indentation rather than spaces");
463 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
464 # Break up the string in the regexp above and messages below to avoid
465 # this triggering on its own code!
467 # Not an error, but interesting information.
468 diagnostic
('info', "FIX"."ME removed");
470 # Not an error, but not good.
471 diagnostic
('warning', "FIX"."ME added");
474 if (defined $lang && ($lang eq 'c++' || $lang eq 'c')) {
475 if ($last_line_blank) {
477 # Allow multiple blank lines at the top level for now.
478 diagnostic
('error', "Extra blank line") unless ($top_level // 1);
479 } elsif (/^.\s+\}$/) {
480 # Closing } of a namespace often has a blank line before it,
481 # and that seems reasonable.
482 diagnostic_last
('error', "Blank line at end of block") unless ($top_level // 1);
483 } elsif ($penultimate_line_block_start && /^.(\s|\}$)/) {
484 diagnostic_last
('error', "Blank line at start of block");
488 if (/^([-+ ])(\s*)\#/) {
489 # Avoid misfiring for something like:
492 if (!$preproc_continuation) {
493 if ($1 eq '+' && $2 ne '') {
494 diagnostic
('error', "Whitespace before '#' on preprocessor line");
498 $preproc_continuation = /\\$/;
499 } elsif ($preproc_continuation) {
500 $preproc_continuation = /\\$/;
504 if ($check_space_tab && /^\+( (?:| | | ))[^ \t].*(?:[^)];|[^);,])\n/) {
505 # We only check for 1, 3, 5 and 7 space indents to avoid false
506 # positives for "public:", etc and for wrapped expressions.
508 # Exclude lines ending ');', ')', or ',' to avoid reporting for
509 # wrapped function arguments. This means we'll also miss some
510 # cases we should complain about, but it's likely that at least
511 # one line in a mis-indented block will trigger an error.
513 # Exclude potential comment continuation lines which might have
514 # been missed by the comment stripping code. Require whitespace
515 # after so we flag a mis-indented: *ptr = foo;
517 diagnostic
('error', "line indented by ".length($1)." spaces");
521 #if (/^\+.*(?<!\btypedef )\b([A-Za-z_][A-Za-z_0-9]*)\s+\(/ &&
522 if (/^\+.*\b([A-Za-z_][A-Za-z_0-9]*)\s+\(((?:[A-Za-z][A-Za-z0-9_]*::)?\*|[A-Za-z][A-Za-z0-9_]*\)\()?/) {
526 # `delete (*i)->foo();` rather than `delete(*i)->foo()'` - the `(`
527 # isn't around function parameters here.
528 $name !~ /^(case|catch|delete|double|for|if|return|switch|throw|while)$/ &&
529 # Function pointer type `int (*)(void)` or parenthesised
530 # function name `int (foo)(`.
531 !($name =~ /^(?:bool|double|float|unsigned|void|[a-z][a-z0-9_]+_t|(?:(?:un)?signed\s+)?(?:char|int|long|short))$/ && length($post))) {
533 diagnostic
('error', "Whitespace between '$name' and '('");
535 # FIXME: We skip preprocessor lines for now to avoid triggering
536 # on things like «#define FOUR (4)» but it would be good to
537 # catch «#define FOO(x) foo (x)»
541 if (m!^\+\s*(case|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
542 diagnostic
('error', "'$1' not followed by exactly one space");
544 if (m!^\+.*;[^\s\\]!) {
545 diagnostic
('error', "Missing space after ';'");
547 if (m!^\+.*[^(;]\s;!) {
548 # Stuff like this is OK: for ( ; ; ) {
549 # though for that exact case I'd suggest: while (true) {
550 diagnostic
('error', "Whitespace before ';'");
553 diagnostic
('error', "Missing space after '<<'");
556 diagnostic
('error', "Missing space before '<<'");
558 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
559 diagnostic
('error', "'$1' not followed by exactly one space");
561 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
562 diagnostic
('error', "'$1' not followed by exactly one space");
564 if (m!^\+.*?\b(catch|while)\b([^ ]| \s)!) {
565 diagnostic
('error', "'$1' not followed by exactly one space");
567 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
568 diagnostic
('error', "'$1' not preceded by exactly '} '");
570 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
571 diagnostic
('error', "'}' and '$1' not separated by exactly one space");
573 if (m
,^\
+.*?
\belse
\b\s
*(?
!if)[^\s
{],) {
574 diagnostic
('error', "Code after 'else' on same line");
576 if (m
,^\
+.*?
\belse\s
+if.*;\s
*$,) {
577 diagnostic
('error', "Code after 'else if' on same line");
579 if (m!^\+.*\((?: [^;]|\t)!) {
580 # Allow: for ( ; i != 10; ++i)
581 diagnostic
('error', "Whitespace after '('");
583 if (m!^\+.*\H.*\h\)!) {
584 diagnostic
('error', "Whitespace before ')'");
586 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
587 diagnostic
('error', "Prefer '$2$1' to '$1$2'");
589 if (m!^\+.*?>\s+>!) {
590 diagnostic
('error', "We assume C++11 so can write '>>' instead of '> >'");
592 if (m!^\+.*?\b(?:enable_if|list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)\s+<!) {
593 diagnostic
('error', "Whitespace between template name and '<'");
595 if (/^\+.*?\bfor\s*\([^(]*([^:(]:[^:])/ && $1 ne ' : ') {
596 diagnostic
('error', "Missing spaces around ':' in 'for'");
598 if (m
,^\
+.*?
[\w
)](?
!-[->]|\
+\
+)((?
:\
&\
&|\
|\
||<<|>>|[-+/*%~=<>!&|^])=?
|[?
]),) {
602 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
604 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
605 # y = static_cast<char>(x);
606 } elsif ($op eq '>') {
607 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
608 # y = static_cast<char>(x);
609 } elsif ($op eq '<' &&
610 substr($_, 0, $pre[1]) =~ /\b(?:enable_if|list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
611 # y = priority_queue<Foo*,
613 # template<typename A,
615 } elsif ($op eq '&&' && substr($_, 0, $pre[1]) =~ /\b(?:auto|bool|char|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*)$/) {
617 # method(Class&& foo);
618 } elsif (($op eq '<<' || $op eq '>>') &&
619 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
620 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
622 } elsif (($op eq '-' || $op eq '+') &&
623 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
625 } elsif ($op eq '>>' &&
626 /[A-Za-z0-9_]<.+</) {
627 # vector<vector<int>> v;
628 } elsif ($op =~ /^[*&|]$/ &&
629 substr($_, 0, $pre[1]) !~ /(?:\b\d+)\s*$/) {
630 # FIXME: *: const char* x;
631 # FIXME: &: const char& x;
633 # (but we do catch "1234*x"
634 } elsif ($preproc && /^.\s*#\s*(?:include|error|warning)\b/) {
635 # Don't warn about missing whitespace in:
637 # #error nothing works!
639 diagnostic
('error', "Missing space before '$op'");
642 if ($first_char eq '+' && length($_)) {
643 # Replace leading `+` to avoid parsing as an operator or part of an
645 my $l = ' ' . substr($_, 1);
646 # Treat some operator combinations as a single pseudo-operator:
649 while ($l =~ m@
((?
:\
|\
||<<|>>|[=!/*%<>|^~])=?
|-[-=>]?
|&[&=]?
~?
|\
+[\
+=]?
|::?
|[?
,])@g) {
653 my $prech = substr($l, $pre[1] - 1, 1);
654 my $postch = substr($l, $post[1], 1) // '';
655 if ($lang eq 'c++' &&
656 ($op eq '*' || $op eq '&') &&
658 # `vector<some_type *> x;` `int f(some_type *);`
659 (($postch eq '>' || $postch eq ')') && $prech =~ /[ \t]/) ||
660 # `vector<int>*` `string&` `const foo*` `struct tm*` `Xapian::docid&`
661 # +static_assert(Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST,
663 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|DIR|DWORD|FD|FILE|HANDLE|WSAOVERLAPPED|[A-Z][A-Z_]*_T|[A-Z]|[A-Z][A-Z0-9_]*?[a-z][A-Za-z0-9_]*|size_type|(?:(?:const|struct)\s+?|Xapian::)[A-Z]*[a-z][A-Za-z0-9_]*)[*&]*)\s+$/ &&
664 substr($l, $post[1]) !~ /^\s*\(/
668 diagnostic
('error', "Preferred style is 'int$op x' (not 'int ${op}x' or 'int ${op} x')");
669 } elsif ($op eq '::') {
670 if ($lang eq 'c++' && $postch =~ /\s/) {
671 diagnostic
('error', "Whitespace not expected after '::'");
673 } elsif ($op eq '->' && $prech !~ /\s/) {
677 if ($postch =~ /[ \t]/) {
678 diagnostic
('error', "Whitespace not expected after '->'");
680 } elsif (($op eq '++' || $op eq '--') && $prech !~ /[A-Za-z0-9_)]/) {
682 if ($postch =~ /[ \t]/) {
683 diagnostic
('error', "Whitespace not expected after '$op'");
685 } elsif ($op eq '!') {
687 if ($postch =~ /[ \t]/) {
688 diagnostic
('error', "Whitespace not expected after '!'");
690 } elsif (substr($l, $post[1]) !~ /^(?:\S| \s)/) {
691 # Check what follows the operator.
692 } elsif (($op eq '++' || $op eq '--') && $postch =~ /[\]),;]/) {
697 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
698 substr($l, 0, $pre[1]) =~ m@
(?
:^\s
*|[-+/*%~=<>&|,;?
:] |[\
[(]|\b(?
:return|case
) |^\
+\s
*)$@
) {
699 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
700 } elsif ($op eq ',' && (
701 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
704 } elsif ($op eq '>>' &&
705 /[A-Za-z0-9_]<.+</) {
706 # vector<vector<int>>&
707 } elsif ($op eq '*' &&
708 substr($l, 0, $pre[1]) !~ /(?:\b\d+)\s*$/ &&
710 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*|(?:struct\s*?|Xapian::)[a-z][a-z0-9_]*)[*&]*)\s+$/)) {
711 # FIXME: *ptr (dereference)
712 # (but we do catch "1234 *x" and common pointer types etc)
713 } elsif ($op eq '&' &&
714 substr($l, 0, $pre[1]) !~ /(?:\b\d+|[^*]\))\s*$/ &&
716 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*|(?:struct\s*?|Xapian::)[a-z][a-z0-9_]*)[*&]*)\s+$/)) {
717 # FIXME: &foo (address of)
718 # (but we do catch "...) &FLAG_FOO" and "1234 &x" and common reference types etc)
719 } elsif ($op eq '&&' && $postch =~ /[,)]/) {
720 # int f(int&&, bool&&);
721 } elsif ($op =~ /^[<|]$/ &&
722 substr($l, $post[1]) !~ /^\s*(?:\d+\b|\()/ &&
723 substr($l, 0, $pre[1]) !~ /(?:\b\d+|\))\s*$/) {
724 # FIXME: <: std::vector<std::string>
725 # (but we do catch "...) <foo" and "1234 >bar" etc)
727 } elsif (substr($l, $pre[1] - 8, 8) eq 'operator' && $postch eq '(') {
729 } elsif (($op eq '<<' || $op eq '>>') &&
730 substr($l, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
731 substr($l, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
733 } elsif (($op eq '-' || $op eq '+') &&
734 substr($l, 0, $pre[1]) =~ /[0-9]\.?e$/) {
736 } elsif ($preproc && $op eq ',') {
737 # Currently there's a lot of: #define FOO(A,B) ...
738 } elsif ($preproc && /^.\s*#\s*(?:include|error|warning|pragma)\b/) {
739 # Don't warn about missing whitespace in:
742 # #pragma warning(disable:4146)
743 } elsif ($op eq '>' && ($postch =~ /[,)(;*&\\]/ || substr($l, $post[1], 2) eq '::')) {
744 # int f(vector<int>, vector<int>);
745 # static_cast<char>(7)
750 # vector<int>::size_type
751 } elsif ($op eq '=' && $postch =~ /[,\]]/) {
752 # Lambdas, e.g. [=]() {...} or [=, &a]() {...}
753 } elsif ($op eq '%' && $ext eq 'lemony' && $pre[1] == 1) {
754 # %-directive in Lemon grammar, e.g.:
756 } elsif ($op =~ /^([<>]|[<>=!]=)$/ && substr($l, 0, $pre[1]) =~ /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/) {
759 } elsif ($op eq '~' && $postch =~ /[A-Za-z0-9_]/ && substr($l, 0, $pre[1]) =~ /(?:\s|::)$/) {
763 # Don't complain about this if it's actually whitespace at
765 if (substr($l, $post[1]) !~ /^\s*$/) {
766 diagnostic
('error', "Should have exactly one space after '$op'");
772 diagnostic
('error', "Extra ';' at end of line");
774 if (/^\+\s*?\S.*? (,|->)/) {
775 diagnostic
('error', "Space before '$1'");
777 if (/^\+[\s#]*?[^\s#] /) {
778 # Allow multiple spaces in "# ifdef FOO".
779 diagnostic
('error', "Multiple spaces");
781 if (/^\+\s*#\s*include([<"])/) {
782 # #include<foo.h> or #include"foo.h"
783 diagnostic
('error', "Missing space between #include and '$1'");
785 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
786 diagnostic
('error', "added/changed line has comment without whitespace before the text");
789 diagnostic
('error', "No space between ')' and '{'");
791 if (m!^\+.*?\bconst\{!) {
792 diagnostic
('error', "No space between 'const' and '{'");
794 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
795 m
,^\
+.*[^\w\
.>]([a
-z
][a
-z0
-9]*[A
-Z
]\w
*),) {
797 my $symbol_idx = $-[1];
798 if ($ext eq 'lemony' && $symbol =~ /^yy/) {
799 # Used in lemon parser grammar.
800 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
801 # For now, allow setD(), etc.
802 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType' || $symbol eq 'pShutdownSocket') {
803 # Platform specific names, allow for now.
804 } elsif ($symbol eq 'gzFile' || $symbol eq 'uInt' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
805 # zlib API uses camelCase names.
806 } elsif ($symbol =~ /^pix[A-Z]/) {
807 # Tesseract's leptonica image library uses camelCase names.
808 } elsif (substr($_, 0, $symbol_idx) =~ /\bicu::(\w+::)?$/) {
809 # ICU library namespace uses camelCase method names.
810 } elsif (substr($_, 0, $symbol_idx) =~ /\b(?:EBOOK|Etonyek|RVNG)\w+::$/) {
811 # Libe-book/libetonyek/librevenge use camelCase method names.
813 diagnostic
('error', "camelCase identifier '$symbol' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names");
816 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
817 diagnostic
('error', "Inheritance list split after ':', should be before");
819 # Try to distinguish ternary operator (?:) correctly split after ":" vs
820 # constructor initialiser list incorrectly split after ":".
821 my $last_in_ternary = $in_ternary;
822 $in_ternary = / \?(?: |$)/;
823 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
824 diagnostic
('error', "Constructor initialiser list split after ':', should be before");
826 if (m
,^\
+\s
+([-+/%^]|[&|]{2})\s
,) {
827 diagnostic
('error', "Expression split before operator '$1', should be after");
829 if ($lang eq 'c++' && /^\+\s+inline\b/) {
830 diagnostic
('error', "Method defined inside a class is implicitly 'inline'");
833 if (/^\+using\s+namespace\b/) {
834 diagnostic
('error', "Avoid 'using namespace' at top level of header");
836 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
837 my ($type, $guard) = ($1, $2);
839 if (!defined $header_guard_macro) {
840 if ($type eq 'ifndef') {
841 $header_guard_macro = [$type, $guard];
842 my $expected_guard = uc $fnm;
843 $expected_guard =~ s![-.]!_!g;
845 if (length($expected_guard) > length($guard) &&
846 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
847 substr($expected_guard, -length($guard)) eq $guard) {
850 for my $i (1 .. length($guard)) {
851 my $ch_e = substr($expected_guard, -$i, 1);
852 my $ch_g = substr($guard, -$i, 1);
853 next if ($ch_e eq $ch_g);
854 last if ($ch_e ne '/' || $ch_g ne '_');
859 diagnostic
('error', "include guard macro should match filename");
861 my $prefix = 'XAPIAN_INCLUDED_';
862 if ($fnm =~ m!.*omega/(?:.*/)?!) {
863 $prefix = 'OMEGA_INCLUDED_';
865 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
866 # $expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
867 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
868 #$expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
869 if (defined $cut && $cut == -1) {
870 diagnostic
('error', "include guard macro should use prefix '$prefix'");
871 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
872 diagnostic
('error', "include guard macro should use prefix '$prefix'");
873 } elsif ($guard !~ /^\Q$prefix\E/) {
874 diagnostic
('error', "include guard macro should use prefix '$prefix'");
878 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
879 my $expected_guard = $header_guard_macro->[1];
880 $header_guard_macro->[0] = $type;
881 if ($guard ne $expected_guard) {
882 diagnostic
('error', "include guard macro should be $expected_guard");
888 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
889 diagnostic
('error', "include guard macro defined in non-header");
893 if (defined $last_line_block_end &&
894 /^\+${last_line_block_end}(catch|else)\b/) {
895 # FIXME: while in do { ... } while can't be as easily checked.
896 diagnostic
('error', "'$1' should be on same line as preceding '}'");
898 } elsif (defined $lang && $lang eq 'py') {
900 diagnostic
('error', "';' at end of line of python code");
902 } elsif (defined $lang && $lang eq 'rb') {
904 diagnostic
('error', "';' at end of line of ruby code");
906 } elsif (defined $lang && $lang eq 'make') {
907 if (/^\+.*[A-Za-z0-9)}] +\s*$/) {
908 diagnostic
('error', "multiple spaces in Makefile");
911 if (defined $fnm && $fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc2?|output2?)\.txt$|omega/testfiles/|unicode/UnicodeData\.txt!) {
912 if ($fullline =~ /^\+.*?(?:\b|_)(xapain|the the|initialsing|ipv5|outputing|intened|wull|extrac|if it possible|betweem|differen|auxiliar|wat(?:|ed|ing|s)|wth|teh|ned|incase)(?:\b|_)/i ||
913 # Cases which just need to be the prefix of a word
914 $fullline =~ /^\+.*?(?:\b|_)((?:deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|delimeter|charactor|databse|operatoar|implict|differnt|orignal|straterg|unecessar|comamnd|docuemnt|implment|initilias|capatil|reprensent|ommit|openning|openned|appropirate|labrar|returm|interati|termfrequenc|continous|juding|gradinet|clearling|clearled|retreiv|reteriv|filedescriptor|avalil*ab|assessem|contruct|particlar|revelan|releven|relv|intial|eal|specifiy|(?:tera|mega|kilo)?btye|comunic|accumlat|useage|existant|regrex|next(?!step)[eis])[a-z]*(?:\b|_))/i ||
915 # Case-sensitive cases
916 $fullline =~ /^\+.*?\b(and and|dont|Dont)\b/) {
917 diagnostic
('error', "Typo '$1'");
923 if (defined $indent && $first_char eq '+' &&
929 !/^\+[A-Za-z_][A-Za-z_0-9]*:/ &&
932 # Special handling for access specifiers, which should get a half indent.
933 if (/^\+\s*(?:private|protected|public):/) {
935 } elsif ($case_no_brace && /^\+(?:[ \t]*)(?:case\b.*|default):(?:\s*\{)?$/) {
936 # case or default following a case or default without a '{', so
937 # shouldn't be indented - reduce $indent by 4 columns.
941 if ($fullline =~ /^.([ \t]+)/) {
942 $this_indent = count_columns
($1);
944 my $extra = $this_indent - $indent;
952 $s = 's' if $extra > 1;
953 diagnostic
('error', "Line ${which}indented by $extra column$s");
957 #if (/^[-+ ]([ \t]*)(?:(?:(?:catch|for|if|for|switch|while)\b.*\)|(?:case|class|do|else|struct|try|union)\b.*) \{|case\b.*:)$/) {
958 if (/^[+ ]([ \t]*)(?:(catch|for|if|for|while|case|class|default|do|else|struct|try|union)\b.* \{|(case\b.*|default):)$/) {
959 $indent = count_columns
($1);
960 my $keyword = $2 // $3;
962 # FIXME: Might be OK in e.g. lambdas
963 #if (/^\+/ && $indent % 4 != 0) {
964 # diagnostic('error', "Indented by $len columns - not a multiple of 4");
966 $indent += $keyword =~ /^(?:case|switch|default)/ ?
2 : 4;
967 } elsif (/^[-+ ]([ \t]*)(?:(?:private|protected|public):)$/) {
968 # Access specifiers get a half indent and are followed by another half indent.
969 $indent = count_columns
($1);
970 $case_no_brace = undef;
971 if (/^\+/ && $indent % 4 != 2) {
972 diagnostic
('error', "Indented by $indent columns, should be 2 plus a multiple of 4");
975 } elsif (!/^.\s*$/) {
976 # Only reset for a non-blank line (after comment removal).
978 $case_no_brace = undef;
982 if ($first_char eq ' ') {
984 } elsif ($first_char eq '+') {
987 } elsif ($first_char eq '-') {
989 } elsif ($first_char eq '\\') {
990 # "\ No newline at end of file" - if preceded by a "+" line, this means
991 # that the patch leaves the file missing a newline at the end.
992 if ($check_end_new_line && $last_first_char eq '+') {
993 diagnostic_last
('error', 'No newline at end of file');
996 $last_fullline = $fullline;
997 $last_first_char = $first_char;
998 $last_line_blank = $line_blank;
999 if (/^.([ \t]+)\}$/) {
1000 $last_line_block_end = $1;
1002 $last_line_block_end = undef;
1004 $penultimate_line_block_start = $last_line_block_start;
1005 if (/^.(.*\{)\s*$/) {
1006 $last_line_block_start = $1;
1008 $last_line_block_start = undef;
1011 if (scalar keys %count) {
1012 for (sort keys %count) {
1013 print STDERR
"$_ count:\t$count{$_}\n";
1017 print STDERR
<<"__END__";
1018 Files patched
:\t$files
1019 Lines added
:\t$add_lines
1020 Lines removed
:\t$del_lines
1023 exit 0 unless exists $count{'error'};
1025 if (exists $ENV{GITHUB_JOB
} || exists $ENV{TRAVIS
}) {
1026 print STDERR
<<"__END__";
1028 You can run these checks locally before pushing with the xapian
-check
-patch
1029 which is
in the source tree
in the xapian
-maintainer
-tools directory
.
1031 E
.g
. to check any changes
in your working directory which aren
't on master:
1033 git diff master.. | xapian-maintainer-tools/xapian-check-patch