Update to Unicode 16.0.0
[xapian.git] / xapian-maintainer-tools / xapian-check-patch
blob83e7a0543488deaa62481eb71aacebe0b74b9bff
1 #! /usr/bin/perl -w
2 # Copyright (c) 2007-2024 Olly Betts
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to
6 # deal in the Software without restriction, including without limitation the
7 # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 # sell copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
22 require 5.000;
23 use strict;
24 use POSIX;
26 if (defined $ARGV[0] && $ARGV[0] eq '--help') {
27 print <<END;
28 Syntax: $0 [PATCH]...
30 Nit-pick Xapian patches.
32 A patch can be supplied on stdin, or one or more patch files listed on the
33 command line.
35 Produces output suitable for use with vim's quick-fix mode, and similar
36 features in other editors.
38 Example usage:
40 git diff master.. | xapian-check-patch > tmp.qf
41 vim -q tmp.qf
42 END
43 exit 0;
46 my ($fnm, $lineno);
47 my ($last_fullline, $fullline);
48 my $ext;
49 my %count;
51 sub diagnostic_ {
52 my ($type, $msg, $n, $l) = @_;
53 print "$fnm:$n: $type: $msg";
54 if (defined $l) {
55 print ": $l";
56 } else {
57 print "\n";
59 ++$count{$type};
62 # Report a diagnostic in the current line.
63 sub diagnostic {
64 my ($type, $msg) = @_;
65 diagnostic_($type, $msg, $lineno, $fullline);
68 # Report a diagnostic in the previous line.
69 sub diagnostic_last {
70 my ($type, $msg) = @_;
71 diagnostic_($type, $msg, $lineno - 1, $last_fullline);
74 sub count_columns {
75 my $s = shift;
76 my $len = 0;
77 for my $i (0 .. length($s) - 1) {
78 if (substr($s, $i, 1) eq "\t") {
79 # Advance to next multiple of 8 column.
80 $len = $len + (8 - $len % 8);
81 } else {
82 ++$len;
85 return $len;
88 sub check_comment_content {
89 local $_ = shift;
90 if (/\\([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
91 diagnostic('error', "Doxygen command '\\$1' introduced by '\\' not '\@'");
93 if (/\@\s+([abcefp]|brief|code|deprecated|endcode|exception|file|internal|li|param|private|return|todo)\b/) {
94 diagnostic('error', "Broken Doxygen command: whitespace between '\@' and '$1'");
96 if (/(\@[a-z]+)\s+\1\b/) {
97 diagnostic('error', "Double Doxygen command: '$1 $1'");
101 my $add_lines = 0;
102 my $del_lines = 0;
103 my $files = 0;
104 # SVN property changes don't have an "Index: [...]" line.
105 my $want_tabs = -1;
106 my $check_indent = 0;
107 my $check_trailing = 0;
108 my $check_space_tab = 0;
109 my $check_end_new_line = 0;
110 my $in_comment = 0;
111 my $lang;
112 my $header_guard_macro;
113 my $last_first_char = '';
114 my $in_ternary;
115 my $preproc;
116 my $preproc_continuation;
117 my ($top_level, $next_top_level); # undef for unknown, 0 for no, 1 for yes.
118 my $last_line_blank = 0;
119 my $last_line_block_start;
120 my $last_line_block_end;
121 my $penultimate_line_block_start;
122 # Indent in columns expected for this line (undef if we don't know).
123 my $indent;
124 # True if the indent increased due to a "case" or "default" without a { - this
125 # means that a following "case"/"default" should not be indented.
126 my $case_no_brace;
127 # Current file is a C/C++ header file.
128 my $header;
129 # First line number for doxygen @file comment check.
130 my $doxygen_first_line;
131 while (<>) {
132 if (defined $next_top_level) {
133 $top_level = $next_top_level;
134 $next_top_level = undef;
137 if (/^Index: (.+)/ || m!^diff --git a/.+ b/(.+)! || m!^\+\+\+ (\S+)!) {
138 ++$files;
139 $fnm = $1;
140 (($ext) = ($fnm =~ /\.([\w.]+)$/)) or $ext = '';
141 $lineno = 1;
142 $lang = undef;
143 $in_comment = 0;
144 $header_guard_macro = undef;
145 $in_ternary = 0;
146 $preproc = 0;
147 $preproc_continuation = 0;
148 $top_level = undef;
149 $header = 0;
150 # Don't know!
151 $want_tabs = -1;
152 $check_indent = 0;
153 $check_space_tab = 1;
154 $check_trailing = 1;
155 $doxygen_first_line = 1;
156 $check_end_new_line = 1;
157 if ($fnm =~ m!xapian-applications/omega/testfiles!) {
158 $check_space_tab = 0;
159 $check_trailing = 0;
160 $doxygen_first_line = 0;
161 $check_end_new_line = 0;
162 } elsif ($ext eq 'cc') {
163 if ($fnm =~ m!\b(?:cdb|portability/mkdtemp)! ||
164 $fnm =~ m!\bcommon/getopt\.cc$! ||
165 $fnm =~ m!\bomega/md5\.cc$! ||
166 $fnm =~ m!\bcommon/msvc_dirent\.cc$!) {
167 $check_trailing = 0;
168 } else {
169 $lang = 'c++';
170 $want_tabs = 1 unless ($fnm =~ m!\blanguages/steminternal\.cc$!);
171 $check_indent = 1;
173 } elsif ($ext eq 'c') {
174 if ($fnm =~ m!\blanguages/compiler/! ||
175 $fnm =~ m!/lemon\.c$!) {
176 $check_trailing = 0;
177 } else {
178 $lang = 'c';
179 $want_tabs = 1;
180 $check_indent = 1;
182 } elsif ($ext eq 'h') {
183 if ($fnm =~ m!\binclude/xapian/intrusive_ptr\.h! ||
184 $fnm =~ m!\blanguages/compiler/! ||
185 $fnm =~ m!\bcommon/msvc_dirent\.h$! ||
186 $fnm =~ m!\bcommon/heap\.h$! ||
187 $fnm =~ m!/omega/cdb! ||
188 $fnm =~ m!\bportability/mkdtemp!) {
189 $check_trailing = 0;
190 } else {
191 $header = 1;
192 $lang = 'c++';
193 $want_tabs = 1;
194 $check_indent = 1;
196 } elsif ($ext eq 'lemony') {
197 $lang = 'c++';
198 $want_tabs = 1;
199 } elsif ($ext eq 'lt') {
200 $want_tabs = 0;
201 $check_trailing = 0;
202 } elsif ($ext eq 'py' || $ext eq 'py.in') {
203 $lang = 'py';
204 $want_tabs = 0;
205 } elsif ($ext eq 'rb') {
206 $lang = 'rb';
207 $want_tabs = 0;
208 } elsif ($ext eq 'sbl') {
209 $check_space_tab = 0;
210 $check_trailing = 0;
211 } elsif ($ext eq 'patch') {
212 $check_space_tab = 0;
213 } elsif ($ext eq 'txt') {
214 # Imported text file with trailing whitespace.
215 if ($fnm =~ m!/testdata/etext\.txt$!) {
216 $check_trailing = 0;
218 } elsif ($fnm =~ m!(?:^|/)Makefile!) {
219 $lang = 'make';
220 $want_tabs = 1;
221 } elsif ($fnm =~ m!(?:^|/)ChangeLog\b!) {
222 $lang = 'changelog';
223 $want_tabs = 1;
225 # print STDERR "$fnm: lang=" . ($lang // "UNKNOWN") . "\;
226 next;
228 my $pre3 = substr($_, 0, 3);
229 if ($pre3 eq '@@ ') {
230 /^\@\@ -\d+,\d+ \+(\d+),\d+\b/ and $lineno = $1;
231 $next_top_level = ($lineno == 1) ? 1 : undef;
232 $in_comment = ($lineno == 1) ? 0 : undef;
233 $last_line_blank = 0;
234 $last_line_block_start = undef;
235 $last_line_block_end = undef;
236 $penultimate_line_block_start = undef;
237 $indent = undef;
238 $last_first_char = '';
239 $last_fullline = undef;
240 next;
242 if ($pre3 eq '---' || $pre3 eq '+++') {
243 next;
246 if (!/^.\s*$/) {
247 $next_top_level = (/^.\s/ ? 0 : 1);
250 my $line_blank = /^[+ ]\s*$/;
252 $fullline = $_;
253 my $first_char = substr($fullline, 0, 1);
255 if (defined $lang && ($lang eq 'c++' || $lang eq 'c')) {
256 if (!defined $in_comment) {
257 # Decide if we're in a C-style comment for the first line of a hunk.
258 $in_comment = /^.\s*\*+\s/;
260 if ($lineno == $doxygen_first_line && m!^\+!) {
261 if ($doxygen_first_line == 1 && m,^\+%include\b\s*\{,) {
262 # If the first line is %include{... check the second.
263 $doxygen_first_line = 2;
264 } elsif (m!^\+/\*\*\s+\@file\s*(.*)!) {
265 if (length($1)) {
266 diagnostic('error', "Doxygen \@file should not list explicit filename");
268 } elsif ($fnm =~ m!\bomega/md5\.h$!) {
269 # Imported file.
270 } else {
271 diagnostic('error', "Doxygen \@file missing");
275 # Uncomment commented out parameter names: foo(int /*bar*/) -> foo(int bar)
276 s!/\*([A-Za-z_][A-Za-z_0-9]*)\*/([,)])!$1$2!g;
278 # Check for comments without a space before the comment text.
279 if (m!^\+.*\s/([*/]{1,2})[A-Za-z0-9]!) {
280 if ($ext eq 'lemony' && $1 eq '*' && $' =~ m!^\w*-overwrites-\w+\*/!) {
281 # Magic comment in lemon grammar - lemon requires no spaces.
282 } else {
283 diagnostic('error', "Missing space between comment characters and comment text");
287 # Trim comments:
288 if (!$in_comment) {
289 if (s! /\*(.*?)\*/ ! !g) {
290 # C-style comment with spaces around, e.g.
291 # { T = P->as_phrase_query(); /*T-overwrites-P*/ }
292 if ($first_char eq '+') {
293 check_comment_content($1);
295 s/\s+$//;
297 if (s!/\*(.*?)\*/!!g) {
298 # C-style comment without spaces on both sides, e.g.:
299 # foo(); /* blah blah */
300 if ($first_char eq '+') {
301 check_comment_content($1);
303 s/\s+$//;
305 if (s!//(.*)!!) {
306 # Single line comment, e.g.:
307 # // blah blah
308 if ($first_char eq '+') {
309 check_comment_content($1);
311 s/\s+$//;
313 # Take care to avoid interpreting "foo/*" as a comment start.
314 if (s!^.(?:[^"]+?|"(?:[^\\"]*?|\\.)*?")*?/\*(.*)!!g) {
315 if ($first_char eq '+') {
316 check_comment_content($1);
318 s/\s+$//;
319 $in_comment = 1;
321 } else {
322 if (s!^.\s*\*+(.*)\*/!$first_char!) {
323 # End of multiline comment with leading *, e.g.:
324 # * blah blah */
325 if ($first_char eq '+') {
326 check_comment_content($1);
328 s/\s+$//;
329 $in_comment = 0;
330 } elsif (s!^.(.*)\*/!$first_char!) {
331 # End of multiline comment without leading *, e.g.:
332 # blah blah */
333 if ($first_char eq '+') {
334 check_comment_content($1);
336 $in_comment = 0;
337 } else {
338 if ($first_char eq '+') {
339 if (m!^.\s*\*+(.*)!) {
340 # In multiline comment with leading *.
341 check_comment_content($1);
342 } else {
343 # In multiline comment without leading *.
344 check_comment_content(substr($_, 1));
347 $_ = $first_char;
350 } elsif (defined $lang && $lang eq 'py') {
351 # Trim comments:
352 if (s!#.*!!g) {
353 s/\s+$//;
355 } elsif (defined $lang && $lang eq 'rb') {
356 # Trim comments:
357 if (s!#.*!!g) {
358 s/\s+$//;
362 # Default to not being in a comment for languages other than C/C++.
363 $in_comment //= 0;
365 # Replace multiple spaces before line continuation marker:
366 s! +\\$! \\!;
368 if (defined $lang && ($lang eq 'c++' || $lang eq 'c')) {
369 if ($first_char eq '+') {
370 my $expandedline = '';
371 for my $i (1..length($fullline) - 1) {
372 my $ch = substr($fullline, $i, 1);
373 if ($ch eq "\t") {
374 $expandedline .= ('.' x (8 - length($expandedline) % 8));
375 } else {
376 $expandedline .= $ch;
379 chomp($expandedline);
380 if (length($expandedline) > 80 &&
381 # Logging annotations aren't really for human eyes.
382 !/^\+[ \t]*LOGCALL/ &&
383 # Allow length up to 84 if " in first column for formatting
384 # text blocks (the extra 4 being "\n").
385 (length($expandedline) > 84 || !/^\+"/) &&
386 # Allow longer copyright lines.
387 $fullline !~ m,^\+[ /]\* Copyright , &&
388 # Allow long initialisers (e.g. for testcases).
389 ! /^\+\s*\{.*\},?$/ &&
390 # Don't force wrapping of a long #error message.
391 !/^\+#\d*(error|warning)\b/) {
392 diagnostic('error', "Line extends beyond column 80 (to column ".length($expandedline).")");
395 if (m,^\+\s+LOGCALL(?:_[A-Z0-9]+)*\([^"]*"[^"]*(?<!operator)\(,) {
396 diagnostic('error', "Don't include parentheses in debug logging method/class name");
398 if (/^\+\s+LOGCALL(?:_[A-Z0-9]+)*\(.*,$/) {
399 diagnostic('error', "Don't wrap long LOGCALL lines");
401 if (/^\+\s+(LOGCALL(?:_STATIC)?)\([^,]*,\s*void,$/) {
402 diagnostic('error', "Use $1_VOID for a method with a void return type");
404 # Replace string literals containing escaped quotes:
405 if (/['"]/) {
406 my $quote = substr($_, $-[0], 1);
407 my $start = $+[0];
408 my $i = $start;
409 my $esc = 0;
410 QUOTELOOP: while (1) {
411 if ($i >= length($_)) {
412 $_ = substr($_, 0, $start) . "X\n";
413 last;
415 my $c = substr($_, $i, 1);
416 if ($c eq $quote) {
417 $_ = substr($_, 0, $start) . "X" . substr($_, $i);
418 $i = $start + 2;
419 # See if there's another string after this one:
420 while ($i != length($_)) {
421 $c = substr($_, $i, 1);
422 ++$i;
423 if ($c eq '"' || $c eq "'") {
424 $quote = $c;
425 $start = $i;
426 $esc = 0;
427 next QUOTELOOP;
430 last;
432 if ($c eq '\\') {
433 ++$i;
434 $c = substr($_, $i, 1);
435 if ($c eq 'x') {
436 ++$i while (substr($_, $i, 1) =~ /^[A-Fa-f0-9]$/);
437 next;
438 } elsif ($c =~ /^[0-7]/) {
439 my $j = $i;
440 ++$i while ($i - $j <= 3 && substr($_, $i, 1) =~ /^[0-7]$/);
441 next;
442 } elsif ($c eq '"' || $c eq "'") {
443 ++$esc;
446 ++$i;
451 if ($check_trailing && $fullline =~ /^\+.*[ \t]$/) {
452 diagnostic('error', "added/changed line has trailing whitespace");
454 if ($check_space_tab && /^\+.* \t/) {
455 diagnostic('error', "added/changed line has space before tab");
457 if ($want_tabs == 1 and /^\+\t* {8}/) {
458 diagnostic('error', "added/changed line uses spaces for indentation rather than tab");
460 if (!$want_tabs and /^\+ *\t/) {
461 diagnostic('error', "added/changed line uses tab for indentation rather than spaces");
463 if ((!defined $lang || $lang ne 'changelog') && $fullline =~ /^([-+]).*\bFIX(?:ME)\b/) {
464 # Break up the string in the regexp above and messages below to avoid
465 # this triggering on its own code!
466 if ($1 eq '-') {
467 # Not an error, but interesting information.
468 diagnostic('info', "FIX"."ME removed");
469 } else {
470 # Not an error, but not good.
471 diagnostic('warning', "FIX"."ME added");
474 if (defined $lang && ($lang eq 'c++' || $lang eq 'c')) {
475 if ($last_line_blank) {
476 if ($line_blank) {
477 # Allow multiple blank lines at the top level for now.
478 diagnostic('error', "Extra blank line") unless ($top_level // 1);
479 } elsif (/^.\s+\}$/) {
480 # Closing } of a namespace often has a blank line before it,
481 # and that seems reasonable.
482 diagnostic_last('error', "Blank line at end of block") unless ($top_level // 1);
483 } elsif ($penultimate_line_block_start && /^.(\s|\}$)/) {
484 diagnostic_last('error', "Blank line at start of block");
488 if (/^([-+ ])(\s*)\#/) {
489 # Avoid misfiring for something like:
490 # #define FOO(x) \
491 # #x
492 if (!$preproc_continuation) {
493 if ($1 eq '+' && $2 ne '') {
494 diagnostic('error', "Whitespace before '#' on preprocessor line");
497 $preproc = 1;
498 $preproc_continuation = /\\$/;
499 } elsif ($preproc_continuation) {
500 $preproc_continuation = /\\$/;
501 } else {
502 $preproc = 0;
504 if ($check_space_tab && /^\+( (?:| | | ))[^ \t].*(?:[^)];|[^);,])\n/) {
505 # We only check for 1, 3, 5 and 7 space indents to avoid false
506 # positives for "public:", etc and for wrapped expressions.
508 # Exclude lines ending ');', ')', or ',' to avoid reporting for
509 # wrapped function arguments. This means we'll also miss some
510 # cases we should complain about, but it's likely that at least
511 # one line in a mis-indented block will trigger an error.
513 # Exclude potential comment continuation lines which might have
514 # been missed by the comment stripping code. Require whitespace
515 # after so we flag a mis-indented: *ptr = foo;
516 if (!/^\+\s*\*\s/) {
517 diagnostic('error', "line indented by ".length($1)." spaces");
521 #if (/^\+.*(?<!\btypedef )\b([A-Za-z_][A-Za-z_0-9]*)\s+\(/ &&
522 if (/^\+.*\b([A-Za-z_][A-Za-z_0-9]*)\s+\(((?:[A-Za-z][A-Za-z0-9_]*::)?\*|[A-Za-z][A-Za-z0-9_]*\)\()?/) {
523 my $name = $1;
524 my $post = $2;
525 if (
526 # `delete (*i)->foo();` rather than `delete(*i)->foo()'` - the `(`
527 # isn't around function parameters here.
528 $name !~ /^(case|catch|delete|double|for|if|return|switch|throw|while)$/ &&
529 # Function pointer type `int (*)(void)` or parenthesised
530 # function name `int (foo)(`.
531 !($name =~ /^(?:bool|double|float|unsigned|void|[a-z][a-z0-9_]+_t|(?:(?:un)?signed\s+)?(?:char|int|long|short))$/ && length($post))) {
532 if (!$preproc) {
533 diagnostic('error', "Whitespace between '$name' and '('");
534 } else {
535 # FIXME: We skip preprocessor lines for now to avoid triggering
536 # on things like «#define FOUR (4)» but it would be good to
537 # catch «#define FOO(x) foo (x)»
541 if (m!^\+\s*(case|class|do|for|if|namespace|struct|switch|try|union)\b([^ ]| \s)!) {
542 diagnostic('error', "'$1' not followed by exactly one space");
544 if (m!^\+.*;[^\s\\]!) {
545 diagnostic('error', "Missing space after ';'");
547 if (m!^\+.*[^(;]\s;!) {
548 # Stuff like this is OK: for ( ; ; ) {
549 # though for that exact case I'd suggest: while (true) {
550 diagnostic('error', "Whitespace before ';'");
552 if (m!^\+.*?<<"!) {
553 diagnostic('error', "Missing space after '<<'");
555 if (m!^\+.*?"<<!) {
556 diagnostic('error', "Missing space before '<<'");
558 if (m!^\+.*?\b(return)\b([^ ;]| \s)!) {
559 diagnostic('error', "'$1' not followed by exactly one space");
561 if (m!^\+.*?\b(else)\b([^ \n]| \s)!) {
562 diagnostic('error', "'$1' not followed by exactly one space");
564 if (m!^\+.*?\b(catch|while)\b([^ ]| \s)!) {
565 diagnostic('error', "'$1' not followed by exactly one space");
567 if (m!^\+.*?(?:}|}\s{2,}|}\t|^[^}]*)\b(catch)\b!) {
568 diagnostic('error', "'$1' not preceded by exactly '} '");
570 if (m!^\+.*?(?:}|}\s{2,}|}\t)\b(else|while)\b!) {
571 diagnostic('error', "'}' and '$1' not separated by exactly one space");
573 if (m,^\+.*?\belse\b\s*(?!if)[^\s{],) {
574 diagnostic('error', "Code after 'else' on same line");
576 if (m,^\+.*?\belse\s+if.*;\s*$,) {
577 diagnostic('error', "Code after 'else if' on same line");
579 if (m!^\+.*\((?: [^;]|\t)!) {
580 # Allow: for ( ; i != 10; ++i)
581 diagnostic('error', "Whitespace after '('");
583 if (m!^\+.*\H.*\h\)!) {
584 diagnostic('error', "Whitespace before ')'");
586 if (m!^\+.*;\s*(\w+)([-+]{2})\)!) {
587 diagnostic('error', "Prefer '$2$1' to '$1$2'");
589 if (m!^\+.*?>\s+>!) {
590 diagnostic('error', "We assume C++11 so can write '>>' instead of '> >'");
592 if (m!^\+.*?\b(?:enable_if|list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)\s+<!) {
593 diagnostic('error', "Whitespace between template name and '<'");
595 if (/^\+.*?\bfor\s*\([^(]*([^:(]:[^:])/ && $1 ne ' : ') {
596 diagnostic('error', "Missing spaces around ':' in 'for'");
598 if (m,^\+.*?[\w)](?!-[->]|\+\+)((?:\&\&|\|\||<<|>>|[-+/*%~=<>!&|^])=?|[?]),) {
599 my @pre = @-;
600 my @post = @+;
601 my $op = $1;
602 if (substr($_, $pre[1] - 8, 8) eq 'operator') {
603 # operator*() etc
604 } elsif ($op eq '>' && substr($_, 0, $pre[1]) =~ /[A-Za-z0-9_]</) {
605 # y = static_cast<char>(x);
606 } elsif ($op eq '>') {
607 } elsif ($op eq '<' && substr($_, $pre[1] - 1, 1) =~ /^[A-Za-z0-9_]$/ && substr($_, $post[1]) =~ />/) {
608 # y = static_cast<char>(x);
609 } elsif ($op eq '<' &&
610 substr($_, 0, $pre[1]) =~ /\b(?:enable_if|list|map|multimap|multiset|priority_queue|set|template|unordered_map|unordered_set|vector)$/) {
611 # y = priority_queue<Foo*,
612 # Bar>;
613 # template<typename A,
614 # typename B>
615 } elsif ($op eq '&&' && substr($_, 0, $pre[1]) =~ /\b(?:auto|bool|char|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*)$/) {
616 # auto&& x;
617 # method(Class&& foo);
618 } elsif (($op eq '<<' || $op eq '>>') &&
619 substr($_, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
620 substr($_, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
621 # 0x00b1<<26
622 } elsif (($op eq '-' || $op eq '+') &&
623 substr($_, 0, $pre[1]) =~ /[0-9]\.?e$/) {
624 # 1.2e-3, 7.e+3
625 } elsif ($op eq '>>' &&
626 /[A-Za-z0-9_]<.+</) {
627 # vector<vector<int>> v;
628 } elsif ($op =~ /^[*&|]$/ &&
629 substr($_, 0, $pre[1]) !~ /(?:\b\d+)\s*$/) {
630 # FIXME: *: const char* x;
631 # FIXME: &: const char& x;
632 # FIXME: |: FOO|BAR
633 # (but we do catch "1234*x"
634 } elsif ($preproc && /^.\s*#\s*(?:include|error|warning)\b/) {
635 # Don't warn about missing whitespace in:
636 # #include <a/b-c.h>
637 # #error nothing works!
638 } else {
639 diagnostic('error', "Missing space before '$op'");
642 if ($first_char eq '+' && length($_)) {
643 # Replace leading `+` to avoid parsing as an operator or part of an
644 # operator.
645 my $l = ' ' . substr($_, 1);
646 # Treat some operator combinations as a single pseudo-operator:
647 # x &=~ y;
648 # a = b &~ c;
649 while ($l =~ m@((?:\|\||<<|>>|[=!/*%<>|^~])=?|-[-=>]?|&[&=]?~?|\+[\+=]?|::?|[?,])@g) {
650 my @pre = @-;
651 my @post = @+;
652 my $op = $1;
653 my $prech = substr($l, $pre[1] - 1, 1);
654 my $postch = substr($l, $post[1], 1) // '';
655 if ($lang eq 'c++' &&
656 ($op eq '*' || $op eq '&') &&
658 # `vector<some_type *> x;` `int f(some_type *);`
659 (($postch eq '>' || $postch eq ')') && $prech =~ /[ \t]/) ||
660 # `vector<int>*` `string&` `const foo*` `struct tm*` `Xapian::docid&`
661 # +static_assert(Xapian::DB_READONLY_ & Xapian::DB_NO_TERMLIST,
663 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|DIR|DWORD|FD|FILE|HANDLE|WSAOVERLAPPED|[A-Z][A-Z_]*_T|[A-Z]|[A-Z][A-Z0-9_]*?[a-z][A-Za-z0-9_]*|size_type|(?:(?:const|struct)\s+?|Xapian::)[A-Z]*[a-z][A-Za-z0-9_]*)[*&]*)\s+$/ &&
664 substr($l, $post[1]) !~ /^\s*\(/
668 diagnostic('error', "Preferred style is 'int$op x' (not 'int ${op}x' or 'int ${op} x')");
669 } elsif ($op eq '::') {
670 if ($lang eq 'c++' && $postch =~ /\s/) {
671 diagnostic('error', "Whitespace not expected after '::'");
673 } elsif ($op eq '->' && $prech !~ /\s/) {
674 # a->b
675 # but not:
676 # auto f() -> bool
677 if ($postch =~ /[ \t]/) {
678 diagnostic('error', "Whitespace not expected after '->'");
680 } elsif (($op eq '++' || $op eq '--') && $prech !~ /[A-Za-z0-9_)]/) {
681 # ++a
682 if ($postch =~ /[ \t]/) {
683 diagnostic('error', "Whitespace not expected after '$op'");
685 } elsif ($op eq '!') {
686 # !a
687 if ($postch =~ /[ \t]/) {
688 diagnostic('error', "Whitespace not expected after '!'");
690 } elsif (substr($l, $post[1]) !~ /^(?:\S| \s)/) {
691 # Check what follows the operator.
692 } elsif (($op eq '++' || $op eq '--') && $postch =~ /[\]),;]/) {
693 # buf[len++] = 'a';
694 # f(x++);
695 # f(1, x++);
696 # a = b++;
697 } elsif (($op eq '-' || $op eq '+' || $op eq '!' || $op eq '~') &&
698 substr($l, 0, $pre[1]) =~ m@(?:^\s*|[-+/*%~=<>&|,;?:] |[\[(]|\b(?:return|case) |^\+\s*)$@) {
699 # Unary -, +, !, ~: e.g. foo = +1; bar = x * (-y); baz = a * -b;
700 } elsif ($op eq ',' && (
701 /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/ ||
702 /{[^()]*}/)) {
703 # AssertRel(a,<,b);
704 } elsif ($op eq '>>' &&
705 /[A-Za-z0-9_]<.+</) {
706 # vector<vector<int>>&
707 } elsif ($op eq '*' &&
708 substr($l, 0, $pre[1]) !~ /(?:\b\d+)\s*$/ &&
709 !($lang eq 'c++' &&
710 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*|(?:struct\s*?|Xapian::)[a-z][a-z0-9_]*)[*&]*)\s+$/)) {
711 # FIXME: *ptr (dereference)
712 # (but we do catch "1234 *x" and common pointer types etc)
713 } elsif ($op eq '&' &&
714 substr($l, 0, $pre[1]) !~ /(?:\b\d+|[^*]\))\s*$/ &&
715 !($lang eq 'c++' &&
716 substr($l, 0, $pre[1]) =~ /(?:>|\b(?:auto|bool|char|const|double|float|int(?:\d+_t)?|long|short|string|uint\d+_t|unsigned|void|[A-Z][A-Za-z0-9_]*|(?:struct\s*?|Xapian::)[a-z][a-z0-9_]*)[*&]*)\s+$/)) {
717 # FIXME: &foo (address of)
718 # (but we do catch "...) &FLAG_FOO" and "1234 &x" and common reference types etc)
719 } elsif ($op eq '&&' && $postch =~ /[,)]/) {
720 # int f(int&&, bool&&);
721 } elsif ($op =~ /^[<|]$/ &&
722 substr($l, $post[1]) !~ /^\s*(?:\d+\b|\()/ &&
723 substr($l, 0, $pre[1]) !~ /(?:\b\d+|\))\s*$/) {
724 # FIXME: <: std::vector<std::string>
725 # (but we do catch "...) <foo" and "1234 >bar" etc)
726 # FIXME: |: FOO|BAR
727 } elsif (substr($l, $pre[1] - 8, 8) eq 'operator' && $postch eq '(') {
728 # operator==() etc
729 } elsif (($op eq '<<' || $op eq '>>') &&
730 substr($l, 0, $pre[1]) =~ /\b(?:0x[0-9a-fA-F]+|[0-9]+)$/ &&
731 substr($l, $post[1]) =~ /^(?:0x[0-9a-fA-F]+|[0-9]+)\b/) {
732 # 0x00b1<<26
733 } elsif (($op eq '-' || $op eq '+') &&
734 substr($l, 0, $pre[1]) =~ /[0-9]\.?e$/) {
735 # 1.2e-3, 7.e+3
736 } elsif ($preproc && $op eq ',') {
737 # Currently there's a lot of: #define FOO(A,B) ...
738 } elsif ($preproc && /^.\s*#\s*(?:include|error|warning|pragma)\b/) {
739 # Don't warn about missing whitespace in:
740 # #include <a/b-c.h>
741 # #warning so-so
742 # #pragma warning(disable:4146)
743 } elsif ($op eq '>' && ($postch =~ /[,)(;*&\\]/ || substr($l, $post[1], 2) eq '::')) {
744 # int f(vector<int>, vector<int>);
745 # static_cast<char>(7)
746 # return tmpl<true>;
747 # vector<int>* x;
748 # vector<int>& y;
749 # template<class S>\
750 # vector<int>::size_type
751 } elsif ($op eq '=' && $postch =~ /[,\]]/) {
752 # Lambdas, e.g. [=]() {...} or [=, &a]() {...}
753 } elsif ($op eq '%' && $ext eq 'lemony' && $pre[1] == 1) {
754 # %-directive in Lemon grammar, e.g.:
755 # %left OR.
756 } elsif ($op =~ /^([<>]|[<>=!]=)$/ && substr($l, 0, $pre[1]) =~ /\b(?:AssertRel(?:Paranoid)?|TEST_REL)\(/) {
757 # AssertRel(a,>=,b);
758 # TEST_REL(a,>=,b);
759 } elsif ($op eq '~' && $postch =~ /[A-Za-z0-9_]/ && substr($l, 0, $pre[1]) =~ /(?:\s|::)$/) {
760 # ~Foo()
761 # Foo::~Foo()
762 } elsif ($op eq '>>' && $prech eq '<') {
763 # std::map<std::string, int, std::less<>>*
764 } else {
765 # Don't complain about this if it's actually whitespace at
766 # line end.
767 if (substr($l, $post[1]) !~ /^\s*$/) {
768 diagnostic('error', "Should have exactly one space after '$op'");
773 if (/^\+.*;;\s*$/) {
774 diagnostic('error', "Extra ';' at end of line");
776 if (/^\+\s*?\S.*? (,|->)/) {
777 diagnostic('error', "Space before '$1'");
779 if (/^\+[\s#]*?[^\s#] /) {
780 # Allow multiple spaces in "# ifdef FOO".
781 diagnostic('error', "Multiple spaces");
783 if (/^\+\s*#\s*include([<"])/) {
784 # #include<foo.h> or #include"foo.h"
785 diagnostic('error', "Missing space between #include and '$1'");
787 if (m!^\+(?:.*[;{])?\s*/[/*]{1,2}\w!) {
788 diagnostic('error', "added/changed line has comment without whitespace before the text");
790 if (m!^\+.*?\)\{!) {
791 diagnostic('error', "No space between ')' and '{'");
793 if (m!^\+.*?\bconst\{!) {
794 diagnostic('error', "No space between 'const' and '{'");
796 if ($fnm !~ m!/(?:md5|posixy_wrapper|perftest)\.cc$! &&
797 m,^\+.*[^\w\.>]([a-z][a-z0-9]*[A-Z]\w*),) {
798 my $symbol = $1;
799 my $symbol_idx = $-[1];
800 if ($ext eq 'lemony' && $symbol =~ /^yy/) {
801 # Used in lemon parser grammar.
802 } elsif ($symbol =~ /^[gs]et[A-Z]$/) {
803 # For now, allow setD(), etc.
804 } elsif ($symbol =~ /^h(?:File|Read|Write|Pipe|Client)$/ || $symbol eq 'fdwCtrlType' || $symbol eq 'pShutdownSocket') {
805 # Platform specific names, allow for now.
806 } elsif ($symbol eq 'gzFile' || $symbol eq 'uInt' || $symbol =~ /^(?:de|in)flate[A-Z]/) {
807 # zlib API uses camelCase names.
808 } elsif ($symbol =~ /^pix[A-Z]/) {
809 # Tesseract's leptonica image library uses camelCase names.
810 } elsif (substr($_, 0, $symbol_idx) =~ /\bicu::(\w+::)?$/) {
811 # ICU library namespace uses camelCase method names.
812 } elsif (substr($_, 0, $symbol_idx) =~ /\b(?:Abi|CDR|EBOOK|Etonyek|MWAW|RVNG)\w+::$/) {
813 # Libabw/Libe-book/libcdr/libetonyek/libmwaw/librevenge use camelCase method names.
814 } else {
815 diagnostic('error', "camelCase identifier '$symbol' - Xapian coding convention is to use lower case and underscores for variables and functions, and CamelCase for class names");
818 if (/^\+.*\b(?:class|struct)\b.*:\s*$/) {
819 diagnostic('error', "Inheritance list split after ':', should be before");
821 # Try to distinguish ternary operator (?:) correctly split after ":" vs
822 # constructor initialiser list incorrectly split after ":".
823 my $last_in_ternary = $in_ternary;
824 $in_ternary = / \?(?: |$)/;
825 if (!$last_in_ternary && !$in_ternary && /^\+.*\)\s*:\s*$/) {
826 diagnostic('error', "Constructor initialiser list split after ':', should be before");
828 if (m,^\+\s+([-+/%^]|[&|]{2})\s,) {
829 diagnostic('error', "Expression split before operator '$1', should be after");
831 if ($lang eq 'c++' && /^\+\s+inline\b/) {
832 diagnostic('error', "Method defined inside a class is implicitly 'inline'");
834 if ($header) {
835 if (/^\+using\s+namespace\b/) {
836 diagnostic('error', "Avoid 'using namespace' at top level of header");
838 if (m!^\+\s*#\s*(ifndef|define|endif\s*/[*/])\s+((?:[A-Z]+_INCLUDED)?_?\w+_[Hh]\b)!) {
839 my ($type, $guard) = ($1, $2);
840 my $expected_guard;
841 if (!defined $header_guard_macro) {
842 if ($type eq 'ifndef') {
843 $header_guard_macro = [$type, $guard];
844 my $expected_guard = uc $fnm;
845 $expected_guard =~ s![-.]!_!g;
846 my $cut;
847 if (length($expected_guard) > length($guard) &&
848 substr($expected_guard, -length($guard) - 1, 1) eq '/' &&
849 substr($expected_guard, -length($guard)) eq $guard) {
850 $cut = -1;
851 } else {
852 for my $i (1 .. length($guard)) {
853 my $ch_e = substr($expected_guard, -$i, 1);
854 my $ch_g = substr($guard, -$i, 1);
855 next if ($ch_e eq $ch_g);
856 last if ($ch_e ne '/' || $ch_g ne '_');
857 $cut = $i;
860 if (!defined $cut) {
861 diagnostic('error', "include guard macro should match filename");
863 my $prefix = 'XAPIAN_INCLUDED_';
864 if ($fnm =~ m!.*omega/(?:.*/)?!) {
865 $prefix = 'OMEGA_INCLUDED_';
867 #} elsif ($fnm =~ s!.*xapian-core/.*/!!) {
868 # $expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
869 #} elsif ($fnm =~ s!.*xapian-letor/.*/!!) {
870 #$expected_guard = "XAPIAN_INCLUDED_" . $expected_guard;
871 if (defined $cut && $cut == -1) {
872 diagnostic('error', "include guard macro should use prefix '$prefix'");
873 } elsif (defined $cut && substr($guard, 0, length($guard) - $cut + 1) ne $prefix) {
874 diagnostic('error', "include guard macro should use prefix '$prefix'");
875 } elsif ($guard !~ /^\Q$prefix\E/) {
876 diagnostic('error', "include guard macro should use prefix '$prefix'");
879 } else {
880 if (!($type eq 'define' && $header_guard_macro->[0] ne 'ifndef')) {
881 my $expected_guard = $header_guard_macro->[1];
882 $header_guard_macro->[0] = $type;
883 if ($guard ne $expected_guard) {
884 diagnostic('error', "include guard macro should be $expected_guard");
889 } else {
890 if (m!^\+\s*#\s*define\s+[A-Z]\+_INCLUDED_!) {
891 diagnostic('error', "include guard macro defined in non-header");
895 if (defined $last_line_block_end &&
896 /^\+${last_line_block_end}(catch|else)\b/) {
897 # FIXME: while in do { ... } while can't be as easily checked.
898 diagnostic('error', "'$1' should be on same line as preceding '}'");
900 } elsif (defined $lang && $lang eq 'py') {
901 if (/^\+.*;\s*$/) {
902 diagnostic('error', "';' at end of line of python code");
904 } elsif (defined $lang && $lang eq 'rb') {
905 if (/^\+.*;\s*$/) {
906 diagnostic('error', "';' at end of line of ruby code");
908 } elsif (defined $lang && $lang eq 'make') {
909 if (/^\+.*[A-Za-z0-9)}] +\s*$/) {
910 diagnostic('error', "multiple spaces in Makefile");
913 if (defined $fnm && $fnm !~ m!xapian-check-patch|ChangeLog|NEWS|stemming/.*/(?:voc2?|output2?)\.txt$|omega/testfiles/|unicode/UnicodeData\.txt!) {
914 if ($fullline =~ /^\+.*?(?:\b|_)(xapain|the the|initialsing|ipv5|outputing|intened|wull|extrac|if it possible|betweem|differen|auxiliar|wat(?:|ed|ing|s)|wth|teh|ned|incase)(?:\b|_)/i ||
915 # Cases which just need to be the prefix of a word
916 $fullline =~ /^\+.*?(?:\b|_)((?:additon|deafult|parm|peform|acessor|comptib|seach|seperat|seprat|separater|iteratat|calulat|delimitor|delimeter|charactor|databse|operatoar|implict|differnt|orignal|straterg|unecessar|comamnd|docuemnt|implment|initilias|capatil|reprensent|ommit|openning|openned|appropirate|labrar|returm|interati|termfrequenc|continous|juding|gradinet|clearling|clearled|retreiv|reteriv|filedescriptor|avalil*ab|assessem|contruct|particlar|revelan|releven|relv|intial|eal|specifiy|(?:tera|mega|kilo)?btye|comunic|accumlat|useage|existant|regrex|next(?!step)[eis])[a-z]*(?:\b|_))/i ||
917 # Case-sensitive cases
918 $fullline =~ /^\+.*?\b(and and|dont|Dont)\b/) {
919 diagnostic('error', "Typo '$1'");
923 if ($check_indent) {
924 # Check indentation.
925 if (defined $indent && $first_char eq '+' &&
926 # blank
927 !/^\+\s*$/ &&
928 # Preprocessor line
929 !$preproc &&
930 # Label for goto
931 !/^\+[A-Za-z_][A-Za-z_0-9]*:/ &&
932 # outdent
933 !/^\+\s*}/) {
934 # Special handling for access specifiers, which should get a half indent.
935 if (/^\+\s*(?:private|protected|public):/) {
936 $indent -= 2;
937 } elsif ($case_no_brace && /^\+(?:[ \t]*)(?:case\b.*|default):(?:\s*\{)?$/) {
938 # case or default following a case or default without a '{', so
939 # shouldn't be indented - reduce $indent by 4 columns.
940 $indent -= 4;
942 my $this_indent = 0;
943 if ($fullline =~ /^.([ \t]+)/) {
944 $this_indent = count_columns($1);
946 my $extra = $this_indent - $indent;
947 if ($extra) {
948 my $which = 'over';
949 if ($extra < 0) {
950 $extra = -$extra;
951 $which = 'under';
953 my $s = '';
954 $s = 's' if $extra > 1;
955 diagnostic('error', "Line ${which}indented by $extra column$s");
959 #if (/^[-+ ]([ \t]*)(?:(?:(?:catch|for|if|for|switch|while)\b.*\)|(?:case|class|do|else|struct|try|union)\b.*) \{|case\b.*:)$/) {
960 if (/^[+ ]([ \t]*)(?:(catch|for|if|for|while|case|class|default|do|else|struct|try|union)\b.* \{|(case\b.*|default):)$/) {
961 $indent = count_columns($1);
962 my $keyword = $2 // $3;
963 $case_no_brace = $3;
964 # FIXME: Might be OK in e.g. lambdas
965 #if (/^\+/ && $indent % 4 != 0) {
966 # diagnostic('error', "Indented by $len columns - not a multiple of 4");
968 $indent += $keyword =~ /^(?:case|switch|default)/ ? 2 : 4;
969 } elsif (/^[-+ ]([ \t]*)(?:(?:private|protected|public):)$/) {
970 # Access specifiers get a half indent and are followed by another half indent.
971 $indent = count_columns($1);
972 $case_no_brace = undef;
973 if (/^\+/ && $indent % 4 != 2) {
974 diagnostic('error', "Indented by $indent columns, should be 2 plus a multiple of 4");
976 $indent += 2;
977 } elsif (!/^.\s*$/) {
978 # Only reset for a non-blank line (after comment removal).
979 $indent = undef;
980 $case_no_brace = undef;
984 if ($first_char eq ' ') {
985 ++$lineno;
986 } elsif ($first_char eq '+') {
987 ++$lineno;
988 ++$add_lines;
989 } elsif ($first_char eq '-') {
990 ++$del_lines;
991 } elsif ($first_char eq '\\') {
992 # "\ No newline at end of file" - if preceded by a "+" line, this means
993 # that the patch leaves the file missing a newline at the end.
994 if ($check_end_new_line && $last_first_char eq '+') {
995 diagnostic_last('error', 'No newline at end of file');
998 $last_fullline = $fullline;
999 $last_first_char = $first_char;
1000 $last_line_blank = $line_blank;
1001 if (/^.([ \t]+)\}$/) {
1002 $last_line_block_end = $1;
1003 } else {
1004 $last_line_block_end = undef;
1006 $penultimate_line_block_start = $last_line_block_start;
1007 if (/^.(.*\{)\s*$/) {
1008 $last_line_block_start = $1;
1009 } else {
1010 $last_line_block_start = undef;
1013 if (scalar keys %count) {
1014 for (sort keys %count) {
1015 print STDERR "$_ count:\t$count{$_}\n";
1017 print STDERR "\n";
1019 print STDERR <<"__END__";
1020 Files patched:\t$files
1021 Lines added:\t$add_lines
1022 Lines removed:\t$del_lines
1023 __END__
1025 exit 0 unless exists $count{'error'};
1027 if (exists $ENV{GITHUB_JOB} || exists $ENV{TRAVIS}) {
1028 print STDERR <<"__END__";
1030 You can run these checks locally before pushing with the xapian-check-patch
1031 which is in the source tree in the xapian-maintainer-tools directory.
1033 E.g. to check any changes in your working directory which aren't on master:
1035 git diff master.. | xapian-maintainer-tools/xapian-check-patch
1036 __END__
1039 exit 1;