match_strval > try_val_to_str
[wireshark-wip.git] / tools / checkhf.pl
blob5de607cfe311236089ded9096fb099fa123df5a2
1 #!/usr/bin/env perl
3 # Copyright 2013, William Meier (See AUTHORS file)
5 # Validate hf_... usage for a dissector file;
7 # Usage: checkhf.pl [--debug=?] <file or files>
9 # $Id$
11 # Wireshark - Network traffic analyzer
12 # By Gerald Combs <gerald@wireshark.org>
13 # Copyright 1998 Gerald Combs
15 # This program is free software; you can redistribute it and/or
16 # modify it under the terms of the GNU General Public License
17 # as published by the Free Software Foundation; either version 2
18 # of the License, or (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program; if not, write to the Free Software
27 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 ## Note: This program is a re-implementation of the
31 ## original checkhf.pl written and (C) by Joerg Mayer.
32 ## The overall objective of the new implementation was to reduce
33 ## the number of false positives which occurred with the
34 ## original checkhf.pl
36 ## This program can be used to scan original .c source files or source
37 ## files which have been passed through a C pre-processor.
38 ## Operating on pre-prosessed source files is optimal; There should be
39 ## minimal false positives.
40 ## If the .c input is an original source file there may very well be
41 ## false positives/negatives due to the fact that the hf_... variables & etc
42 ## may be created via macros.
44 ## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
45 ## Example:
46 ## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
47 ## Unused entry: packet-afs.c, hf_afs_ubik_voteend
48 ## Unused entry: packet-afs.c, hf_afs_ubik_errcode
49 ## Unused entry: packet-afs.c, hf_afs_ubik_votetype
50 ## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
52 ## or checkhf.pl packet-*.c, which will check all the dissector files.
54 ## NOTE: This tool currently generates false positives!
56 ## The "NO ARRAY" messages - if accurate - points to an error that will
57 ## cause (t|wire)shark to report a DISSECTOR_BUG when a packet containing
58 ## this particular element is being dissected.
60 ## The "Unused entry" message indicates the opposite: We define an entry but
61 ## never use it (e.g., in a proto_...add... function).
62 ## ------------------------------------------------------------------------------------
64 # ------------------------------------------------------------------------------------
65 # Main
67 # Logic:
68 # 1. Clean the input: remove blank lines, comments, quoted strings and code under '#if 0'.
69 # 2. hf_defs:
70 # Find (and remove from input) list of hf_... variable
71 # definitions ('static? g?int hf_... ;')
72 # 2. hf_array_entries:
73 # Find (and remove from input) list of hf_... variables
74 # referenced in the hf[] entries;
75 # 3. hf_usage:
76 # From the remaining input, extract list of all strings of form hf_...
77 # (which may include strings which are not actually valid
78 # hf_... variable references).
79 # 4. Checks:
80 # If entries in hf_defs not in hf_usage then "unused" (or static hf_defs only)
81 # If entries in hf_defs not in hf_array_entries then "ERROR: NO ARRAY";
83 use strict;
84 use warnings;
86 use Getopt::Long;
88 my $help_flag = '';
89 my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hf_defs; 4=hf_array_entries; 5=hfusage (See code)
91 my $sts = GetOptions(
92 'debug=i' => \$debug,
93 'help|?' => \$help_flag
95 if (!$sts || $help_flag || !$ARGV[0]) {
96 usage();
99 my $error = 0;
101 while (my $filename = $ARGV[0]) {
102 shift;
104 my ($file_contents);
105 my (%hf_defs, %hf_static_defs, %hf_array_entries, %hf_usage);
106 my ($unused_href, $no_array_href);
108 read_file(\$filename, \$file_contents);
110 remove_comments (\$file_contents, $filename);
111 remove_blank_lines (\$file_contents, $filename);
112 remove_quoted_strings(\$file_contents, $filename);
113 remove_if0_code (\$file_contents, $filename);
115 find_remove_hf_defs (\$file_contents, $filename, \%hf_defs);
116 find_remove_hf_array_entries (\$file_contents, $filename, \%hf_array_entries);
117 find_remove_proto_get_id_hf_assignments(\$file_contents, $filename, \%hf_array_entries);
118 find_hf_usage (\$file_contents, $filename, \%hf_usage);
120 # Tests (See above)
121 # 1. Are all the static hf_defs entries in hf_usage ?
122 # if not: "Unused entry:"
125 # create a hash containing entries just for the static definitions
126 @hf_static_defs{grep {$hf_defs{$_} == 0} keys %hf_defs} = (); # All values in the new hash will be undef
128 $unused_href = diff_hash(\%hf_static_defs, \%hf_usage);
129 remove_hf_pid_from_unused_if_add_oui_call(\$file_contents, $filename, $unused_href);
131 print_list("Unused entry: $filename, ", $unused_href);
133 # 2. Are all the hf_defs entries (static and global) in hf_array_entries ?
134 # (Note: if a static hfDef is "unused", don't check for same in hf_array_entries)
135 # if not: "ERROR: NO ARRAY"
137 ## Checking for missing global defs currently gives false positives
138 ## So: only check static defs for now.
139 ## $no_array_href = diff_hash(\%hf_defs, \%hf_array_entries);
140 $no_array_href = diff_hash(\%hf_static_defs, \%hf_array_entries);
141 $no_array_href = diff_hash($no_array_href, $unused_href); # Remove "unused" hf_... from no_array list
143 print_list("ERROR: NO ARRAY: $filename, ", $no_array_href);
145 if ((keys %{$no_array_href}) != 0) {
146 $error += 1;
150 exit (($error == 0) ? 0 : 1); # exit 1 if ERROR
153 # ---------------------------------------------------------------------
155 sub usage {
156 print "Usage: $0 [--debug=n] Filename [...]\n";
157 exit(1);
160 # ---------------------------------------------------------------------
161 # action: read contents of a file to specified string
162 # arg: filename_ref, file_contents_ref
164 sub read_file {
165 my ($filename_ref, $file_contents_ref) = @_;
167 die "No such file: \"${$filename_ref}\"\n" if (! -e ${$filename_ref});
169 # delete leading './'
170 ${$filename_ref} =~ s{ ^ [.] / } {}xmso;
172 # Read in the file (ouch, but it's easier that way)
173 open(my $fci, "<:crlf", ${$filename_ref}) || die("Couldn't open ${$filename_ref}");
175 ${$file_contents_ref} = do { local( $/ ) ; <$fci> } ;
177 close($fci);
179 return;
182 # ---------------------------------------------------------------------
183 # action: Create a hash containing entries in 'a' that are not in 'b'
184 # arg: a_href, b_href
185 # returns: pointer to hash
187 sub diff_hash {
188 my ($a_href, $b_href) = @_;
190 my %diffs;
192 @diffs{grep {! exists $b_href->{$_}} keys %{$a_href}} = (); # All values in the new hash will be undef
194 return \%diffs;
197 # ---------------------------------------------------------------------
198 # action: print a list
199 # arg: hdr, list_href
201 sub print_list {
202 my ($hdr, $list_href) = @_;
204 print
205 map {"$hdr$_\n"}
206 sort
207 keys %{$list_href};
209 return;
212 # ------------
213 # action: remove blank lines from input string
214 # arg: code_ref, filename
216 sub remove_blank_lines {
217 my ($code_ref, $filename) = @_;
219 ${$code_ref} =~ s{ ^ \s* \n ? } {}xmsog;
221 return;
224 # ------------
225 # action: remove comments from input string
226 # arg: code_ref, filename
228 sub remove_comments {
229 my ($code_ref, $filename) = @_;
231 # The below Regexp is based on one from:
232 # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
233 # It is in the public domain.
234 # A complicated regex which matches C-style comments.
235 my $c_comment_regex = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xmso;
237 ${$code_ref} =~ s{ $c_comment_regex } {}xmsog;
239 ($debug == 1) && print "==> After Remove Comments: code: [$filename]\n${$code_ref}\n===<\n";
241 return;
244 # ------------
245 # action: remove quoted strings from input string
246 # arg: code_ref, filename
248 sub remove_quoted_strings {
249 my ($code_ref, $filename) = @_;
251 # A regex which matches double-quoted strings.
252 # 's' modifier added so that strings containing a 'line continuation'
253 # ( \ followed by a new-line) will match.
254 my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\])* ["]) }xmso;
256 # A regex which matches single-quoted strings.
257 my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\])* [']) }xmso;
259 ${$code_ref} =~ s{ $double_quoted_str | $single_quoted_str } {}xmsog;
261 ($debug == 1) && print "==> After Remove quoted strings: code: [$filename]\n${$code_ref}\n===<\n";
263 return;
266 # -------------
267 # action: remove '#if 0'd code from the input string
268 # args code_ref, filename
270 # Essentially: Use s//patsub/meg to pass each line to patsub.
271 # patsub monitors #if/#if 0/etc and determines
272 # if a particular code line should be removed.
273 # XXX: This is probably pretty inefficient;
274 # I could imagine using another approach such as converting
275 # the input string to an array of lines and then making
276 # a pass through the array deleting lines as needed.
278 { # block begin
279 my ($if_lvl, $if0_lvl, $if0); # shared vars
281 sub remove_if0_code {
282 my ($code_ref, $filename) = @_;
284 # First see if any '#if 0' lines which need to be handled
285 if (${$code_ref} !~ m{ \# \s* if \s+ 0 }xmso ) {
286 return;
289 my ($preproc_regex) = qr{
290 ( # $1 [complete line)
292 (?: # non-capturing
293 \s* \# \s*
294 (if \s 0| if | else | endif) # $2 (only if #...)
296 [^\n]*
297 \n ?
299 }xmso;
301 ($if_lvl, $if0_lvl, $if0) = (0,0,0);
302 ${$code_ref} =~ s{ $preproc_regex } { patsub($1,$2) }xmsoeg;
304 ($debug == 2) && print "==> After Remove if0: code: [$filename]\n${$code_ref}\n===<\n";
305 return;
308 sub patsub {
309 if ($debug == 99) {
310 print "-->$_[0]\n";
311 (defined $_[1]) && print " >$_[1]<\n";
314 # #if/#if 0/#else/#endif processing
315 if (defined $_[1]) {
316 my ($if) = $_[1];
317 if ($if eq 'if') {
318 $if_lvl += 1;
320 elsif ($if eq 'if 0') {
321 $if_lvl += 1;
322 if ($if0_lvl == 0) {
323 $if0_lvl = $if_lvl;
324 $if0 = 1; # inside #if 0
327 elsif ($if eq 'else') {
328 if ($if0_lvl == $if_lvl) {
329 $if0 = 0;
332 elsif ($if eq 'endif') {
333 if ($if0_lvl == $if_lvl) {
334 $if0 = 0;
335 $if0_lvl = 0;
337 $if_lvl -= 1;
338 if ($if_lvl < 0) {
339 die "patsub: #if/#endif mismatch"
342 return $_[0]; # don't remove preprocessor lines themselves
345 # not preprocessor line: See if under #if 0: If so, remove
346 if ($if0 == 1) {
347 return ''; # remove
349 return $_[0];
351 } # block end
353 # ---------------------------------------------------------------------
354 # action: Add to hash an entry for each
355 # 'static? g?int hf_...' definition (including array names)
356 # in the input string.
357 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
358 # Remove each definition found from the input string.
359 # args: code_ref, filename, hf_defs_href
360 # returns: ref to the hash
362 sub find_remove_hf_defs {
363 my ($code_ref, $filename, $hf_defs_href) = @_;
365 # Build pattern to match any of the following
366 # static? g?int hf_foo = -1;
367 # static? g?int hf_foo = HF_EMPTY;
368 # static? g?int hf_foo[xxx];
369 # static? g?int hf_foo[xxx] = {
371 # p1: 'static? g?int hf_foo'
372 my $p1_regex = qr{
375 (static \s+)?
376 g?int
378 (hf_[a-zA-Z0-9_]+) # hf_..
379 }xmso;
381 # p2a: ' = -1;' or ' = HF_EMPTY;'
382 my $p2a_regex = qr{
383 \s* = \s*
385 - \s* 1 | HF_EMPTY
387 \s* ;
388 }xmso;
390 # p2b: '[xxx];' or '[xxx] = {'
391 my $p2b_regex = qr/
392 \s* \[ [^\]]+ \] \s*
394 = \s* [{] | ;
396 /xmso;
398 my $hf_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
400 while (${$code_ref} =~ m{ $hf_def_regex }xmsog) {
401 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
402 $hf_defs_href->{$2} = (defined $1) ? 0 : 1; # 'static' if $1 is defined.
404 ($debug == 3) && debug_print_hash("VD: $filename", $hf_defs_href); # VariableDefinition
406 # remove all
407 ${$code_ref} =~ s{ $hf_def_regex } {}xmsog;
408 ($debug == 3) && print "==> After remove hf_defs: code: [$filename]\n${$code_ref}\n===<\n";
410 return;
413 # ---------------------------------------------------------------------
414 # action: Add to hash an entry (hf_...) for each hf[] entry.
415 # Remove each hf[] entries found from the input string.
416 # args: code_ref, filename, hf_array_entries_href
418 sub find_remove_hf_array_entries {
419 my ($code_ref, $filename, $hf_array_entries_href) = @_;
421 # hf[] entry regex (to extract an hf_index_name and associated field type)
422 my $hf_array_entry_regex = qr /
425 & \s* ( [a-zA-Z0-9_]+ ) # &hf
427 \s* [[] [^]]+ []] # optional array ref
429 \s* , \s*
431 [^}]+
432 , \s*
433 (FT_[a-zA-Z0-9_]+) # field type
434 \s* ,
435 [^}]+
436 , \s*
438 HFILL | HF_REF_TYPE_NONE
440 [^}]*
442 [\s,]*
444 /xmso;
446 # find all the hf[] entries (searching ${$code_ref}).
447 while (${$code_ref} =~ m{ $hf_array_entry_regex }xmsog) {
448 ($debug == 98) && print "+++ $1 $2\n";
449 $hf_array_entries_href->{$1} = undef;
452 ($debug == 4) && debug_print_hash("AE: $filename", $hf_array_entries_href); # ArrayEntry
454 # now remove all
455 ${$code_ref} =~ s{ $hf_array_entry_regex } {}xmsog;
456 ($debug == 4) && print "==> After remove hf_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
458 return;
461 # ---------------------------------------------------------------------
462 # action: Add to hash an entry (hf_...) for each hf_... var
463 # found in statements of the form:
464 # 'hf_... = proto_registrar_get_id_byname ...'
465 # 'hf_... = proto_get_id_by_filtername ...'
466 # Remove each such statement found from the input string.
467 # args: code_ref, filename, hf_array_entries_href
469 sub find_remove_proto_get_id_hf_assignments {
470 my ($code_ref, $filename, $hf_array_entries_href) = @_;
472 my $_regex = qr{ ( hf_ [a-zA-Z0-9_]+ )
473 \s* = \s*
474 (?: proto_registrar_get_id_byname | proto_get_id_by_filter_name )
475 }xmso;
477 my @hfvars = ${$code_ref} =~ m{ $_regex }xmsog;
479 if (@hfvars == 0) {
480 return;
483 # found:
484 # Sanity check: hf_vars shouldn't already be in hf_array_entries
485 if (defined @$hf_array_entries_href{@hfvars}) {
486 printf "? one or more of [@hfvars] initialized via proto_registrar_get_by_name() also in hf[] ??\n";
489 # Now: add to hf_array_entries
490 @$hf_array_entries_href{@hfvars} = ();
492 ($debug == 4) && debug_print_hash("PR: $filename", $hf_array_entries_href);
494 # remove from input (so not considered as 'usage')
495 ${$code_ref} =~ s{ $_regex } {}xmsog;
497 ($debug == 4) && print "==> After remove proto_registrar_by_name: code: [$filename]\n${$code_ref}\n===<\n";
499 return;
502 # ---------------------------------------------------------------------
503 # action: Add to hash all hf_... strings remaining in input string.
504 # arga: code_ref, filename, hf_usage_href
505 # return: ref to hf_usage hash
507 # The hash will include *all* strings of form hf_...
508 # which are in the input string (even strings which
509 # aren't actually vars).
510 # We don't care since we'll be checking only
511 # known valid vars against these strings.
513 sub find_hf_usage {
514 my ($code_ref, $filename, $hf_usage_href) = @_;
516 my $hf_usage_regex = qr{
517 \b ( hf_[a-zA-Z0-9_]+ ) # hf_...
518 }xmso;
520 while (${$code_ref} =~ m{ $hf_usage_regex }xmsog) {
521 #print "$1\n";
522 $hf_usage_href->{$1} += 1;
525 ($debug == 5) && debug_print_hash("VU: $filename", $hf_usage_href); # VariableUsage
527 return;
530 # ---------------------------------------------------------------------
531 # action: Remove from 'unused' hash an instance of a variable named hf_..._pid
532 # if the source has a call to llc_add_oui() or ieee802a_add_oui().
533 # (This is rather a bit of a hack).
534 # arga: code_ref, filename, unused_href
536 sub remove_hf_pid_from_unused_if_add_oui_call {
537 my ($code_ref, $filename, $unused_href) = @_;
539 if ((keys %{$unused_href}) == 0) {
540 return;
543 my @hfvars = grep { m/ ^ hf_ [a-zA-Z0-9_]+ _pid $ /xmso} keys %{$unused_href};
545 if ((@hfvars == 0) || (@hfvars > 1)) {
546 return; # if multiple unused hf_..._pid
549 if (${$code_ref} !~ m{ llc_add_oui | ieee802a_add_oui }xmso) {
550 return;
553 # hf_...pid unused var && a call to ..._add_oui(); delete entry from unused
554 # XXX: maybe hf_..._pid should really be added to hfUsed ?
555 delete @$unused_href{@hfvars};
557 return;
560 # ---------------------------------------------------------------------
561 sub debug_print_hash {
562 my ($title, $href) = @_;
564 ##print "==> $title\n";
565 for my $k (sort keys %{$href}) {
566 printf "%-40.40s %5.5s %s\n", $title, $href->{$k} // "undef", $k;