3 # Copyright 2013, William Meier (See AUTHORS file)
5 # Validate hf_... and ei_... usage for a dissector file;
7 # Usage: checkhf.pl [--debug=?] <file or files>
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
13 # SPDX-License-Identifier: GPL-2.0-or-later
16 ## Note: This program is a re-implementation of the
17 ## original checkhf.pl written and (C) by Joerg Mayer.
18 ## The overall objective of the new implementation was to reduce
19 ## the number of false positives which occurred with the
20 ## original checkhf.pl
22 ## This program can be used to scan original .c source files or source
23 ## files which have been passed through a C pre-processor.
24 ## Operating on pre-processed source files is optimal; There should be
25 ## minimal false positives.
26 ## If the .c input is an original source file there may very well be
27 ## false positives/negatives due to the fact that the hf_... variables & etc
28 ## may be created via macros.
30 ## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
32 ## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
33 ## Unused entry: packet-afs.c, hf_afs_ubik_voteend
34 ## Unused entry: packet-afs.c, hf_afs_ubik_errcode
35 ## Unused entry: packet-afs.c, hf_afs_ubik_votetype
36 ## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
38 ## or checkhf.pl packet-*.c, which will check all the dissector files.
40 ## NOTE: This tool currently generates false positives!
42 ## The "NO ARRAY" messages - if accurate - points to an error that will
43 ## cause (t|wire)shark to report a DISSECTOR_BUG when a packet containing
44 ## this particular element is being dissected.
46 ## The "Unused entry" message indicates the opposite: We define an entry but
47 ## never use it (e.g., in a proto_...add... function).
48 ## ------------------------------------------------------------------------------------
50 # ------------------------------------------------------------------------------------
54 # 1. Clean the input: remove blank lines, comments, quoted strings and code under '#if 0'.
56 # Find (and remove from input) list of hf_... variable
57 # definitions ('static? g?int hf_... ;')
58 # 2. hf_array_entries:
59 # Find (and remove from input) list of hf_... variables
60 # referenced in the hf[] entries;
62 # From the remaining input, extract list of all strings of form hf_...
63 # (which may include strings which are not actually valid
64 # hf_... variable references).
66 # If entries in hf_defs not in hf_usage then "unused" (for static hf_defs only)
67 # If entries in hf_defs not in hf_array_entries then "ERROR: NO ARRAY";
75 my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hf_defs; 4=hf_array_entries; 5=hfusage (See code)
79 'help|?' => \
$help_flag
81 if (!$sts || $help_flag || !$ARGV[0]) {
87 while (my $filename = $ARGV[0]) {
91 my (%hf_defs, %hf_static_defs, %hf_array_entries, %hf_usage);
92 my ($unused_href, $no_array_href);
93 my (%ei_defs, %ei_static_defs, %ei_array_entries, %ei_usage);
94 my ($unused_ei, $no_array_ei);
96 read_file
(\
$filename, \
$file_contents);
98 remove_comments
(\
$file_contents, $filename);
99 remove_blank_lines
(\
$file_contents, $filename);
100 $file_contents =~ s/^\s+//m; # Remove leading spaces
101 remove_quoted_strings
(\
$file_contents, $filename);
102 remove_if0_code
(\
$file_contents, $filename);
104 find_remove_hf_defs
(\
$file_contents, $filename, \
%hf_defs);
105 find_remove_hf_array_entries
(\
$file_contents, $filename, \
%hf_array_entries);
106 find_remove_proto_get_id_hf_assignments
(\
$file_contents, $filename, \
%hf_array_entries);
107 find_hf_usage
(\
$file_contents, $filename, \
%hf_usage);
109 find_remove_ei_defs
(\
$file_contents, $filename, \
%ei_defs);
110 find_remove_ei_array_entries
(\
$file_contents, $filename, \
%ei_array_entries);
111 find_ei_usage
(\
$file_contents, $filename, \
%ei_usage);
114 # 1. Are all the static hf_defs and ei_defs entries in hf_usage and ei_usage?
115 # if not: "Unused entry:"
118 # create a hash containing entries just for the static definitions
119 @hf_static_defs{grep {$hf_defs{$_} == 0} keys %hf_defs} = (); # All values in the new hash will be undef
120 @ei_static_defs{grep {$ei_defs{$_} == 0} keys %ei_defs} = (); # All values in the new hash will be undef
122 $unused_href = diff_hash
(\
%hf_static_defs, \
%hf_usage);
123 remove_hf_pid_from_unused_if_add_oui_call
(\
$file_contents, $filename, $unused_href);
125 $unused_ei = diff_hash
(\
%ei_static_defs, \
%ei_usage);
127 print_list
("Unused href entry: $filename: ", $unused_href);
128 print_list
("Unused ei entry: $filename: ", $unused_ei);
130 # 2. Are all the hf_defs and ei_ entries (static and global) in [hf|ei]_array_entries ?
131 # (Note: if a static hf_def or ei is "unused", don't check for same in [hf|ei]_array_entries)
132 # if not: "ERROR: NO ARRAY"
134 ## Checking for missing global defs currently gives false positives
135 ## So: only check static defs for now.
136 ## $no_array_href = diff_hash(\%hf_defs, \%hf_array_entries);
137 $no_array_href = diff_hash
(\
%hf_static_defs, \
%hf_array_entries);
138 $no_array_href = diff_hash
($no_array_href, $unused_href); # Remove "unused" hf_... from no_array list
139 $no_array_ei = diff_hash
(\
%ei_static_defs, \
%ei_array_entries);
140 $no_array_ei = diff_hash
($no_array_ei, $unused_ei); # Remove "unused" ei_... from no_array list
142 print_list
("ERROR: NO ARRAY: $filename: ", $no_array_href);
143 print_list
("ERROR: NO ARRAY: $filename: ", $no_array_ei);
145 if ((keys %{$no_array_href}) != 0) {
148 if ((keys %{$no_array_ei}) != 0) {
153 exit (($error == 0) ?
0 : 1); # exit 1 if ERROR
156 # ---------------------------------------------------------------------
159 print "Usage: $0 [--debug=n] Filename [...]\n";
163 # ---------------------------------------------------------------------
164 # action: read contents of a file to specified string
165 # arg: filename_ref, file_contents_ref
168 my ($filename_ref, $file_contents_ref) = @_;
170 die "No such file: \"${$filename_ref}\"\n" if (! -e
${$filename_ref});
172 # delete leading './'
173 ${$filename_ref} =~ s{ ^ [.] / } {}xmso;
175 # Read in the file (ouch, but it's easier that way)
176 open(my $fci, "<:crlf", ${$filename_ref}) || die("Couldn't open ${$filename_ref}");
178 ${$file_contents_ref} = do { local( $/ ) ; <$fci> } ;
185 # ---------------------------------------------------------------------
186 # action: Create a hash containing entries in 'a' that are not in 'b'
187 # arg: a_href, b_href
188 # returns: pointer to hash
191 my ($a_href, $b_href) = @_;
195 @diffs{grep {! exists $b_href->{$_}} keys %{$a_href}} = (); # All values in the new hash will be undef
200 # ---------------------------------------------------------------------
201 # action: print a list
202 # arg: hdr, list_href
205 my ($hdr, $list_href) = @_;
216 # action: remove blank lines from input string
217 # arg: code_ref, filename
219 sub remove_blank_lines
{
220 my ($code_ref, $filename) = @_;
222 ${$code_ref} =~ s{ ^ \s* \n ? } {}xmsog;
227 sub get_quoted_str_regex
{
228 # A regex which matches double-quoted strings.
229 # 's' modifier added so that strings containing a 'line continuation'
230 # ( \ followed by a new-line) will match.
231 my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\\n])* ["]) }xmso;
233 # A regex which matches single-quoted strings.
234 my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\\n])* [']) }xmso;
236 return qr{ $double_quoted_str | $single_quoted_str }xmso;
240 # action: remove comments from input string
241 # arg: code_ref, filename
243 sub remove_comments
{
244 my ($code_ref, $filename) = @_;
246 # The below Regexp is based on one from:
247 # https://web.archive.org/web/20080614012925/http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
248 # It is in the public domain.
249 # A complicated regex which matches C-style comments.
250 my $c_comment_regex = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xmso;
252 ${$code_ref} =~ s{ $c_comment_regex } {}xmsog;
254 # Remove single-line C++-style comments. Be careful not to break up strings
255 # like "coap://", so match double quoted strings, single quoted characters,
256 # division operator and other characters before the actual "//" comment.
257 my $quoted_str = get_quoted_str_regex
();
258 my $cpp_comment_regex = qr{ ^((?: $quoted_str | /(?!/) | [^'"/\n] )*) // .*$ }xm;
259 ${$code_ref} =~ s{ $cpp_comment_regex } { $1 }xmg;
261 ($debug == 1) && print "==> After Remove Comments: code: [$filename]\n${$code_ref}\n===<\n";
267 # action: remove quoted strings from input string
268 # arg: code_ref, filename
270 sub remove_quoted_strings
{
271 my ($code_ref, $filename) = @_;
273 my $quoted_str = get_quoted_str_regex
();
274 ${$code_ref} =~ s{ $quoted_str } {}xmsog;
276 ($debug == 1) && print "==> After Remove quoted strings: code: [$filename]\n${$code_ref}\n===<\n";
282 # action: remove '#if 0'd code from the input string
283 # args codeRef, fileName
286 # Essentially: split the input into blocks of code or lines of #if/#if 0/etc.
287 # Remove blocks that follow '#if 0' until '#else/#endif' is found.
291 sub remove_if0_code
{
292 my ($codeRef, $fileName) = @_;
294 # Preprocess output (ensure trailing LF and no leading WS before '#')
295 $$codeRef =~ s/^\s*#/#/m;
296 if ($$codeRef !~ /\n$/) { $$codeRef .= "\n"; }
298 # Split into blocks of normal code or lines with conditionals.
299 my $ifRegExp = qr/if 0|if|else|endif/;
300 my @blocks = split(/^(#\s*(?:$ifRegExp).*\n)/m, $$codeRef);
302 my ($if_lvl, $if0_lvl, $if0) = (0,0,0);
304 for my $block (@blocks) {
306 if ($block =~ /^#\s*($ifRegExp)/) {
307 # #if/#if 0/#else/#endif processing
310 print(STDERR
"if0=$if0 if0_lvl=$if0_lvl lvl=$if_lvl [$if] - $block");
314 } elsif ($if eq 'if 0') {
318 $if0 = 1; # inside #if 0
320 } elsif ($if eq 'else') {
321 if ($if0_lvl == $if_lvl) {
324 } elsif ($if eq 'endif') {
325 if ($if0_lvl == $if_lvl) {
331 die "patsub: #if/#endif mismatch in $fileName"
337 print(STDERR
"if0=$if0 if0_lvl=$if0_lvl lvl=$if_lvl\n");
339 # Keep preprocessor lines and blocks that are not enclosed in #if 0
340 if ($if or $if0 != 1) {
346 ($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n";
351 # ---------------------------------------------------------------------
352 # action: Add to hash an entry for each
353 # 'static? g?int hf_...' definition (including array names)
354 # in the input string.
355 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
356 # Remove each definition found from the input string.
357 # args: code_ref, filename, hf_defs_href
358 # returns: ref to the hash
360 sub find_remove_hf_defs
{
361 my ($code_ref, $filename, $hf_defs_href) = @_;
363 # Build pattern to match any of the following
364 # static? g?int hf_foo = -1;
365 # static? g?int hf_foo[xxx];
366 # static? g?int hf_foo[xxx] = {
368 # p1: 'static? g?int hf_foo'
375 (hf_
[a
-zA
-Z0
-9_
]+) # hf_..
387 # p2b: '[xxx];' or '[xxx] = {'
395 my $hf_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
397 while (${$code_ref} =~ m{ $hf_def_regex }xmsog) {
398 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
399 $hf_defs_href->{$2} = (defined $1) ?
0 : 1; # 'static' if $1 is defined.
401 ($debug == 3) && debug_print_hash
("VD: $filename", $hf_defs_href); # VariableDefinition
404 ${$code_ref} =~ s{ $hf_def_regex } {}xmsog;
405 ($debug == 3) && print "==> After remove hf_defs: code: [$filename]\n${$code_ref}\n===<\n";
410 # ---------------------------------------------------------------------
411 # action: Add to hash an entry (hf_...) for each hf[] entry.
412 # Remove each hf[] entries found from the input string.
413 # args: code_ref, filename, hf_array_entries_href
415 sub find_remove_hf_array_entries
{
416 my ($code_ref, $filename, $hf_array_entries_href) = @_;
418 # hf[] entry regex (to extract an hf_index_name and associated field type)
419 my $hf_array_entry_regex = qr
/
422 & \s
* ( [a
-zA
-Z0
-9_
]+ ) # &hf
424 \s
* [[] [^]]+ []] # optional array ref
430 (FT_
[a
-zA
-Z0
-9_
]+) # field type
435 HFILL
| HF_REF_TYPE_NONE
443 # find all the hf[] entries (searching ${$code_ref}).
444 while (${$code_ref} =~ m{ $hf_array_entry_regex }xmsog) {
445 ($debug == 98) && print "+++ $1 $2\n";
446 $hf_array_entries_href->{$1} = undef;
449 ($debug == 4) && debug_print_hash
("AE: $filename", $hf_array_entries_href); # ArrayEntry
452 ${$code_ref} =~ s{ $hf_array_entry_regex } {}xmsog;
453 ($debug == 4) && print "==> After remove hf_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
458 # ---------------------------------------------------------------------
459 # action: Add to hash an entry (hf_...) for each hf_... var
460 # found in statements of the form:
461 # 'hf_... = proto_registrar_get_id_byname ...'
462 # 'hf_... = proto_get_id_by_filtername ...'
463 # Remove each such statement found from the input string.
464 # args: code_ref, filename, hf_array_entries_href
466 sub find_remove_proto_get_id_hf_assignments
{
467 my ($code_ref, $filename, $hf_array_entries_href) = @_;
469 my $_regex = qr{ ( hf_
[a
-zA
-Z0
-9_
]+ )
471 (?
: proto_registrar_get_id_byname
| proto_get_id_by_filter_name
)
474 my @hfvars = ${$code_ref} =~ m{ $_regex }xmsog;
481 # Sanity check: hf_vars shouldn't already be in hf_array_entries
482 if (defined @
$hf_array_entries_href{@hfvars}) {
483 printf "? one or more of [@hfvars] initialized via proto_registrar_get_by_name() also in hf[] ??\n";
486 # Now: add to hf_array_entries
487 @
$hf_array_entries_href{@hfvars} = ();
489 ($debug == 4) && debug_print_hash
("PR: $filename", $hf_array_entries_href);
491 # remove from input (so not considered as 'usage')
492 ${$code_ref} =~ s{ $_regex } {}xmsog;
494 ($debug == 4) && print "==> After remove proto_registrar_by_name: code: [$filename]\n${$code_ref}\n===<\n";
499 # ---------------------------------------------------------------------
500 # action: Add to hash all hf_... strings remaining in input string.
501 # arga: code_ref, filename, hf_usage_href
502 # return: ref to hf_usage hash
504 # The hash will include *all* strings of form hf_...
505 # which are in the input string (even strings which
506 # aren't actually vars).
507 # We don't care since we'll be checking only
508 # known valid vars against these strings.
511 my ($code_ref, $filename, $hf_usage_href) = @_;
513 my $hf_usage_regex = qr{
514 \b ( hf_
[a
-zA
-Z0
-9_
]+ ) # hf_...
517 while (${$code_ref} =~ m{ $hf_usage_regex }xmsog) {
519 $hf_usage_href->{$1} += 1;
522 ($debug == 5) && debug_print_hash
("VU: $filename", $hf_usage_href); # VariableUsage
527 # ---------------------------------------------------------------------
528 # action: Remove from 'unused' hash an instance of a variable named hf_..._pid
529 # if the source has a call to llc_add_oui() or ieee802a_add_oui().
530 # (This is rather a bit of a hack).
531 # arga: code_ref, filename, unused_href
533 sub remove_hf_pid_from_unused_if_add_oui_call
{
534 my ($code_ref, $filename, $unused_href) = @_;
536 if ((keys %{$unused_href}) == 0) {
540 my @hfvars = grep { m/ ^ hf_ [a-zA-Z0-9_]+ _pid $ /xmso} keys %{$unused_href};
542 if ((@hfvars == 0) || (@hfvars > 1)) {
543 return; # if multiple unused hf_..._pid
546 if (${$code_ref} !~ m{ llc_add_oui | ieee802a_add_oui }xmso) {
550 # hf_...pid unused var && a call to ..._add_oui(); delete entry from unused
551 # XXX: maybe hf_..._pid should really be added to hfUsed ?
552 delete @
$unused_href{@hfvars};
557 # ---------------------------------------------------------------------
558 # action: Add to hash an entry for each
559 # 'static? expert_field ei_...' definition (including array names)
560 # in the input string.
561 # The entry value will be 0 for 'static' definitions and 1 for 'global' definitions;
562 # Remove each definition found from the input string.
563 # args: code_ref, filename, hf_defs_href
564 # returns: ref to the hash
566 sub find_remove_ei_defs
{
567 my ($code_ref, $filename, $ei_defs_eiref) = @_;
569 # Build pattern to match any of the following
570 # static? expert_field ei_foo = -1;
571 # static? expert_field ei_foo[xxx];
572 # static? expert_field ei_foo[xxx] = {
574 # p1: 'static? expert_field ei_foo'
580 (ei_
[a
-zA
-Z0
-9_
]+) # ei_..
592 # p2b: '[xxx];' or '[xxx] = {'
600 my $ei_def_regex = qr{ $p1_regex (?: $p2a_regex | $p2b_regex ) }xmso;
602 while (${$code_ref} =~ m{ $ei_def_regex }xmsog) {
603 #print ">%s< >$2<\n", (defined $1) ? $1 ; "";
604 $ei_defs_eiref->{$2} = (defined $1) ?
0 : 1; # 'static' if $1 is defined.
606 ($debug == 3) && debug_print_hash
("VD: $filename", $ei_defs_eiref); # VariableDefinition
609 ${$code_ref} =~ s{ $ei_def_regex } {}xmsog;
610 ($debug == 3) && print "==> After remove ei_defs: code: [$filename]\n${$code_ref}\n===<\n";
615 # ---------------------------------------------------------------------
616 # action: Add to hash an entry (ei_...) for each ei[] entry.
617 # Remove each ei[] entries found from the input string.
618 # args: code_ref, filename, ei_array_entries_href
620 sub find_remove_ei_array_entries
{
621 my ($code_ref, $filename, $ei_array_entries_eiref) = @_;
623 # ei[] entry regex (to extract an ei_index_name and associated field type)
624 my $ei_array_entry_regex = qr
/
627 & \s
* ( [a
-zA
-Z0
-9_
]+ ) # &ei
629 \s
* [ [^]]+ ] # optional array ref
633 # \s* "[^"]+" # (filter string has been removed already)
635 PI_
[A
-Z0
-9_
]+ # event group
637 PI_
[A
-Z0
-9_
]+ # event severity
639 [^,]* # description string (already removed) or NULL
648 # find all the ei[] entries (searching ${$code_ref}).
649 while (${$code_ref} =~ m{ $ei_array_entry_regex }xsg) {
650 ($debug == 98) && print "+++ $1\n";
651 $ei_array_entries_eiref->{$1} = undef;
654 ($debug == 4) && debug_print_hash
("AE: $filename", $ei_array_entries_eiref); # ArrayEntry
657 ${$code_ref} =~ s{ $ei_array_entry_regex } {}xmsog;
658 ($debug == 4) && print "==> After remove ei_array_entries: code: [$filename]\n${$code_ref}\n===<\n";
663 # ---------------------------------------------------------------------
664 # action: Add to hash all ei_... strings remaining in input string.
665 # arga: code_ref, filename, ei_usage_eiref
666 # return: ref to ei_usage hash
668 # The hash will include *all* strings of form ei_...
669 # which are in the input string (even strings which
670 # aren't actually vars).
671 # We don't care since we'll be checking only
672 # known valid vars against these strings.
675 my ($code_ref, $filename, $ei_usage_eiref) = @_;
677 my $ei_usage_regex = qr{
678 \b ( ei_
[a
-zA
-Z0
-9_
]+ ) # ei_...
681 while (${$code_ref} =~ m{ $ei_usage_regex }xmsog) {
683 $ei_usage_eiref->{$1} += 1;
686 ($debug == 5) && debug_print_hash
("VU: $filename", $ei_usage_eiref); # VariableUsage
691 # ---------------------------------------------------------------------
692 sub debug_print_hash
{
693 my ($title, $href) = @_;
695 ##print "==> $title\n";
696 for my $k (sort keys %{$href}) {
697 my $h = defined($href->{$k}) ?
$href->{$k} : "undef";
698 printf "%-40.40s %5.5s %s\n", $title, $h, $k;