tools/checkAPIs.pl

   1 #!/usr/bin/env perl
   2
   3 #
   4 # Copyright 2006, Jeff Morriss <jeff.morriss.ws[AT]gmail.com>
   5 #
   6 # A simple tool to check source code for function calls that should not
   7 # be called by Wireshark code and to perform certain other checks.
   8 #
   9 # Usage:
  10 # checkAPIs.pl [-M] [-g group1] [-g group2] ...
  11 #              [-s summary-group1] [-s summary-group2] ...
  12 #              [--nocheck-hf]
  13 #              [--nocheck-value-string-array]
  14 #              [--nocheck-shadow]
  15 #              [--debug]
  16 #              file1 file2 ...
  17 #
  18 # Wireshark - Network traffic analyzer
  19 # By Gerald Combs <gerald@wireshark.org>
  20 # Copyright 1998 Gerald Combs
  21 #
  22 # SPDX-License-Identifier: GPL-2.0-or-later
  23 #
  24
  25 use strict;
  26 use Encode;
  27 use English;
  28 use Getopt::Long;
  29 use Text::Balanced qw(extract_bracketed);
  30
  31 my %APIs = (
  32         # API groups.
  33         # Group name, e.g. 'prohibited'
  34         # '<name>' => {
  35         #   'count_errors'      => 1,                     # 1 if these are errors, 0 if warnings
  36         #   'functions'         => [ 'f1', 'f2', ...],    # Function array
  37         #   'function-counts'   => {'f1',0, 'f2',0, ...}, # Function Counts hash (initialized in the code)
  38         # }
  39         #
  40         # APIs that MUST NOT be used in Wireshark
  41         'prohibited' => { 'count_errors' => 1, 'functions' => [
  42                 # Memory-unsafe APIs
  43                 # Use something that won't overwrite the end of your buffer instead
  44                 # of these.
  45                 #
  46                 # Microsoft provides lists of unsafe functions and their
  47                 # recommended replacements in "Security Development Lifecycle
  48                 # (SDL) Banned Function Calls"
  49                 # https://docs.microsoft.com/en-us/previous-versions/bb288454(v=msdn.10)
  50                 # and "Deprecated CRT Functions"
  51                 # https://docs.microsoft.com/en-us/previous-versions/ms235384(v=vs.100)
  52                 #
  53                 'atoi', # use wsutil/strtoi.h functions
  54                 'gets',
  55                 'sprintf',
  56                 'g_sprintf',
  57                 'vsprintf',
  58                 'g_vsprintf',
  59                 'strcpy',
  60                 'strncpy',
  61                 'strcat',
  62                 'strncat',
  63                 'cftime',
  64                 'ascftime',
  65                 ### non-portable APIs
  66                 # use glib (g_*) versions instead of these:
  67                 'ntohl',
  68                 'ntohs',
  69                 'htonl',
  70                 'htons',
  71                 'strdup',
  72                 'strndup',
  73                 # Windows doesn't have this; use g_ascii_strtoull() instead
  74                 'strtoull',
  75                 ### non-portable: fails on Windows Wireshark built with VC newer than VC6
  76                 # See https://gitlab.com/wireshark/wireshark/-/issues/6695#note_400659130
  77                 'g_fprintf',
  78                 'g_vfprintf',
  79                 # use native snprintf() and vsnprintf() instead of these:
  80                 'g_snprintf',
  81                 'g_vsnprintf',
  82                 ### non-ANSI C
  83                 # use memset, memcpy, memcmp instead of these:
  84                 'bzero',
  85                 'bcopy',
  86                 'bcmp',
  87                 # The MSDN page for ZeroMemory recommends SecureZeroMemory
  88                 # instead.
  89                 'ZeroMemory',
  90                 # use wmem_*, ep_*, or g_* functions instead of these:
  91                 # (One thing to be aware of is that space allocated with malloc()
  92                 # may not be freeable--at least on Windows--with g_free() and
  93                 # vice-versa.)
  94                 'malloc',
  95                 'calloc',
  96                 'realloc',
  97                 'valloc',
  98                 'free',
  99                 'cfree',
 100                 # Locale-unsafe APIs
 101                 # These may have unexpected behaviors in some locales (e.g.,
 102                 # "I" isn't always the upper-case form of "i", and "i" isn't
 103                 # always the lower-case form of "I").  Use the g_ascii_* version
 104                 # instead.
 105                 'isalnum',
 106                 'isascii',
 107                 'isalpha',
 108                 'iscntrl',
 109                 'isdigit',
 110                 'islower',
 111                 'isgraph',
 112                 'isprint',
 113                 'ispunct',
 114                 'isspace',
 115                 'isupper',
 116                 'isxdigit',
 117                 'tolower',
 118                 'atof',
 119                 'strtod',
 120                 'strcasecmp',
 121                 'strncasecmp',
 122                 # Deprecated in glib 2.68 in favor of g_memdup2
 123                 # We have our local implementation for older versions
 124                 'g_memdup',
 125                 'g_strcasecmp',
 126                 'g_strncasecmp',
 127                 'g_strup',
 128                 'g_strdown',
 129                 'g_string_up',
 130                 'g_string_down',
 131                 'strerror',     # use g_strerror
 132                 # Use the ws_* version of these:
 133                 # (Necessary because on Windows we use UTF8 for throughout the code
 134                 # so we must tweak that to UTF16 before operating on the file.  Code
 135                 # using these functions will work unless the file/path name contains
 136                 # non-ASCII chars.)
 137                 'open',
 138                 'rename',
 139                 'mkdir',
 140                 'stat',
 141                 'unlink',
 142                 'remove',
 143                 'fopen',
 144                 'freopen',
 145                 'fstat',
 146                 'lseek',
 147                 # Misc
 148                 'tmpnam',       # use mkstemp
 149                 '_snwprintf'    # use StringCchPrintf
 150                 ] },
 151
 152         ### Soft-Deprecated functions that should not be used in new code but
 153         # have not been entirely removed from old code. These will become errors
 154         # once they've been removed from all existing code.
 155         'soft-deprecated' => { 'count_errors' => 0, 'functions' => [
 156                 'tvb_length_remaining', # replaced with tvb_captured_length_remaining
 157
 158                 # Locale-unsafe APIs
 159                 # These may have unexpected behaviors in some locales (e.g.,
 160                 # "I" isn't always the upper-case form of "i", and "i" isn't
 161                 # always the lower-case form of "I").  Use the g_ascii_* version
 162                 # instead.
 163                 'toupper'
 164             ] },
 165
 166         # APIs that SHOULD NOT be used in Wireshark (any more)
 167         'deprecated' => { 'count_errors' => 1, 'functions' => [
 168                 'perror',                                       # Use g_strerror() and report messages in whatever
 169                                                                 #  fashion is appropriate for the code in question.
 170                 'ctime',                                        # Use abs_time_secs_to_str()
 171                 'next_tvb_add_port',                            # Use next_tvb_add_uint() (and a matching change
 172                                                                 #  of NTVB_PORT -> NTVB_UINT)
 173
 174                 ### Deprecated GLib/GObject functions/macros
 175                 # (The list is based upon the GLib 2.30.2 & GObject 2.30.2 documentation;
 176                 #  An entry may be commented out if it is currently
 177                 #  being used in Wireshark and if the replacement functionality
 178                 #  is not available in all the GLib versions that Wireshark
 179                 #  currently supports.
 180                 # Note: Wireshark currently (Jan 2012) requires GLib 2.14 or newer.
 181                 #  The Wireshark build currently (Jan 2012) defines G_DISABLE_DEPRECATED
 182                 #  so use of any of the following should cause the Wireshark build to fail and
 183                 #  therefore the tests for obsolete GLib function usage in checkAPIs should not be needed.
 184                 'G_ALLOC_AND_FREE',
 185                 'G_ALLOC_ONLY',
 186                 'g_allocator_free',                             # "use slice allocator" (avail since 2.10,2.14)
 187                 'g_allocator_new',                              # "use slice allocator" (avail since 2.10,2.14)
 188                 'g_async_queue_ref_unlocked',                   # g_async_queue_ref()   (OK since 2.8)
 189                 'g_async_queue_unref_and_unlock',               # g_async_queue_unref() (OK since 2.8)
 190                 'g_atomic_int_exchange_and_add',                # since 2.30
 191                 'g_basename',
 192                 'g_blow_chunks',                                # "use slice allocator" (avail since 2.10,2.14)
 193                 'g_cache_value_foreach',                        # g_cache_key_foreach()
 194                 'g_chunk_free',                                 # g_slice_free (avail since 2.10)
 195                 'g_chunk_new',                                  # g_slice_new  (avail since 2.10)
 196                 'g_chunk_new0',                                 # g_slice_new0 (avail since 2.10)
 197                 'g_completion_add_items',                       # since 2.26
 198                 'g_completion_clear_items',                     # since 2.26
 199                 'g_completion_complete',                        # since 2.26
 200                 'g_completion_complete_utf8',                   # since 2.26
 201                 'g_completion_free',                            # since 2.26
 202                 'g_completion_new',                             # since 2.26
 203                 'g_completion_remove_items',                    # since 2.26
 204                 'g_completion_set_compare',                     # since 2.26
 205                 'G_CONST_RETURN',                               # since 2.26
 206                 'g_date_set_time',                              # g_date_set_time_t (avail since 2.10)
 207                 'g_dirname',
 208                 'g_format_size_for_display',                    # since 2.30: use g_format_size()
 209                 'G_GNUC_FUNCTION',
 210                 'G_GNUC_PRETTY_FUNCTION',
 211                 'g_hash_table_freeze',
 212                 'g_hash_table_thaw',
 213                 'G_HAVE_GINT64',
 214                 'g_io_channel_close',
 215                 'g_io_channel_read',
 216                 'g_io_channel_seek',
 217                 'g_io_channel_write',
 218                 'g_list_pop_allocator',                         # "does nothing since 2.10"
 219                 'g_list_push_allocator',                        # "does nothing since 2.10"
 220                 'g_main_destroy',
 221                 'g_main_is_running',
 222                 'g_main_iteration',
 223                 'g_main_new',
 224                 'g_main_pending',
 225                 'g_main_quit',
 226                 'g_main_run',
 227                 'g_main_set_poll_func',
 228                 'g_mapped_file_free',                           # [as of 2.22: use g_map_file_unref]
 229                 'g_mem_chunk_alloc',                            # "use slice allocator" (avail since 2.10)
 230                 'g_mem_chunk_alloc0',                           # "use slice allocator" (avail since 2.10)
 231                 'g_mem_chunk_clean',                            # "use slice allocator" (avail since 2.10)
 232                 'g_mem_chunk_create',                           # "use slice allocator" (avail since 2.10)
 233                 'g_mem_chunk_destroy',                          # "use slice allocator" (avail since 2.10)
 234                 'g_mem_chunk_free',                             # "use slice allocator" (avail since 2.10)
 235                 'g_mem_chunk_info',                             # "use slice allocator" (avail since 2.10)
 236                 'g_mem_chunk_new',                              # "use slice allocator" (avail since 2.10)
 237                 'g_mem_chunk_print',                            # "use slice allocator" (avail since 2.10)
 238                 'g_mem_chunk_reset',                            # "use slice allocator" (avail since 2.10)
 239                 'g_node_pop_allocator',                         # "does nothing since 2.10"
 240                 'g_node_push_allocator',                        # "does nothing since 2.10"
 241                 'g_relation_count',                             # since 2.26
 242                 'g_relation_delete',                            # since 2.26
 243                 'g_relation_destroy',                           # since 2.26
 244                 'g_relation_exists',                            # since 2.26
 245                 'g_relation_index',                             # since 2.26
 246                 'g_relation_insert',                            # since 2.26
 247                 'g_relation_new',                               # since 2.26
 248                 'g_relation_print',                             # since 2.26
 249                 'g_relation_select',                            # since 2.26
 250                 'g_scanner_add_symbol',
 251                 'g_scanner_remove_symbol',
 252                 'g_scanner_foreach_symbol',
 253                 'g_scanner_freeze_symbol_table',
 254                 'g_scanner_thaw_symbol_table',
 255                 'g_slist_pop_allocator',                        # "does nothing since 2.10"
 256                 'g_slist_push_allocator',                       # "does nothing since 2.10"
 257                 'g_source_get_current_time',                    # since 2.28: use g_source_get_time()
 258                 'g_strcasecmp',                                 #
 259                 'g_strdown',                                    #
 260                 'g_string_down',                                #
 261                 'g_string_sprintf',                             # use g_string_printf() instead
 262                 'g_string_sprintfa',                            # use g_string_append_printf instead
 263                 'g_string_up',                                  #
 264                 'g_strncasecmp',                                #
 265                 'g_strup',                                      #
 266                 'g_tree_traverse',
 267                 'g_tuples_destroy',                             # since 2.26
 268                 'g_tuples_index',                               # since 2.26
 269                 'g_unicode_canonical_decomposition',            # since 2.30: use g_unichar_fully_decompose()
 270                 'G_UNICODE_COMBINING_MARK',                     # since 2.30:use G_UNICODE_SPACING_MARK
 271                 'g_value_set_boxed_take_ownership',             # GObject
 272                 'g_value_set_object_take_ownership',            # GObject
 273                 'g_value_set_param_take_ownership',             # GObject
 274                 'g_value_set_string_take_ownership',            # Gobject
 275                 'G_WIN32_DLLMAIN_FOR_DLL_NAME',
 276                 'g_win32_get_package_installation_directory',
 277                 'g_win32_get_package_installation_subdirectory',
 278                 'qVariantFromValue'
 279                 ] },
 280
 281         'dissectors-prohibited' => { 'count_errors' => 1, 'functions' => [
 282                 # APIs that make the program exit. Dissectors shouldn't call these.
 283                 'abort',
 284                 'assert',
 285                 'assert_perror',
 286                 'exit',
 287                 'g_assert',
 288                 'g_error',
 289                 ] },
 290
 291         'dissectors-restricted' => { 'count_errors' => 0, 'functions' => [
 292                 # APIs that print to the terminal. Dissectors shouldn't call these.
 293                 # FIXME: Explain what to use instead.
 294                 'printf',
 295                 'g_warning',
 296                 ] },
 297
 298 );
 299
 300 my @apiGroups = qw(prohibited deprecated soft-deprecated);
 301
 302 # Defines array of pairs function/variable which are excluded
 303 # from prefs_register_*_preference checks
 304 my @excludePrefsCheck = (
 305          [ qw(prefs_register_password_preference), '(const char **)arg->pref_valptr' ],
 306          [ qw(prefs_register_string_preference), '(const char **)arg->pref_valptr' ],
 307 );
 308
 309
 310 # Given a ref to a hash containing "functions" and "functions_count" entries:
 311 # Determine if any item of the list of APIs contained in the array referenced by "functions"
 312 # exists in the file.
 313 # For each API which appears in the file:
 314 #     Push the API onto the provided list;
 315 #     Add the number of times the API appears in the file to the total count
 316 #      for the API (stored as the value of the API key in the hash referenced by "function_counts").
 317
 318 sub findAPIinFile($$$)
 319 {
 320         my ($groupHashRef, $fileContentsRef, $foundAPIsRef) = @_;
 321
 322         for my $api ( @{$groupHashRef->{functions}} )
 323         {
 324                 my $cnt = 0;
 325                 # Match function calls, but ignore false positives from:
 326                 # C++ method definition: int MyClass::open(...)
 327                 # Method invocation: myClass->open(...);
 328                 # Function declaration: int open(...);
 329                 # Method invocation: QString().sprintf(...)
 330                 while (${$fileContentsRef} =~ m/ \W (?<!::|->|\w\ ) (?<!\.) $api \W* \( /gx)
 331                 {
 332                         $cnt += 1;
 333                 }
 334                 if ($cnt > 0) {
 335                         push @{$foundAPIsRef}, $api;
 336                         $groupHashRef->{function_counts}->{$api} += 1;
 337                 }
 338         }
 339 }
 340
 341 # APIs which (generally) should not be called with an argument of tvb_get_ptr()
 342 my @TvbPtrAPIs = (
 343         # Use NULL for the value_ptr instead of tvb_get_ptr() (only if the
 344         # given offset and length are equal) with these:
 345         'proto_tree_add_bytes_format',
 346         'proto_tree_add_bytes_format_value',
 347         'proto_tree_add_ether',
 348         # Use the tvb_* version of these:
 349         # Use tvb_bytes_to_str[_punct] instead of:
 350         'bytes_to_str',
 351         'bytes_to_str_punct',
 352         'SET_ADDRESS',
 353         'SET_ADDRESS_HF',
 354 );
 355
 356 sub checkAPIsCalledWithTvbGetPtr($$$)
 357 {
 358         my ($APIs, $fileContentsRef, $foundAPIsRef) = @_;
 359
 360         for my $api (@{$APIs}) {
 361                 my @items;
 362                 my $cnt = 0;
 363
 364                 @items = (${$fileContentsRef} =~ m/ ($api [^;]* ; ) /xsg);
 365                 while (@items) {
 366                         my ($item) = @items;
 367                         shift @items;
 368                         if ($item =~ / tvb_get_ptr /xos) {
 369                                 $cnt += 1;
 370                         }
 371                 }
 372
 373                 if ($cnt > 0) {
 374                         push @{$foundAPIsRef}, $api;
 375                 }
 376         }
 377 }
 378
 379 # List of possible shadow variable (Majority coming from macOS..)
 380 my @ShadowVariable = (
 381         'index',
 382         'time',
 383         'strlen',
 384         'system'
 385 );
 386
 387 sub check_shadow_variable($$$)
 388 {
 389         my ($groupHashRef, $fileContentsRef, $foundAPIsRef) = @_;
 390
 391         for my $api ( @{$groupHashRef} )
 392         {
 393                 my $cnt = 0;
 394                 while (${$fileContentsRef} =~ m/ \s $api \s*+ [^\(\w] /gx)
 395                 {
 396                         $cnt += 1;
 397                 }
 398                 if ($cnt > 0) {
 399                         push @{$foundAPIsRef}, $api;
 400                 }
 401         }
 402 }
 403
 404 sub check_snprintf_plus_strlen($$)
 405 {
 406         my ($fileContentsRef, $filename) = @_;
 407         my @items;
 408         my $errorCount = 0;
 409
 410         # If we need to do more APIs, we can make this function look more like
 411         # checkAPIsCalledWithTvbGetPtr().
 412         @items = (${$fileContentsRef} =~ m/ (snprintf [^;]* ; ) /xsg);
 413         while (@items) {
 414                 my ($item) = @items;
 415                 shift @items;
 416                 if ($item =~ / strlen\s*\( /xos) {
 417                         print STDERR "Error: ".$filename." uses snprintf + strlen to assemble strings.\n";
 418                         $errorCount++;
 419                         last;
 420                 }
 421         }
 422         return $errorCount;
 423 }
 424
 425 sub check_complex_snprintf($$)
 426 {
 427         my ($fileContentsRef, $filename) = @_;
 428         my $errorCount = 0;
 429
 430         my @items = (${$fileContentsRef} =~ m/ (= \s* snprintf) /xsg);
 431         while (@items) {
 432                 my ($item) = @items;
 433                 shift @items;
 434                 print STDERR "Warning: ".$filename." appears to use snprintf to assemble\n" .
 435                         "strings. Consider using a wmem_strbuf or GString instead.\n";
 436                 # $errorCount++;
 437                 last;
 438         }
 439
 440         return $errorCount;
 441 }
 442
 443 #### Regex for use when searching for value-string definitions
 444 my $StaticRegex             = qr/ static \s+                                                            /xs;
 445 my $ConstRegex              = qr/ const  \s+                                                            /xs;
 446 my $Static_andor_ConstRegex = qr/ (?: $StaticRegex $ConstRegex | $StaticRegex | $ConstRegex)            /xs;
 447 my $ValueStringVarnameRegex = qr/ (?:value|val64|string|range|bytes)_string                             /xs;
 448 my $ValueStringRegex        = qr/ $Static_andor_ConstRegex ($ValueStringVarnameRegex) \ + [^;*#]+ = [^;]+ [{] .+? [}] \s*? ;  /xs;
 449 my $EnumValRegex            = qr/ $Static_andor_ConstRegex enum_val_t \ + [^;*]+ = [^;]+ [{] .+? [}] \s*? ;  /xs;
 450 my $NewlineStringRegex      = qr/ ["] [^"]* \\n [^"]* ["] /xs;
 451
 452 sub check_value_string_arrays($$$)
 453 {
 454         my ($fileContentsRef, $filename, $debug_flag) = @_;
 455         my $cnt = 0;
 456         # Brute force check for value_string (and string_string or range_string) arrays
 457         # which are missing {0, NULL} as the final (terminating) array entry
 458
 459         #  Assumption: definition is of form (pseudo-Regex):
 460         #    " (static const|static|const) (value|string|range)_string .+ = { .+ ;"
 461         #  (possibly over multiple lines)
 462         while (${$fileContentsRef} =~ / ( $ValueStringRegex ) /xsog) {
 463                 # XXX_string array definition found; check if NULL terminated
 464                 my $vs = my $vsx = $1;
 465                 my $type = $2;
 466                 if ($debug_flag) {
 467                         $vsx =~ / ( .+ $ValueStringVarnameRegex [^=]+ ) = /xo;
 468                         printf STDERR "==> %-35.35s: %s\n", $filename, $1;
 469                         printf STDERR "%s\n", $vs;
 470                 }
 471                 $vs =~ s{ \s } {}xg;
 472
 473                 # Check for expected trailer
 474                 my $expectedTrailer;
 475                 my $trailerHint;
 476                 if ($type eq "string_string") {
 477                         # XXX shouldn't we reject 0 since it is char *?
 478                         $expectedTrailer = "(NULL|0), NULL";
 479                         $trailerHint = "NULL, NULL";
 480                 } elsif ($type eq "range_string") {
 481                         $expectedTrailer = "0(x0+)?, 0(x0+)?, NULL";
 482                         $trailerHint = "0, 0, NULL";
 483                 } elsif ($type eq "bytes_string") {
 484                         # XXX shouldn't we reject 0 since it is uint8_t *?
 485                         $expectedTrailer = "(NULL|0), 0, NULL";
 486                         $trailerHint = "NULL, NULL";
 487                 } else {
 488                         $expectedTrailer = "0(x?0+)?, NULL";
 489                         $trailerHint = "0, NULL";
 490                 }
 491                 if ($vs !~ / [{] $expectedTrailer [}] ,? [}] ; $/x) {
 492                         $vsx =~ /( $ValueStringVarnameRegex [^=]+ ) = /xo;
 493                         printf STDERR "Error: %-35.35s: {%s} is required as the last %s array entry: %s\n", $filename, $trailerHint, $type, $1;
 494                         $cnt++;
 495                 }
 496
 497                 if ($vs !~ / (static)? const $ValueStringVarnameRegex /xo)  {
 498                         $vsx =~ /( $ValueStringVarnameRegex [^=]+ ) = /xo;
 499                         printf STDERR "Error: %-35.35s: Missing 'const': %s\n", $filename, $1;
 500                         $cnt++;
 501                 }
 502                 if ($vs =~ / $NewlineStringRegex /xo && $type ne "bytes_string")  {
 503                         $vsx =~ /( $ValueStringVarnameRegex [^=]+ ) = /xo;
 504                         printf STDERR "Error: %-35.35s: XXX_string contains a newline: %s\n", $filename, $1;
 505                         $cnt++;
 506                 }
 507         }
 508
 509         # Brute force check for enum_val_t arrays which are missing {NULL, NULL, ...}
 510         # as the final (terminating) array entry
 511         # For now use the same option to turn this and value_string checking on and off.
 512         # (Is the option even necessary?)
 513
 514         #  Assumption: definition is of form (pseudo-Regex):
 515         #    " (static const|static|const) enum_val_t .+ = { .+ ;"
 516         #  (possibly over multiple lines)
 517         while (${$fileContentsRef} =~ / ( $EnumValRegex ) /xsog) {
 518                 # enum_val_t array definition found; check if NULL terminated
 519                 my $vs = my $vsx = $1;
 520                 if ($debug_flag) {
 521                         $vsx =~ / ( .+ enum_val_t [^=]+ ) = /xo;
 522                         printf STDERR "==> %-35.35s: %s\n", $filename, $1;
 523                         printf STDERR "%s\n", $vs;
 524                 }
 525                 $vs =~ s{ \s } {}xg;
 526                 # README.developer says
 527                 #  "Don't put a comma after the last tuple of an initializer of an array"
 528                 # However: since this usage is present in some number of cases, we'll allow for now
 529                 if ($vs !~ / NULL, NULL, -?[0-9] [}] ,? [}] ; $/xo) {
 530                         $vsx =~ /( enum_val_t [^=]+ ) = /xo;
 531                         printf STDERR "Error: %-35.35s: {NULL, NULL, ...} is required as the last enum_val_t array entry: %s\n", $filename, $1;
 532                         $cnt++;
 533                 }
 534                 if ($vs !~ / (static)? const enum_val_t /xo)  {
 535                         $vsx =~ /( enum_val_t [^=]+ ) = /xo;
 536                         printf STDERR "Error: %-35.35s: Missing 'const': %s\n", $filename, $1;
 537                         $cnt++;
 538                 }
 539                 if ($vs =~ / $NewlineStringRegex /xo)  {
 540                         $vsx =~ /( (?:value|string|range)_string [^=]+ ) = /xo;
 541                         printf STDERR "Error: %-35.35s: enum_val_t contains a newline: %s\n", $filename, $1;
 542                         $cnt++;
 543                 }
 544         }
 545
 546         return $cnt;
 547 }
 548
 549
 550 sub check_included_files($$)
 551 {
 552         my ($fileContentsRef, $filename) = @_;
 553         my @incFiles;
 554
 555         @incFiles = (${$fileContentsRef} =~ m/\#include \s* ([<"].+[>"])/gox);
 556
 557         # files in the ui/qt directory should include the ui class includes
 558         # by using #include <>
 559         # this ensures that Visual Studio picks up these files from the
 560         # build directory if we're compiling with cmake
 561         if ($filename =~ m#ui/qt/# ) {
 562                 foreach (@incFiles) {
 563                         if ( m#"ui_.*\.h"$# ) {
 564                                 # strip the quotes to get the base name
 565                                 # for the error message
 566                                 s/\"//g;
 567
 568                                 print STDERR "$filename: ".
 569                                         "Please use #include <$_> ".
 570                                         "instead of #include \"$_\".\n";
 571                         }
 572                 }
 573         }
 574 }
 575
 576
 577 sub check_proto_tree_add_XXX($$)
 578 {
 579         my ($fileContentsRef, $filename) = @_;
 580         my @items;
 581         my $errorCount = 0;
 582
 583         @items = (${$fileContentsRef} =~ m/ (proto_tree_add_[_a-z0-9]+) \( ([^;]*) \) \s* ; /xsg);
 584
 585         while (@items) {
 586                 my ($func) = @items;
 587                 shift @items;
 588                 my ($args) = @items;
 589                 shift @items;
 590
 591                 #Check to make sure tvb_get* isn't used to pass into a proto_tree_add_<datatype>, when
 592                 #proto_tree_add_item could just be used instead
 593                 if ($args =~ /,\s*tvb_get_/xos) {
 594                         if (($func =~ m/^proto_tree_add_(time|bytes|ipxnet|ipv4|ipv6|ether|guid|oid|string|boolean|float|double|uint|uint64|int|int64|eui64|bitmask_list_value)$/)
 595                            ) {
 596                                 print STDERR "Error: ".$filename." uses $func with tvb_get_*. Use proto_tree_add_item instead\n";
 597                                 $errorCount++;
 598
 599                                 # Print out the function args to make it easier
 600                                 # to find the offending code.  But first make
 601                                 # it readable by eliminating extra white space.
 602                                 $args =~ s/\s+/ /g;
 603                                 print STDERR "\tArgs: " . $args . "\n";
 604                         }
 605                 }
 606
 607                 # Remove anything inside parenthesis in the arguments so we
 608                 # don't get false positives when someone calls
 609                 # proto_tree_add_XXX(..., tvb_YYY(..., ENC_ZZZ))
 610                 # and allow there to be newlines inside
 611                 $args =~ s/\(.*\)//sg;
 612
 613                 #Check for accidental usage of ENC_ parameter
 614                 if ($args =~ /,\s*ENC_/xos) {
 615                         if (!($func =~ /proto_tree_add_(time|item|bitmask|[a-z0-9]+_bits_format_value|bits_item|bits_ret_val|item_ret_int|item_ret_uint|bytes_item|checksum)/xos)
 616                            ) {
 617                                 print STDERR "Error: ".$filename." uses $func with ENC_*.\n";
 618                                 $errorCount++;
 619
 620                                 # Print out the function args to make it easier
 621                                 # to find the offending code.  But first make
 622                                 # it readable by eliminating extra white space.
 623                                 $args =~ s/\s+/ /g;
 624                                 print STDERR "\tArgs: " . $args . "\n";
 625                         }
 626                 }
 627         }
 628
 629         return $errorCount;
 630 }
 631
 632
 633 # Verify that all declared ett_ variables are registered.
 634 # Don't bother trying to check usage (for now)...
 635 sub check_ett_registration($$)
 636 {
 637         my ($fileContentsRef, $filename) = @_;
 638         my @ett_declarations;
 639         my @ett_address_uses;
 640         my %ett_uses;
 641         my @unUsedEtts;
 642         my $errorCount = 0;
 643
 644         # A pattern to match ett variable names.  Obviously this assumes that
 645         # they start with `ett_`
 646         my $EttVarName = qr{ (?: ett_[a-z0-9_]+ (?:\[[0-9]+\])? ) }xi;
 647
 648         # Find all the ett_ variables declared in the file
 649         @ett_declarations = (${$fileContentsRef} =~ m{
 650                 ^                       # assume declarations are on their own line
 651                 (?:static\s+)?          # some declarations aren't static
 652                 g?int                   # could be int or gint
 653                 \s+
 654                 ($EttVarName)           # variable name
 655                 \s*=\s*
 656                 -1\s*;
 657         }xgiom);
 658
 659         if (!@ett_declarations) {
 660                 # Only complain if the file looks like a dissector
 661                 #print STDERR "Found no etts in ".$filename."\n" if
 662                 #        (${$fileContentsRef} =~ m{proto_register_field_array}os);
 663                 return;
 664         }
 665         #print "Found these etts in ".$filename.": ".join(' ', @ett_declarations)."\n\n";
 666
 667         # Find all the uses of the *addresses* of ett variables in the file.
 668         # (We assume if someone is using the address they're using it to
 669         # register the ett.)
 670         @ett_address_uses = (${$fileContentsRef} =~ m{
 671                 &\s*($EttVarName)
 672         }xgiom);
 673
 674         if (!@ett_address_uses) {
 675                 print STDERR "Found no ett address uses in ".$filename."\n";
 676                 # Don't treat this as an error.
 677                 # It's more likely a problem with checkAPIs.
 678                 return;
 679         }
 680         #print "Found these etts addresses used in ".$filename.": ".join(' ', @ett_address_uses)."\n\n";
 681
 682         # Convert to a hash for fast lookup
 683         $ett_uses{$_}++ for (@ett_address_uses);
 684
 685         # Find which declared etts are not used.
 686         while (@ett_declarations) {
 687                 my ($ett_var) = @ett_declarations;
 688                 shift @ett_declarations;
 689
 690                 push(@unUsedEtts, $ett_var) if (not exists $ett_uses{$ett_var});
 691         }
 692
 693         if (@unUsedEtts) {
 694                 print STDERR "Error: found these unused ett variables in ".$filename.": ".join(' ', @unUsedEtts)."\n";
 695                 $errorCount++;
 696         }
 697
 698         return $errorCount;
 699 }
 700
 701 # Given the file contents and a file name, check all of the hf entries for
 702 # various problems (such as those checked for in proto.c).
 703 sub check_hf_entries($$)
 704 {
 705         my ($fileContentsRef, $filename) = @_;
 706         my $errorCount = 0;
 707
 708         my @items;
 709         my $hfRegex = qr{
 710                                   \{
 711                                   \s*
 712                                   &\s*([A-Z0-9_\[\]-]+)         # &hf
 713                                   \s*,\s*
 714         }xis;
 715         @items = (${$fileContentsRef} =~ m{
 716                                   $hfRegex                      # &hf
 717                                   \{\s*
 718                                   ("[A-Z0-9 '\./\(\)_:-]+")     # name
 719                                   \s*,\s*
 720                                   (NULL|"[A-Z0-9_\.-]*")        # abbrev
 721                                   \s*,\s*
 722                                   (FT_[A-Z0-9_]+)               # field type
 723                                   \s*,\s*
 724                                   ([A-Z0-9x\|_\s]+)             # display
 725                                   \s*,\s*
 726                                   ([^,]+?)                      # convert
 727                                   \s*,\s*
 728                                   ([A-Z0-9_]+)                  # bitmask
 729                                   \s*,\s*
 730                                   (NULL|"[A-Z0-9 '\./\(\)\?_:-]+")      # blurb (NULL or a string)
 731                                   \s*,\s*
 732                                   HFILL                         # HFILL
 733         }xgios);
 734
 735         #print "Found @items items\n";
 736         while (@items) {
 737                 ##my $errorCount_save = $errorCount;
 738                 my ($hf, $name, $abbrev, $ft, $display, $convert, $bitmask, $blurb) = @items;
 739                 shift @items; shift @items; shift @items; shift @items; shift @items; shift @items; shift @items; shift @items;
 740
 741                 $display =~ s/\s+//g;
 742                 $convert =~ s/\s+//g;
 743                 # GET_VALS_EXTP is a macro in packet-mq.h for packet-mq.c and packet-mq-pcf.c
 744                 $convert =~ s/\bGET_VALS_EXTP\(/VALS_EXT_PTR\(/;
 745
 746                 #print "name=$name, abbrev=$abbrev, ft=$ft, display=$display, convert=>$convert<, bitmask=$bitmask, blurb=$blurb\n";
 747
 748                 if ($abbrev eq '""' || $abbrev eq "NULL") {
 749                         print STDERR "Error: $hf does not have an abbreviation in $filename\n";
 750                         $errorCount++;
 751                 }
 752                 if ($abbrev =~ m/\.\.+/) {
 753                         print STDERR "Error: the abbreviation for $hf ($abbrev) contains two or more sequential periods in $filename\n";
 754                         $errorCount++;
 755                 }
 756                 if ($name eq $abbrev) {
 757                         print STDERR "Error: the abbreviation for $hf ($abbrev) matches the field name ($name) in $filename\n";
 758                         $errorCount++;
 759                 }
 760                 if (lc($name) eq lc($blurb)) {
 761                         print STDERR "Error: the blurb for $hf ($blurb) matches the field name ($name) in $filename\n";
 762                         $errorCount++;
 763                 }
 764                 if ($name =~ m/"\s+/) {
 765                         print STDERR "Error: the name for $hf ($name) has leading space in $filename\n";
 766                         $errorCount++;
 767                 }
 768                 if ($name =~ m/\s+"/) {
 769                         print STDERR "Error: the name for $hf ($name) has trailing space in $filename\n";
 770                         $errorCount++;
 771                 }
 772                 if ($blurb =~ m/"\s+/) {
 773                         print STDERR "Error: the blurb for $hf ($blurb) has leading space in $filename\n";
 774                         $errorCount++;
 775                 }
 776                 if ($blurb =~ m/\s+"/) {
 777                         print STDERR "Error: the blurb for $hf ($blurb) has trailing space in $filename\n";
 778                         $errorCount++;
 779                 }
 780                 if ($abbrev =~ m/\s+/) {
 781                         print STDERR "Error: the abbreviation for $hf ($abbrev) has white space in $filename\n";
 782                         $errorCount++;
 783                 }
 784                 if ("\"".$hf ."\"" eq $name) {
 785                         print STDERR "Error: name is the hf_variable_name in field $name ($abbrev) in $filename\n";
 786                         $errorCount++;
 787                 }
 788                 if ("\"".$hf ."\"" eq $abbrev) {
 789                         print STDERR "Error: abbreviation is the hf_variable_name in field $name ($abbrev) in $filename\n";
 790                         $errorCount++;
 791                 }
 792                 if ($ft ne "FT_BOOLEAN" && $convert =~ m/^TFS\(.*\)/) {
 793                         print STDERR "Error: $hf uses a true/false string but is an $ft instead of FT_BOOLEAN in $filename\n";
 794                         $errorCount++;
 795                 }
 796                 if ($ft eq "FT_BOOLEAN" && $convert =~ m/^VALS\(.*\)/) {
 797                         print STDERR "Error: $hf uses a value_string but is an FT_BOOLEAN in $filename\n";
 798                         $errorCount++;
 799                 }
 800                 if (($ft eq "FT_BOOLEAN") && ($bitmask !~ /^(0x)?0+$/) && ($display =~ /^BASE_/)) {
 801                         print STDERR "Error: $hf: FT_BOOLEAN with a bitmask must specify a 'parent field width' for 'display' in $filename\n";
 802                         $errorCount++;
 803                 }
 804                 if (($ft eq "FT_BOOLEAN") && ($convert !~ m/^((0[xX]0?)?0$|NULL$|TFS)/)) {
 805                         print STDERR "Error: $hf: FT_BOOLEAN with non-null 'convert' field missing TFS in $filename\n";
 806                         $errorCount++;
 807                 }
 808                 if ($convert =~ m/RVALS/ && $display !~ m/BASE_RANGE_STRING/) {
 809                         print STDERR "Error: $hf uses RVALS but 'display' does not include BASE_RANGE_STRING in $filename\n";
 810                         $errorCount++;
 811                 }
 812                 if ($convert =~ m/VALS64/ && $display !~ m/BASE_VAL64_STRING/) {
 813                         print STDERR "Error: $hf uses VALS64 but 'display' does not include BASE_VAL64_STRING in $filename\n";
 814                         $errorCount++;
 815                 }
 816                 if ($display =~ /BASE_EXT_STRING/ && $convert !~ /^(VALS_EXT_PTR\(|&)/) {
 817                         print STDERR "Error: $hf: BASE_EXT_STRING should use VALS_EXT_PTR for 'strings' instead of '$convert' in $filename\n";
 818                         $errorCount++;
 819                 }
 820                 if ($display =~ /BASE_UNIT_STRING/ && ($convert !~ m/^((0[xX]0?)?0$|NULL$|UNS)/)) {
 821                         print STDERR "Error: $hf: BASE_UNIT_STRING with non-null 'convert' field missing UNS in $filename\n";
 822                         $errorCount++;
 823                 }
 824                 if ($ft =~ m/^FT_U?INT(8|16|24|32)$/ && $convert =~ m/^VALS64\(/) {
 825                         print STDERR "Error: $hf: 32-bit field must use VALS instead of VALS64 in $filename\n";
 826                         $errorCount++;
 827                 }
 828                 if ($ft =~ m/^FT_U?INT(40|48|56|64)$/ && $convert =~ m/^VALS\(/) {
 829                         print STDERR "Error: $hf: 64-bit field must use VALS64 instead of VALS in $filename\n";
 830                         $errorCount++;
 831                 }
 832                 if ($convert =~ m/^(VALS|VALS64|RVALS)\(&.*\)/) {
 833                         print STDERR "Error: $hf is passing the address of a pointer to $1 in $filename\n";
 834                         $errorCount++;
 835                 }
 836                 if ($convert !~ m/^((0[xX]0?)?0$|NULL$|VALS|VALS64|VALS_EXT_PTR|RVALS|TFS|UNS|CF_FUNC|FRAMENUM_TYPE|&|STRINGS_ENTERPRISES)/ && $display !~ /BASE_CUSTOM/) {
 837                         print STDERR "Error: non-null $hf 'convert' field missing 'VALS|VALS64|RVALS|TFS|UNS|CF_FUNC|FRAMENUM_TYPE|&|STRINGS_ENTERPRISES' in $filename ?\n";
 838                         $errorCount++;
 839                 }
 840 ## Benign...
 841 ##              if (($ft eq "FT_BOOLEAN") && ($bitmask =~ /^(0x)?0+$/) && ($display ne "BASE_NONE")) {
 842 ##                      print STDERR "Error: $abbrev: FT_BOOLEAN with no bitmask must use BASE_NONE for 'display' in $filename\n";
 843 ##                      $errorCount++;
 844 ##              }
 845                 ##if ($errorCount != $errorCount_save) {
 846                 ##        print STDERR "name=$name, abbrev=$abbrev, ft=$ft, display=$display, convert=>$convert<, bitmask=$bitmask, blurb=$blurb\n";
 847                 ##}
 848
 849         }
 850
 851         return $errorCount;
 852 }
 853
 854 sub check_pref_var_dupes($$)
 855 {
 856         my ($filecontentsref, $filename) = @_;
 857         my $errorcount = 0;
 858
 859         # Avoid flagging the actual prototypes
 860         return 0 if $filename =~ /prefs\.[ch]$/;
 861
 862         # remove macro lines
 863         my $filecontents = ${$filecontentsref};
 864         $filecontents =~ s { ^\s*\#.*$} []xogm;
 865
 866         # At what position is the variable in the prefs_register_*_preference() call?
 867         my %prefs_register_var_pos = (
 868                 static_text => undef, obsolete => undef, # ignore
 869                 decode_as_range => -2, range => -2, filename => -2, # second to last
 870                 enum => -3, # third to last
 871                 # everything else is the last argument
 872         );
 873
 874         my @dupes;
 875         my %count;
 876         while ($filecontents =~ /prefs_register_(\w+?)_preference/gs) {
 877                 my ($func) = "prefs_register_$1_preference";
 878                 my ($args) = extract_bracketed(substr($filecontents, $+[0]), '()');
 879                 $args = substr($args, 1, -1); # strip parens
 880
 881                 my $pos = $prefs_register_var_pos{$1};
 882                 next if exists $prefs_register_var_pos{$1} and not defined $pos;
 883                 $pos //= -1;
 884                 my $var = (split /\s*,\s*(?![^(]*\))/, $args)[$pos]; # only commas outside parens
 885
 886                 my $ignore = 0;
 887                 for my $row (@excludePrefsCheck) {
 888                         my ($rfunc, $rvar) = @$row;
 889                         if (($rfunc eq $func) && ($rvar eq $var)) {
 890                                 $ignore = 1
 891                         }
 892                 }
 893                 if (!$ignore) {
 894                         push @dupes, $var if $count{$var}++ == 1;
 895                 }
 896         }
 897
 898         if (@dupes) {
 899                 print STDERR "$filename: error: found these preference variables used in more than one prefs_register_*_preference:\n\t".join(', ', @dupes)."\n";
 900                 $errorcount++;
 901         }
 902
 903         return $errorcount;
 904 }
 905
 906 # Check for forbidden control flow changes, see epan/exceptions.h
 907 sub check_try_catch($$)
 908 {
 909         my ($fileContentsRef, $filename) = @_;
 910         my $errorCount = 0;
 911
 912         # Match TRY { ... } ENDTRY (with an optional '\' in case of a macro).
 913         my @items = (${$fileContentsRef} =~ m/ \bTRY\s*\{ (.+?) \}\s* \\? \s*ENDTRY\b /xsg);
 914         for my $block (@items) {
 915                 if ($block =~ m/ \breturn\b /x) {
 916                         print STDERR "Error: return is forbidden in TRY/CATCH in $filename\n";
 917                         $errorCount++;
 918                 }
 919
 920                 my @gotoLabels = $block =~ m/ \bgoto\s+ (\w+) /xsg;
 921                 my %seen = ();
 922                 for my $gotoLabel (@gotoLabels) {
 923                         if ($seen{$gotoLabel}) {
 924                                 next;
 925                         }
 926                         $seen{$gotoLabel} = 1;
 927
 928                         if ($block !~ /^ \s* $gotoLabel \s* :/xsgm) {
 929                                 print STDERR "Error: goto to label '$gotoLabel' outside TRY/CATCH is forbidden in $filename\n";
 930                                 $errorCount++;
 931                         }
 932                 }
 933         }
 934
 935         return $errorCount;
 936 }
 937
 938 sub print_usage
 939 {
 940         print "Usage: checkAPIs.pl [-M] [-h] [-g group1[:count]] [-g group2] ... \n";
 941         print "                    [-summary-group group1] [-summary-group group2] ... \n";
 942         print "                    [--sourcedir=srcdir] \n";
 943         print "                    [--nocheck-hf]\n";
 944         print "                    [--nocheck-value-string-array] \n";
 945         print "                    [--nocheck-shadow]\n";
 946         print "                    [--debug]\n";
 947         print "                    [--file=/path/to/file_list]\n";
 948         print "                    file1 file2 ...\n";
 949         print "\n";
 950         print "       -M: Generate output for -g in 'machine-readable' format\n";
 951         print "       -p: used by the git pre-commit hook\n";
 952         print "       -h: help, print usage message\n";
 953         print "       -g <group>:  Check input files for use of APIs in <group>\n";
 954         print "                    (in addition to the default groups)\n";
 955         print "                    Maximum uses can be specified with <group>:<count>\n";
 956         print "       -summary-group <group>:  Output summary (count) for each API in <group>\n";
 957         print "                    (-g <group> also req'd)\n";
 958         print "       --nocheck-hf: Skip header field definition checks\n";
 959         print "       --nocheck-value-string-array: Skip value string array checks\n";
 960         print "       --nocheck-shadow: Skip shadow variable checks\n";
 961         print "       --debug: UNDOCUMENTED\n";
 962         print "\n";
 963         print "   Default Groups[-g]: ", join (", ", sort @apiGroups), "\n";
 964         print "   Available Groups:   ", join (", ", sort keys %APIs), "\n";
 965 }
 966
 967 # -------------
 968 # action:  remove '#if 0'd code from the input string
 969 # args     codeRef, fileName
 970 # returns: codeRef
 971 #
 972 # Essentially: split the input into blocks of code or lines of #if/#if 0/etc.
 973 #               Remove blocks that follow '#if 0' until '#else/#endif' is found.
 974
 975 {  # block begin
 976 my $debug = 0;
 977
 978     sub remove_if0_code {
 979         my ($codeRef, $fileName)  = @_;
 980
 981         # Preprocess output (ensure trailing LF and no leading WS before '#')
 982         $$codeRef =~ s/^\s*#/#/m;
 983         if ($$codeRef !~ /\n$/) { $$codeRef .= "\n"; }
 984
 985         # Split into blocks of normal code or lines with conditionals.
 986         my $ifRegExp = qr/if 0|if|else|endif/;
 987         my @blocks = split(/^(#\s*(?:$ifRegExp).*\n)/m, $$codeRef);
 988
 989         my ($if_lvl, $if0_lvl, $if0) = (0,0,0);
 990         my $lines = '';
 991         for my $block (@blocks) {
 992             my $if;
 993             if ($block =~ /^#\s*($ifRegExp)/) {
 994                 # #if/#if 0/#else/#endif processing
 995                 $if = $1;
 996                 if ($debug == 99) {
 997                     print(STDERR "if0=$if0 if0_lvl=$if0_lvl lvl=$if_lvl [$if] - $block");
 998                 }
 999                 if ($if eq 'if') {
1000                     $if_lvl += 1;
1001                 } elsif ($if eq 'if 0') {
1002                     $if_lvl += 1;
1003                     if ($if0_lvl == 0) {
1004                         $if0_lvl = $if_lvl;
1005                         $if0     = 1;  # inside #if 0
1006                     }
1007                 } elsif ($if eq 'else') {
1008                     if ($if0_lvl == $if_lvl) {
1009                         $if0 = 0;
1010                     }
1011                 } elsif ($if eq 'endif') {
1012                     if ($if0_lvl == $if_lvl) {
1013                         $if0     = 0;
1014                         $if0_lvl = 0;
1015                     }
1016                     $if_lvl -= 1;
1017                     if ($if_lvl < 0) {
1018                         die "patsub: #if/#endif mismatch in $fileName"
1019                     }
1020                 }
1021             }
1022
1023             if ($debug == 99) {
1024                 print(STDERR "if0=$if0 if0_lvl=$if0_lvl lvl=$if_lvl\n");
1025             }
1026             # Keep preprocessor lines and blocks that are not enclosed in #if 0
1027             if ($if or $if0 != 1) {
1028                 $lines .= $block;
1029             }
1030         }
1031         $$codeRef = $lines;
1032
1033         ($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n";
1034         return $codeRef;
1035     }
1036 }  # block end
1037
1038 # The below Regexp are based on those from:
1039 # https://web.archive.org/web/20080614012925/http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
1040 # They are in the public domain.
1041
1042 # 2. A regex which matches double-quoted strings.
1043 #    ?s added so that strings containing a 'line continuation'
1044 #    ( \ followed by a new-line) will match.
1045 my $DoubleQuotedStr = qr{ (?: ["] (?s: \\. | [^\"\\])* ["]) }x;
1046
1047 # 3. A regex which matches single-quoted strings.
1048 my $SingleQuotedStr = qr{ (?: \' (?: \\. | [^\'\\])* [']) }x;
1049
1050 #
1051 # MAIN
1052 #
1053 my $errorCount = 0;
1054
1055 # The default list, which can be expanded.
1056 my @apiSummaryGroups = ();
1057 my $machine_readable_output = 0;                        # default: disabled
1058 my $check_hf = 1;                                       # default: enabled
1059 my $check_value_string_array= 1;                        # default: enabled
1060 my $check_shadow = 1;                                   # default: enabled
1061 my $debug_flag = 0;                                     # default: disabled
1062 my $source_dir = "";
1063 my $filenamelist = "";
1064 my $help_flag = 0;
1065 my $pre_commit = 0;
1066
1067 my $result = GetOptions(
1068                         'group=s' => \@apiGroups,
1069                         'summary-group=s' => \@apiSummaryGroups,
1070                         'Machine-readable' => \$machine_readable_output,
1071                         'check-hf!' => \$check_hf,
1072                         'check-value-string-array!' => \$check_value_string_array,
1073                         'check-shadow!' => \$check_shadow,
1074                         'sourcedir=s' => \$source_dir,
1075                         'debug' => \$debug_flag,
1076                         'pre-commit' => \$pre_commit,
1077                         'file=s' => \$filenamelist,
1078                         'help' => \$help_flag
1079                         );
1080 if (!$result || $help_flag) {
1081         print_usage();
1082         exit(1);
1083 }
1084
1085 # the pre-commit hook only calls checkAPIs one file at a time, so this
1086 # is safe to do globally (and easier)
1087 if ($pre_commit) {
1088     my $filename = $ARGV[0];
1089     # if the filename is packet-*.c or packet-*.h, then we set the abort and termoutput groups.
1090     if ($filename =~ /\bpacket-[^\/\\]+\.[ch]$/) {
1091         push @apiGroups, "abort";
1092         push @apiGroups, "termoutput";
1093     }
1094 }
1095
1096 # Add a 'function_count' anonymous hash to each of the 'apiGroup' entries in the %APIs hash.
1097 for my $apiGroup (keys %APIs) {
1098         my @functions = @{$APIs{$apiGroup}{functions}};
1099
1100         $APIs{$apiGroup}->{function_counts}   = {};
1101         @{$APIs{$apiGroup}->{function_counts}}{@functions} = ();  # Add fcn names as keys to the anonymous hash
1102         $APIs{$apiGroup}->{max_function_count}   = -1;
1103         if ($APIs{$apiGroup}->{count_errors}) {
1104                 $APIs{$apiGroup}->{max_function_count}   = 0;
1105         }
1106         $APIs{$apiGroup}->{cur_function_count}   = 0;
1107 }
1108
1109 my @filelist;
1110 push @filelist, @ARGV;
1111 if ("$filenamelist" ne "") {
1112         # We have a file containing a list of files to check (possibly in
1113         # addition to those on the command line).
1114         open(FC, $filenamelist) || die("Couldn't open $filenamelist");
1115
1116         while (<FC>) {
1117                 # file names can be separated by ;
1118                 push @filelist, split(';');
1119         }
1120         close(FC);
1121 }
1122
1123 die "no files to process" unless (scalar @filelist);
1124
1125 # Read through the files; do various checks
1126 while ($_ = pop @filelist)
1127 {
1128         my $filename = $_;
1129         my $fileContents = '';
1130         my @foundAPIs = ();
1131         my $line;
1132
1133         if ($source_dir and ! -e $filename) {
1134                 $filename = $source_dir . '/' . $filename;
1135         }
1136         if (! -e $filename) {
1137                 warn "No such file: \"$filename\"";
1138                 next;
1139         }
1140
1141         # delete leading './'
1142         $filename =~ s{ ^ \. / } {}xo;
1143         unless (-f $filename) {
1144                 print STDERR "Warning: $filename is not of type file - skipping.\n";
1145                 next;
1146         }
1147
1148         # Read in the file (ouch, but it's easier that way)
1149         open(FC, $filename) || die("Couldn't open $filename");
1150         $line = 1;
1151         while (<FC>) {
1152                 $fileContents .= $_;
1153                 eval { decode( 'UTF-8', $_, Encode::FB_CROAK ) };
1154                 if ($EVAL_ERROR) {
1155                         print STDERR "Error: Found an invalid UTF-8 sequence on line " .$line. " of " .$filename."\n";
1156                         $errorCount++;
1157                 }
1158                 $line++;
1159         }
1160         close(FC);
1161
1162         if (($fileContents =~ m{ \$Id .* \$ }xo))
1163         {
1164                 print STDERR "Warning: ".$filename." has an SVN Id tag. Please remove it!\n";
1165         }
1166
1167         if (($fileContents =~ m{ tab-width:\s*[0-7|9]+ | tabstop=[0-7|9]+ | tabSize=[0-7|9]+ }xo))
1168         {
1169                 # To quote Icf0831717de10fc615971fa1cf75af2f1ea2d03d :
1170                 # HT tab stops are set every 8 spaces on UN*X; UN*X tools that treat an HT character
1171                 # as tabbing to 4-space tab stops, or that even are configurable but *default* to
1172                 # 4-space tab stops (I'm looking at *you*, Xcode!) are broken. tab-width: 4,
1173                 # tabstop=4, and tabSize=4 are errors if you ever expect anybody to look at your file
1174                 # with a UN*X tool, and every text file will probably be looked at by a UN*X tool at
1175                 # some point, so Don't Do That.
1176                 #
1177                 # Can I get an "amen!"?
1178                 print STDERR "Error: Found modelines with tabstops set to something other than 8 in " .$filename."\n";
1179                 $errorCount++;
1180         }
1181
1182         # Remove C/C++ comments
1183         # The below pattern is modified (to keep newlines at the end of C++-style comments) from that at:
1184         # https://perldoc.perl.org/perlfaq6.html#How-do-I-use-a-regular-expression-to-strip-C-style-comments-from-a-file?
1185         $fileContents =~ s#/\*[^*]*\*+([^/*][^*]*\*+)*/|//([^\\]|[^\n][\n]?)*?\n|("(\\.|[^"\\])*"|'(\\.|[^'\\])*'|.[^/"'\\]*)#defined $3 ? $3 : "\n"#gse;
1186
1187         # optionally check the hf entries (including those under #if 0)
1188         if ($check_hf) {
1189             $errorCount += check_hf_entries(\$fileContents, $filename);
1190         }
1191
1192         if ($fileContents =~ m{ %\d*?ll }dxo)
1193         {
1194                 # use PRI[dux...]N instead of ll
1195                 print STDERR "Error: Found %ll in " .$filename."\n";
1196                 $errorCount++;
1197         }
1198
1199         if ($fileContents =~ m{ %hh }xo)
1200         {
1201                 # %hh is C99 and Windows doesn't like it:
1202                 # http://connect.microsoft.com/VisualStudio/feedback/details/416843/sscanf-cannot-not-handle-hhd-format
1203                 # Need to use temporary variables instead.
1204                 print STDERR "Error: Found %hh in " .$filename."\n";
1205                 $errorCount++;
1206         }
1207
1208         # check for files that we should not include directly
1209         # this must be done before quoted strings (#include "file.h") are removed
1210         check_included_files(\$fileContents, $filename);
1211
1212         # Check for value_string and enum_val_t errors: NULL termination,
1213         # const-nes, and newlines within strings
1214         if ($check_value_string_array) {
1215                 $errorCount += check_value_string_arrays(\$fileContents, $filename, $debug_flag);
1216         }
1217
1218         # Remove all the quoted strings
1219         $fileContents =~ s{ $DoubleQuotedStr | $SingleQuotedStr } []xog;
1220
1221         $errorCount += check_pref_var_dupes(\$fileContents, $filename);
1222
1223         # Remove all blank lines
1224         $fileContents =~ s{ ^ \s* $ } []xog;
1225
1226         # Remove all '#if 0'd' code
1227         remove_if0_code(\$fileContents, $filename);
1228
1229         $errorCount += check_ett_registration(\$fileContents, $filename);
1230
1231         #checkAPIsCalledWithTvbGetPtr(\@TvbPtrAPIs, \$fileContents, \@foundAPIs);
1232         #if (@foundAPIs) {
1233         #       print STDERR "Found APIs with embedded tvb_get_ptr() calls in ".$filename." : ".join(',', @foundAPIs)."\n"
1234         #}
1235
1236         if ($check_shadow) {
1237                 check_shadow_variable(\@ShadowVariable, \$fileContents, \@foundAPIs);
1238                 if (@foundAPIs) {
1239                 print STDERR "Warning: Found shadow variable(s) in ".$filename." : ".join(',', @foundAPIs)."\n"
1240                 }
1241         }
1242
1243
1244         $errorCount += check_snprintf_plus_strlen(\$fileContents, $filename);
1245
1246         $errorCount += check_complex_snprintf(\$fileContents, $filename);
1247
1248         $errorCount += check_proto_tree_add_XXX(\$fileContents, $filename);
1249
1250         $errorCount += check_try_catch(\$fileContents, $filename);
1251
1252         # Check and count APIs
1253         for my $groupArg (@apiGroups) {
1254                 my $pfx = "Warning";
1255                 @foundAPIs = ();
1256                 my @groupParts = split(/:/, $groupArg);
1257                 my $apiGroup = $groupParts[0];
1258                 my $curFuncCount = 0;
1259
1260                 if (scalar @groupParts > 1) {
1261                         $APIs{$apiGroup}->{max_function_count} = $groupParts[1];
1262                 }
1263
1264                 findAPIinFile($APIs{$apiGroup}, \$fileContents, \@foundAPIs);
1265
1266                 for my $api (keys %{$APIs{$apiGroup}->{function_counts}}   ) {
1267                         $curFuncCount += $APIs{$apiGroup}{function_counts}{$api};
1268                 }
1269
1270                 # If we have a max function count and we've exceeded it, treat it
1271                 # as an error.
1272                 if (!$APIs{$apiGroup}->{count_errors} && $APIs{$apiGroup}->{max_function_count} >= 0) {
1273                         if ($curFuncCount > $APIs{$apiGroup}->{max_function_count}) {
1274                                 print STDERR $pfx . ": " . $apiGroup . " exceeds maximum function count: " . $APIs{$apiGroup}->{max_function_count} . "\n";
1275                                 $APIs{$apiGroup}->{count_errors} = 1;
1276                         }
1277                 }
1278
1279                 if ($curFuncCount <= $APIs{$apiGroup}->{max_function_count}) {
1280                         next;
1281                 }
1282
1283                 if ($APIs{$apiGroup}->{count_errors}) {
1284                         # the use of "prohibited" APIs is an error, increment the error count
1285                         $errorCount += @foundAPIs;
1286                         $pfx = "Error";
1287                 }
1288
1289                 if (@foundAPIs && ! $machine_readable_output) {
1290                         print STDERR $pfx . ": Found " . $apiGroup . " APIs in ".$filename.": ".join(',', @foundAPIs)."\n";
1291                 }
1292                 if (@foundAPIs && $machine_readable_output) {
1293                         for my $api (@foundAPIs) {
1294                                 printf STDERR "%-8.8s %-20.20s %-30.30s %-45.45s\n", $pfx, $apiGroup, $filename, $api;
1295                         }
1296                 }
1297         }
1298 }
1299
1300 # Summary: Print Use Counts of each API in each requested summary group
1301
1302 if (scalar @apiSummaryGroups > 0) {
1303         my $fileline = join(", ", @ARGV);
1304         printf "\nSummary for " . substr($fileline, 0, 65) . "…\n";
1305
1306         for my $apiGroup (@apiSummaryGroups) {
1307                 printf "\nUse counts for %s (maximum allowed total is %d)\n", $apiGroup, $APIs{$apiGroup}->{max_function_count};
1308                 for my $api (sort {"\L$a" cmp "\L$b"} (keys %{$APIs{$apiGroup}->{function_counts}}   )) {
1309                         if ($APIs{$apiGroup}{function_counts}{$api} < 1) { next; }
1310                         printf "%5d  %-40.40s\n", $APIs{$apiGroup}{function_counts}{$api}, $api;
1311                 }
1312         }
1313 }
1314
1315 exit($errorCount > 120 ? 120 : $errorCount);
1316
1317 #
1318 # Editor modelines  -  https://www.wireshark.org/tools/modelines.html
1319 #
1320 # Local variables:
1321 # c-basic-offset: 8
1322 # tab-width: 8
1323 # indent-tabs-mode: nil
1324 # End:
1325 #
1326 # vi: set shiftwidth=8 tabstop=8 expandtab:
1327 # :indentSize=8:tabSize=8:noTabs=true:
1328 #