tools/check_tfs.py

   1 #!/usr/bin/env python3
   2 # Wireshark - Network traffic analyzer
   3 # By Gerald Combs <gerald@wireshark.org>
   4 # Copyright 1998 Gerald Combs
   5 #
   6 # SPDX-License-Identifier: GPL-2.0-or-later
   7
   8 import os
   9 import re
  10 import subprocess
  11 import argparse
  12 import signal
  13
  14 # This utility scans for tfs items, and works out if standard ones
  15 # could have been used instead (from epan/tfs.c)
  16 # Can also check for value_string where common tfs could be used instead.
  17
  18 # TODO:
  19 # - consider merging Item class with check_typed_item_calls.py ?
  20
  21
  22 # Try to exit soon after Ctrl-C is pressed.
  23 should_exit = False
  24
  25 def signal_handler(sig, frame):
  26     global should_exit
  27     should_exit = True
  28     print('You pressed Ctrl+C - exiting')
  29
  30 signal.signal(signal.SIGINT, signal_handler)
  31
  32
  33 # Test for whether the given file was automatically generated.
  34 def isGeneratedFile(filename):
  35     # Check file exists - e.g. may have been deleted in a recent commit.
  36     if not os.path.exists(filename):
  37         return False
  38
  39     # Open file
  40     f_read = open(os.path.join(filename), 'r', encoding="utf8", errors="ignore")
  41     lines_tested = 0
  42     for line in f_read:
  43         # The comment to say that its generated is near the top, so give up once
  44         # get a few lines down.
  45         if lines_tested > 10:
  46             f_read.close()
  47             return False
  48         if (line.find('Generated automatically') != -1 or
  49             line.find('Generated Automatically') != -1 or
  50             line.find('Autogenerated from') != -1 or
  51             line.find('is autogenerated') != -1 or
  52             line.find('automatically generated by Pidl') != -1 or
  53             line.find('Created by: The Qt Meta Object Compiler') != -1 or
  54             line.find('This file was generated') != -1 or
  55             line.find('This filter was automatically generated') != -1 or
  56             line.find('This file is auto generated, do not edit!') != -1 or
  57             line.find('This file is auto generated') != -1):
  58
  59             f_read.close()
  60             return True
  61         lines_tested = lines_tested + 1
  62
  63     # OK, looks like a hand-written file!
  64     f_read.close()
  65     return False
  66
  67
  68 # Keep track of custom entries that might appear in multiple dissectors,
  69 # so we can consider adding them to tfs.c
  70 custom_tfs_entries = {}
  71 def AddCustomEntry(true_val, false_val, file):
  72     global custom_tfs_entries
  73     if (true_val, false_val) in custom_tfs_entries:
  74         custom_tfs_entries[(true_val, false_val)].append(file)
  75     else:
  76         custom_tfs_entries[(true_val, false_val)] = [file]
  77
  78
  79 # Individual parsed TFS entry
  80 class TFS:
  81     def __init__(self, file, name, true_val, false_val):
  82         self.file = file
  83         self.name = name
  84         self.true_val = true_val
  85         self.false_val = false_val
  86
  87         global warnings_found
  88
  89         # Should not be empty
  90         if not len(true_val) or not len(false_val):
  91             print('Warning:', file, name, 'has an empty field', self)
  92             warnings_found += 1
  93         #else:
  94             # Strange if one begins with capital but other doesn't?
  95             #if true_val[0].isalpha() and false_val[0].isalpha():
  96             #    if true_val[0].isupper() != false_val[0].isupper():
  97             #        print(file, name, 'one starts lowercase and the other upper', self)
  98
  99         # Leading or trailing space should not be needed.
 100         if true_val.startswith(' ') or true_val.endswith(' '):
 101             print('Note: ' + self.file + ' ' + self.name + ' - true val begins or ends with space \"' + self.true_val + '\"')
 102         if false_val.startswith(' ') or false_val.endswith(' '):
 103             print('Note: ' + self.file + ' ' + self.name + ' - false val begins or ends with space \"' + self.false_val + '\"')
 104
 105         # Should really not be identical...
 106         if true_val.lower() == false_val.lower():
 107             print('Warning:', file, name, 'true and false strings are the same', self)
 108             warnings_found += 1
 109
 110         # Shouldn't both be negation (with exception..)
 111         if (file != os.path.join('epan', 'dissectors', 'packet-smb.c') and (true_val.lower().find('not ') != -1) and (false_val.lower().find('not ') != -1)):
 112             print('Warning:', file, name, self, 'both strings contain not')
 113             warnings_found += 1
 114
 115         # Not expecting full-stops inside strings..
 116         if true_val.find('.') != -1 or false_val.find('.') != -1:
 117             print('Warning:', file, name, 'Period found in string', self)
 118             warnings_found += 1
 119
 120
 121     def __str__(self):
 122         return '{' + '"' + self.true_val + '", "' + self.false_val + '"}'
 123
 124
 125 # Only looking at in terms of could/should it be TFS instead.
 126 class ValueString:
 127     def __init__(self, file, name, vals):
 128         self.file = file
 129         self.name = name
 130         self.raw_vals = vals
 131         self.parsed_vals = {}
 132         self.looks_like_tfs = True
 133
 134         no_lines =  self.raw_vals.count('{')
 135         if no_lines != 3:
 136             self.looks_like_tfs = False
 137             return
 138
 139         # Now parse out each entry in the value_string
 140         matches = re.finditer(r'\{([\"a-zA-Z\s\d\,]*)\}', self.raw_vals)
 141         for m in matches:
 142             entry = m[1]
 143             # Check each entry looks like part of a TFS entry.
 144             match = re.match(r'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry)
 145             if match:
 146                 if match[1] == '1':
 147                     self.parsed_vals[True] = match[2]
 148                 else:
 149                     self.parsed_vals[False] = match[2]
 150
 151                 # Now have both entries
 152                 if len(self.parsed_vals) == 2:
 153                     break
 154             else:
 155                 self.looks_like_tfs = False
 156                 break
 157
 158     def __str__(self):
 159         return '{' + '"' + self.raw_vals + '"}'
 160
 161
 162 field_widths = {
 163     'FT_BOOLEAN' : 64,   # TODO: Width depends upon 'display' field
 164     'FT_CHAR'    : 8,
 165     'FT_UINT8'   : 8,
 166     'FT_INT8'    : 8,
 167     'FT_UINT16'  : 16,
 168     'FT_INT16'   : 16,
 169     'FT_UINT24'  : 24,
 170     'FT_INT24'   : 24,
 171     'FT_UINT32'  : 32,
 172     'FT_INT32'   : 32,
 173     'FT_UINT40'  : 40,
 174     'FT_INT40'   : 40,
 175     'FT_UINT48'  : 48,
 176     'FT_INT48'   : 48,
 177     'FT_UINT56'  : 56,
 178     'FT_INT56'   : 56,
 179     'FT_UINT64'  : 64,
 180     'FT_INT64'   : 64
 181 }
 182
 183
 184
 185
 186 # Simplified version of class that is in check_typed_item_calls.py
 187 class Item:
 188
 189     previousItem = None
 190
 191     def __init__(self, filename, hf, filter, label, item_type, type_modifier, strings, macros, mask=None,
 192                  check_mask=False):
 193         self.filename = filename
 194         self.hf = hf
 195         self.filter = filter
 196         self.label = label
 197         self.strings = strings
 198         self.mask = mask
 199
 200         # N.B. Not setting mask by looking up macros.
 201
 202         self.item_type = item_type
 203         self.type_modifier = type_modifier
 204
 205         self.set_mask_value(macros)
 206
 207         self.bits_set = 0
 208         for n in range(0, self.get_field_width_in_bits()):
 209             if self.check_bit(self.mask_value, n):
 210                 self.bits_set += 1
 211
 212     def __str__(self):
 213         return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self.filename, self.label, self.filter,
 214                                                                                self.item_type, self.type_modifier, self.strings, self.mask)
 215
 216     def set_mask_value(self, macros):
 217         try:
 218             self.mask_read = True
 219
 220             # Substitute mask if found as a macro..
 221             if self.mask in macros:
 222                 self.mask = macros[self.mask]
 223             elif any(c not in '0123456789abcdefABCDEFxX' for c in self.mask):
 224                 self.mask_read = False
 225                 self.mask_value = 0
 226                 return
 227
 228             # Read according to the appropriate base.
 229             if self.mask.startswith('0x'):
 230                 self.mask_value = int(self.mask, 16)
 231             elif self.mask.startswith('0'):
 232                 self.mask_value = int(self.mask, 8)
 233             else:
 234                 self.mask_value = int(self.mask, 10)
 235         except Exception:
 236             self.mask_read = False
 237             self.mask_value = 0
 238
 239
 240     # Return true if bit position n is set in value.
 241     def check_bit(self, value, n):
 242         return (value & (0x1 << n)) != 0
 243
 244
 245     def get_field_width_in_bits(self):
 246         if self.item_type == 'FT_BOOLEAN':
 247             if self.type_modifier == 'NULL':
 248                 return 8  # i.e. 1 byte
 249             elif self.type_modifier == 'BASE_NONE':
 250                 return 8
 251             elif self.type_modifier == 'SEP_DOT':   # from proto.h, only meant for FT_BYTES
 252                 return 64
 253             else:
 254                 try:
 255                     # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
 256                     return int((int(self.type_modifier) + 3)/4)*4
 257                 except Exception:
 258                     return 0
 259         else:
 260             if self.item_type in field_widths:
 261                 # Lookup fixed width for this type
 262                 return field_widths[self.item_type]
 263             else:
 264                 #print('returning 0 for', self)
 265                 return 0
 266
 267
 268
 269
 270
 271 def removeComments(code_string):
 272     code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
 273     code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string)             # C++-style comment
 274     code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region
 275
 276     return code_string
 277
 278
 279 # Look for true_false_string items in a dissector file.
 280 def findTFS(filename):
 281     tfs_found = {}
 282
 283     with open(filename, 'r', encoding="utf8", errors="ignore") as f:
 284         contents = f.read()
 285         # Example: const true_false_string tfs_yes_no = { "Yes", "No" };
 286
 287         # Remove comments so as not to trip up RE.
 288         contents = removeComments(contents)
 289
 290         matches =   re.finditer(r'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents)
 291         for m in matches:
 292             name = m.group(1)
 293             true_val = m.group(2)
 294             false_val = m.group(3)
 295             # Store this entry.
 296             tfs_found[name] = TFS(filename, name, true_val, false_val)
 297
 298     return tfs_found
 299
 300 # Look for value_string entries in a dissector file.
 301 def findValueStrings(filename):
 302     vals_found = {}
 303
 304     #static const value_string radio_type_vals[] =
 305     #{
 306     #    { 0,      "FDD"},
 307     #    { 1,      "TDD"},
 308     #    { 0, NULL }
 309     #};
 310
 311     with open(filename, 'r', encoding="utf8", errors="ignore") as f:
 312         contents = f.read()
 313
 314         # Remove comments so as not to trip up RE.
 315         contents = removeComments(contents)
 316
 317         matches =   re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents)
 318         for m in matches:
 319             name = m.group(1)
 320             vals = m.group(2)
 321             vals_found[name] = ValueString(filename, name, vals)
 322
 323     return vals_found
 324
 325 # Look for hf items (i.e. full item to be registered) in a dissector file.
 326 def find_items(filename, macros, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False):
 327     items = {}
 328     with open(filename, 'r', encoding="utf8", errors="ignore") as f:
 329         contents = f.read()
 330         # Remove comments so as not to trip up RE.
 331         contents = removeComments(contents)
 332
 333         # N.B. re extends all the way to HFILL to avoid greedy matching
 334         matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents)
 335         for m in matches:
 336             # Store this item.
 337             hf = m.group(1)
 338             items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4),
 339                              type_modifier=m.group(5),
 340                              strings=m.group(6),
 341                              macros=macros,
 342                              mask=m.group(7))
 343     return items
 344
 345 def find_macros(filename):
 346     macros = {}
 347     with open(filename, 'r', encoding="utf8", errors="ignore") as f:
 348         contents = f.read()
 349         # Remove comments so as not to trip up RE.
 350         contents = removeComments(contents)
 351
 352         matches = re.finditer( r'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents)
 353         for m in matches:
 354             # Store this mapping.
 355             macros[m.group(1)] = m.group(2)
 356     return macros
 357
 358
 359
 360 def is_dissector_file(filename):
 361     p = re.compile(r'.*(packet|file)-.*\.c')
 362     return p.match(filename)
 363
 364 def findDissectorFilesInFolder(folder):
 365     files = set()
 366
 367     for path, tmp_unused, names in os.walk(folder):
 368         for f in names:
 369             if should_exit:
 370                 return
 371             if is_dissector_file(f):
 372                 files.add(os.path.join(path, f))
 373
 374     return files
 375
 376
 377 # Global counts
 378 warnings_found = 0
 379 errors_found = 0
 380
 381 # name -> count
 382 common_usage = {}
 383
 384
 385 # Check the given dissector file.
 386 def checkFile(filename, common_tfs, look_for_common=False, check_value_strings=False, count_common_usage=False):
 387     global warnings_found
 388     global errors_found
 389
 390     # Check file exists - e.g. may have been deleted in a recent commit.
 391     if not os.path.exists(filename):
 392         print(filename, 'does not exist!')
 393         return
 394
 395     # Find items.
 396     file_tfs = findTFS(filename)
 397
 398     # See if any of these items already existed in tfs.c
 399     for f in file_tfs:
 400         for c in common_tfs:
 401             found = False
 402
 403             #
 404             # Do not do this check for plugins; plugins cannot import
 405             # data values from libwireshark (functions, yes; data
 406             # values, no).
 407             #
 408             # Test whether there's a common prefix for the file name
 409             # and "plugin/epan/"; if so, this is a plugin, and there
 410             # is no common path and os.path.commonprefix returns an
 411             # empty string, otherwise it returns the common path, so
 412             # we check whether the common path is an empty string.
 413             #
 414             if os.path.commonprefix([filename, 'plugin/epan/']) == '':
 415                 exact_case = False
 416                 if file_tfs[f].true_val == common_tfs[c].true_val and file_tfs[f].false_val == common_tfs[c].false_val:
 417                     found = True
 418                     exact_case = True
 419                 elif file_tfs[f].true_val.upper() == common_tfs[c].true_val.upper() and file_tfs[f].false_val.upper() == common_tfs[c].false_val.upper():
 420                     found = True
 421
 422                 if found:
 423                     print("Error:" if exact_case else "Warning: ", filename, f,
 424                           "- could have used", c, 'from tfs.c instead: ', common_tfs[c],
 425                           '' if exact_case else '  (capitalisation differs)')
 426                     if exact_case:
 427                         errors_found += 1
 428                     else:
 429                         warnings_found += 1
 430                     break
 431         if not found:
 432             if look_for_common:
 433                 AddCustomEntry(file_tfs[f].true_val, file_tfs[f].false_val, filename)
 434
 435     if check_value_strings:
 436         # Get macros
 437         macros = find_macros(filename)
 438
 439         # Get value_string entries.
 440         vs = findValueStrings(filename)
 441
 442         # Also get hf items
 443         items = find_items(filename, macros, check_mask=True)
 444
 445
 446         for v in vs:
 447             if vs[v].looks_like_tfs:
 448                 found = False
 449                 exact_case = False
 450
 451                 for c in common_tfs:
 452                     found = False
 453
 454                     #
 455                     # Do not do this check for plugins; plugins cannot import
 456                     # data values from libwireshark (functions, yes; data
 457                     # values, no).
 458                     #
 459                     # Test whether there's a common prefix for the file name
 460                     # and "plugin/epan/"; if so, this is a plugin, and there
 461                     # is no common path and os.path.commonprefix returns an
 462                     # empty string, otherwise it returns the common path, so
 463                     # we check whether the common path is an empty string.
 464                     #
 465                     if os.path.commonprefix([filename, 'plugin/epan/']) == '':
 466                         exact_case = False
 467                         if common_tfs[c].true_val == vs[v].parsed_vals[True] and common_tfs[c].false_val == vs[v].parsed_vals[False]:
 468                             found = True
 469                             exact_case = True
 470                         elif common_tfs[c].true_val.upper() == vs[v].parsed_vals[True].upper() and common_tfs[c].false_val.upper() == vs[v].parsed_vals[False].upper():
 471                             found = True
 472
 473                         # Do values match?
 474                         if found:
 475                             # OK, now look for items that:
 476                             # - have VALS(v)  AND
 477                             # - have a mask width of 1 bit (no good if field can have values > 1...)
 478                             for i in items:
 479                                 if re.match(r'VALS\(\s*'+v+r'\s*\)', items[i].strings):
 480                                     if items[i].bits_set == 1:
 481                                         print("Warn:" if exact_case else "Note:", filename, 'value_string', "'"+v+"'",
 482                                               '- could have used tfs.c entry instead: for', i,
 483                                               ' - "FT_BOOLEAN,', str(items[i].get_field_width_in_bits()) + ', TFS(&' + c + '),"',
 484                                               '' if exact_case else '  (capitalisation differs)')
 485                                         if exact_case:
 486                                             warnings_found += 1
 487
 488     if count_common_usage:
 489         # Look for TFS(&<name>) in dissector
 490         with open(filename, 'r') as f:
 491             contents = f.read()
 492             for c in common_tfs:
 493                 m = re.search(r'TFS\(\s*\&' + c + r'\s*\)', contents)
 494                 if m:
 495                     if c not in common_usage:
 496                         common_usage[c] = 1
 497                     else:
 498                         common_usage[c] += 1
 499
 500
 501
 502 #################################################################
 503 # Main logic.
 504
 505 # command-line args.  Controls which dissector files should be checked.
 506 # If no args given, will just scan epan/dissectors folder.
 507 parser = argparse.ArgumentParser(description='Check calls in dissectors')
 508 parser.add_argument('--file', action='append',
 509                     help='specify individual dissector file to test')
 510 parser.add_argument('--commits', action='store',
 511                     help='last N commits to check')
 512 parser.add_argument('--open', action='store_true',
 513                     help='check open files')
 514 parser.add_argument('--check-value-strings', action='store_true',
 515                     help='check whether value_strings could have been tfs?')
 516
 517 parser.add_argument('--common', action='store_true',
 518                     help='check for potential new entries for tfs.c')
 519 parser.add_argument('--common-usage', action='store_true',
 520                     help='count how many dissectors are using common tfs entries')
 521
 522 args = parser.parse_args()
 523
 524
 525 # Get files from wherever command-line args indicate.
 526 files = set()
 527 if args.file:
 528     # Add specified file(s)
 529     for f in args.file:
 530         if not os.path.isfile(f) and not f.startswith('epan'):
 531             f = os.path.join('epan', 'dissectors', f)
 532         if not os.path.isfile(f):
 533             print('Chosen file', f, 'does not exist.')
 534             exit(1)
 535         else:
 536             files.add(f)
 537 elif args.commits:
 538     # Get files affected by specified number of commits.
 539     command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
 540     files = {f.decode('utf-8')
 541              for f in subprocess.check_output(command).splitlines()}
 542     # Will examine dissector files only
 543     files = set(filter(is_dissector_file, files))
 544 elif args.open:
 545     # Unstaged changes.
 546     command = ['git', 'diff', '--name-only']
 547     files = {f.decode('utf-8')
 548              for f in subprocess.check_output(command).splitlines()}
 549     # Only interested in dissector files.
 550     files = list(filter(is_dissector_file, files))
 551     # Staged changes.
 552     command = ['git', 'diff', '--staged', '--name-only']
 553     files_staged = {f.decode('utf-8')
 554                     for f in subprocess.check_output(command).splitlines()}
 555     # Only interested in dissector files.
 556     files = set(filter(is_dissector_file, files_staged))
 557     for f in files_staged:
 558         files.add(f)
 559 else:
 560     # Find all dissector files from folder.
 561     files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
 562
 563
 564 # If scanning a subset of files, list them here.
 565 print('Examining:')
 566 if args.file or args.commits or args.open:
 567     if files:
 568         print(' '.join(sorted(files)), '\n')
 569     else:
 570         print('No files to check.\n')
 571 else:
 572     print('All dissector modules\n')
 573
 574
 575 # Get standard/ shared ones.
 576 common_tfs_entries = findTFS(os.path.join('epan', 'tfs.c'))
 577
 578 # Now check the files to see if they could have used shared ones instead.
 579 # Look at files in sorted order, to give some idea of how far through we are.
 580 for f in sorted(files):
 581     if should_exit:
 582         exit(1)
 583     if not isGeneratedFile(f):
 584         checkFile(f, common_tfs_entries, look_for_common=args.common,
 585                   check_value_strings=args.check_value_strings,
 586                   count_common_usage=args.common_usage)
 587
 588 # Report on commonly-defined values.
 589 if args.common:
 590     # Looking for items that could potentially be moved to tfs.c
 591     for c in custom_tfs_entries:
 592         # Only want to see items that have 3 or more occurrences.
 593         # Even then, probably only want to consider ones that sound generic.
 594         if len(custom_tfs_entries[c]) > 2:
 595             print(c, 'appears', len(custom_tfs_entries[c]), 'times, in: ', custom_tfs_entries[c])
 596
 597 if args.common_usage:
 598     for c in common_tfs_entries:
 599         if c in common_usage:
 600             print(c, 'used in', common_usage[c], 'dissectors')
 601         else:
 602             print('***', c, 'IS NOT USED! ***')
 603
 604 # Show summary.
 605 print(warnings_found, 'warnings found')
 606 if errors_found:
 607     print(errors_found, 'errors found')
 608     exit(1)