tools/check_typed_item_calls.py

   1 #!/usr/bin/env python3
   2 # Wireshark - Network traffic analyzer
   3 # By Gerald Combs <gerald@wireshark.org>
   4 # Copyright 1998 Gerald Combs
   5 #
   6 # SPDX-License-Identifier: GPL-2.0-or-later
   7
   8 import os
   9 import re
  10 import argparse
  11 import signal
  12 import subprocess
  13
  14 # This utility scans the dissector code for various issues.
  15 # TODO:
  16 # - Create maps from type -> display types for hf items (see display (FIELDDISPLAY)) in docs/README.dissector
  17
  18
  19 # Try to exit soon after Ctrl-C is pressed.
  20 should_exit = False
  21
  22 def signal_handler(sig, frame):
  23     global should_exit
  24     should_exit = True
  25     print('You pressed Ctrl+C - exiting')
  26
  27 signal.signal(signal.SIGINT, signal_handler)
  28
  29
  30 warnings_found = 0
  31 errors_found = 0
  32
  33 def name_has_one_of(name, substring_list):
  34     for word in substring_list:
  35         if name.lower().find(word) != -1:
  36             return True
  37     return False
  38
  39 # An individual call to an API we are interested in.
  40 # Used by APICheck below.
  41 class Call:
  42     def __init__(self, function_name, hf_name, macros, line_number=None, offset=None, length=None, fields=None):
  43         self.hf_name = hf_name
  44         self.line_number = line_number
  45         self.fields = fields
  46         self.length = None
  47         if length:
  48             try:
  49                 #if offset.find('*') != -1 and offset.find('*') != 0 and offset.find('8') != -1:
  50                 #    print(hf_name, function_name, offset)
  51                 self.length = int(length)
  52             except Exception:
  53                 if length.isupper():
  54                     if length in macros:
  55                         try:
  56                             self.length = int(macros[length])
  57                         except Exception:
  58                             pass
  59                 pass
  60
  61
  62 # These are variable names that have been seen to be used in calls..
  63 common_hf_var_names = { 'hf_index', 'hf_item', 'hf_idx', 'hf_x', 'hf_id', 'hf_cookie', 'hf_flag',
  64                         'hf_dos_time', 'hf_dos_date', 'hf_value', 'hf_num',
  65                         'hf_cause_value', 'hf_uuid',
  66                         'hf_endian', 'hf_ip', 'hf_port', 'hf_suff', 'hf_string', 'hf_uint',
  67                         'hf_tag', 'hf_type', 'hf_hdr', 'hf_field', 'hf_opcode', 'hf_size',
  68                         'hf_entry', 'field' }
  69
  70 item_lengths = {}
  71 item_lengths['FT_CHAR']  = 1
  72 item_lengths['FT_UINT8']  = 1
  73 item_lengths['FT_INT8']   = 1
  74 item_lengths['FT_UINT16'] = 2
  75 item_lengths['FT_INT16']  = 2
  76 item_lengths['FT_UINT24'] = 3
  77 item_lengths['FT_INT24']  = 3
  78 item_lengths['FT_UINT32'] = 4
  79 item_lengths['FT_INT32']  = 4
  80 item_lengths['FT_UINT40'] = 5
  81 item_lengths['FT_INT40']  = 5
  82 item_lengths['FT_UINT48'] = 6
  83 item_lengths['FT_INT48']  = 6
  84 item_lengths['FT_UINT56'] = 7
  85 item_lengths['FT_INT56']  = 7
  86 item_lengths['FT_UINT64'] = 8
  87 item_lengths['FT_INT64']  = 8
  88 item_lengths['FT_ETHER']  = 6
  89 item_lengths['FT_IPv4']   = 4
  90 item_lengths['FT_IPv6']   = 16
  91
  92 # TODO: other types...
  93
  94
  95 # A check for a particular API function.
  96 class APICheck:
  97     def __init__(self, fun_name, allowed_types, positive_length=False):
  98         self.fun_name = fun_name
  99         self.allowed_types = allowed_types
 100         self.positive_length = positive_length
 101         self.calls = []
 102
 103         if fun_name.startswith('ptvcursor'):
 104             # RE captures function name + 1st 2 args (always ptvc + hfindex)
 105             self.p = re.compile('[^\n]*' +  self.fun_name + r'\s*\(([a-zA-Z0-9_]+),\s*([a-zA-Z0-9_]+)')
 106         elif fun_name.find('add_bitmask') == -1:
 107             # Normal case.
 108             # RE captures function name + 1st 2 args (always tree + hfindex + length)
 109             self.p = re.compile('[^\n]*' +  self.fun_name + r'\s*\(([a-zA-Z0-9_]+),\s*([a-zA-Z0-9_]+),\s*[a-zA-Z0-9_]+,\s*[a-zA-Z0-9_]+,\s*([a-zA-Z0-9_]+)')
 110         else:
 111             # _add_bitmask functions.
 112             # RE captures function name + 1st + 4th args (always tree + hfindex)
 113             # 6th arg is 'fields'
 114             self.p = re.compile('[^\n]*' +  self.fun_name + r'\s*\(([a-zA-Z0-9_]+),\s*[a-zA-Z0-9_]+,\s*[a-zA-Z0-9_]+,\s*([a-zA-Z0-9_]+)\s*,\s*[a-zA-Z0-9_]+\s*,\s*([a-zA-Z0-9_]+)\s*,')
 115
 116         self.file = None
 117         self.mask_allowed = True
 118         if fun_name.find('proto_tree_add_bits_') != -1:
 119             self.mask_allowed = False
 120
 121
 122     def find_calls(self, file, macros):
 123         self.file = file
 124         self.calls = []
 125
 126         with open(file, 'r', encoding="utf8") as f:
 127             contents = f.read()
 128             lines = contents.splitlines()
 129             total_lines = len(lines)
 130             for line_number,line in enumerate(lines):
 131                 # Want to check this, and next few lines
 132                 to_check = lines[line_number-1] + '\n'
 133                 # Nothing to check if function name isn't in it
 134                 if to_check.find(self.fun_name) != -1:
 135                     # Ok, add the next file lines before trying RE
 136                     for i in range(1, 4):
 137                         if to_check.find(';') != -1:
 138                             break
 139                         elif line_number+i < total_lines:
 140                             to_check += (lines[line_number-1+i] + '\n')
 141                     m = self.p.search(to_check)
 142                     if m:
 143                         fields = None
 144                         length = None
 145
 146                         if self.fun_name.find('add_bitmask') != -1:
 147                             fields = m.group(3)
 148                         else:
 149                             if self.p.groups == 3:
 150                                 length = m.group(3)
 151
 152                         # Add call. We have length if re had 3 groups.
 153                         self.calls.append(Call(self.fun_name,
 154                                                m.group(2),
 155                                                macros,
 156                                                line_number=line_number,
 157                                                length=length,
 158                                                fields=fields))
 159
 160     # Return true if bit position n is set in value.
 161     def check_bit(self, value, n):
 162         return (value & (0x1 << n)) != 0
 163
 164     def does_mask_cover_value(self, mask, value):
 165         # Walk past any l.s. 0 bits in value
 166         n = 0
 167
 168         # Walk through any bits that are set and check they are in mask
 169         while self.check_bit(value, n) and n <= 63:
 170             if not self.check_bit(mask, n):
 171                 return False
 172             n += 1
 173
 174         return True
 175
 176     def check_against_items(self, items_defined, items_declared, items_declared_extern, check_missing_items=False,
 177                             field_arrays=None):
 178         global errors_found
 179         global warnings_found
 180
 181         for call in self.calls:
 182
 183             # Check lengths, but for now only for APIs that have length in bytes.
 184             if self.fun_name.find('add_bits') == -1 and call.hf_name in items_defined:
 185                 if call.length and items_defined[call.hf_name].item_type in item_lengths:
 186                     if item_lengths[items_defined[call.hf_name].item_type] < call.length:
 187                         # Don't warn if adding value - value is unlikely to just be bytes value
 188                         if self.fun_name.find('_add_uint') == -1:
 189                             print('Warning:', self.file + ':' + str(call.line_number),
 190                                 self.fun_name + ' called for', call.hf_name, ' - ',
 191                                 'item type is', items_defined[call.hf_name].item_type, 'but call has len', call.length)
 192                             warnings_found += 1
 193
 194             # Needs a +ve length
 195             if self.positive_length and call.length is not None:
 196                 if call.length != -1 and call.length <= 0:
 197                     print('Error: ' +  self.fun_name + '(.., ' + call.hf_name + ', ...) called at ' +
 198                           self.file + ':' + str(call.line_number) +
 199                           ' with length ' + str(call.length) + ' - must be > 0 or -1')
 200                     errors_found += 1
 201
 202             if call.hf_name in items_defined:
 203                 # Is type allowed?
 204                 if items_defined[call.hf_name].item_type not in self.allowed_types:
 205                     print('Error: ' +  self.fun_name + '(.., ' + call.hf_name + ', ...) called at ' +
 206                           self.file + ':' + str(call.line_number) +
 207                           ' with type ' + items_defined[call.hf_name].item_type)
 208                     print('    (allowed types are', self.allowed_types, ')\n')
 209                     errors_found += 1
 210                 # No mask allowed
 211                 if not self.mask_allowed and items_defined[call.hf_name].mask_value != 0:
 212                     print('Error: ' +  self.fun_name + '(.., ' + call.hf_name + ', ...) called at ' +
 213                           self.file + ':' + str(call.line_number) +
 214                           ' with mask ' + items_defined[call.hf_name].mask + '    (must be zero!)\n')
 215                     errors_found += 1
 216
 217             if self.fun_name.find('add_bitmask') != -1 and call.hf_name in items_defined and field_arrays:
 218                 if call.fields in field_arrays:
 219                     if (items_defined[call.hf_name].mask_value and
 220                         field_arrays[call.fields][1] != 0 and items_defined[call.hf_name].mask_value != field_arrays[call.fields][1]):
 221                         # TODO: only really a problem if bit is set in array but not in top-level item?
 222                         if not self.does_mask_cover_value(items_defined[call.hf_name].mask_value,
 223                                                           field_arrays[call.fields][1]):
 224                             print('Warning:', self.file, call.hf_name, call.fields, "masks don't match. root=",
 225                                 items_defined[call.hf_name].mask,
 226                                 "array has", hex(field_arrays[call.fields][1]))
 227                             warnings_found += 1
 228
 229             if check_missing_items:
 230                 if call.hf_name in items_declared and call.hf_name not in items_defined and call.hf_name not in items_declared_extern:
 231                 #not in common_hf_var_names:
 232                     print('Warning:', self.file + ':' + str(call.line_number),
 233                           self.fun_name + ' called for "' + call.hf_name + '"', ' - but no item found')
 234                     warnings_found += 1
 235
 236
 237 # Specialization of APICheck for add_item() calls
 238 class ProtoTreeAddItemCheck(APICheck):
 239     def __init__(self, ptv=None):
 240
 241         # RE will capture whole call.
 242
 243         if not ptv:
 244             # proto_item *
 245             # proto_tree_add_item(proto_tree *tree, int hfindex, tvbuff_t *tvb,
 246             #                     const gint start, gint length, const unsigned encoding)
 247             self.fun_name = 'proto_tree_add_item'
 248             self.p = re.compile('[^\n]*' + self.fun_name + r'\s*\(\s*[a-zA-Z0-9_]+?,\s*([a-zA-Z0-9_]+?),\s*[a-zA-Z0-9_\+\s]+?,\s*([^,.]+?),\s*(.+),\s*([^,.]+?)\);')
 249         else:
 250             # proto_item *
 251             # ptvcursor_add(ptvcursor_t *ptvc, int hfindex, gint length,
 252             #               const unsigned encoding)
 253             self.fun_name = 'ptvcursor_add'
 254             self.p = re.compile('[^\n]*' + self.fun_name + r'\s*\([^,.]+?,\s*([^,.]+?),\s*([^,.]+?),\s*([a-zA-Z0-9_\-\>]+)')
 255
 256
 257     def find_calls(self, file, macros):
 258         self.file = file
 259         self.calls = []
 260         with open(file, 'r', encoding="utf8") as f:
 261
 262             contents = f.read()
 263             lines = contents.splitlines()
 264             total_lines = len(lines)
 265             for line_number,line in enumerate(lines):
 266                 # Want to check this, and next few lines
 267                 to_check = lines[line_number-1] + '\n'
 268                 # Nothing to check if function name isn't in it
 269                 fun_idx = to_check.find(self.fun_name)
 270                 if fun_idx != -1:
 271                     # Ok, add the next file lines before trying RE
 272                     for i in range(1, 5):
 273                         if to_check.find(';') != -1:
 274                             break
 275                         elif line_number+i < total_lines:
 276                             to_check += (lines[line_number-1+i] + '\n')
 277                     # Lose anything before function call itself.
 278                     to_check = to_check[fun_idx:]
 279                     m = self.p.search(to_check)
 280                     if m:
 281                         # Throw out if parens not matched
 282                         if m.group(0).count('(') != m.group(0).count(')'):
 283                             continue
 284
 285                         enc = m.group(4)
 286                         hf_name = m.group(1)
 287                         if not enc.startswith('ENC_'):
 288                             if enc not in { 'encoding', 'enc', 'client_is_le', 'cigi_byte_order', 'endian', 'endianess', 'machine_encoding', 'byte_order', 'bLittleEndian',
 289                                             'p_mq_parm->mq_str_enc', 'p_mq_parm->mq_int_enc',
 290                                             'iEnc', 'strid_enc', 'iCod', 'nl_data->encoding',
 291                                             'argp->info->encoding', 'gquic_info->encoding', 'writer_encoding',
 292                                             'tds_get_int2_encoding(tds_info)',
 293                                             'tds_get_int4_encoding(tds_info)',
 294                                             'tds_get_char_encoding(tds_info)',
 295                                             'info->encoding',
 296                                             'item->encoding',
 297                                             'DREP_ENC_INTEGER(drep)', 'string_encoding', 'item', 'type',
 298                                             'dvb_enc_to_item_enc(encoding)',
 299                                             'packet->enc',
 300                                             'IS_EBCDIC(uCCS) ? ENC_EBCDIC : ENC_ASCII',
 301                                             'DREP_ENC_INTEGER(hdr->drep)',
 302                                             'dhcp_uuid_endian',
 303                                             'payload_le',
 304                                             'local_encoding',
 305                                             'big_endian',
 306                                             'hf_data_encoding',
 307                                             'IS_EBCDIC(eStr) ? ENC_EBCDIC : ENC_ASCII',
 308                                             'big_endian ? ENC_BIG_ENDIAN : ENC_LITTLE_ENDIAN',
 309                                             '(skip == 1) ? ENC_BIG_ENDIAN : ENC_LITTLE_ENDIAN',
 310                                             'pdu_info->sbc', 'pdu_info->mbc',
 311                                             'seq_info->txt_enc | ENC_NA',
 312                                             'BASE_SHOW_UTF_8_PRINTABLE',
 313                                             'dhcp_secs_endian',
 314                                             'is_mdns ? ENC_UTF_8|ENC_NA : ENC_ASCII|ENC_NA',
 315                                             'xl_encoding',
 316                                             'my_frame_data->encoding_client', 'my_frame_data->encoding_results'
 317
 318                                           }:
 319                                 global warnings_found
 320
 321                                 print('Warning:', self.file + ':' + str(line_number),
 322                                       self.fun_name + ' called for "' + hf_name + '"',  'check last/enc param:', enc, '?')
 323                                 warnings_found += 1
 324                         self.calls.append(Call(self.fun_name, hf_name, macros, line_number=line_number, offset=m.group(2), length=m.group(3)))
 325
 326     def check_against_items(self, items_defined, items_declared, items_declared_extern,
 327                             check_missing_items=False, field_arrays=None):
 328         # For now, only complaining if length if call is longer than the item type implies.
 329         #
 330         # Could also be bugs where the length is always less than the type allows.
 331         # Would involve keeping track (in the item) of whether any call had used the full length.
 332
 333         global warnings_found
 334
 335         for call in self.calls:
 336             if call.hf_name in items_defined:
 337                 if call.length and items_defined[call.hf_name].item_type in item_lengths:
 338                     if item_lengths[items_defined[call.hf_name].item_type] < call.length:
 339                         # On balance, it is not worth complaining about these - the value is unlikely to be
 340                         # just the value found in these bytes..
 341                         if self.fun_name.find('_add_uint') == -1:
 342                             print('Warning:', self.file + ':' + str(call.line_number),
 343                                 self.fun_name + ' called for', call.hf_name, ' - ',
 344                                 'item type is', items_defined[call.hf_name].item_type, 'but call has len', call.length)
 345                             warnings_found += 1
 346             elif check_missing_items:
 347                 if call.hf_name in items_declared and call.hf_name not in items_declared_extern:
 348                 #not in common_hf_var_names:
 349                     print('Warning:', self.file + ':' + str(call.line_number),
 350                           self.fun_name + ' called for "' + call.hf_name + '"', ' - but no item found')
 351                     warnings_found += 1
 352
 353
 354
 355 ##################################################################################################
 356 # This is a set of items (by filter name) where we know that the bitmask is non-contiguous,
 357 # but is still believed to be correct.
 358 known_non_contiguous_fields = { 'wlan.fixed.capabilities.cfpoll.sta',
 359                                 'wlan.wfa.ie.wme.qos_info.sta.reserved',
 360                                 'btrfcomm.frame_type',   # https://os.itec.kit.edu/downloads/sa_2006_roehricht-martin_flow-control-in-bluez.pdf
 361                                 'capwap.control.message_element.ac_descriptor.dtls_policy.r', # RFC 5415
 362                                 'couchbase.extras.subdoc.flags.reserved',
 363                                 'wlan.fixed.capabilities.cfpoll.ap',   # These are 3 separate bits...
 364                                 'wlan.wfa.ie.wme.tspec.ts_info.reserved', # matches other fields in same sequence
 365                                 'zbee_zcl_se.pp.attr.payment_control_configuration.reserved', # matches other fields in same sequence
 366                                 'zbee_zcl_se.pp.snapshot_payload_cause.reserved',  # matches other fields in same sequence
 367                                 'ebhscr.eth.rsv',  # matches other fields in same sequence
 368                                 'v120.lli',  # non-contiguous field (http://www.acacia-net.com/wwwcla/protocol/v120_l2.htm)
 369                                 'stun.type.class',
 370                                 'bssgp.csg_id', 'tiff.t6.unused', 'artnet.ip_prog_reply.unused',
 371                                 'telnet.auth.mod.enc', 'osc.message.midi.bender', 'btle.data_header.rfu',
 372                                 'stun.type.method', # figure 3 in rfc 5389
 373                                 'tds.done.status', # covers all bits in bitset
 374                                 'hf_iax2_video_csub',  # RFC 5456, table 8.7
 375                                 'iax2.video.subclass',
 376                                 'dnp3.al.ana.int',
 377                                 'pwcesopsn.cw.lm',
 378                                 'gsm_a.rr.format_id', # EN 301 503
 379                                 'siii.mst.phase', # comment in code seems convinced
 380                                 'xmcp.type.class',
 381                                 'xmcp.type.method',
 382                                 'hf_hiqnet_flags',
 383                                 'hf_hiqnet_flagmask',
 384                                 'hf_h223_mux_mpl',
 385                                 'rdp.flags.pkt',
 386                                 'erf.flags.if_raw',  # confirmed by Stephen Donnelly
 387                                 'oran_fh_cus.sReSMask'
 388                               }
 389 ##################################################################################################
 390
 391
 392 field_widths = {
 393     'FT_BOOLEAN' : 64,   # TODO: Width depends upon 'display' field
 394     'FT_CHAR'    : 8,
 395     'FT_UINT8'   : 8,
 396     'FT_INT8'    : 8,
 397     'FT_UINT16'  : 16,
 398     'FT_INT16'   : 16,
 399     'FT_UINT24'  : 24,
 400     'FT_INT24'   : 24,
 401     'FT_UINT32'  : 32,
 402     'FT_INT32'   : 32,
 403     'FT_UINT40'  : 40,
 404     'FT_INT40'   : 40,
 405     'FT_UINT48'  : 48,
 406     'FT_INT48'   : 48,
 407     'FT_UINT56'  : 56,
 408     'FT_INT56'   : 56,
 409     'FT_UINT64'  : 64,
 410     'FT_INT64'   : 64
 411 }
 412
 413 def is_ignored_consecutive_filter(filter):
 414     ignore_filters = {
 415         'elf.sh_type',
 416         'elf.p_type',
 417         'btavrcp.pdu_id',
 418         'netlogon.dummy_string',
 419         'opa.reserved',
 420         'wassp.data.mu_mac',
 421         'thrift.type',
 422         'quake2.game.client.command.move.angles',
 423         'ipp.enum_value',
 424         'idrp.error.subcode',
 425         'ftdi-ft.lValue',
 426         '6lowpan.src',
 427         'couchbase.flex_frame.frame.id',
 428         'rtps.param.id',
 429         'rtps.locator.port',
 430         'sigcomp.udvm.value',
 431         'opa.mad.attributemodifier.n',
 432         'smb.cmd',
 433         'sctp.checksum',
 434         'dhcp.option.end',
 435         'nfapi.num.bf.vector.bf.value',
 436         'dnp3.al.range.abs',
 437         'dnp3.al.range.quantity',
 438         'dnp3.al.index',
 439         'dnp3.al.size',
 440         'ftdi-ft.hValue',
 441         'homeplug_av.op_attr_cnf.data.sw_sub',
 442         'radiotap.he_mu.preamble_puncturing',
 443         'ndmp.file',
 444         'ocfs2.dlm.lvb',
 445         'oran_fh_cus.reserved',
 446         'qnet6.kif.msgsend.msg.read.xtypes0-7',
 447         'qnet6.kif.msgsend.msg.write.xtypes0-7',
 448         'mih.sig_strength',
 449         'couchbase.flex_frame.frame.len',
 450         'nvme-rdma.read_to_host_req',
 451         'rpcap.dummy',
 452         'sflow.flow_sample.output_interface',
 453         'socks.results',
 454         'opa.mad.attributemodifier.p',
 455         'v5ua.efa',
 456         'zbncp.data.tx_power',
 457         'zbncp.data.nwk_addr',
 458         'zbee_zcl_hvac.pump_config_control.attr.ctrl_mode',
 459         'nat-pmp.external_port',
 460         'zbee_zcl.attr.float',
 461         'wpan-tap.phr.fsk_ms.mode',
 462         'mysql.exec_flags',
 463         'pim.metric_pref',
 464         'modbus.regval_float',
 465         'alcap.cau.value',
 466         'bpv7.crc_field',
 467         'at.chld.mode',
 468         'btl2cap.psm',
 469         'srvloc.srvtypereq.nameauthlistlen',
 470         'a11.ext.code',
 471         'adwin_config.port',
 472         'afp.unknown',
 473         'ansi_a_bsmap.mid.digit_1',
 474         'ber.unknown.OCTETSTRING',
 475         'btatt.handle',
 476         'btl2cap.option_flushto',
 477         'cip.network_segment.prod_inhibit',
 478         'cql.result.rows.table_name',
 479         'dcom.sa.vartype',
 480         'f5ethtrailer.slot',
 481         'ipdr.cm_ipv6_addr',
 482         'mojito.kuid',
 483         'mtp3.priority',
 484         'pw.cw.length',
 485         'rlc.ciphered_data',
 486         'vp8.pld.pictureid',
 487         'gryphon.sched.channel',
 488         'pn_io.ioxs',
 489         'pn_dcp.block_qualifier_reset',
 490         'pn_dcp.suboption_device_instance',
 491         'nfs.attr',
 492         'nfs.create_session_flags',
 493         'rmt-lct.toi64',
 494         'gryphon.data.header_length',
 495         'quake2.game.client.command.move.movement',
 496         'isup.parameter_type',
 497         'cip.port',
 498         'adwin.fifo_no',
 499         'bthci_evt.hci_vers_nr',
 500         'gryphon.usdt.stmin_active',
 501         'dnp3.al.anaout.int',
 502         'dnp3.al.ana.int',
 503         'dnp3.al.cnt',
 504         'bthfp.chld.mode',
 505         'nat-pmp.pml',
 506         'isystemactivator.actproperties.ts.hdr',
 507         'rtpdump.txt_addr',
 508         'unistim.vocoder.id',
 509         'mac.ueid',
 510         'cip.symbol.size',
 511         'dnp3.al.range.start',
 512         'dnp3.al.range.stop',
 513         'gtpv2.mp',
 514         'gvcp.cmd.resend.firstpacketid',
 515         'gvcp.cmd.resend.lastpacketid',
 516         'wlan.bf.reserved',
 517         'opa.sa.reserved',
 518         'rmt-lct.ext_tol_transfer_len',
 519         'pn_io.error_code2',
 520         'gryphon.ldf.schedsize',
 521         'wimaxmacphy.burst_opt_mimo_matrix_indicator',
 522         'ccsds.packet_type',
 523         'iso15765.flow_control.stmin',
 524         'msdo.PieceSize',
 525         'opa.clasportinfo.redirect.reserved',
 526         'p_mul.unused',
 527         'opa.pm.dataportcounters.reserved',
 528         'opa.switchinfo.switchcapabilitymask.reserved',
 529         'nvme-rdma.read_from_host_resp',
 530         'nvme-rdma.write_to_host_req',
 531         'netlink-route.ifla_linkstats.rx_errors.fifo_errs',
 532         'mtp3mg.japan_spare',
 533         'ixveriwave.errors.ip_checksum_error',
 534         'bpsec.asb.result_count',
 535         'btle.control.phys.le_coded_phy',
 536         'gsm_rlcmac.ul.gprs_multislot_class_exist',
 537         'tpm.resp.size',
 538         'sasp.flags.quiesce',
 539         'canopen.sdo.n',
 540         'cigi.celestial_sphere_control.date',
 541         'corosync_totemsrp.orf_token.seq',
 542         'dec_dna.flags.msglen',
 543         'hiqnet.device',
 544         'ipdr.cm_ipv6_addr_len',
 545         'ipdr.cm_ipv6_addr_string',
 546         'mpeg_descr.phone.nat_code_len'
 547     }
 548     if filter in ignore_filters:
 549         return True
 550
 551
 552     ignore_patterns = [
 553         re.compile(r'^nstrace.trcdbg.val(\d+)'),
 554         re.compile(r'^mpls_pm.timestamp\d\..*'),
 555         re.compile(r'alcap.*bwt.*.[b|f]w'),
 556         re.compile(r'btle.control.phys.le_[1|2]m_phy'),
 557         re.compile(r'ansi_a_bsmap.cm2.scm.bc_entry.opmode[0|1]'),
 558         re.compile(r'cemi.[n|x]')
 559     ]
 560     for patt in ignore_patterns:
 561         if patt.match(filter):
 562             return True
 563
 564     return False
 565
 566
 567 class ValueString:
 568     def __init__(self, file, name, vals, macros, do_extra_checks=False):
 569         self.file = file
 570         self.name = name
 571         self.raw_vals = vals
 572         self.parsed_vals = {}
 573         self.seen_labels = set()
 574         self.valid = True
 575         self.min_value =  99999
 576         self.max_value = -99999
 577
 578         # Now parse out each entry in the value_string
 579         matches = re.finditer(r'\{\s*([0-9_A-Za-z]*)\s*,\s*(".*?")\s*}\s*,', self.raw_vals)
 580         for m in matches:
 581             value,label = m.group(1), m.group(2)
 582             if value in macros:
 583                 value = macros[value]
 584             elif any(c not in '0123456789abcdefABCDEFxX' for c in value):
 585                 self.valid = False
 586                 return
 587
 588             try:
 589                 # Read according to the appropriate base.
 590                 if value.lower().startswith('0x'):
 591                     value = int(value, 16)
 592                 elif value.startswith('0b'):
 593                     value = int(value[2:], 2)
 594                 elif value.startswith('0'):
 595                     value = int(value, 8)
 596                 else:
 597                     value = int(value, 10)
 598             except Exception:
 599                 return
 600
 601             global warnings_found
 602
 603             # Check for value conflict before inserting
 604             if do_extra_checks and value in self.parsed_vals and label == self.parsed_vals[value]:
 605                 print('Warning:', self.file, ': value_string', self.name, '- value ', value, 'repeated with same string - ', label)
 606                 warnings_found += 1
 607
 608             # Same value, different label
 609             if value in self.parsed_vals and label != self.parsed_vals[value]:
 610                 print('Warning:', self.file, ': value_string', self.name, '- value ', value, 'repeated with different values - was',
 611                     self.parsed_vals[value], 'now', label)
 612                 warnings_found += 1
 613             else:
 614                 # Add into table, while checking for repeated label
 615                 self.parsed_vals[value] = label
 616                 if do_extra_checks and label in self.seen_labels:
 617                     # These are commonly repeated..
 618                     exceptions = [ 'reserved', 'invalid', 'unused', 'not used', 'unknown', 'undefined', 'spare',
 619                                    'unallocated', 'not assigned', 'implementation specific', 'unspecified',
 620                                    'other', 'for further study', 'future', 'vendor specific', 'obsolete', 'none',
 621                                    'shall not be used', 'national use', 'unassigned', 'oem', 'user defined',
 622                                    'manufacturer specific', 'not specified', 'proprietary', 'operator-defined',
 623                                    'dynamically allocated', 'user specified', 'xxx', 'default', 'planned', 'not req',
 624                                    'deprecated', 'not measured', 'unspecified', 'nationally defined', 'nondisplay', 'general',
 625                                    'tbd' ]
 626                     excepted = False
 627                     for ex in exceptions:
 628                         if label.lower().find(ex) != -1:
 629                             excepted = True
 630                             break
 631
 632                     if not excepted and len(label)>2:
 633                         print('Warning:', self.file, ': value_string', self.name, '- label ', label, 'repeated')
 634                         warnings_found += 1
 635                 else:
 636                     self.seen_labels.add(label)
 637
 638                 if value > self.max_value:
 639                     self.max_value = value
 640                 if value < self.min_value:
 641                     self.min_value = value
 642
 643     def extraChecks(self):
 644         global warnings_found
 645
 646         # Look for one value missing in range (quite common...)
 647         num_items = len(self.parsed_vals)
 648         span = self.max_value - self.min_value + 1
 649         if num_items > 4 and span > num_items and (span-num_items <=1):
 650             for val in range(self.min_value, self.max_value):
 651                 if val not in self.parsed_vals:
 652                     print('Warning:', self.file, ': value_string', self.name, '- value', val, 'missing?', '(', num_items, 'entries)')
 653                     global warnings_found
 654                     warnings_found += 1
 655
 656         # Do most of the labels match the number?
 657         matching_label_entries = set()
 658         for val in self.parsed_vals:
 659             if self.parsed_vals[val].find(str(val)) != -1:
 660                 # TODO: pick out multiple values rather than concat into wrong number
 661                 parsed_value = int(''.join(d for d in self.parsed_vals[val] if d.isdecimal()))
 662                 if val == parsed_value:
 663                     matching_label_entries.add(val)
 664
 665         if len(matching_label_entries) >= 4 and len(matching_label_entries) > 0 and len(matching_label_entries) < num_items and len(matching_label_entries) >= num_items-1:
 666             # Be forgiving about first or last entry
 667             first_val = list(self.parsed_vals)[0]
 668             last_val =  list(self.parsed_vals)[-1]
 669             if first_val not in matching_label_entries or last_val not in matching_label_entries:
 670                 return
 671             print('Warning:', self.file, ': value_string', self.name, 'Labels match value except for 1!', matching_label_entries, num_items, self)
 672
 673         # Do all labels start with lower-or-upper char?
 674         startLower,startUpper = 0,0
 675         for val in self.parsed_vals:
 676             first_letter = self.parsed_vals[val][1]
 677             if first_letter.isalpha():
 678                 if first_letter.isupper():
 679                     startUpper += 1
 680                 else:
 681                     startLower += 1
 682         if startLower > 0 and startUpper > 0:
 683             if startLower+startUpper > 10 and (startLower <=3 or startUpper <=3):
 684                 standouts = []
 685                 if startLower < startUpper:
 686                     standouts += [self.parsed_vals[val] for val in self.parsed_vals if self.parsed_vals[val][1].islower()]
 687                 if startLower > startUpper:
 688                     standouts += [self.parsed_vals[val] for val in self.parsed_vals if self.parsed_vals[val][1].isupper()]
 689
 690                 print('Note:', self.file, ': value_string', self.name, 'mix of upper', startUpper, 'and lower', startLower, standouts)
 691
 692
 693     def __str__(self):
 694         return  self.name + '= { ' + self.raw_vals + ' }'
 695
 696
 697 class RangeStringEntry:
 698     def __init__(self, min, max, label):
 699         self.min = min
 700         self.max = max
 701         self.label = label
 702
 703     def hides(self, min, max):
 704         return min >= self.min and max <= self.max
 705
 706     def __str__(self):
 707         return '(' + str(self.min) + ', ' + str(self.max) + ') -> ' + self.label
 708
 709
 710 class RangeString:
 711     def __init__(self, file, name, vals, macros, do_extra_checks=False):
 712         self.file = file
 713         self.name = name
 714         self.raw_vals = vals
 715         self.parsed_vals = []
 716         self.seen_labels = set()
 717         self.valid = True
 718         self.min_value =  99999
 719         self.max_value = -99999
 720
 721         # Now parse out each entry in the value_string
 722         matches = re.finditer(r'\{\s*([0-9_A-Za-z]*)\s*,\s*([0-9_A-Za-z]*)\s*,\s*(".*?")\s*\}\s*,', self.raw_vals)
 723         for m in matches:
 724             min,max,label = m.group(1), m.group(2), m.group(3)
 725             if min in macros:
 726                 min = macros[min]
 727             elif any(c not in '0123456789abcdefABCDEFxX' for c in min):
 728                 self.valid = False
 729                 return
 730             if max in macros:
 731                 max = macros[max]
 732             elif any(c not in '0123456789abcdefABCDEFxX' for c in max):
 733                 self.valid = False
 734                 return
 735
 736             try:
 737                 # Read according to the appropriate base.
 738                 if min.lower().startswith('0x'):
 739                     min = int(min, 16)
 740                 elif min.startswith('0b'):
 741                     min = int(min[2:], 2)
 742                 elif min.startswith('0'):
 743                     min = int(min, 8)
 744                 else:
 745                     min = int(min, 10)
 746
 747                 if max.lower().startswith('0x'):
 748                     max = int(max, 16)
 749                 elif max.startswith('0b'):
 750                     max = int(max[2:], 2)
 751                 elif max.startswith('0'):
 752                     max = int(max, 8)
 753                 else:
 754                     max = int(max, 10)
 755             except Exception:
 756                 return
 757
 758             # Now check what we've found.
 759             global warnings_found
 760
 761             if min < self.min_value:
 762                 self.min_value = min
 763             # For overall max value, still use min of each entry.
 764             # It is common for entries to extend to e.g. 0xff, but at least we can check for items
 765             # that can never match if we only check the min.
 766             if min > self.max_value:
 767                 self.max_value = min
 768
 769             # This value should not be entirely hidden by earlier entries
 770             for prev in self.parsed_vals:
 771                 if prev.hides(min, max):
 772                     print('Warning:', self.file, ': range_string label', label, 'hidden by', prev)
 773                     warnings_found += 1
 774
 775             # Min should not be > max
 776             if min > max:
 777                 print('Warning:', self.file, ': range_string', self.name, 'entry', label, 'min', min, '>', max)
 778                 warnings_found += 1
 779
 780             # Check label.
 781             if label[1:-1].startswith(' ') or label[1:-1].endswith(' '):
 782                 print('Warning:', self.file, ': range_string', self.name, 'entry', label, 'starts or ends with space')
 783                 warnings_found += 1
 784
 785             # OK, add this entry
 786             self.parsed_vals.append(RangeStringEntry(min, max, label))
 787
 788         # TODO: mark as not valid if not all pairs were successfully parsed?
 789
 790     def extraChecks(self):
 791         global warnings_found
 792
 793         # if in all cases min==max, suggest value_string instead?
 794         could_use_value_string = True
 795         for val in self.parsed_vals:
 796             if val.min != val.max:
 797                 could_use_value_string = False
 798                 break
 799
 800         # Look for gaps
 801         gaps = []    # N.B. could become huge if added every number, so only record first number inside each gap
 802         current = None
 803         for val in self.parsed_vals:
 804             if current:
 805                 if val.min > current+1:
 806                     gaps.append(current+1)
 807             current = val.max
 808
 809         # Check whether each gap is actually covered.
 810         for n in gaps:
 811             covered = False
 812             for val in self.parsed_vals:
 813                 if n >= val.min and n <= val.max:
 814                     covered = True
 815                     break
 816             if not covered:
 817                 print('Warning:', self.file, ': range_string', self.name, 'value', str(n) + '-?', '(' + str(hex(n)) +'-?)', 'not covered by any entries')
 818                 warnings_found += 1
 819
 820         if could_use_value_string:
 821             print('Warning:', self.file, ': range_string', self.name, 'could be value_string instead!')
 822             warnings_found += 1
 823
 824         # TODO: can multiple values be coalesced into fewer?
 825         # TODO: Partial overlapping?
 826
 827
 828
 829 class StringString:
 830     def __init__(self, file, name, vals, macros, do_extra_checks=False):
 831         self.file = file
 832         self.name = name
 833         self.raw_vals = vals
 834         self.parsed_vals = {}
 835
 836         terminated = False
 837         global errors_found
 838
 839         # Now parse out each entry in the string_string
 840         matches = re.finditer(r'\{\s*(["0-9_A-Za-z\s\-]*?)\s*,\s*(["0-9_A-Za-z\s\-]*)\s*', self.raw_vals)
 841         for m in matches:
 842             key = m.group(1).strip()
 843             value = m.group(2).strip()
 844             if key in self.parsed_vals:
 845                 print('Error:', self.file, ': string_string', self.name, 'entry', key, 'has been added twice (values',
 846                       self.parsed_vals[key], 'and now', value, ')')
 847                 errors_found += 1
 848
 849             else:
 850                 self.parsed_vals[key] = value
 851                 # TODO: Also allow key to be "0" ?
 852                 if (key in { "NULL" }) and value == "NULL":
 853                     terminated = True
 854
 855         if not terminated:
 856             print('Error:', self.file, ': string_string', self.name, "is not terminated with { NULL, NULL }")
 857             errors_found += 1
 858
 859     def extraChecks(self):
 860         pass
 861         # TODO: ?
 862
 863
 864
 865 # Look for value_string entries in a dissector file.  Return a dict name -> ValueString
 866 def findValueStrings(filename, macros, do_extra_checks=False):
 867     vals_found = {}
 868
 869     #static const value_string radio_type_vals[] =
 870     #{
 871     #    { 0,      "FDD"},
 872     #    { 1,      "TDD"},
 873     #    { 0, NULL }
 874     #};
 875
 876     with open(filename, 'r', encoding="utf8") as f:
 877         contents = f.read()
 878
 879         # Remove comments so as not to trip up RE.
 880         contents = removeComments(contents)
 881
 882         matches =   re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9_\-\*\#\.:\/\(\)\'\s\"]*)\};', contents)
 883         for m in matches:
 884             name = m.group(1)
 885             vals = m.group(2)
 886             vals_found[name] = ValueString(filename, name, vals, macros, do_extra_checks)
 887
 888     return vals_found
 889
 890 # Look for range_string entries in a dissector file.  Return a dict name -> RangeString
 891 def findRangeStrings(filename, macros, do_extra_checks=False):
 892     vals_found = {}
 893
 894     #static const range_string symbol_table_shndx_rvals[] = {
 895     #    { 0x0000, 0x0000,  "Undefined" },
 896     #    { 0x0001, 0xfeff,  "Normal Section" },
 897     #    { 0, 0, NULL }
 898     #};
 899
 900     with open(filename, 'r', encoding="utf8") as f:
 901         contents = f.read()
 902
 903         # Remove comments so as not to trip up RE.
 904         contents = removeComments(contents)
 905
 906         matches =   re.finditer(r'.*const range_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9_\-\*\#\.:\/\(\)\'\s\"]*)\};', contents)
 907         for m in matches:
 908             name = m.group(1)
 909             vals = m.group(2)
 910             vals_found[name] = RangeString(filename, name, vals, macros, do_extra_checks)
 911
 912     return vals_found
 913
 914 # Look for string_string entries in a dissector file.  Return a dict name -> StringString
 915 def findStringStrings(filename, macros, do_extra_checks=False):
 916     vals_found = {}
 917
 918     #static const string_string ice_candidate_types[] = {
 919     #    { "host",       "Host candidate" },
 920     #    { "srflx",      "Server reflexive candidate" },
 921     #    { 0, NULL }
 922     #};
 923
 924     with open(filename, 'r', encoding="utf8") as f:
 925         contents = f.read()
 926
 927         # Remove comments so as not to trip up RE.
 928         contents = removeComments(contents)
 929
 930         matches =   re.finditer(r'.*const string_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9_\-\*\#\.:\/\(\)\'\s\"]*)\};', contents)
 931         for m in matches:
 932             name = m.group(1)
 933             vals = m.group(2)
 934             vals_found[name] = StringString(filename, name, vals, macros, do_extra_checks)
 935
 936     return vals_found
 937
 938
 939 # Look for expert entries in a dissector file.  Return ExpertEntries object
 940 def findExpertItems(filename, macros):
 941     with open(filename, 'r', encoding="utf8") as f:
 942         contents = f.read()
 943
 944         # Remove comments so as not to trip up RE.
 945         contents = removeComments(contents)
 946
 947         # Look for array of definitions. Looks something like this
 948         #static ei_register_info ei[] = {
 949         #    { &ei_oran_unsupported_bfw_compression_method, { "oran_fh_cus.unsupported_bfw_compression_method", PI_UNDECODED, PI_WARN, "Unsupported BFW Compression Method", EXPFILL }},
 950         #    { &ei_oran_invalid_sample_bit_width, { "oran_fh_cus.invalid_sample_bit_width", PI_UNDECODED, PI_ERROR, "Unsupported sample bit width", EXPFILL }},
 951         #};
 952
 953         expertEntries = ExpertEntries(filename)
 954
 955         definition_matches = re.finditer(r'static ei_register_info\s*([a-zA-Z0-9_]*)\s*\[\]\s*=\s*\{(.*?)\};', contents, re.MULTILINE|re.DOTALL)
 956         for d in definition_matches:
 957             entries = d.group(2)
 958
 959             # Now separate out each entry
 960             matches = re.finditer(r'\{\s*&([a-zA-Z0-9_]*)\s*\,\s*\{\s*\"(.*?)\"\s*\,\s*([A-Z_]*)\,\s*([A-Z_]*)\,\s*\"(.*?)\"\s*\,\s*EXPFILL\s*\}\s*\}',
 961                                     entries, re.MULTILINE|re.DOTALL)
 962             for match in matches:
 963                 expertEntry = ExpertEntry(filename, name=match.group(1), filter=match.group(2), group=match.group(3),
 964                                             severity=match.group(4), summary=match.group(5))
 965                 expertEntries.AddEntry(expertEntry)
 966
 967         return expertEntries
 968
 969 def findDeclaredTrees(filename):
 970     trees = []
 971     with open(filename, 'r', encoding="utf8") as f:
 972         contents = f.read()
 973
 974         # Remove comments so as not to trip up RE.
 975         contents = removeComments(contents)
 976
 977         definition_matches = re.finditer(r'static int\s*\s*(ett_[a-zA-Z0-9_]*)\s*;', contents, re.MULTILINE|re.DOTALL)
 978         for d in definition_matches:
 979             trees.append(d.group(1))
 980
 981     return trees
 982
 983 def findDefinedTrees(filename, declared):
 984     with open(filename, 'r', encoding="utf8") as f:
 985         contents = f.read()
 986
 987         # Remove comments so as not to trip up RE.
 988         contents = removeComments(contents)
 989
 990         # Look for array of definitions. Looks something like this
 991         # static int *ett[] = {
 992         #    &ett_oran,
 993         #    &ett_oran_ecpri_pcid,
 994         #    &ett_oran_ecpri_rtcid,
 995         #    &ett_oran_ecpri_seqid
 996         # };
 997
 998         trees = set()
 999
1000         # Not insisting that this array is static..
1001         definition_matches = re.finditer(r'int\s*\*\s*(?:const|)\s*[a-zA-Z0-9_]*?ett[a-zA-Z0-9_]*\s*\[\]\s*=\s*\{(.*?)\};', contents, re.MULTILINE|re.DOTALL)
1002         for d in definition_matches:
1003             entries = d.group(1)
1004
1005             # Now separate out each entry
1006             matches = re.finditer(r'\&(ett_[a-zA-Z0-9_]+)',
1007                                   entries, re.MULTILINE|re.DOTALL)
1008             for match in matches:
1009                 ett = match.group(1)
1010
1011                 if ett not in declared:
1012                     # N.B., this check will avoid matches with arrays (which won't match 'declared' re)
1013                     continue
1014
1015                 # Don't think this can happen..
1016                 #if ett in trees:
1017                 #    print('Warning:', filename, ett, 'appears twice!!!')
1018                 trees.add(match.group(1))
1019         return trees
1020
1021 def checkExpertCalls(filename, expertEntries):
1022         with open(filename, 'r', encoding="utf8") as f:
1023             contents = f.read()
1024
1025             # Remove comments so as not to trip up RE.
1026             contents = removeComments(contents)
1027
1028             # Look for array of definitions. Looks something like this
1029             # expert_add_info(NULL, tree, &ei_oran_invalid_eaxc_bit_width);
1030             # OR
1031             # expert_add_info_format(pinfo, ti_data_length, &ei_data_length, "Data Length %d is too small, should be %d", data_length, payload_size - ECPRI_MSG_TYPE_4_PAYLOAD_MIN_LENGTH);
1032             matches = re.finditer(r'expert_add_info(?:_format|)\s*\(([a-zA-Z_0-9]*)\s*,\s*([a-zA-Z_0-9]*)\s*,\s*(&[a-zA-Z_0-9]*)', contents, re.MULTILINE|re.DOTALL)
1033             for m in matches:
1034                 item = m.group(3)[1:]
1035                 expertEntries.VerifyCall(item)
1036
1037
1038
1039 # These are the valid values from expert.h
1040 valid_groups = set(['PI_GROUP_MASK', 'PI_CHECKSUM', 'PI_SEQUENCE',
1041                     'PI_RESPONSE_CODE', 'PI_REQUEST_CODE', 'PI_UNDECODED', 'PI_REASSEMBLE',
1042                     'PI_MALFORMED', 'PI_DEBUG', 'PI_PROTOCOL', 'PI_SECURITY', 'PI_COMMENTS_GROUP',
1043                     'PI_DECRYPTION', 'PI_ASSUMPTION', 'PI_DEPRECATED', 'PI_RECEIVE',
1044                     'PI_INTERFACE', 'PI_DISSECTOR_BUG'])
1045
1046 valid_levels = set(['PI_COMMENT', 'PI_CHAT', 'PI_NOTE',
1047                     'PI_WARN', 'PI_ERROR'])
1048
1049
1050 # An individual entry
1051 class ExpertEntry:
1052     def __init__(self, filename, name, filter, group, severity, summary):
1053         self.name = name
1054         self.filter = filter
1055         self.group = group
1056         self.severity = severity
1057         self.summary = summary
1058
1059         global errors_found, warnings_found
1060
1061         # Some immediate checks
1062         if group not in valid_groups:
1063             print('Error:', filename, 'Expert group', group, 'is not in', valid_groups)
1064             errors_found += 1
1065
1066         if severity not in valid_levels:
1067             print('Error:', filename, 'Expert severity', severity, 'is not in', valid_levels)
1068             errors_found += 1
1069
1070         # Checks on the summary field
1071         if summary.startswith(' '):
1072             print('Warning:', filename, 'Expert info summary', '"' + summary + '"', 'for', name, 'starts with space')
1073             warnings_found += 1
1074         if summary.endswith(' '):
1075             print('Warning:', filename, 'Expert info summary', '"' + summary + '"', 'for', name, 'ends with space')
1076             warnings_found += 1
1077
1078         # The summary field is shown in the expert window without substituting args..
1079         if summary.find('%') != -1:
1080             print('Warning:', filename, 'Expert info summary', '"' + summary + '"', 'for', name, 'has format specifiers in it?')
1081             warnings_found += 1
1082
1083
1084
1085 # Collection of entries for this dissector
1086 class ExpertEntries:
1087     def __init__(self, filename):
1088         self.filename = filename
1089         self.entries = []
1090         self.summaries = set()  # key is (name, severity)
1091         self.reverselookup = {}  # summary -> previous-item
1092         self.filters = set()
1093
1094     def AddEntry(self, entry):
1095         self.entries.append(entry)
1096
1097         global errors_found, warnings_found
1098
1099         # If summaries are not unique, can't tell apart from expert window (need to look into frame to see details)
1100         if (entry.summary, entry.severity) in self.summaries:
1101             print('Warning:', self.filename, 'Expert summary', '"' + entry.summary + '"',
1102                   'has already been seen (now in', entry.name, '- previously in', self.reverselookup[entry.summary], ')')
1103             warnings_found += 1
1104         self.summaries.add((entry.summary, entry.severity))
1105         self.reverselookup[entry.summary] = entry.name
1106
1107         # Not sure if anyone ever filters on these, but check if are unique
1108         if entry.filter in self.filters:
1109             print('Warning:', self.filename, 'Expert filter', '"' + entry.filter + '"', 'has already been seen (now in', entry.name+')')
1110             warnings_found += 1
1111         self.filters.add(entry.filter)
1112
1113     def VerifyCall(self, item):
1114         # TODO: ignore if wasn't declared in self.filename?
1115         for entry in self.entries:
1116             if entry.name == item:
1117                 # Found,
1118                 return
1119
1120         # None matched...
1121         if item not in [ 'hf', 'dissect_hf' ]:
1122             global warnings_found
1123             print('Warning:', self.filename, 'Expert info added with', '"' + item + '"', 'was it was not registered (in this file)')
1124             warnings_found += 1
1125
1126
1127
1128 # The relevant parts of an hf item.  Used as value in dict where hf variable name is key.
1129 class Item:
1130
1131     # Keep the previous few items
1132     previousItems = []
1133
1134     def __init__(self, filename, hf, filter, label, item_type, display, strings, macros,
1135                  value_strings, range_strings,
1136                  mask=None, check_mask=False, mask_exact_width=False, check_label=False,
1137                  check_consecutive=False, blurb=''):
1138         self.filename = filename
1139         self.hf = hf
1140         self.filter = filter
1141         self.label = label
1142         self.blurb = blurb
1143         self.mask = mask
1144         self.strings = strings
1145         self.mask_exact_width = mask_exact_width
1146
1147         global warnings_found, errors_found
1148
1149         if blurb == '0':
1150             print('Error:', filename, hf, ': - filter "' + filter +
1151                 '" has blurb of 0 - if no string, please set NULL instead')
1152             errors_found += 1
1153
1154         self.set_mask_value(macros)
1155
1156         if check_consecutive:
1157             for previous_index,previous_item in enumerate(Item.previousItems):
1158                 if previous_item.filter == filter:
1159                     if label != previous_item.label:
1160                         if not is_ignored_consecutive_filter(self.filter):
1161                             print('Warning:', filename, hf, ': - filter "' + filter +
1162                                 '" appears ' + str(previous_index+1) + ' items before - labels are "' + previous_item.label + '" and "' + label + '"')
1163                             warnings_found += 1
1164
1165             # Add this one to front of (short) previous list
1166             Item.previousItems = [self] + Item.previousItems
1167             if len(Item.previousItems) > 5:
1168                 # Get rid of oldest one now
1169                 #Item.previousItems = Item.previousItems[:-1]
1170                 Item.previousItems.pop()
1171
1172         self.item_type = item_type
1173
1174         self.display = display
1175         self.set_display_value(macros)
1176
1177         # Optionally check label (short and long).
1178         if check_label:
1179             self.check_label(label, 'label')
1180             #self.check_label(blurb, 'blurb')
1181             self.check_blurb_vs_label()
1182
1183         # Optionally check that mask bits are contiguous
1184         if check_mask:
1185             if self.mask_read and mask not in { 'NULL', '0x0', '0', '0x00' }:
1186                 self.check_contiguous_bits(mask)
1187                 self.check_num_digits(self.mask)
1188                 # N.B., if last entry in set is removed, see around 18,000 warnings
1189                 self.check_digits_all_zeros(self.mask)
1190
1191         # N.B. these checks are already done by checkApis.pl
1192         if strings.find('RVALS') != -1 and display.find('BASE_RANGE_STRING') == -1:
1193             print('Warning: ' + filename, hf, 'filter "' + filter + ' strings has RVALS but display lacks BASE_RANGE_STRING')
1194             warnings_found += 1
1195
1196         # For RVALS, is BASE_RANGE_STRING also set (checked by checkApis.pl)?
1197         if strings.find('VALS_EXT_PTR') != -1 and display.find('BASE_EXT_STRING') == -1:
1198             print('Warning: ' + filename, hf, 'filter "' + filter + ' strings has VALS_EXT_PTR but display lacks BASE_EXT_STRING')
1199             warnings_found += 1
1200
1201         # For VALS, lookup the corresponding ValueString and try to check range.
1202         vs_re = re.compile(r'VALS\(([a-zA-Z0-9_]*)\)')
1203         m = vs_re.search(strings)
1204         if m:
1205             self.vs_name = m.group(1)
1206             if self.vs_name in value_strings:
1207                 vs = value_strings[self.vs_name]
1208                 self.check_value_string_range(vs.min_value, vs.max_value)
1209
1210         # For RVALS, lookup the corresponding RangeString and try to check range.
1211         rs_re = re.compile(r'RVALS\(([a-zA-Z0-9_]*)\)')
1212         m = rs_re.search(strings)
1213         if m:
1214             self.rs_name = m.group(1)
1215             if self.rs_name in range_strings:
1216                 rs = range_strings[self.rs_name]
1217                 self.check_range_string_range(rs.min_value, rs.max_value)
1218
1219         # Could/should this item be FT_FRAMENUM ?
1220         #if ((self.label.lower().find(' frame') != -1 or self.label.lower().find('frame ') != -1) and self.label.lower().find('frames') == -1 and
1221         #    (self.label.lower().find('in') != -1 or self.label.lower().find('for') != -1) and
1222         #    self.item_type == 'FT_UINT32' and self.mask_value == 0x0):
1223         #    print('Warning: ' + self.filename, self.hf, 'filter "' + self.filter + '", label "' + label + '"', 'item type is', self.item_type, '- could be FT_FRANENUM?')
1224         #    warnings_found += 1
1225
1226
1227     def __str__(self):
1228         return 'Item ({0} {1} "{2}" {3} type={4}:{5} {6} mask={7})'.format(self.filename, self.hf, self.label, self.filter, self.item_type, self.display, self.strings, self.mask)
1229
1230     def check_label(self, label, label_name):
1231         global warnings_found
1232
1233         # TODO: this is masking a bug where the re for the item can't cope with macro for containing ',' for mask arg..
1234         if label.count('"') == 1:
1235             return
1236
1237         if label.startswith(' ') or label.endswith(' '):
1238             print('Warning: ' + self.filename, self.hf, 'filter "' + self.filter, label_name,  '"' + label + '" begins or ends with a space')
1239             warnings_found += 1
1240
1241         if (label.count('(') != label.count(')') or
1242             label.count('[') != label.count(']') or
1243             label.count('{') != label.count('}')):
1244             # Ignore if includes quotes, as may be unbalanced.
1245             if label.find("'") == -1:
1246                 print('Warning: ' + self.filename, self.hf, 'filter "' + self.filter + '"', label_name, '"' + label + '"', 'has unbalanced parens/braces/brackets')
1247                 warnings_found += 1
1248         if self.item_type != 'FT_NONE' and label.endswith(':'):
1249             print('Warning: ' + self.filename, self.hf, 'filter "' + self.filter + '"', label_name, '"' + label + '"', 'ends with an unnecessary colon')
1250             warnings_found += 1
1251
1252     def check_blurb_vs_label(self):
1253         global warnings_found
1254         if self.blurb == "NULL":
1255             return
1256
1257         # Is the label longer than the blurb?
1258         # Generated dissectors tend to write the type into the blurb field...
1259         #if len(self.label) > len(self.blurb):
1260         #    print('Warning:', self.filename, self.hf, 'label="' + self.label + '" blurb="' + self.blurb + '"', "- label longer than blurb!!!")
1261
1262         # Is the blurb just the label in a different order?
1263         label_words = self.label.lower().split(' ')
1264         label_words.sort()
1265         blurb_words = self.blurb.lower().split(' ')
1266         blurb_words.sort()
1267
1268         # Subset - often happens when part specific to that field is dropped
1269         if set(label_words) > set(blurb_words):
1270             print('Warning:', self.filename, self.hf, 'label="' + self.label + '" blurb="' + self.blurb + '"', "- words in blurb are subset of label words")
1271             warnings_found += 1
1272
1273         # Just a re-ordering (but may also contain capitalization changes.)
1274         if blurb_words == label_words:
1275             print('Warning:', self.filename, self.hf, 'label="' + self.label + '" blurb="' + self.blurb + '"', "- blurb words are label words (re-ordered?)")
1276             warnings_found += 1
1277
1278         # TODO: could have item know protocol name(s) from file this item was found in, and complain if blurb is just prot-name + label ?
1279
1280
1281     def set_mask_value(self, macros):
1282         try:
1283             self.mask_read = True
1284             # PIDL generator adds annoying parenthesis and spaces around mask..
1285             self.mask = self.mask.strip('() ')
1286
1287             # Substitute mask if found as a macro..
1288             if self.mask in macros:
1289                 self.mask = macros[self.mask]
1290             elif any(c not in '0123456789abcdefABCDEFxX' for c in self.mask):
1291                 self.mask_read = False
1292                 self.mask_value = 0
1293                 #print(self.filename, 'Could not read:', '"' + self.mask + '"')
1294                 return
1295
1296             # Read according to the appropriate base.
1297             if self.mask.startswith('0x'):
1298                 self.mask_value = int(self.mask, 16)
1299             elif self.mask.startswith('0'):
1300                 self.mask_value = int(self.mask, 8)
1301             else:
1302                 self.mask_value = int(self.mask, 10)
1303         except Exception:
1304             self.mask_read = False
1305             self.mask_value = 0
1306
1307         #if not self.mask_read:
1308         #    print('Could not read:', self.mask)
1309
1310
1311     def set_display_value(self, macros):
1312         try:
1313             self.display_read = True
1314             display = self.display
1315
1316             # Substitute display if found as a macro..
1317             if display in macros:
1318                 display = macros[display]
1319             elif any(c not in '0123456789abcdefABCDEFxX' for c in display):
1320                 self.display_read = False
1321                 self.display_value = 0
1322                 return
1323
1324             # Read according to the appropriate base.
1325             if self.display.startswith('0x'):
1326                 self.display_value = int(display, 16)
1327             elif self.display.startswith('0'):
1328                 self.display_value = int(display, 8)
1329             else:
1330                 self.display_value = int(display, 10)
1331         except Exception:
1332             self.display_read = False
1333             self.display_value = 0
1334
1335
1336     def check_value_string_range(self, vs_min, vs_max):
1337         item_width = self.get_field_width_in_bits()
1338
1339         if item_width is None:
1340             # Type field defined by macro?
1341             return
1342
1343         if self.mask_value > 0:
1344             # Distance between first and last '1'
1345             bitBools = bin(self.mask_value)[2:]
1346             mask_width = bitBools.rfind('1') - bitBools.find('1') + 1
1347         else:
1348             # No mask is effectively a full mask..
1349             mask_width = item_width
1350
1351         item_max = (2 ** mask_width)
1352         if vs_max > item_max:
1353             global warnings_found
1354             print('Warning:', self.filename, self.hf, 'filter=', self.filter,
1355                   self.strings, "has max value", vs_max, '(' + hex(vs_max) + ')', "which doesn't fit into", mask_width, 'bits',
1356                   '( mask is', hex(self.mask_value), ')')
1357             warnings_found += 1
1358
1359     def check_range_string_range(self, rs_min, rs_max):
1360         item_width = self.get_field_width_in_bits()
1361
1362         if item_width is None:
1363             # Type field defined by macro?
1364             return
1365
1366         if self.mask_value > 0:
1367             # Distance between first and last '1'
1368             bitBools = bin(self.mask_value)[2:]
1369             mask_width = bitBools.rfind('1') - bitBools.find('1') + 1
1370         else:
1371             # No mask is effectively a full mask..
1372             mask_width = item_width
1373
1374         item_max = (2 ** mask_width)
1375         if rs_max > item_max:
1376             global warnings_found
1377             print('Warning:', self.filename, self.hf, 'filter=', self.filter,
1378                   self.strings, "has values", rs_min, rs_max, '(' + hex(rs_max) + ')', "which doesn't fit into", mask_width, 'bits',
1379                   '( mask is', hex(self.mask_value), ')')
1380             warnings_found += 1
1381
1382
1383
1384
1385     # Return true if bit position n is set in value.
1386     def check_bit(self, value, n):
1387         return (value & (0x1 << n)) != 0
1388
1389     # Output a warning if non-contiguous bits are found in the mask (uint64_t).
1390     # Note that this legitimately happens in several dissectors where multiple reserved/unassigned
1391     # bits are conflated into one field.
1392     # - there is probably a cool/efficient way to check this (+1 => 1-bit set?)
1393     def check_contiguous_bits(self, mask):
1394         if not self.mask_value:
1395             return
1396
1397         # Do see legitimate non-contiguous bits often for these..
1398         if name_has_one_of(self.hf, ['reserved', 'unknown', 'unused', 'spare']):
1399             return
1400         if name_has_one_of(self.label, ['reserved', 'unknown', 'unused', 'spare']):
1401             return
1402
1403
1404         # Walk past any l.s. 0 bits
1405         n = 0
1406         while not self.check_bit(self.mask_value, n) and n <= 63:
1407             n += 1
1408         if n==63:
1409             return
1410
1411         mask_start = n
1412         # Walk through any bits that are set
1413         while self.check_bit(self.mask_value, n) and n <= 63:
1414             n += 1
1415         n += 1
1416
1417         if n >= 63:
1418             return
1419
1420         # Look up the field width
1421         field_width = 0
1422         if self.item_type not in field_widths:
1423             print('unexpected item_type is ', self.item_type)
1424             field_width = 64
1425         else:
1426             field_width = self.get_field_width_in_bits()
1427
1428
1429         # Its a problem is the mask_width is > field_width - some of the bits won't get looked at!?
1430         mask_width = n-1-mask_start
1431         if field_width is not None and (mask_width > field_width):
1432             # N.B. No call, so no line number.
1433             print(self.filename + ':', self.hf, 'filter=', self.filter, self.item_type, 'so field_width=', field_width,
1434                   'but mask is', mask, 'which is', mask_width, 'bits wide!')
1435             global warnings_found
1436             warnings_found += 1
1437         # Now, any more zero set bits are an error!
1438         if self.filter in known_non_contiguous_fields or self.filter.startswith('rtpmidi'):
1439             # Don't report if we know this one is Ok.
1440             # TODO: also exclude items that are used as root in add_bitmask() calls?
1441             return
1442         while n <= 63:
1443             if self.check_bit(self.mask_value, n):
1444                 print('Warning:', self.filename, self.hf, 'filter=', self.filter, ' - mask with non-contiguous bits',
1445                       mask, '(', hex(self.mask_value), ')')
1446                 warnings_found += 1
1447                 return
1448             n += 1
1449
1450     def get_field_width_in_bits(self):
1451         if self.item_type == 'FT_BOOLEAN':
1452             if self.display == 'NULL':
1453                 return 8  # i.e. 1 byte
1454             elif self.display == 'SEP_DOT':   # from proto.h, only meant for FT_BYTES
1455                 return 64
1456             else:
1457                 try:
1458                     # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
1459                     return int((int(self.display) + 3)/4)*4
1460                 except Exception:
1461                     return None
1462         else:
1463             if self.item_type in field_widths:
1464                 # Lookup fixed width for this type
1465                 return field_widths[self.item_type]
1466             else:
1467                 return None
1468
1469     def check_num_digits(self, mask):
1470         if mask.startswith('0x') and len(mask) > 3:
1471             global warnings_found
1472             global errors_found
1473
1474             width_in_bits = self.get_field_width_in_bits()
1475             # Warn if odd number of digits.  TODO: only if >= 5?
1476             if len(mask) % 2  and self.item_type != 'FT_BOOLEAN':
1477                 print('Warning:', self.filename, self.hf, 'filter=', self.filter, ' - mask has odd number of digits', mask,
1478                       'expected max for', self.item_type, 'is', int(width_in_bits/4))
1479                 warnings_found += 1
1480
1481             if self.item_type in field_widths:
1482                 # Longer than it should be?
1483                 if width_in_bits is None:
1484                     return
1485                 if len(mask)-2 > width_in_bits/4:
1486                     extra_digits = mask[2:2+(len(mask)-2 - int(width_in_bits/4))]
1487                     # Its definitely an error if any of these are non-zero, as they won't have any effect!
1488                     if extra_digits != '0'*len(extra_digits):
1489                         print('Error:', self.filename, self.hf, 'filter=', self.filter, 'mask', self.mask, "with len is", len(mask)-2,
1490                               "but type", self.item_type, " indicates max of", int(width_in_bits/4),
1491                               "and extra digits are non-zero (" + extra_digits + ")")
1492                         errors_found += 1
1493                     else:
1494                         # Has extra leading zeros, still confusing, so warn.
1495                         print('Warning:', self.filename, self.hf, 'filter=', self.filter, 'mask', self.mask, "with len", len(mask)-2,
1496                               "but type", self.item_type, " indicates max of", int(width_in_bits/4))
1497                         warnings_found += 1
1498
1499                 # Strict/fussy check - expecting mask length to match field width exactly!
1500                 # Currently only doing for FT_BOOLEAN, and don't expect to be in full for 64-bit fields!
1501                 if self.mask_exact_width:
1502                     ideal_mask_width = int(width_in_bits/4)
1503                     if self.item_type == 'FT_BOOLEAN' and ideal_mask_width < 16 and len(mask)-2 != ideal_mask_width:
1504                         print('Warning:', self.filename, self.hf, 'filter=', self.filter, 'mask', self.mask, "with len", len(mask)-2,
1505                                 "but type", self.item_type, "|", self.display,  " indicates should be", int(width_in_bits/4))
1506                         warnings_found += 1
1507
1508             else:
1509                 # This type shouldn't have a mask set at all.
1510                 print('Warning:', self.filename, self.hf, 'filter=', self.filter, ' - item has type', self.item_type, 'but mask set:', mask)
1511                 warnings_found += 1
1512
1513     def check_digits_all_zeros(self, mask):
1514         if mask.startswith('0x') and len(mask) > 3:
1515             if mask[2:] == '0'*(len(mask)-2):
1516                 print('Warning:', self.filename, self.hf, 'filter=', self.filter, ' - item mask has all zeros - this is confusing! :', '"' + mask + '"')
1517                 global warnings_found
1518                 warnings_found += 1
1519
1520     # A mask where all bits are set should instead be 0.
1521     # Exceptions might be where:
1522     # - in add_bitmask()
1523     # - represents flags, but dissector is not yet decoding them
1524     def check_full_mask(self, mask, field_arrays):
1525         if self.item_type == "FT_BOOLEAN":
1526             return
1527         if self.label.lower().find('mask') != -1 or self.label.lower().find('flag') != -1 or self.label.lower().find('bitmap') != -1:
1528             return
1529         if mask.startswith('0x') and len(mask) > 3:
1530             width_in_bits = self.get_field_width_in_bits()
1531             if not width_in_bits:
1532                 return
1533             num_digits = int(width_in_bits / 4)
1534             if num_digits is None:
1535                 return
1536             if mask[2:] == 'f'*num_digits   or   mask[2:] == 'F'*num_digits:
1537                 # Don't report if appears in a 'fields' array
1538                 for arr in field_arrays:
1539                     list = field_arrays[arr][0]
1540                     if self.hf in list:
1541                         # These need to have a mask - don't judge for being 0
1542                         return
1543
1544                 # No point in setting all bits if only want decimal number..
1545                 if self.display == "BASE_DEC":
1546                     print('Note:', self.filename, self.hf, 'filter=', self.filter, " - mask is all set - if only want value (rather than bits), set 0 instead? :", '"' + mask + '"')
1547
1548     # An item that appears in a bitmask set, needs to have a non-zero mask.
1549     def check_mask_if_in_field_array(self, mask, field_arrays):
1550         # Work out if this item appears in a field array
1551         found = False
1552         for arr in field_arrays:
1553             list = field_arrays[arr][0]
1554             if self.hf in list:
1555                 # These need to have a mask - don't judge for being 0
1556                 found = True
1557                 break
1558
1559         if found:
1560             # It needs to have a non-zero mask.
1561             if self.mask_read and self.mask_value == 0:
1562                 print('Error:', self.filename, self.hf, 'is in fields array', arr, 'but has a zero mask - this is not allowed')
1563                 global errors_found
1564                 errors_found += 1
1565
1566
1567
1568     # Return True if appears to be a match
1569     def check_label_vs_filter(self, reportError=True, reportNumericalMismatch=True):
1570         global warnings_found
1571
1572         last_filter = self.filter.split('.')[-1]
1573         last_filter_orig = last_filter
1574         last_filter = last_filter.replace('-', '')
1575         last_filter = last_filter.replace('_', '')
1576         last_filter = last_filter.replace(' ', '')
1577         label = self.label
1578         label_orig = label
1579         label = label.replace(' ', '')
1580         label = label.replace('-', '')
1581         label = label.replace('_', '')
1582         label = label.replace('(', '')
1583         label = label.replace(')', '')
1584         label = label.replace('/', '')
1585         label = label.replace("'", '')
1586
1587
1588         # OK if filter is abbrev of label.
1589         label_words = self.label.split(' ')
1590         label_words = [w for w in label_words if len(w)]
1591         if len(label_words) == len(last_filter):
1592             #print(label_words)
1593             abbrev_letters = [w[0] for w in label_words]
1594             abbrev = ''.join(abbrev_letters)
1595             if abbrev.lower() == last_filter.lower():
1596                 return True
1597
1598         # If both have numbers, they should probably match!
1599         label_numbers =  [int(n) for n in re.findall(r'\d+', label_orig)]
1600         filter_numbers = [int(n) for n in re.findall(r'\d+', last_filter_orig)]
1601         if len(label_numbers) == len(filter_numbers) and label_numbers != filter_numbers:
1602             if reportNumericalMismatch:
1603                 print('Note:', self.filename, self.hf, 'label="' + self.label + '" has different **numbers** from  filter="' + self.filter + '"')
1604                 print(label_numbers, filter_numbers)
1605             return False
1606
1607         # If they match after trimming number from filter, they should match.
1608         if label.lower() == last_filter.lower().rstrip("0123456789"):
1609             return True
1610
1611         # Are they just different?
1612         if label.lower().find(last_filter.lower()) == -1:
1613             if reportError:
1614                 print('Warning:', self.filename, self.hf, 'label="' + self.label + '" does not seem to match filter="' + self.filter + '"')
1615                 warnings_found += 1
1616             return False
1617
1618         return True
1619
1620     def check_boolean_length(self):
1621         global errors_found
1622         # If mask is 0, display must be BASE_NONE.
1623         if self.item_type == 'FT_BOOLEAN' and self.mask_read and self.mask_value == 0 and self.display.find('BASE_NONE') == -1:
1624             print('Error:', self.filename, self.hf, 'type is FT_BOOLEAN, no mask set (', self.mask, ') - display should be BASE_NONE, is instead', self.display)
1625             errors_found += 1
1626         # TODO: check for length > 64?
1627
1628     def check_string_display(self):
1629         global warnings_found
1630         if self.item_type in { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING'}:
1631             if self.display.find('BASE_NONE')==-1:
1632                 print('Warning:', self.filename, self.hf, 'type is', self.item_type, 'display must be BASE_NONE, is instead', self.display)
1633                 warnings_found += 1
1634
1635
1636
1637
1638     def check_ipv4_display(self):
1639         global errors_found
1640         if self.item_type == 'FT_IPv4' and self.display not in { 'BASE_NETMASK', 'BASE_NONE' }:
1641             print('Error:', self.filename, self.hf, 'type is FT_IPv4, should be BASE_NETMASK or BASE_NONE, is instead', self.display)
1642             errors_found += 1
1643
1644
1645 class CombinedCallsCheck:
1646     def __init__(self, file, apiChecks):
1647         self.file = file
1648         self.apiChecks = apiChecks
1649         self.get_all_calls()
1650
1651     def get_all_calls(self):
1652         self.all_calls = []
1653         # Combine calls into one list.
1654         for check in self.apiChecks:
1655             self.all_calls += check.calls
1656
1657         # Sort by line number.
1658         self.all_calls.sort(key=lambda x:x.line_number)
1659
1660     def check_consecutive_item_calls(self):
1661         lines = open(self.file, 'r', encoding="utf8").read().splitlines()
1662
1663         prev = None
1664         for call in self.all_calls:
1665
1666             # These names commonly do appear together..
1667             if name_has_one_of(call.hf_name, [ 'unused', 'unknown', 'spare', 'reserved', 'default']):
1668                 return
1669
1670             if prev and call.hf_name == prev.hf_name:
1671                 # More compelling if close together..
1672                 if call.line_number>prev.line_number and call.line_number-prev.line_number <= 4:
1673                     scope_different = False
1674                     for no in range(prev.line_number, call.line_number-1):
1675                         if lines[no].find('{') != -1 or lines[no].find('}') != -1 or lines[no].find('else') != -1 or lines[no].find('break;') != -1 or lines[no].find('if ') != -1:
1676                             scope_different = True
1677                             break
1678                     # Also more compelling if check for and scope changes { } in lines in-between?
1679                     if not scope_different:
1680                         print('Warning:', f + ':' + str(call.line_number),
1681                               call.hf_name + ' called consecutively at line', call.line_number, '- previous at', prev.line_number)
1682                         global warnings_found
1683                         warnings_found += 1
1684             prev = call
1685
1686
1687
1688
1689 # These are APIs in proto.c that check a set of types at runtime and can print '.. is not of type ..' to the console
1690 # if the type is not suitable.
1691 apiChecks = []
1692 apiChecks.append(APICheck('proto_tree_add_item_ret_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'}, positive_length=True))
1693 apiChecks.append(APICheck('proto_tree_add_item_ret_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}))
1694 apiChecks.append(APICheck('ptvcursor_add_ret_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'}, positive_length=True))
1695 apiChecks.append(APICheck('ptvcursor_add_ret_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}, positive_length=True))
1696 apiChecks.append(APICheck('ptvcursor_add_ret_string', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD', 'FT_STRINGZTRUNC'}))
1697 apiChecks.append(APICheck('ptvcursor_add_ret_boolean', { 'FT_BOOLEAN'}, positive_length=True))
1698 apiChecks.append(APICheck('proto_tree_add_item_ret_uint64', { 'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64'}, positive_length=True))
1699 apiChecks.append(APICheck('proto_tree_add_item_ret_int64', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'}, positive_length=True))
1700 apiChecks.append(APICheck('proto_tree_add_item_ret_boolean', { 'FT_BOOLEAN'}, positive_length=True))
1701 apiChecks.append(APICheck('proto_tree_add_item_ret_string_and_length', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD', 'FT_STRINGZTRUNC'}))
1702 apiChecks.append(APICheck('proto_tree_add_item_ret_display_string_and_length', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING',
1703                                                                                  'FT_STRINGZPAD', 'FT_STRINGZTRUNC', 'FT_BYTES', 'FT_UINT_BYTES'}))
1704 apiChecks.append(APICheck('proto_tree_add_item_ret_time_string', { 'FT_ABSOLUTE_TIME', 'FT_RELATIVE_TIME'}))
1705 apiChecks.append(APICheck('proto_tree_add_uint', {  'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'}))
1706 apiChecks.append(APICheck('proto_tree_add_uint_format_value', {  'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'}))
1707 apiChecks.append(APICheck('proto_tree_add_uint_format', {  'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'}))
1708 apiChecks.append(APICheck('proto_tree_add_uint64', { 'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64', 'FT_FRAMENUM'}))
1709 apiChecks.append(APICheck('proto_tree_add_int64', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'}))
1710 apiChecks.append(APICheck('proto_tree_add_int64_format_value', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'}))
1711 apiChecks.append(APICheck('proto_tree_add_int64_format', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'}))
1712 apiChecks.append(APICheck('proto_tree_add_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}))
1713 apiChecks.append(APICheck('proto_tree_add_int_format_value', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}))
1714 apiChecks.append(APICheck('proto_tree_add_int_format', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}))
1715 apiChecks.append(APICheck('proto_tree_add_boolean', { 'FT_BOOLEAN'}))
1716 apiChecks.append(APICheck('proto_tree_add_boolean64', { 'FT_BOOLEAN'}))
1717 apiChecks.append(APICheck('proto_tree_add_float', { 'FT_FLOAT'}))
1718 apiChecks.append(APICheck('proto_tree_add_float_format', { 'FT_FLOAT'}))
1719 apiChecks.append(APICheck('proto_tree_add_float_format_value', { 'FT_FLOAT'}))
1720 apiChecks.append(APICheck('proto_tree_add_double', { 'FT_DOUBLE'}))
1721 apiChecks.append(APICheck('proto_tree_add_double_format', { 'FT_DOUBLE'}))
1722 apiChecks.append(APICheck('proto_tree_add_double_format_value', { 'FT_DOUBLE'}))
1723 apiChecks.append(APICheck('proto_tree_add_string', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD', 'FT_STRINGZTRUNC'}))
1724 apiChecks.append(APICheck('proto_tree_add_string_format', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD', 'FT_STRINGZTRUNC'}))
1725 apiChecks.append(APICheck('proto_tree_add_string_format_value', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD', 'FT_STRINGZTRUNC'}))
1726 apiChecks.append(APICheck('proto_tree_add_guid', { 'FT_GUID'}))
1727 apiChecks.append(APICheck('proto_tree_add_oid', { 'FT_OID'}))
1728 apiChecks.append(APICheck('proto_tree_add_none_format', { 'FT_NONE'}))
1729 apiChecks.append(APICheck('proto_tree_add_item_ret_varint', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32', 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64',
1730                                                               'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM',
1731                                                               'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64',}))
1732 apiChecks.append(APICheck('proto_tree_add_boolean_bits_format_value', { 'FT_BOOLEAN'}))
1733 apiChecks.append(APICheck('proto_tree_add_ascii_7bits_item', { 'FT_STRING'}))
1734 # TODO: positions are different, and takes 2 hf_fields..
1735 #apiChecks.append(APICheck('proto_tree_add_checksum', { 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'}))
1736 apiChecks.append(APICheck('proto_tree_add_int64_bits_format_value', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'}))
1737
1738 # TODO: add proto_tree_add_bytes_item, proto_tree_add_time_item ?
1739
1740 bitmask_types = { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32',
1741                   'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32',
1742                   'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64',
1743                   'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64',
1744                    'FT_BOOLEAN'}
1745 apiChecks.append(APICheck('proto_tree_add_bitmask', bitmask_types))
1746 apiChecks.append(APICheck('proto_tree_add_bitmask_tree', bitmask_types))
1747 apiChecks.append(APICheck('proto_tree_add_bitmask_ret_uint64', bitmask_types))
1748 apiChecks.append(APICheck('proto_tree_add_bitmask_with_flags', bitmask_types))
1749 apiChecks.append(APICheck('proto_tree_add_bitmask_with_flags_ret_uint64', bitmask_types))
1750 apiChecks.append(APICheck('proto_tree_add_bitmask_value', bitmask_types))
1751 apiChecks.append(APICheck('proto_tree_add_bitmask_value_with_flags', bitmask_types))
1752 apiChecks.append(APICheck('proto_tree_add_bitmask_len', bitmask_types))
1753 # N.B., proto_tree_add_bitmask_list does not have a root item, just a subtree...
1754
1755 add_bits_types = { 'FT_CHAR', 'FT_BOOLEAN',
1756                    'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64',
1757                    'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32', 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64',
1758                     'FT_BYTES'}
1759 apiChecks.append(APICheck('proto_tree_add_bits_item',    add_bits_types))
1760 apiChecks.append(APICheck('proto_tree_add_bits_ret_val', add_bits_types))
1761
1762 # TODO: doesn't even have an hf_item !
1763 #apiChecks.append(APICheck('proto_tree_add_bitmask_text', bitmask_types))
1764
1765 # Check some ptvcuror calls too.
1766 apiChecks.append(APICheck('ptvcursor_add_ret_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'}))
1767 apiChecks.append(APICheck('ptvcursor_add_ret_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'}))
1768 apiChecks.append(APICheck('ptvcursor_add_ret_boolean', { 'FT_BOOLEAN'}))
1769
1770
1771 # Also try to check proto_tree_add_item() calls (for length)
1772 apiChecks.append(ProtoTreeAddItemCheck())
1773 apiChecks.append(ProtoTreeAddItemCheck(True)) # for ptvcursor_add()
1774
1775
1776
1777 def removeComments(code_string):
1778     code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" , code_string) # C-style comment
1779     code_string = re.sub(re.compile(r"//.*?\n" ) ,"" , code_string)             # C++-style comment
1780     code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region
1781
1782     return code_string
1783
1784 # Test for whether the given file was automatically generated.
1785 def isGeneratedFile(filename):
1786     # Check file exists - e.g. may have been deleted in a recent commit.
1787     if not os.path.exists(filename):
1788         return False
1789
1790     # Open file
1791     f_read = open(os.path.join(filename), 'r', encoding="utf8")
1792     lines_tested = 0
1793     for line in f_read:
1794         # The comment to say that its generated is near the top, so give up once
1795         # get a few lines down.
1796         if lines_tested > 10:
1797             f_read.close()
1798             return False
1799         if (line.find('Generated automatically') != -1 or
1800             line.find('Generated Automatically') != -1 or
1801             line.find('Autogenerated from') != -1 or
1802             line.find('is autogenerated') != -1 or
1803             line.find('automatically generated by Pidl') != -1 or
1804             line.find('Created by: The Qt Meta Object Compiler') != -1 or
1805             line.find('This file was generated') != -1 or
1806             line.find('This filter was automatically generated') != -1 or
1807             line.find('This file is auto generated, do not edit!') != -1):
1808
1809             f_read.close()
1810             return True
1811         lines_tested = lines_tested + 1
1812
1813     # OK, looks like a hand-written file!
1814     f_read.close()
1815     return False
1816
1817
1818 # TODO: could also look for macros in related/included header file(s)?
1819 def find_macros(filename):
1820     # Pre-populate with some useful values..
1821     macros = { 'BASE_NONE' : 0,  'BASE_DEC' : 1 }
1822
1823     with open(filename, 'r', encoding="utf8") as f:
1824         contents = f.read()
1825         # Remove comments so as not to trip up RE.
1826         contents = removeComments(contents)
1827
1828         matches = re.finditer( r'#define\s*([A-Za-z0-9_]*)\s*([0-9xa-fA-F]*)\s*\n', contents)
1829         for m in matches:
1830             # Store this mapping.
1831             macros[m.group(1)] = m.group(2)
1832
1833         # Also look for what could be enumeration assignments
1834         matches = re.finditer( r'\s*([A-Za-z0-9_]*)\s*=\s*([0-9xa-fA-F]*)\s*,?\n', contents)
1835         for m in matches:
1836             # Store this mapping.
1837             macros[m.group(1)] = m.group(2)
1838
1839     return macros
1840
1841
1842 # Look for hf items (i.e. full item to be registered) in a dissector file.
1843 def find_items(filename, macros, value_strings, range_strings,
1844                check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False):
1845     is_generated = isGeneratedFile(filename)
1846     items = {}
1847     with open(filename, 'r', encoding="utf8") as f:
1848         contents = f.read()
1849         # Remove comments so as not to trip up RE.
1850         contents = removeComments(contents)
1851
1852         # N.B. re extends all the way to HFILL to avoid greedy matching
1853         # TODO: fix a problem where re can't cope with mask that involve a macro with commas in it...
1854         matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents)
1855         for m in matches:
1856             # Store this item.
1857             hf = m.group(1)
1858
1859             blurb = m.group(8)
1860             if blurb.startswith('"'):
1861                 blurb = blurb[1:-1]
1862
1863             items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4),
1864                              display=m.group(5),
1865                              strings=m.group(6),
1866                              macros=macros,
1867                              value_strings=value_strings,
1868                              range_strings=range_strings,
1869                              mask=m.group(7),
1870                              blurb=blurb,
1871                              check_mask=check_mask,
1872                              mask_exact_width=mask_exact_width,
1873                              check_label=check_label,
1874                              check_consecutive=(not is_generated and check_consecutive))
1875     return items
1876
1877
1878 # Looking for args to ..add_bitmask_..() calls that are not NULL-terminated or  have repeated items.
1879 # TODO: some dissectors have similar-looking hf arrays for other reasons, so need to cross-reference with
1880 # the 6th arg of ..add_bitmask_..() calls...
1881 # TODO: return items (rather than local checks) from here so can be checked against list of calls for given filename
1882 def find_field_arrays(filename, all_fields, all_hf):
1883     field_entries = {}
1884     global warnings_found
1885     with open(filename, 'r', encoding="utf8") as f:
1886         contents = f.read()
1887         # Remove comments so as not to trip up RE.
1888         contents = removeComments(contents)
1889
1890         # Find definition of hf array
1891         matches = re.finditer(r'static\s*g?int\s*\*\s*const\s+([a-zA-Z0-9_]*)\s*\[\]\s*\=\s*\{([a-zA-Z0-9,_\&\s]*)\}', contents)
1892         for m in matches:
1893             name = m.group(1)
1894             # Ignore if not used in a call to an _add_bitmask_ API
1895             if name not in all_fields:
1896                 continue
1897
1898             fields_text = m.group(2)
1899             fields_text = fields_text.replace('&', '')
1900             fields_text = fields_text.replace(',', '')
1901
1902             # Get list of each hf field in the array
1903             fields = fields_text.split()
1904
1905             if fields[0].startswith('ett_'):
1906                 continue
1907             if fields[-1].find('NULL') == -1 and fields[-1] != '0':
1908                 print('Warning:', filename, name, 'is not NULL-terminated - {', ', '.join(fields), '}')
1909                 warnings_found += 1
1910                 continue
1911
1912             # Do any hf items reappear?
1913             seen_fields = set()
1914             for f in fields:
1915                 if f in seen_fields:
1916                     print(filename, name, f, 'already added!')
1917                     warnings_found += 1
1918                 seen_fields.add(f)
1919
1920             # Check for duplicated flags among entries..
1921             combined_mask = 0x0
1922             for f in fields[0:-1]:
1923                 if f in all_hf:
1924                     new_mask = all_hf[f].mask_value
1925                     if new_mask & combined_mask:
1926                         print('Warning:', filename, name, 'has overlapping mask - {', ', '.join(fields), '} combined currently', hex(combined_mask), f, 'adds', hex(new_mask))
1927                         warnings_found += 1
1928                     combined_mask |= new_mask
1929
1930             # Make sure all entries have the same width
1931             set_field_width = None
1932             for f in fields[0:-1]:
1933                 if f in all_hf:
1934                     new_field_width = all_hf[f].get_field_width_in_bits()
1935                     if set_field_width is not None and new_field_width != set_field_width:
1936                         # Its not uncommon for fields to be used in multiple sets, some of which can be different widths..
1937                         print('Note:', filename, name, 'set items not all same width - {', ', '.join(fields), '} seen', set_field_width, 'now', new_field_width)
1938                     set_field_width = new_field_width
1939
1940             # Add entry to table
1941             field_entries[name] = (fields[0:-1], combined_mask)
1942
1943     return field_entries
1944
1945 def find_item_declarations(filename):
1946     items = set()
1947
1948     with open(filename, 'r', encoding="utf8") as f:
1949         lines = f.read().splitlines()
1950         p = re.compile(r'^static int (hf_[a-zA-Z0-9_]*)\s*\=\s*-1;')
1951         for line in lines:
1952             m = p.search(line)
1953             if m:
1954                 items.add(m.group(1))
1955     return items
1956
1957 def find_item_extern_declarations(filename):
1958     items = set()
1959     with open(filename, 'r', encoding="utf8") as f:
1960         lines = f.read().splitlines()
1961         p = re.compile(r'^\s*(hf_[a-zA-Z0-9_]*)\s*\=\s*proto_registrar_get_id_byname\s*\(')
1962         for line in lines:
1963             m = p.search(line)
1964             if m:
1965                 items.add(m.group(1))
1966     return items
1967
1968
1969 def is_dissector_file(filename):
1970     p = re.compile(r'.*(packet|file)-.*\.c$')
1971     return p.match(filename)
1972
1973
1974 def findDissectorFilesInFolder(folder, recursive=False):
1975     dissector_files = []
1976
1977     if recursive:
1978         for root, subfolders, files in os.walk(folder):
1979             for f in files:
1980                 if should_exit:
1981                     return
1982                 f = os.path.join(root, f)
1983                 dissector_files.append(f)
1984     else:
1985         for f in sorted(os.listdir(folder)):
1986             if should_exit:
1987                 return
1988             filename = os.path.join(folder, f)
1989             dissector_files.append(filename)
1990
1991     return [x for x in filter(is_dissector_file, dissector_files)]
1992
1993
1994
1995 # Run checks on the given dissector file.
1996 def checkFile(filename, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False,
1997               check_missing_items=False, check_bitmask_fields=False, label_vs_filter=False, extra_value_string_checks=False,
1998               check_expert_items=False, check_subtrees=False):
1999     # Check file exists - e.g. may have been deleted in a recent commit.
2000     if not os.path.exists(filename):
2001         print(filename, 'does not exist!')
2002         return
2003
2004     # Find simple macros so can substitute into items and calls.
2005     macros = find_macros(filename)
2006
2007     # Find (and sanity-check) value_strings
2008     value_strings = findValueStrings(filename, macros, do_extra_checks=extra_value_string_checks)
2009     if extra_value_string_checks:
2010         for name in value_strings:
2011             value_strings[name].extraChecks()
2012
2013     # Find (and sanity-check) range_strings
2014     range_strings = findRangeStrings(filename, macros, do_extra_checks=extra_value_string_checks)
2015     if extra_value_string_checks:
2016         for name in range_strings:
2017             range_strings[name].extraChecks()
2018
2019     # Find (and sanity-check) string_strings
2020     string_strings = findStringStrings(filename, macros, do_extra_checks=extra_value_string_checks)
2021     if extra_value_string_checks:
2022         for name in string_strings:
2023             string_strings[name].extraChecks()
2024
2025     # Find expert items
2026     if check_expert_items:
2027         expert_items = findExpertItems(filename, macros)
2028         checkExpertCalls(filename, expert_items)
2029
2030     # Find important parts of items.
2031     items_defined = find_items(filename, macros, value_strings, range_strings,
2032                                check_mask, mask_exact_width, check_label, check_consecutive)
2033     items_extern_declared = {}
2034
2035
2036     # Check that ett_ variables are registered
2037     if check_subtrees:
2038         ett_declared = findDeclaredTrees(filename)
2039         ett_defined =  findDefinedTrees(filename, ett_declared)
2040         for d in ett_declared:
2041             if d not in ett_defined:
2042                 global warnings_found
2043                 print('Warning:', filename, 'subtree identifier', d, 'is declared but not found in an array for registering')
2044                 warnings_found += 1
2045
2046     items_declared = {}
2047     if check_missing_items:
2048         items_declared = find_item_declarations(filename)
2049         items_extern_declared = find_item_extern_declarations(filename)
2050
2051     fields = set()
2052
2053     # Get 'fields' out of calls
2054     for c in apiChecks:
2055         c.find_calls(filename, macros)
2056         for call in c.calls:
2057             # From _add_bitmask() calls
2058             if call.fields:
2059                 fields.add(call.fields)
2060
2061     # Checking for lists of fields for add_bitmask calls
2062     field_arrays = {}
2063     if check_bitmask_fields:
2064         field_arrays = find_field_arrays(filename, fields, items_defined)
2065
2066     if check_mask and check_bitmask_fields:
2067         for i in items_defined:
2068             item = items_defined[i]
2069             item.check_full_mask(item.mask, field_arrays)
2070             item.check_mask_if_in_field_array(item.mask, field_arrays)
2071
2072     # Now actually check the calls
2073     for c in apiChecks:
2074         c.check_against_items(items_defined, items_declared, items_extern_declared, check_missing_items, field_arrays)
2075
2076
2077     if label_vs_filter:
2078         matches = 0
2079         for hf in items_defined:
2080             if items_defined[hf].check_label_vs_filter(reportError=False, reportNumericalMismatch=True):
2081                 matches += 1
2082
2083         # Only checking if almost every field does match.
2084         checking = len(items_defined) and matches<len(items_defined) and ((matches / len(items_defined)) > 0.93)
2085         if checking:
2086             print(filename, ':', matches, 'label-vs-filter matches out of', len(items_defined), 'so reporting mismatches')
2087             for hf in items_defined:
2088                 items_defined[hf].check_label_vs_filter(reportError=True, reportNumericalMismatch=False)
2089
2090     for hf in items_defined:
2091         items_defined[hf].check_boolean_length()
2092         items_defined[hf].check_string_display()
2093         items_defined[hf].check_ipv4_display()
2094
2095
2096
2097 #################################################################
2098 # Main logic.
2099
2100 # command-line args.  Controls which dissector files should be checked.
2101 # If no args given, will just scan epan/dissectors folder.
2102 parser = argparse.ArgumentParser(description='Check calls in dissectors')
2103 parser.add_argument('--file', action='append',
2104                     help='specify individual dissector file to test')
2105 parser.add_argument('--folder', action='store', default='',
2106                     help='specify folder to test')
2107 parser.add_argument('--commits', action='store',
2108                     help='last N commits to check')
2109 parser.add_argument('--open', action='store_true',
2110                     help='check open files')
2111 parser.add_argument('--mask', action='store_true',
2112                    help='when set, check mask field too')
2113 parser.add_argument('--mask-exact-width', action='store_true',
2114                    help='when set, check width of mask against field width')
2115 parser.add_argument('--label', action='store_true',
2116                    help='when set, check label field too')
2117 parser.add_argument('--consecutive', action='store_true',
2118                     help='when set, copy copy/paste errors between consecutive items')
2119 parser.add_argument('--missing-items', action='store_true',
2120                     help='when set, look for used items that were never registered')
2121 parser.add_argument('--check-bitmask-fields', action='store_true',
2122                     help='when set, attempt to check arrays of hf items passed to add_bitmask() calls')
2123 parser.add_argument('--label-vs-filter', action='store_true',
2124                     help='when set, check whether label matches last part of filter')
2125 parser.add_argument('--extra-value-string-checks', action='store_true',
2126                     help='when set, do extra checks on parsed value_strings')
2127 parser.add_argument('--check-expert-items', action='store_true',
2128                     help='when set, do extra checks on expert items')
2129 parser.add_argument('--check-subtrees', action='store_true',
2130                     help='when set, do extra checks ett variables')
2131 parser.add_argument('--all-checks', action='store_true',
2132                     help='when set, apply all checks to selected files')
2133
2134
2135 args = parser.parse_args()
2136
2137 # Turn all checks on.
2138 if args.all_checks:
2139     args.mask = True
2140     args.mask_exact_width = True
2141     args.consecutive = True
2142     args.check_bitmask_fields = True
2143     args.label = True
2144     args.label_vs_filter = True
2145     #args.extra_value_string_checks = True
2146     args.check_expert_items = True
2147     #args.check_subtrees = Truue
2148
2149 if args.check_bitmask_fields:
2150     args.mask = True
2151
2152
2153 # Get files from wherever command-line args indicate.
2154 files = []
2155 if args.file:
2156     # Add specified file(s)
2157     for f in args.file:
2158         if not os.path.isfile(f):
2159             print('Chosen file', f, 'does not exist.')
2160             exit(1)
2161         else:
2162             files.append(f)
2163 elif args.folder:
2164     # Add all files from a given folder.
2165     folder = args.folder
2166     if not os.path.isdir(folder):
2167         print('Folder', folder, 'not found!')
2168         exit(1)
2169     # Find files from folder.
2170     print('Looking for files in', folder)
2171     files = findDissectorFilesInFolder(folder, recursive=True)
2172 elif args.commits:
2173     # Get files affected by specified number of commits.
2174     command = ['git', 'diff', '--name-only', '--diff-filter=d', 'HEAD~' + args.commits]
2175     files = [f.decode('utf-8')
2176              for f in subprocess.check_output(command).splitlines()]
2177     # Will examine dissector files only
2178     files = list(filter(lambda f : is_dissector_file(f), files))
2179 elif args.open:
2180     # Unstaged changes.
2181     command = ['git', 'diff', '--name-only', '--diff-filter=d']
2182     files = [f.decode('utf-8')
2183              for f in subprocess.check_output(command).splitlines()]
2184     # Only interested in dissector files.
2185     files = list(filter(lambda f : is_dissector_file(f), files))
2186     # Staged changes.
2187     command = ['git', 'diff', '--staged', '--name-only', '--diff-filter=d']
2188     files_staged = [f.decode('utf-8')
2189                     for f in subprocess.check_output(command).splitlines()]
2190     # Only interested in dissector files.
2191     files_staged = list(filter(lambda f : is_dissector_file(f), files_staged))
2192     for f in files_staged:
2193         if f not in files:
2194             files.append(f)
2195 else:
2196     # Find all dissector files.
2197     files  = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
2198     files += findDissectorFilesInFolder(os.path.join('plugins', 'epan'), recursive=True)
2199
2200
2201 # If scanning a subset of files, list them here.
2202 print('Examining:')
2203 if args.file or args.commits or args.open:
2204     if files:
2205         print(' '.join(files), '\n')
2206     else:
2207         print('No files to check.\n')
2208 else:
2209     print('All dissector modules\n')
2210
2211
2212 # Now check the files.
2213 for f in files:
2214     if should_exit:
2215         exit(1)
2216     checkFile(f, check_mask=args.mask, mask_exact_width=args.mask_exact_width, check_label=args.label,
2217               check_consecutive=args.consecutive, check_missing_items=args.missing_items,
2218               check_bitmask_fields=args.check_bitmask_fields, label_vs_filter=args.label_vs_filter,
2219               extra_value_string_checks=args.extra_value_string_checks,
2220               check_expert_items=args.check_expert_items, check_subtrees=args.check_subtrees)
2221
2222     # Do checks against all calls.
2223     if args.consecutive:
2224         combined_calls = CombinedCallsCheck(f, apiChecks)
2225         # This hasn't really found any issues, but shows lots of false positives (and are difficult to investigate)
2226         #combined_calls.check_consecutive_item_calls()
2227
2228
2229 # Show summary.
2230 print(warnings_found, 'warnings')
2231 if errors_found:
2232     print(errors_found, 'errors')
2233     exit(1)