2 # Wireshark - Network traffic analyzer
3 # By Gerald Combs <gerald@wireshark.org>
4 # Copyright 1998 Gerald Combs
6 # SPDX-License-Identifier: GPL-2.0-or-later
14 # This utility scans for tfs items, and works out if standard ones
15 # could have been used instead (from epan/tfs.c)
16 # Can also check for value_string where common tfs could be used instead.
19 # - consider merging Item class with check_typed_item_calls.py ?
22 # Try to exit soon after Ctrl-C is pressed.
25 def signal_handler(sig
, frame
):
28 print('You pressed Ctrl+C - exiting')
30 signal
.signal(signal
.SIGINT
, signal_handler
)
33 # Test for whether the given file was automatically generated.
34 def isGeneratedFile(filename
):
35 # Check file exists - e.g. may have been deleted in a recent commit.
36 if not os
.path
.exists(filename
):
40 f_read
= open(os
.path
.join(filename
), 'r', encoding
="utf8", errors
="ignore")
43 # The comment to say that its generated is near the top, so give up once
44 # get a few lines down.
48 if (line
.find('Generated automatically') != -1 or
49 line
.find('Generated Automatically') != -1 or
50 line
.find('Autogenerated from') != -1 or
51 line
.find('is autogenerated') != -1 or
52 line
.find('automatically generated by Pidl') != -1 or
53 line
.find('Created by: The Qt Meta Object Compiler') != -1 or
54 line
.find('This file was generated') != -1 or
55 line
.find('This filter was automatically generated') != -1 or
56 line
.find('This file is auto generated, do not edit!') != -1 or
57 line
.find('This file is auto generated') != -1):
61 lines_tested
= lines_tested
+ 1
63 # OK, looks like a hand-written file!
68 # Keep track of custom entries that might appear in multiple dissectors,
69 # so we can consider adding them to tfs.c
70 custom_tfs_entries
= {}
71 def AddCustomEntry(true_val
, false_val
, file):
72 global custom_tfs_entries
73 if (true_val
, false_val
) in custom_tfs_entries
:
74 custom_tfs_entries
[(true_val
, false_val
)].append(file)
76 custom_tfs_entries
[(true_val
, false_val
)] = [file]
79 # Individual parsed TFS entry
81 def __init__(self
, file, name
, true_val
, false_val
):
84 self
.true_val
= true_val
85 self
.false_val
= false_val
90 if not len(true_val
) or not len(false_val
):
91 print('Warning:', file, name
, 'has an empty field', self
)
94 # Strange if one begins with capital but other doesn't?
95 #if true_val[0].isalpha() and false_val[0].isalpha():
96 # if true_val[0].isupper() != false_val[0].isupper():
97 # print(file, name, 'one starts lowercase and the other upper', self)
99 # Leading or trailing space should not be needed.
100 if true_val
.startswith(' ') or true_val
.endswith(' '):
101 print('Note: ' + self
.file + ' ' + self
.name
+ ' - true val begins or ends with space \"' + self
.true_val
+ '\"')
102 if false_val
.startswith(' ') or false_val
.endswith(' '):
103 print('Note: ' + self
.file + ' ' + self
.name
+ ' - false val begins or ends with space \"' + self
.false_val
+ '\"')
105 # Should really not be identical...
106 if true_val
.lower() == false_val
.lower():
107 print('Warning:', file, name
, 'true and false strings are the same', self
)
110 # Shouldn't both be negation (with exception..)
111 if (file != os
.path
.join('epan', 'dissectors', 'packet-smb.c') and (true_val
.lower().find('not ') != -1) and (false_val
.lower().find('not ') != -1)):
112 print('Warning:', file, name
, self
, 'both strings contain not')
115 # Not expecting full-stops inside strings..
116 if true_val
.find('.') != -1 or false_val
.find('.') != -1:
117 print('Warning:', file, name
, 'Period found in string', self
)
122 return '{' + '"' + self
.true_val
+ '", "' + self
.false_val
+ '"}'
125 # Only looking at in terms of could/should it be TFS instead.
127 def __init__(self
, file, name
, vals
):
131 self
.parsed_vals
= {}
132 self
.looks_like_tfs
= True
134 no_lines
= self
.raw_vals
.count('{')
136 self
.looks_like_tfs
= False
139 # Now parse out each entry in the value_string
140 matches
= re
.finditer(r
'\{([\"a-zA-Z\s\d\,]*)\}', self
.raw_vals
)
143 # Check each entry looks like part of a TFS entry.
144 match
= re
.match(r
'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry
)
147 self
.parsed_vals
[True] = match
[2]
149 self
.parsed_vals
[False] = match
[2]
151 # Now have both entries
152 if len(self
.parsed_vals
) == 2:
155 self
.looks_like_tfs
= False
159 return '{' + '"' + self
.raw_vals
+ '"}'
163 'FT_BOOLEAN' : 64, # TODO: Width depends upon 'display' field
186 # Simplified version of class that is in check_typed_item_calls.py
191 def __init__(self
, filename
, hf
, filter, label
, item_type
, type_modifier
, strings
, macros
, mask
=None,
193 self
.filename
= filename
197 self
.strings
= strings
200 # N.B. Not setting mask by looking up macros.
202 self
.item_type
= item_type
203 self
.type_modifier
= type_modifier
205 self
.set_mask_value(macros
)
208 for n
in range(0, self
.get_field_width_in_bits()):
209 if self
.check_bit(self
.mask_value
, n
):
213 return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self
.filename
, self
.label
, self
.filter,
214 self
.item_type
, self
.type_modifier
, self
.strings
, self
.mask
)
216 def set_mask_value(self
, macros
):
218 self
.mask_read
= True
220 # Substitute mask if found as a macro..
221 if self
.mask
in macros
:
222 self
.mask
= macros
[self
.mask
]
223 elif any(c
not in '0123456789abcdefABCDEFxX' for c
in self
.mask
):
224 self
.mask_read
= False
228 # Read according to the appropriate base.
229 if self
.mask
.startswith('0x'):
230 self
.mask_value
= int(self
.mask
, 16)
231 elif self
.mask
.startswith('0'):
232 self
.mask_value
= int(self
.mask
, 8)
234 self
.mask_value
= int(self
.mask
, 10)
236 self
.mask_read
= False
240 # Return true if bit position n is set in value.
241 def check_bit(self
, value
, n
):
242 return (value
& (0x1 << n
)) != 0
245 def get_field_width_in_bits(self
):
246 if self
.item_type
== 'FT_BOOLEAN':
247 if self
.type_modifier
== 'NULL':
248 return 8 # i.e. 1 byte
249 elif self
.type_modifier
== 'BASE_NONE':
251 elif self
.type_modifier
== 'SEP_DOT': # from proto.h, only meant for FT_BYTES
255 # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
256 return int((int(self
.type_modifier
) + 3)/4)*4
260 if self
.item_type
in field_widths
:
261 # Lookup fixed width for this type
262 return field_widths
[self
.item_type
]
264 #print('returning 0 for', self)
271 def removeComments(code_string
):
272 code_string
= re
.sub(re
.compile(r
"/\*.*?\*/",re
.DOTALL
) ,"" ,code_string
) # C-style comment
273 code_string
= re
.sub(re
.compile(r
"//.*?\n" ) ,"" ,code_string
) # C++-style comment
274 code_string
= re
.sub(re
.compile(r
"#if 0.*?#endif",re
.DOTALL
) ,"" , code_string
) # Ignored region
279 # Look for true_false_string items in a dissector file.
280 def findTFS(filename
):
283 with
open(filename
, 'r', encoding
="utf8", errors
="ignore") as f
:
285 # Example: const true_false_string tfs_yes_no = { "Yes", "No" };
287 # Remove comments so as not to trip up RE.
288 contents
= removeComments(contents
)
290 matches
= re
.finditer(r
'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents
)
293 true_val
= m
.group(2)
294 false_val
= m
.group(3)
296 tfs_found
[name
] = TFS(filename
, name
, true_val
, false_val
)
300 # Look for value_string entries in a dissector file.
301 def findValueStrings(filename
):
304 #static const value_string radio_type_vals[] =
311 with
open(filename
, 'r', encoding
="utf8", errors
="ignore") as f
:
314 # Remove comments so as not to trip up RE.
315 contents
= removeComments(contents
)
317 matches
= re
.finditer(r
'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents
)
321 vals_found
[name
] = ValueString(filename
, name
, vals
)
325 # Look for hf items (i.e. full item to be registered) in a dissector file.
326 def find_items(filename
, macros
, check_mask
=False, mask_exact_width
=False, check_label
=False, check_consecutive
=False):
328 with
open(filename
, 'r', encoding
="utf8", errors
="ignore") as f
:
330 # Remove comments so as not to trip up RE.
331 contents
= removeComments(contents
)
333 # N.B. re extends all the way to HFILL to avoid greedy matching
334 matches
= re
.finditer( r
'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents
)
338 items
[hf
] = Item(filename
, hf
, filter=m
.group(3), label
=m
.group(2), item_type
=m
.group(4),
339 type_modifier
=m
.group(5),
345 def find_macros(filename
):
347 with
open(filename
, 'r', encoding
="utf8", errors
="ignore") as f
:
349 # Remove comments so as not to trip up RE.
350 contents
= removeComments(contents
)
352 matches
= re
.finditer( r
'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents
)
354 # Store this mapping.
355 macros
[m
.group(1)] = m
.group(2)
360 def is_dissector_file(filename
):
361 p
= re
.compile(r
'.*(packet|file)-.*\.c')
362 return p
.match(filename
)
364 def findDissectorFilesInFolder(folder
):
367 for path
, tmp_unused
, names
in os
.walk(folder
):
371 if is_dissector_file(f
):
372 files
.add(os
.path
.join(path
, f
))
385 # Check the given dissector file.
386 def checkFile(filename
, common_tfs
, look_for_common
=False, check_value_strings
=False, count_common_usage
=False):
387 global warnings_found
390 # Check file exists - e.g. may have been deleted in a recent commit.
391 if not os
.path
.exists(filename
):
392 print(filename
, 'does not exist!')
396 file_tfs
= findTFS(filename
)
398 # See if any of these items already existed in tfs.c
404 # Do not do this check for plugins; plugins cannot import
405 # data values from libwireshark (functions, yes; data
408 # Test whether there's a common prefix for the file name
409 # and "plugin/epan/"; if so, this is a plugin, and there
410 # is no common path and os.path.commonprefix returns an
411 # empty string, otherwise it returns the common path, so
412 # we check whether the common path is an empty string.
414 if os
.path
.commonprefix([filename
, 'plugin/epan/']) == '':
416 if file_tfs
[f
].true_val
== common_tfs
[c
].true_val
and file_tfs
[f
].false_val
== common_tfs
[c
].false_val
:
419 elif file_tfs
[f
].true_val
.upper() == common_tfs
[c
].true_val
.upper() and file_tfs
[f
].false_val
.upper() == common_tfs
[c
].false_val
.upper():
423 print("Error:" if exact_case
else "Warning: ", filename
, f
,
424 "- could have used", c
, 'from tfs.c instead: ', common_tfs
[c
],
425 '' if exact_case
else ' (capitalisation differs)')
433 AddCustomEntry(file_tfs
[f
].true_val
, file_tfs
[f
].false_val
, filename
)
435 if check_value_strings
:
437 macros
= find_macros(filename
)
439 # Get value_string entries.
440 vs
= findValueStrings(filename
)
443 items
= find_items(filename
, macros
, check_mask
=True)
447 if vs
[v
].looks_like_tfs
:
455 # Do not do this check for plugins; plugins cannot import
456 # data values from libwireshark (functions, yes; data
459 # Test whether there's a common prefix for the file name
460 # and "plugin/epan/"; if so, this is a plugin, and there
461 # is no common path and os.path.commonprefix returns an
462 # empty string, otherwise it returns the common path, so
463 # we check whether the common path is an empty string.
465 if os
.path
.commonprefix([filename
, 'plugin/epan/']) == '':
467 if common_tfs
[c
].true_val
== vs
[v
].parsed_vals
[True] and common_tfs
[c
].false_val
== vs
[v
].parsed_vals
[False]:
470 elif common_tfs
[c
].true_val
.upper() == vs
[v
].parsed_vals
[True].upper() and common_tfs
[c
].false_val
.upper() == vs
[v
].parsed_vals
[False].upper():
475 # OK, now look for items that:
477 # - have a mask width of 1 bit (no good if field can have values > 1...)
479 if re
.match(r
'VALS\(\s*'+v
+r
'\s*\)', items
[i
].strings
):
480 if items
[i
].bits_set
== 1:
481 print("Warn:" if exact_case
else "Note:", filename
, 'value_string', "'"+v
+"'",
482 '- could have used tfs.c entry instead: for', i
,
483 ' - "FT_BOOLEAN,', str(items
[i
].get_field_width_in_bits()) + ', TFS(&' + c
+ '),"',
484 '' if exact_case
else ' (capitalisation differs)')
488 if count_common_usage
:
489 # Look for TFS(&<name>) in dissector
490 with
open(filename
, 'r') as f
:
493 m
= re
.search(r
'TFS\(\s*\&' + c
+ r
'\s*\)', contents
)
495 if c
not in common_usage
:
502 #################################################################
505 # command-line args. Controls which dissector files should be checked.
506 # If no args given, will just scan epan/dissectors folder.
507 parser
= argparse
.ArgumentParser(description
='Check calls in dissectors')
508 parser
.add_argument('--file', action
='append',
509 help='specify individual dissector file to test')
510 parser
.add_argument('--commits', action
='store',
511 help='last N commits to check')
512 parser
.add_argument('--open', action
='store_true',
513 help='check open files')
514 parser
.add_argument('--check-value-strings', action
='store_true',
515 help='check whether value_strings could have been tfs?')
517 parser
.add_argument('--common', action
='store_true',
518 help='check for potential new entries for tfs.c')
519 parser
.add_argument('--common-usage', action
='store_true',
520 help='count how many dissectors are using common tfs entries')
522 args
= parser
.parse_args()
525 # Get files from wherever command-line args indicate.
528 # Add specified file(s)
530 if not os
.path
.isfile(f
) and not f
.startswith('epan'):
531 f
= os
.path
.join('epan', 'dissectors', f
)
532 if not os
.path
.isfile(f
):
533 print('Chosen file', f
, 'does not exist.')
538 # Get files affected by specified number of commits.
539 command
= ['git', 'diff', '--name-only', 'HEAD~' + args
.commits
]
540 files
= {f
.decode('utf-8')
541 for f
in subprocess
.check_output(command
).splitlines()}
542 # Will examine dissector files only
543 files
= set(filter(is_dissector_file
, files
))
546 command
= ['git', 'diff', '--name-only']
547 files
= {f
.decode('utf-8')
548 for f
in subprocess
.check_output(command
).splitlines()}
549 # Only interested in dissector files.
550 files
= list(filter(is_dissector_file
, files
))
552 command
= ['git', 'diff', '--staged', '--name-only']
553 files_staged
= {f
.decode('utf-8')
554 for f
in subprocess
.check_output(command
).splitlines()}
555 # Only interested in dissector files.
556 files
= set(filter(is_dissector_file
, files_staged
))
557 for f
in files_staged
:
560 # Find all dissector files from folder.
561 files
= findDissectorFilesInFolder(os
.path
.join('epan', 'dissectors'))
564 # If scanning a subset of files, list them here.
566 if args
.file or args
.commits
or args
.open:
568 print(' '.join(sorted(files
)), '\n')
570 print('No files to check.\n')
572 print('All dissector modules\n')
575 # Get standard/ shared ones.
576 common_tfs_entries
= findTFS(os
.path
.join('epan', 'tfs.c'))
578 # Now check the files to see if they could have used shared ones instead.
579 # Look at files in sorted order, to give some idea of how far through we are.
580 for f
in sorted(files
):
583 if not isGeneratedFile(f
):
584 checkFile(f
, common_tfs_entries
, look_for_common
=args
.common
,
585 check_value_strings
=args
.check_value_strings
,
586 count_common_usage
=args
.common_usage
)
588 # Report on commonly-defined values.
590 # Looking for items that could potentially be moved to tfs.c
591 for c
in custom_tfs_entries
:
592 # Only want to see items that have 3 or more occurrences.
593 # Even then, probably only want to consider ones that sound generic.
594 if len(custom_tfs_entries
[c
]) > 2:
595 print(c
, 'appears', len(custom_tfs_entries
[c
]), 'times, in: ', custom_tfs_entries
[c
])
597 if args
.common_usage
:
598 for c
in common_tfs_entries
:
599 if c
in common_usage
:
600 print(c
, 'used in', common_usage
[c
], 'dissectors')
602 print('***', c
, 'IS NOT USED! ***')
605 print(warnings_found
, 'warnings found')
607 print(errors_found
, 'errors found')