LATER... ei_kerberos_kdc_session_key ...
[wireshark-sm.git] / tools / check_tfs.py
blobf7c59377f3ca2a4cb7df62e62029312050253493
1 #!/usr/bin/env python3
2 # Wireshark - Network traffic analyzer
3 # By Gerald Combs <gerald@wireshark.org>
4 # Copyright 1998 Gerald Combs
6 # SPDX-License-Identifier: GPL-2.0-or-later
8 import os
9 import re
10 import subprocess
11 import argparse
12 import signal
14 # This utility scans for tfs items, and works out if standard ones
15 # could have been used instead (from epan/tfs.c)
16 # Can also check for value_string where common tfs could be used instead.
18 # TODO:
19 # - consider merging Item class with check_typed_item_calls.py ?
22 # Try to exit soon after Ctrl-C is pressed.
23 should_exit = False
25 def signal_handler(sig, frame):
26 global should_exit
27 should_exit = True
28 print('You pressed Ctrl+C - exiting')
30 signal.signal(signal.SIGINT, signal_handler)
33 # Test for whether the given file was automatically generated.
34 def isGeneratedFile(filename):
35 # Check file exists - e.g. may have been deleted in a recent commit.
36 if not os.path.exists(filename):
37 return False
39 # Open file
40 f_read = open(os.path.join(filename), 'r', encoding="utf8", errors="ignore")
41 lines_tested = 0
42 for line in f_read:
43 # The comment to say that its generated is near the top, so give up once
44 # get a few lines down.
45 if lines_tested > 10:
46 f_read.close()
47 return False
48 if (line.find('Generated automatically') != -1 or
49 line.find('Generated Automatically') != -1 or
50 line.find('Autogenerated from') != -1 or
51 line.find('is autogenerated') != -1 or
52 line.find('automatically generated by Pidl') != -1 or
53 line.find('Created by: The Qt Meta Object Compiler') != -1 or
54 line.find('This file was generated') != -1 or
55 line.find('This filter was automatically generated') != -1 or
56 line.find('This file is auto generated, do not edit!') != -1 or
57 line.find('This file is auto generated') != -1):
59 f_read.close()
60 return True
61 lines_tested = lines_tested + 1
63 # OK, looks like a hand-written file!
64 f_read.close()
65 return False
68 # Keep track of custom entries that might appear in multiple dissectors,
69 # so we can consider adding them to tfs.c
70 custom_tfs_entries = {}
71 def AddCustomEntry(true_val, false_val, file):
72 global custom_tfs_entries
73 if (true_val, false_val) in custom_tfs_entries:
74 custom_tfs_entries[(true_val, false_val)].append(file)
75 else:
76 custom_tfs_entries[(true_val, false_val)] = [file]
79 # Individual parsed TFS entry
80 class TFS:
81 def __init__(self, file, name, true_val, false_val):
82 self.file = file
83 self.name = name
84 self.true_val = true_val
85 self.false_val = false_val
87 global warnings_found
89 # Should not be empty
90 if not len(true_val) or not len(false_val):
91 print('Warning:', file, name, 'has an empty field', self)
92 warnings_found += 1
93 #else:
94 # Strange if one begins with capital but other doesn't?
95 #if true_val[0].isalpha() and false_val[0].isalpha():
96 # if true_val[0].isupper() != false_val[0].isupper():
97 # print(file, name, 'one starts lowercase and the other upper', self)
99 # Leading or trailing space should not be needed.
100 if true_val.startswith(' ') or true_val.endswith(' '):
101 print('Note: ' + self.file + ' ' + self.name + ' - true val begins or ends with space \"' + self.true_val + '\"')
102 if false_val.startswith(' ') or false_val.endswith(' '):
103 print('Note: ' + self.file + ' ' + self.name + ' - false val begins or ends with space \"' + self.false_val + '\"')
105 # Should really not be identical...
106 if true_val.lower() == false_val.lower():
107 print('Warning:', file, name, 'true and false strings are the same', self)
108 warnings_found += 1
110 # Shouldn't both be negation (with exception..)
111 if (file != os.path.join('epan', 'dissectors', 'packet-smb.c') and (true_val.lower().find('not ') != -1) and (false_val.lower().find('not ') != -1)):
112 print('Warning:', file, name, self, 'both strings contain not')
113 warnings_found += 1
115 # Not expecting full-stops inside strings..
116 if true_val.find('.') != -1 or false_val.find('.') != -1:
117 print('Warning:', file, name, 'Period found in string', self)
118 warnings_found += 1
121 def __str__(self):
122 return '{' + '"' + self.true_val + '", "' + self.false_val + '"}'
125 # Only looking at in terms of could/should it be TFS instead.
126 class ValueString:
127 def __init__(self, file, name, vals):
128 self.file = file
129 self.name = name
130 self.raw_vals = vals
131 self.parsed_vals = {}
132 self.looks_like_tfs = True
134 no_lines = self.raw_vals.count('{')
135 if no_lines != 3:
136 self.looks_like_tfs = False
137 return
139 # Now parse out each entry in the value_string
140 matches = re.finditer(r'\{([\"a-zA-Z\s\d\,]*)\}', self.raw_vals)
141 for m in matches:
142 entry = m[1]
143 # Check each entry looks like part of a TFS entry.
144 match = re.match(r'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry)
145 if match:
146 if match[1] == '1':
147 self.parsed_vals[True] = match[2]
148 else:
149 self.parsed_vals[False] = match[2]
151 # Now have both entries
152 if len(self.parsed_vals) == 2:
153 break
154 else:
155 self.looks_like_tfs = False
156 break
158 def __str__(self):
159 return '{' + '"' + self.raw_vals + '"}'
162 field_widths = {
163 'FT_BOOLEAN' : 64, # TODO: Width depends upon 'display' field
164 'FT_CHAR' : 8,
165 'FT_UINT8' : 8,
166 'FT_INT8' : 8,
167 'FT_UINT16' : 16,
168 'FT_INT16' : 16,
169 'FT_UINT24' : 24,
170 'FT_INT24' : 24,
171 'FT_UINT32' : 32,
172 'FT_INT32' : 32,
173 'FT_UINT40' : 40,
174 'FT_INT40' : 40,
175 'FT_UINT48' : 48,
176 'FT_INT48' : 48,
177 'FT_UINT56' : 56,
178 'FT_INT56' : 56,
179 'FT_UINT64' : 64,
180 'FT_INT64' : 64
186 # Simplified version of class that is in check_typed_item_calls.py
187 class Item:
189 previousItem = None
191 def __init__(self, filename, hf, filter, label, item_type, type_modifier, strings, macros, mask=None,
192 check_mask=False):
193 self.filename = filename
194 self.hf = hf
195 self.filter = filter
196 self.label = label
197 self.strings = strings
198 self.mask = mask
200 # N.B. Not setting mask by looking up macros.
202 self.item_type = item_type
203 self.type_modifier = type_modifier
205 self.set_mask_value(macros)
207 self.bits_set = 0
208 for n in range(0, self.get_field_width_in_bits()):
209 if self.check_bit(self.mask_value, n):
210 self.bits_set += 1
212 def __str__(self):
213 return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self.filename, self.label, self.filter,
214 self.item_type, self.type_modifier, self.strings, self.mask)
216 def set_mask_value(self, macros):
217 try:
218 self.mask_read = True
220 # Substitute mask if found as a macro..
221 if self.mask in macros:
222 self.mask = macros[self.mask]
223 elif any(c not in '0123456789abcdefABCDEFxX' for c in self.mask):
224 self.mask_read = False
225 self.mask_value = 0
226 return
228 # Read according to the appropriate base.
229 if self.mask.startswith('0x'):
230 self.mask_value = int(self.mask, 16)
231 elif self.mask.startswith('0'):
232 self.mask_value = int(self.mask, 8)
233 else:
234 self.mask_value = int(self.mask, 10)
235 except Exception:
236 self.mask_read = False
237 self.mask_value = 0
240 # Return true if bit position n is set in value.
241 def check_bit(self, value, n):
242 return (value & (0x1 << n)) != 0
245 def get_field_width_in_bits(self):
246 if self.item_type == 'FT_BOOLEAN':
247 if self.type_modifier == 'NULL':
248 return 8 # i.e. 1 byte
249 elif self.type_modifier == 'BASE_NONE':
250 return 8
251 elif self.type_modifier == 'SEP_DOT': # from proto.h, only meant for FT_BYTES
252 return 64
253 else:
254 try:
255 # For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
256 return int((int(self.type_modifier) + 3)/4)*4
257 except Exception:
258 return 0
259 else:
260 if self.item_type in field_widths:
261 # Lookup fixed width for this type
262 return field_widths[self.item_type]
263 else:
264 #print('returning 0 for', self)
265 return 0
271 def removeComments(code_string):
272 code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
273 code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string) # C++-style comment
274 code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region
276 return code_string
279 # Look for true_false_string items in a dissector file.
280 def findTFS(filename):
281 tfs_found = {}
283 with open(filename, 'r', encoding="utf8", errors="ignore") as f:
284 contents = f.read()
285 # Example: const true_false_string tfs_yes_no = { "Yes", "No" };
287 # Remove comments so as not to trip up RE.
288 contents = removeComments(contents)
290 matches = re.finditer(r'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents)
291 for m in matches:
292 name = m.group(1)
293 true_val = m.group(2)
294 false_val = m.group(3)
295 # Store this entry.
296 tfs_found[name] = TFS(filename, name, true_val, false_val)
298 return tfs_found
300 # Look for value_string entries in a dissector file.
301 def findValueStrings(filename):
302 vals_found = {}
304 #static const value_string radio_type_vals[] =
306 # { 0, "FDD"},
307 # { 1, "TDD"},
308 # { 0, NULL }
311 with open(filename, 'r', encoding="utf8", errors="ignore") as f:
312 contents = f.read()
314 # Remove comments so as not to trip up RE.
315 contents = removeComments(contents)
317 matches = re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents)
318 for m in matches:
319 name = m.group(1)
320 vals = m.group(2)
321 vals_found[name] = ValueString(filename, name, vals)
323 return vals_found
325 # Look for hf items (i.e. full item to be registered) in a dissector file.
326 def find_items(filename, macros, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False):
327 items = {}
328 with open(filename, 'r', encoding="utf8", errors="ignore") as f:
329 contents = f.read()
330 # Remove comments so as not to trip up RE.
331 contents = removeComments(contents)
333 # N.B. re extends all the way to HFILL to avoid greedy matching
334 matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents)
335 for m in matches:
336 # Store this item.
337 hf = m.group(1)
338 items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4),
339 type_modifier=m.group(5),
340 strings=m.group(6),
341 macros=macros,
342 mask=m.group(7))
343 return items
345 def find_macros(filename):
346 macros = {}
347 with open(filename, 'r', encoding="utf8", errors="ignore") as f:
348 contents = f.read()
349 # Remove comments so as not to trip up RE.
350 contents = removeComments(contents)
352 matches = re.finditer( r'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents)
353 for m in matches:
354 # Store this mapping.
355 macros[m.group(1)] = m.group(2)
356 return macros
360 def is_dissector_file(filename):
361 p = re.compile(r'.*(packet|file)-.*\.c')
362 return p.match(filename)
364 def findDissectorFilesInFolder(folder):
365 files = set()
367 for path, tmp_unused, names in os.walk(folder):
368 for f in names:
369 if should_exit:
370 return
371 if is_dissector_file(f):
372 files.add(os.path.join(path, f))
374 return files
377 # Global counts
378 warnings_found = 0
379 errors_found = 0
381 # name -> count
382 common_usage = {}
385 # Check the given dissector file.
386 def checkFile(filename, common_tfs, look_for_common=False, check_value_strings=False, count_common_usage=False):
387 global warnings_found
388 global errors_found
390 # Check file exists - e.g. may have been deleted in a recent commit.
391 if not os.path.exists(filename):
392 print(filename, 'does not exist!')
393 return
395 # Find items.
396 file_tfs = findTFS(filename)
398 # See if any of these items already existed in tfs.c
399 for f in file_tfs:
400 for c in common_tfs:
401 found = False
404 # Do not do this check for plugins; plugins cannot import
405 # data values from libwireshark (functions, yes; data
406 # values, no).
408 # Test whether there's a common prefix for the file name
409 # and "plugin/epan/"; if so, this is a plugin, and there
410 # is no common path and os.path.commonprefix returns an
411 # empty string, otherwise it returns the common path, so
412 # we check whether the common path is an empty string.
414 if os.path.commonprefix([filename, 'plugin/epan/']) == '':
415 exact_case = False
416 if file_tfs[f].true_val == common_tfs[c].true_val and file_tfs[f].false_val == common_tfs[c].false_val:
417 found = True
418 exact_case = True
419 elif file_tfs[f].true_val.upper() == common_tfs[c].true_val.upper() and file_tfs[f].false_val.upper() == common_tfs[c].false_val.upper():
420 found = True
422 if found:
423 print("Error:" if exact_case else "Warning: ", filename, f,
424 "- could have used", c, 'from tfs.c instead: ', common_tfs[c],
425 '' if exact_case else ' (capitalisation differs)')
426 if exact_case:
427 errors_found += 1
428 else:
429 warnings_found += 1
430 break
431 if not found:
432 if look_for_common:
433 AddCustomEntry(file_tfs[f].true_val, file_tfs[f].false_val, filename)
435 if check_value_strings:
436 # Get macros
437 macros = find_macros(filename)
439 # Get value_string entries.
440 vs = findValueStrings(filename)
442 # Also get hf items
443 items = find_items(filename, macros, check_mask=True)
446 for v in vs:
447 if vs[v].looks_like_tfs:
448 found = False
449 exact_case = False
451 for c in common_tfs:
452 found = False
455 # Do not do this check for plugins; plugins cannot import
456 # data values from libwireshark (functions, yes; data
457 # values, no).
459 # Test whether there's a common prefix for the file name
460 # and "plugin/epan/"; if so, this is a plugin, and there
461 # is no common path and os.path.commonprefix returns an
462 # empty string, otherwise it returns the common path, so
463 # we check whether the common path is an empty string.
465 if os.path.commonprefix([filename, 'plugin/epan/']) == '':
466 exact_case = False
467 if common_tfs[c].true_val == vs[v].parsed_vals[True] and common_tfs[c].false_val == vs[v].parsed_vals[False]:
468 found = True
469 exact_case = True
470 elif common_tfs[c].true_val.upper() == vs[v].parsed_vals[True].upper() and common_tfs[c].false_val.upper() == vs[v].parsed_vals[False].upper():
471 found = True
473 # Do values match?
474 if found:
475 # OK, now look for items that:
476 # - have VALS(v) AND
477 # - have a mask width of 1 bit (no good if field can have values > 1...)
478 for i in items:
479 if re.match(r'VALS\(\s*'+v+r'\s*\)', items[i].strings):
480 if items[i].bits_set == 1:
481 print("Warn:" if exact_case else "Note:", filename, 'value_string', "'"+v+"'",
482 '- could have used tfs.c entry instead: for', i,
483 ' - "FT_BOOLEAN,', str(items[i].get_field_width_in_bits()) + ', TFS(&' + c + '),"',
484 '' if exact_case else ' (capitalisation differs)')
485 if exact_case:
486 warnings_found += 1
488 if count_common_usage:
489 # Look for TFS(&<name>) in dissector
490 with open(filename, 'r') as f:
491 contents = f.read()
492 for c in common_tfs:
493 m = re.search(r'TFS\(\s*\&' + c + r'\s*\)', contents)
494 if m:
495 if c not in common_usage:
496 common_usage[c] = 1
497 else:
498 common_usage[c] += 1
502 #################################################################
503 # Main logic.
505 # command-line args. Controls which dissector files should be checked.
506 # If no args given, will just scan epan/dissectors folder.
507 parser = argparse.ArgumentParser(description='Check calls in dissectors')
508 parser.add_argument('--file', action='append',
509 help='specify individual dissector file to test')
510 parser.add_argument('--commits', action='store',
511 help='last N commits to check')
512 parser.add_argument('--open', action='store_true',
513 help='check open files')
514 parser.add_argument('--check-value-strings', action='store_true',
515 help='check whether value_strings could have been tfs?')
517 parser.add_argument('--common', action='store_true',
518 help='check for potential new entries for tfs.c')
519 parser.add_argument('--common-usage', action='store_true',
520 help='count how many dissectors are using common tfs entries')
522 args = parser.parse_args()
525 # Get files from wherever command-line args indicate.
526 files = set()
527 if args.file:
528 # Add specified file(s)
529 for f in args.file:
530 if not os.path.isfile(f) and not f.startswith('epan'):
531 f = os.path.join('epan', 'dissectors', f)
532 if not os.path.isfile(f):
533 print('Chosen file', f, 'does not exist.')
534 exit(1)
535 else:
536 files.add(f)
537 elif args.commits:
538 # Get files affected by specified number of commits.
539 command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
540 files = {f.decode('utf-8')
541 for f in subprocess.check_output(command).splitlines()}
542 # Will examine dissector files only
543 files = set(filter(is_dissector_file, files))
544 elif args.open:
545 # Unstaged changes.
546 command = ['git', 'diff', '--name-only']
547 files = {f.decode('utf-8')
548 for f in subprocess.check_output(command).splitlines()}
549 # Only interested in dissector files.
550 files = list(filter(is_dissector_file, files))
551 # Staged changes.
552 command = ['git', 'diff', '--staged', '--name-only']
553 files_staged = {f.decode('utf-8')
554 for f in subprocess.check_output(command).splitlines()}
555 # Only interested in dissector files.
556 files = set(filter(is_dissector_file, files_staged))
557 for f in files_staged:
558 files.add(f)
559 else:
560 # Find all dissector files from folder.
561 files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
564 # If scanning a subset of files, list them here.
565 print('Examining:')
566 if args.file or args.commits or args.open:
567 if files:
568 print(' '.join(sorted(files)), '\n')
569 else:
570 print('No files to check.\n')
571 else:
572 print('All dissector modules\n')
575 # Get standard/ shared ones.
576 common_tfs_entries = findTFS(os.path.join('epan', 'tfs.c'))
578 # Now check the files to see if they could have used shared ones instead.
579 # Look at files in sorted order, to give some idea of how far through we are.
580 for f in sorted(files):
581 if should_exit:
582 exit(1)
583 if not isGeneratedFile(f):
584 checkFile(f, common_tfs_entries, look_for_common=args.common,
585 check_value_strings=args.check_value_strings,
586 count_common_usage=args.common_usage)
588 # Report on commonly-defined values.
589 if args.common:
590 # Looking for items that could potentially be moved to tfs.c
591 for c in custom_tfs_entries:
592 # Only want to see items that have 3 or more occurrences.
593 # Even then, probably only want to consider ones that sound generic.
594 if len(custom_tfs_entries[c]) > 2:
595 print(c, 'appears', len(custom_tfs_entries[c]), 'times, in: ', custom_tfs_entries[c])
597 if args.common_usage:
598 for c in common_tfs_entries:
599 if c in common_usage:
600 print(c, 'used in', common_usage[c], 'dissectors')
601 else:
602 print('***', c, 'IS NOT USED! ***')
604 # Show summary.
605 print(warnings_found, 'warnings found')
606 if errors_found:
607 print(errors_found, 'errors found')
608 exit(1)