2 # Wireshark - Network traffic analyzer
3 # By Gerald Combs <gerald@wireshark.org>
4 # Copyright 1998 Gerald Combs
6 # SPDX-License-Identifier: GPL-2.0-or-later
8 # Scan dissectors for calls to col_[set|add|append]_[f]str
9 # to check that most appropriate API is being used
18 # Try to exit soon after Ctrl-C is pressed.
21 def signal_handler(sig
, frame
):
24 print('You pressed Ctrl+C - exiting')
26 signal
.signal(signal
.SIGINT
, signal_handler
)
29 # Test for whether the given file was automatically generated.
30 def isGeneratedFile(filename
):
31 # Check file exists - e.g. may have been deleted in a recent commit.
32 if not os
.path
.exists(filename
):
36 f_read
= open(os
.path
.join(filename
), 'r', encoding
="utf8")
39 # The comment to say that its generated is near the top, so give up once
40 # get a few lines down.
44 if (line
.find('Generated automatically') != -1 or
45 line
.find('Generated Automatically') != -1 or
46 line
.find('Autogenerated from') != -1 or
47 line
.find('is autogenerated') != -1 or
48 line
.find('automatically generated by Pidl') != -1 or
49 line
.find('Created by: The Qt Meta Object Compiler') != -1 or
50 line
.find('This file was generated') != -1 or
51 line
.find('This filter was automatically generated') != -1 or
52 line
.find('This file is auto generated, do not edit!') != -1 or
53 line
.find('This file is auto generated') != -1):
57 lines_tested
= lines_tested
+ 1
59 # OK, looks like a hand-written file!
64 def removeComments(code_string
):
65 code_string
= re
.sub(re
.compile(r
"/\*.*?\*/",re
.DOTALL
) ,"" ,code_string
) # C-style comment
66 code_string
= re
.sub(re
.compile(r
"//.*?\n" ) ,"" ,code_string
) # C++-style comment
70 def is_dissector_file(filename
):
71 p
= re
.compile(r
'.*(packet|file)-.*\.c')
72 return p
.match(filename
)
74 def findDissectorFilesInFolder(folder
, recursive
=False):
78 for root
, subfolders
, files
in os
.walk(folder
):
82 f
= os
.path
.join(root
, f
)
83 dissector_files
.append(f
)
85 for f
in sorted(os
.listdir(folder
)):
88 filename
= os
.path
.join(folder
, f
)
89 dissector_files
.append(filename
)
91 return [x
for x
in filter(is_dissector_file
, dissector_files
)]
99 def __init__(self
, file, line_number
, name
, last_args
, generated
, verbose
):
101 self
.line_number
= line_number
103 self
.last_args
= last_args
104 self
.generated
= generated
105 self
.verbose
= verbose
107 def issue_prefix(self
):
108 generated
= '(GENERATED) ' if self
.generated
else ''
109 return self
.filename
+ ':' + generated
+ str(self
.line_number
) + ' : called ' + self
.name
+ ' with ' + self
.last_args
112 global warnings_found
114 self
.last_args
= self
.last_args
.replace('\\\"', "'")
115 self
.last_args
= self
.last_args
.strip()
117 # Empty string never a good idea
118 if self
.last_args
== r
'""':
119 if self
.name
.find('append') == -1:
120 print('Warning:', self
.issue_prefix(), '- if want to clear column, use col_clear() instead')
123 # TODO: pointless if appending, but unlikely to see
126 # This is never a good idea..
127 if self
.last_args
.startswith(r
'"%s"'):
128 print('Warning:', self
.issue_prefix(), " - don't need fstr API?")
131 # Unlikely, but did someone accidentally include a specifier but call str() function with no args?
132 if self
.last_args
.startswith('"') and self
.last_args
.find("%") != -1 and self
.name
.find('fstr') == -1:
133 print('Warning:', self
.issue_prefix(), " - meant to call fstr version of function?")
136 ternary_re
= re
.compile(r
'.*\s*\?\s*.*\".*\"\s*:\s*.*\".*\"')
138 # String should be static, or at least persist.
139 # TODO: how persistent does it need to be. Which memory scope is appropriate?
140 if self
.name
== 'col_set_str':
141 # Literal strings are safe, as well as some other patterns..
142 if self
.last_args
.startswith('"'):
144 elif self
.last_args
.startswith('val_to_str_const') or self
.last_args
.startswith('val_to_str_ext_const'):
146 # TODO: substitute macros to avoid some special cases..
147 elif self
.last_args
.upper() == self
.last_args
:
149 # Ternary test with both outcomes being literal strings?
150 elif ternary_re
.match(self
.last_args
):
154 # Not easy/possible to judge lifetime of string..
155 print('Note:', self
.issue_prefix(), '- is this persistent enough??')
157 if self
.name
== 'col_add_str':
158 # If literal string, could have used col_set_str instead?
159 self
.last_args
= self
.last_args
.replace('\\\"', "'")
160 self
.last_args
= self
.last_args
.strip()
161 if self
.last_args
.startswith('"'):
162 print('Warning:', self
.issue_prefix(), '- could call col_set_str() instead')
164 elif self
.last_args
.startswith('val_to_str_const'):
165 print('Warning:', self
.issue_prefix(), '- const so could use col_set_str() instead')
167 elif self
.last_args
.startswith('val_to_str_ext_const'):
168 print('Warning:', self
.issue_prefix(), '- const so could use col_set_str() instead')
171 if self
.name
== 'col_append_str':
173 if self
.name
== 'col_add_fstr' or self
.name
== 'col_append_fstr':
174 # Look at format string
175 self
.last_args
= self
.last_args
.replace('\\\"', "'")
176 m
= re
.search(r
'"(.*?)"', self
.last_args
)
178 # Should contain at least one format specifier!
179 format_string
= m
.group(1)
180 if format_string
.find('%') == -1:
181 print('Warning:', self
.issue_prefix(), 'with no format specifiers - "' + format_string
+ '" - use str() version instead')
185 # Check the given dissector file.
186 def checkFile(filename
, generated
, verbose
=False):
187 global warnings_found
190 # Check file exists - e.g. may have been deleted in a recent commit.
191 if not os
.path
.exists(filename
):
192 print(filename
, 'does not exist!')
195 with
open(filename
, 'r', encoding
="utf8") as f
:
196 full_contents
= f
.read()
198 # Remove comments so as not to trip up RE.
199 contents
= removeComments(full_contents
)
201 # Look for all calls in this file
202 matches
= re
.finditer(r
'(col_set_str|col_add_str|col_add_fstr|col_append_str|col_append_fstr)\((.*?)\)\s*\;', contents
, re
.MULTILINE|re
.DOTALL
)
212 # May fail to find there were comments inside call...
213 # Make search partial to:
214 # - avoid finding an earlier identical call
215 # - speed up searching by making it shorter
216 remaining_lines_text
= full_contents
[last_char_offset
:]
217 match_offset
= remaining_lines_text
.find(m
.group(0))
218 if match_offset
!= -1:
219 match_in_lines
= len(remaining_lines_text
[0:match_offset
].splitlines())
220 line_number
= last_line_number
+ match_in_lines
-1
221 last_line_number
= line_number
222 last_char_offset
+= match_offset
+ 1 # enough to not match again
224 # Match first 2 args plus remainder
225 args_m
= re
.match(r
'(.*?),\s*(.*?),\s*(.*)', args
)
227 col_calls
.append(ColCall(filename
, line_number
, m
.group(1), last_args
=args_m
.group(3),
228 generated
=generated
, verbose
=verbose
))
231 for call
in col_calls
:
236 #################################################################
239 # command-line args. Controls which dissector files should be checked.
240 # If no args given, will scan all dissectors.
241 parser
= argparse
.ArgumentParser(description
='Check calls in dissectors')
242 parser
.add_argument('--file', action
='append',
243 help='specify individual dissector file to test')
244 parser
.add_argument('--commits', action
='store',
245 help='last N commits to check')
246 parser
.add_argument('--open', action
='store_true',
247 help='check open files')
248 parser
.add_argument('--verbose', action
='store_true',
249 help='show extra info')
252 args
= parser
.parse_args()
255 # Get files from wherever command-line args indicate.
258 # Add specified file(s)
260 if not os
.path
.isfile(f
) and not f
.startswith('epan'):
261 f
= os
.path
.join('epan', 'dissectors', f
)
262 if not os
.path
.isfile(f
):
263 print('Chosen file', f
, 'does not exist.')
268 # Get files affected by specified number of commits.
269 command
= ['git', 'diff', '--name-only', 'HEAD~' + args
.commits
]
270 files
= [f
.decode('utf-8')
271 for f
in subprocess
.check_output(command
).splitlines()]
272 # Will examine dissector files only
273 files
= list(filter(lambda f
: is_dissector_file(f
), files
))
276 command
= ['git', 'diff', '--name-only']
277 files
= [f
.decode('utf-8')
278 for f
in subprocess
.check_output(command
).splitlines()]
279 # Only interested in dissector files.
280 files
= list(filter(lambda f
: is_dissector_file(f
), files
))
282 command
= ['git', 'diff', '--staged', '--name-only']
283 files_staged
= [f
.decode('utf-8')
284 for f
in subprocess
.check_output(command
).splitlines()]
285 # Only interested in dissector files.
286 files_staged
= list(filter(lambda f
: is_dissector_file(f
), files_staged
))
287 for f
in files_staged
:
291 # Find all dissector files from folder.
292 files
= findDissectorFilesInFolder(os
.path
.join('epan', 'dissectors'))
293 files
+= findDissectorFilesInFolder(os
.path
.join('plugins', 'epan'), recursive
=True)
294 files
+= findDissectorFilesInFolder(os
.path
.join('epan', 'dissectors', 'asn1'), recursive
=True)
297 # If scanning a subset of files, list them here.
299 if args
.file or args
.commits
or args
.open:
301 print(' '.join(files
), '\n')
303 print('No files to check.\n')
305 print('All dissectors\n')
308 # Now check the chosen files
313 checkFile(f
, isGeneratedFile(f
), verbose
=args
.verbose
)
317 print(warnings_found
, 'warnings found')
319 print(errors_found
, 'errors found')