regen pidl all: rm epan/dissectors/pidl/*-stamp; pushd epan/dissectors/pidl/ && make...
[wireshark-sm.git] / tools / generate-sysdig-event.py
blob0cec2d3ab96bcf433e47e2f9c29357a8581376ea
1 #!/usr/bin/env python3
3 # Wireshark - Network traffic analyzer
4 # By Gerald Combs <gerald@wireshark.org>
5 # Copyright 1998 Gerald Combs
7 # SPDX-License-Identifier: GPL-2.0-or-later
9 '''\
10 Generate Sysdig event dissector sections from the sysdig sources.
12 Reads driver/event_table.c and driver/ppm_events_public.h and generates
13 corresponding dissection code in packet-sysdig-event.c. Updates are
14 performed in-place in the dissector code.
16 Requires an Internet connection. Assets are loaded from GitHub over HTTPS, from falcosecurity/libs master.
17 '''
19 import logging
20 import os
21 import os.path
22 import re
23 import urllib.request, urllib.error, urllib.parse
24 import sys
26 sysdig_repo_pfx = 'https://raw.githubusercontent.com/falcosecurity/libs/master/'
28 def exit_msg(msg=None, status=1):
29 if msg is not None:
30 sys.stderr.write(msg + '\n\n')
31 sys.stderr.write(__doc__ + '\n')
32 sys.exit(status)
34 def get_url_lines(url):
35 '''Open a URL.
36 Returns the URL body as a list of lines.
37 '''
38 req_headers = { 'User-Agent': 'Wireshark generate-sysdig-event' }
39 try:
40 req = urllib.request.Request(url, headers=req_headers)
41 response = urllib.request.urlopen(req)
42 lines = response.read().decode().splitlines()
43 response.close()
44 except urllib.error.HTTPError as err:
45 exit_msg("HTTP error fetching {0}: {1}".format(url, err.reason))
46 except urllib.error.URLError as err:
47 exit_msg("URL error fetching {0}: {1}".format(url, err.reason))
48 except OSError as err:
49 exit_msg("OS error fetching {0}: {1}".format(url, err.strerror))
50 except Exception:
51 exit_msg("Unexpected error:", sys.exc_info()[0])
53 return lines
56 ppm_ev_pub_lines = get_url_lines(sysdig_repo_pfx + 'driver/ppm_events_public.h')
58 ppme_re = re.compile('^\s+PPME_([A-Z0-9_]+_[EX])\s*=\s*([0-9]+)\s*,')
59 ppm_sc_x_re = re.compile('^\s+PPM_SC_X\s*\(\s*(\S+)\s*,\s*(\d+)\s*\)')
61 event_info_d = {}
63 def get_event_defines():
64 event_d = {}
65 for line in ppm_ev_pub_lines:
66 m = ppme_re.match(line)
67 if m:
68 event_d[int(m.group(2))] = m.group(1)
69 return event_d
71 def get_syscall_code_defines():
72 sc_d = {}
73 for line in ppm_ev_pub_lines:
74 m = ppm_sc_x_re.match(line)
75 if m:
76 sc_d[int(m.group(2))] = m.group(1)
77 return sc_d
79 ppm_ev_table_lines = get_url_lines(sysdig_repo_pfx + 'driver/event_table.c')
81 hf_d = {}
83 event_info_re = re.compile('^\s+\[\s*PPME_.*\]\s*=\s*{\s*"([A-Za-z0-9_]+)"\s*,[^,]+,[^,]+,\s*([0-9]+)\s*[,{}]')
84 event_param_re = re.compile('{\s*"([A-Za-z0-9_ ]+)"\s*,\s*PT_([A-Z0-9_]+)\s*,\s*PF_([A-Z0-9_]+)\s*[,}]')
86 def get_event_names():
87 '''Return a contiguous list of event names. Names are lower case.'''
88 event_name_l = []
89 for line in ppm_ev_table_lines:
90 ei = event_info_re.match(line)
91 if ei:
92 event_name_l.append(ei.group(1))
93 return event_name_l
95 # PT_xxx to FT_xxx
96 pt_to_ft = {
97 'BYTEBUF': 'BYTES',
98 'CHARBUF': 'STRING',
99 'ERRNO': 'INT64',
100 'FD': 'INT64',
101 'FLAGS8': 'INT8',
102 'FLAGS16': 'INT16',
103 'FLAGS32': 'INT32',
104 'FSPATH': 'STRING',
105 'FSRELPATH': 'STRING',
106 'GID': 'INT32',
107 'MODE': 'INT32',
108 'PID': 'INT64',
109 'UID': 'INT32',
110 'SYSCALLID': 'UINT16',
113 # FT_xxx to BASE_xxx
114 force_param_formats = {
115 'STRING': 'NONE',
116 'INT.*': 'DEC',
119 def get_event_params():
120 '''Return a list of dictionaries containing event names and parameter info.'''
121 event_param_l = []
122 event_num = 0
123 force_string_l = ['args', 'env']
124 for line in ppm_ev_table_lines:
125 ei = event_info_re.match(line)
126 ep = event_param_re.findall(line)
127 if ei and ep:
128 event_name = ei.group(1)
129 src_param_count = int(ei.group(2))
130 if len(ep) != src_param_count:
131 err_msg = '{}: found {} parameters. Expected {}. Params: {}'.format(
132 event_name, len(ep), src_param_count, repr(ep))
133 if len(ep) > src_param_count:
134 logging.warning(err_msg)
135 del ep[src_param_count:]
136 else:
137 raise NameError(err_msg)
138 for p in ep:
139 if p[0] in force_string_l:
140 param_type = 'STRING'
141 elif p[1] in pt_to_ft:
142 param_type = pt_to_ft[p[1]]
143 elif p[0] == 'flags' and p[1].startswith('INT') and 'HEX' in p[2]:
144 param_type = 'U' + p[1]
145 elif 'INT' in p[1]:
146 # Ints
147 param_type = p[1]
148 else:
149 print(f"p fallback {p}")
150 # Fall back to bytes
151 param_type = 'BYTES'
153 if p[2] == 'NA':
154 if 'INT' in param_type:
155 param_format = 'DEC'
156 else:
157 param_format = 'NONE'
158 elif param_type == 'BYTES':
159 param_format = 'NONE'
160 else:
161 param_format = p[2]
163 for pt_pat, force_pf in force_param_formats.items():
164 if re.match(pt_pat, param_type) and param_format != force_pf:
165 err_msg = 'Forcing {} {} format to {}. Params: {}'.format(
166 event_name, param_type, force_pf, repr(ep))
167 logging.warning(err_msg)
168 param_format = force_pf
170 param_d = {
171 'event_name': event_name,
172 'event_num': event_num,
173 # use replace() to account for "plugin ID" param name (ie: param names with space)
174 'param_name': p[0].replace(" ", "_"),
175 'param_type': param_type,
176 'param_format': param_format,
178 event_param_l.append(param_d)
179 if ei:
180 event_num += 1
181 return event_param_l
183 def param_to_hf_name(param):
184 return 'hf_param_{}_{}'.format(param['param_name'], param['param_type'].lower())
186 def param_to_value_string_name(param):
187 return '{}_{}_vals'.format(param['param_name'], param['param_type'].lower())
189 def get_param_desc(param):
190 # Try to coerce event names and parameters into human-friendly
191 # strings.
192 # XXX This could use some work.
194 # Specific descriptions. Event name + parameter name.
195 param_descs = {
196 'accept.queuepct': 'Accept queue per connection',
197 'execve.args': 'Program arguments',
198 'execve.comm': 'Command',
199 'execve.cwd': 'Current working directory',
201 # General descriptions. Event name only.
202 event_descs = {
203 'ioctl': 'I/O control',
206 event_name = param['event_name']
207 param_id = '{}.{}'.format(event_name, param['param_name'])
208 if param_id in param_descs:
209 param_desc = param_descs[param_id]
210 elif event_name in event_descs:
211 param_desc = '{}: {}'.format(event_descs[event_name], param['param_name'])
212 else:
213 param_desc = param['param_name']
214 return param_desc
216 def main():
217 logging.basicConfig(format='%(levelname)s: %(message)s')
219 # Event list
220 event_d = get_event_defines()
221 event_nums = list(event_d.keys())
222 event_nums.sort()
224 event_name_l = get_event_names()
225 event_param_l = get_event_params()
227 hf_d = {}
228 for param in event_param_l:
229 hf_name = param_to_hf_name(param)
230 hf_d[hf_name] = param
232 idx_id_to_name = { '': 'no' }
233 parameter_index_l = []
235 for en in range (0, len(event_nums)):
236 param_id = ''
237 param_l = []
238 event_var = event_d[en].lower()
239 for param in event_param_l:
240 if param['event_num'] == en:
241 hf_name = param_to_hf_name(param)
242 param_l.append(hf_name)
243 param_id += ':' + param['param_name'] + '_' + param['param_type']
245 ei_str = ''
246 if param_id not in idx_id_to_name:
247 idx_id_to_name[param_id] = event_var
248 ei_str = 'static int * const {}_indexes[] = {{ &{}, NULL }};'.format(
249 event_var,
250 ', &'.join(param_l)
252 else:
253 ei_str = '#define {}_indexes {}_indexes'.format(event_var, idx_id_to_name[param_id])
255 parameter_index_l.append(ei_str)
257 dissector_path = os.path.join(os.path.dirname(__file__),
258 '..', 'epan', 'dissectors', 'packet-sysdig-event.c')
259 dissector_f = open(dissector_path, 'r')
260 dissector_lines = list(dissector_f)
261 dissector_f = open(dissector_path, 'w+')
263 # Strip out old content
264 strip_re_l = []
265 strip_re_l.append(re.compile('^static\s+int\s+hf_param_.*;'))
266 strip_re_l.append(re.compile('^#define\s+EVT_STR_[A-Z0-9_]+\s+"[A-Za-z0-9_]+"'))
267 strip_re_l.append(re.compile('^#define\s+EVT_[A-Z0-9_]+\s+[0-9]+'))
268 strip_re_l.append(re.compile('^\s*{\s*EVT_[A-Z0-9_]+\s*,\s*EVT_STR_[A-Z0-9_]+\s*}'))
269 strip_re_l.append(re.compile('^static\s+const\s+int\s+\*\s*[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
270 strip_re_l.append(re.compile('^static\s+int\s*\*\s+const\s+[a-z0-9_]+_[ex]_indexes\[\]\s*=\s*\{\s*&hf_param_.*NULL\s*\}\s*;'))
271 strip_re_l.append(re.compile('^\s*#define\s+[a-z0-9_]+_[ex]_indexes\s+[a-z0-9_]+_indexes'))
272 strip_re_l.append(re.compile('^\s*\{\s*EVT_[A-Z0-9_]+_[EX]\s*,\s*[a-z0-9_]+_[ex]_indexes\s*}\s*,'))
273 strip_re_l.append(re.compile('^\s*\{\s*\d+\s*,\s*"\S+"\s*}\s*,\s*//\s*PPM_SC_\S+'))
274 strip_re_l.append(re.compile('^\s*{\s*&hf_param_.*},')) # Must all be on one line
276 for strip_re in strip_re_l:
277 dissector_lines = [line for line in dissector_lines if not strip_re.search(line)]
279 # Find our value strings
280 value_string_re = re.compile('static\s+const\s+value_string\s+([A-Za-z0-9_]+_vals)')
281 value_string_l = []
282 for line in dissector_lines:
283 vs = value_string_re.match(line)
284 if vs:
285 value_string_l.append(vs.group(1))
287 # Add in new content after comments.
289 header_fields_c = 'Header fields'
290 header_fields_re = re.compile('/\*\s+' + header_fields_c, flags = re.IGNORECASE)
291 header_fields_l = []
292 for hf_name in sorted(hf_d.keys()):
293 header_fields_l.append('static int {};'.format(hf_name))
295 event_names_c = 'Event names'
296 event_names_re = re.compile('/\*\s+' + event_names_c, flags = re.IGNORECASE)
297 event_names_l = []
298 event_str_l = list(set(event_name_l))
299 event_str_l.sort()
300 for evt_str in event_str_l:
301 event_names_l.append('#define EVT_STR_{0:24s} "{1:s}"'.format(evt_str.upper(), evt_str))
303 event_definitions_c = 'Event definitions'
304 event_definitions_re = re.compile('/\*\s+' + event_definitions_c, flags = re.IGNORECASE)
305 event_definitions_l = []
306 for evt in event_nums:
307 event_definitions_l.append('#define EVT_{0:24s} {1:3d}'.format(event_d[evt], evt))
309 value_strings_c = 'Value strings'
310 value_strings_re = re.compile('/\*\s+' + value_strings_c, flags = re.IGNORECASE)
311 value_strings_l = []
312 for evt in event_nums:
313 evt_num = 'EVT_{},'.format(event_d[evt])
314 evt_str = 'EVT_STR_' + event_name_l[evt].upper()
315 value_strings_l.append(' {{ {0:<32s} {1:s} }},'.format(evt_num, evt_str))
317 parameter_index_c = 'Parameter indexes'
318 parameter_index_re = re.compile('/\*\s+' + parameter_index_c, flags = re.IGNORECASE)
319 # parameter_index_l defined above.
321 event_tree_c = 'Event tree'
322 event_tree_re = re.compile('/\*\s+' + event_tree_c, flags = re.IGNORECASE)
323 event_tree_l = []
324 for evt in event_nums:
325 evt_num = 'EVT_{}'.format(event_d[evt])
326 evt_idx = '{}_indexes'.format(event_d[evt].lower())
327 event_tree_l.append(' {{ {}, {} }},'.format(evt_num, evt_idx))
329 # Syscall codes
330 syscall_code_d = get_syscall_code_defines()
331 syscall_code_c = 'Syscall codes'
332 syscall_code_re = re.compile('/\*\s+' + syscall_code_c, flags = re.IGNORECASE)
333 syscall_code_l = []
334 for sc_num in syscall_code_d:
335 syscall_code_l.append(f' {{ {sc_num:3}, "{syscall_code_d[sc_num].lower()}" }}, // PPM_SC_{syscall_code_d[sc_num]}')
337 header_field_reg_c = 'Header field registration'
338 header_field_reg_re = re.compile('/\*\s+' + header_field_reg_c, flags = re.IGNORECASE)
339 header_field_reg_l = []
340 for hf_name in sorted(hf_d.keys()):
341 param = hf_d[hf_name]
342 event_name = param['event_name']
343 param_desc = get_param_desc(param)
344 param_name = param['param_name']
345 param_type = param['param_type']
346 param_format = param['param_format']
347 fieldconvert = 'NULL'
348 vs_name = param_to_value_string_name(param)
349 if vs_name in value_string_l and 'INT' in param_type:
350 fieldconvert = 'VALS({})'.format(vs_name)
351 header_field_reg_l.append(' {{ &{}, {{ "{}", "sysdig.param.{}.{}", FT_{}, BASE_{}, {}, 0, NULL, HFILL }} }},'.format(
352 hf_name,
353 param_desc,
354 event_name,
355 param_name,
356 param_type,
357 param_format,
358 fieldconvert
361 for line in dissector_lines:
362 fill_comment = None
363 fill_l = []
365 if header_fields_re.match(line):
366 fill_comment = header_fields_c
367 fill_l = header_fields_l
368 elif event_names_re.match(line):
369 fill_comment = event_names_c
370 fill_l = event_names_l
371 elif event_definitions_re.match(line):
372 fill_comment = event_definitions_c
373 fill_l = event_definitions_l
374 elif value_strings_re.match(line):
375 fill_comment = value_strings_c
376 fill_l = value_strings_l
377 elif parameter_index_re.match(line):
378 fill_comment = parameter_index_c
379 fill_l = parameter_index_l
380 elif event_tree_re.match(line):
381 fill_comment = event_tree_c
382 fill_l = event_tree_l
383 elif syscall_code_re.match(line):
384 fill_comment = syscall_code_c
385 fill_l = syscall_code_l
386 elif header_field_reg_re.match(line):
387 fill_comment = header_field_reg_c
388 fill_l = header_field_reg_l
390 if fill_comment is not None:
391 # Write our comment followed by the content
392 print(('Generating {}, {:d} lines'.format(fill_comment, len(fill_l))))
393 dissector_f.write('/* {}. Automatically generated by tools/{} */\n'.format(
394 fill_comment,
395 os.path.basename(__file__)
397 for line in fill_l:
398 dissector_f.write('{}\n'.format(line))
399 # Fill each section only once
400 del fill_l[:]
401 else:
402 # Existing content
403 dissector_f.write(line)
405 dissector_f.close()
408 # On with the show
411 if __name__ == "__main__":
412 sys.exit(main())