dcerpc-nt: add UNION_ALIGN_TO... helpers
[wireshark-sm.git] / tools / indexcap.py
blob31f4e3970cb884f0859feb2471b0d2de806ad482
1 #!/usr/bin/env python3
3 # Tool to index protocols that appears in the given capture files
5 # The script list_protos_in_cap.sh does the same thing.
7 # Copyright 2009, Kovarththanan Rajaratnam <kovarththanan.rajaratnam@gmail.com>
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
13 # SPDX-License-Identifier: GPL-2.0-or-later
16 from optparse import OptionParser
17 import multiprocessing
18 import sys
19 import os
20 import subprocess
21 import re
22 import pickle
23 import tempfile
24 import filecmp
25 import random
27 def extract_protos_from_file_proces(tshark, file):
28 try:
29 cmd = [tshark, "-Tfields", "-e", "frame.protocols", "-r", file]
30 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
31 (stdout, stderr) = p.communicate()
32 stdout = stdout.decode('utf-8')
33 if p.returncode != 0:
34 return (file, {})
36 proto_hash = {}
37 for line in stdout.splitlines():
38 if not re.match(r'^[\w:-]+$', line):
39 continue
41 for proto in line.split(':'):
42 proto_hash[proto] = 1 + proto_hash.setdefault(proto, 0)
44 return (file, proto_hash)
45 except KeyboardInterrupt:
46 return None
48 def extract_protos_from_file(tshark, num_procs, max_files, cap_files, cap_hash, index_file_name):
49 pool = multiprocessing.Pool(num_procs)
50 results = [pool.apply_async(extract_protos_from_file_proces, [tshark, file]) for file in cap_files]
51 try:
52 for (cur_item_idx,result_async) in enumerate(results):
53 file_result = result_async.get()
54 action = "SKIPPED" if file_result[1] is {} else "PROCESSED"
55 print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0])))
56 cap_hash.update(dict([file_result]))
57 except KeyboardInterrupt:
58 print("%s was interrupted by user" % (sys.argv[0]))
59 pool.terminate()
60 exit(1)
62 index_file = open(index_file_name, "wb")
63 pickle.dump(cap_hash, index_file)
64 index_file.close()
65 exit(0)
67 def dissect_file_process(tshark, tmpdir, file):
68 try:
69 (handle_o, tmpfile_o) = tempfile.mkstemp(suffix='_stdout', dir=tmpdir)
70 (handle_e, tmpfile_e) = tempfile.mkstemp(suffix='_stderr', dir=tmpdir)
71 cmd = [tshark, "-nxVr", file]
72 p = subprocess.Popen(cmd, stdout=handle_o, stderr=handle_e)
73 (stdout, stderr) = p.communicate()
74 if p.returncode == 0:
75 return (file, True, tmpfile_o, tmpfile_e)
76 else:
77 return (file, False, tmpfile_o, tmpfile_e)
79 except KeyboardInterrupt:
80 return False
82 finally:
83 os.close(handle_o)
84 os.close(handle_e)
86 def dissect_files(tshark, tmpdir, num_procs, max_files, cap_files):
87 pool = multiprocessing.Pool(num_procs)
88 results = [pool.apply_async(dissect_file_process, [tshark, tmpdir, file]) for file in cap_files]
89 try:
90 for (cur_item_idx,result_async) in enumerate(results):
91 file_result = result_async.get()
92 action = "FAILED" if file_result[1] is False else "PASSED"
93 print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0])))
94 except KeyboardInterrupt:
95 print("%s was interrupted by user" % (sys.argv[0]))
96 pool.terminate()
97 exit(1)
99 def compare_files(tshark_bin, tmpdir, tshark_cmp, num_procs, max_files, cap_files):
100 pool = multiprocessing.Pool(num_procs)
101 results_bin = [pool.apply_async(dissect_file_process, [tshark_bin, tmpdir, file]) for file in cap_files]
102 results_cmp = [pool.apply_async(dissect_file_process, [tshark_cmp, tmpdir, file]) for file in cap_files]
103 try:
104 for (cur_item_idx,(result_async_bin, result_async_cmp)) in enumerate(zip(results_bin, results_cmp)):
105 file_result_bin = result_async_bin.get()
106 file_result_cmp = result_async_cmp.get()
107 if file_result_cmp[1] is False or file_result_bin[1] is False:
108 action = "FAILED (exitcode)"
109 if not filecmp.cmp(file_result_bin[2], file_result_cmp[2]):
110 action = "FAILED (stdout)"
111 if not filecmp.cmp(file_result_bin[3], file_result_cmp[3]):
112 action = "FAILED (stderr)"
113 else:
114 action = "PASSED"
115 os.remove(file_result_bin[2])
116 os.remove(file_result_cmp[2])
117 os.remove(file_result_bin[3])
118 os.remove(file_result_cmp[3])
120 print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_bin[0], os.path.getsize(file_result_bin[0])))
121 print("%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_cmp[0], os.path.getsize(file_result_cmp[0])))
122 except KeyboardInterrupt:
123 print("%s was interrupted by user" % (sys.argv[0]))
124 pool.terminate()
125 exit(1)
127 def list_all_proto(cap_hash):
128 proto_hash = {}
129 for files_hash in cap_hash.values():
130 for proto,count in files_hash.items():
131 proto_hash[proto] = count + proto_hash.setdefault(proto, 0)
133 return proto_hash
135 def list_all_files(cap_hash):
136 files = list(cap_hash.keys())
137 files.sort()
139 return files
141 def list_all_proto_files(cap_hash, proto_comma_delit):
142 protos = [ x.strip() for x in proto_comma_delit.split(',') ]
143 files = []
144 for (file, files_hash) in cap_hash.items():
145 for proto in files_hash.keys():
146 if proto in protos:
147 files.append(file)
148 break
150 return files
152 def index_file_action(options):
153 return options.list_all_proto or \
154 options.list_all_files or \
155 options.list_all_proto_files or \
156 options.dissect_files
158 def find_capture_files(paths, cap_hash):
159 cap_files = []
160 for path in paths:
161 if os.path.isdir(path):
162 path = os.path.normpath(path)
163 for root, dirs, files in os.walk(path):
164 cap_files += [os.path.join(root, name) for name in files if os.path.join(root, name) not in cap_hash]
165 elif path not in cap_hash:
166 cap_files.append(path)
167 return cap_files
169 def find_tshark_executable(bin_dir):
170 for file in ["tshark.exe", "tshark"]:
171 tshark = os.path.join(bin_dir, file)
172 if os.access(tshark, os.X_OK):
173 return tshark
175 return None
177 def main():
178 parser = OptionParser(usage="usage: %prog [options] index_file [file_1|dir_1 [.. file_n|dir_n]]")
179 parser.add_option("-d", "--dissect-files", dest="dissect_files", default=False, action="store_true",
180 help="Dissect all matching files")
181 parser.add_option("-m", "--max-files", dest="max_files", default=sys.maxsize, type="int",
182 help="Max number of files to process")
183 parser.add_option("-b", "--binary-dir", dest="bin_dir", default=os.getcwd(),
184 help="Directory containing tshark executable")
185 parser.add_option("-c", "--compare-dir", dest="compare_dir", default=None,
186 help="Directory containing tshark executable which is used for comparison")
187 parser.add_option("-j", dest="num_procs", default=multiprocessing.cpu_count(), type=int,
188 help="Max number of processes to spawn")
189 parser.add_option("-r", "--randomize", default=False, action="store_true",
190 help="Randomize the file list order")
191 parser.add_option("", "--list-all-proto", dest="list_all_proto", default=False, action="store_true",
192 help="List all protocols in index file")
193 parser.add_option("", "--list-all-files", dest="list_all_files", default=False, action="store_true",
194 help="List all files in index file")
195 parser.add_option("", "--list-all-proto-files", dest="list_all_proto_files", default=False,
196 metavar="PROTO_1[, .. PROTO_N]",
197 help="List all files in index file containing the given protocol")
199 (options, args) = parser.parse_args()
201 if len(args) == 0:
202 parser.error("index_file is a required argument")
204 if len(args) == 1 and not index_file_action(options):
205 parser.error("one capture file/directory must be specified")
207 if options.dissect_files and not options.list_all_files and not options.list_all_proto_files:
208 parser.error("--list-all-files or --list-all-proto-files must be specified")
210 if options.dissect_files and options.compare_dir is not None:
211 parser.error("--dissect-files and --compare-dir cannot be specified at the same time")
213 index_file_name = args.pop(0)
214 paths = args
215 cap_hash = {}
216 try:
217 index_file = open(index_file_name, "rb")
218 print("index file: %s [OPENED]" % index_file.name)
219 cap_hash = pickle.load(index_file)
220 index_file.close()
221 print("%d files" % len(cap_hash))
222 except IOError:
223 print("index file: %s [NEW]" % index_file_name)
225 if options.list_all_proto:
226 print(list_all_proto(cap_hash))
227 exit(0)
229 indexed_files = []
230 if options.list_all_files:
231 indexed_files = list_all_files(cap_hash)
232 print(indexed_files)
234 if options.list_all_proto_files:
235 indexed_files = list_all_proto_files(cap_hash, options.list_all_proto_files)
236 print(indexed_files)
238 tshark_bin = find_tshark_executable(options.bin_dir)
239 if tshark_bin is not None:
240 print("tshark: %s [FOUND]" % tshark_bin)
241 else:
242 print("tshark: %s [MISSING]" % tshark_bin)
243 exit(1)
245 if options.compare_dir is not None:
246 tshark_cmp = find_tshark_executable(options.compare_dir)
247 if tshark_cmp is not None:
248 print("tshark: %s [FOUND]" % tshark_cmp)
249 else:
250 print("tshark: %s [MISSING]" % tshark_cmp)
251 exit(1)
253 if options.dissect_files or options.compare_dir:
254 cap_files = indexed_files
255 elif options.list_all_proto_files or options.list_all_files:
256 exit(0)
257 else:
258 cap_files = find_capture_files(paths, cap_hash)
260 if options.randomize:
261 random.shuffle(cap_files)
262 else:
263 cap_files.sort()
265 options.max_files = min(options.max_files, len(cap_files))
266 print("%u total files, %u working files" % (len(cap_files), options.max_files))
267 cap_files = cap_files[:options.max_files]
268 if options.compare_dir or options.dissect_files:
269 tmpdir = tempfile.mkdtemp()
270 print("Temporary working dir: %s" % tmpdir)
271 try:
272 if options.compare_dir:
273 compare_files(tshark_bin, tmpdir, tshark_cmp, options.num_procs, options.max_files, cap_files)
274 elif options.dissect_files:
275 dissect_files(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files)
276 else:
277 extract_protos_from_file(tshark_bin, options.num_procs, options.max_files, cap_files, cap_hash, index_file_name)
278 finally:
279 # Dissection may result in a non-empty directory.
280 if options.compare_dir:
281 os.rmdir(tmpdir)
282 if __name__ == "__main__":
283 main()