MSWSP: remove unused tree_item's
[wireshark-wip.git] / tools / indexcap.py
blobd252e27b4afafe12f487444243bb841235364571
1 #!/usr/bin/python
3 # Tool to index protocols that appears in the given capture files
5 # The script list_protos_in_cap.sh does the same thing.
7 # Copyright 2009, Kovarththanan Rajaratnam <kovarththanan.rajaratnam@gmail.com>
9 # $Id$
11 # Wireshark - Network traffic analyzer
12 # By Gerald Combs <gerald@wireshark.org>
13 # Copyright 1998 Gerald Combs
15 # This program is free software; you can redistribute it and/or
16 # modify it under the terms of the GNU General Public License
17 # as published by the Free Software Foundation; either version 2
18 # of the License, or (at your option) any later version.
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 # You should have received a copy of the GNU General Public License
26 # along with this program; if not, write to the Free Software
27 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 from optparse import OptionParser
31 import multiprocessing
32 import sys
33 import os
34 import subprocess
35 import re
36 import pickle
37 import tempfile
38 import filecmp
39 import random
41 def extract_protos_from_file_proces(tshark, file):
42 try:
43 cmd = [tshark, "-Tfields", "-e", "frame.protocols", "-r", file]
44 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
45 (stdout, stderr) = p.communicate()
46 if p.returncode != 0:
47 return (file, {})
49 proto_hash = {}
50 for line in stdout.splitlines():
51 if not re.match(r'^[\w:-]+$', line):
52 continue
54 for proto in line.split(':'):
55 proto_hash[proto] = 1 + proto_hash.setdefault(proto, 0)
57 return (file, proto_hash)
58 except KeyboardInterrupt:
59 return None
61 def extract_protos_from_file(tshark, num_procs, max_files, cap_files, cap_hash, index_file_name):
62 pool = multiprocessing.Pool(num_procs)
63 results = [pool.apply_async(extract_protos_from_file_proces, [tshark, file]) for file in cap_files]
64 try:
65 for (cur_item_idx,result_async) in enumerate(results):
66 file_result = result_async.get()
67 action = "SKIPPED" if file_result[1] is {} else "PROCESSED"
68 print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0]))
69 cap_hash.update(dict([file_result]))
70 except KeyboardInterrupt:
71 print "%s was interrupted by user" % (sys.argv[0])
72 pool.terminate()
73 exit(1)
75 index_file = open(index_file_name, "w")
76 pickle.dump(cap_hash, index_file)
77 index_file.close()
78 exit(0)
80 def dissect_file_process(tshark, tmpdir, file):
81 try:
82 (handle_o, tmpfile_o) = tempfile.mkstemp(suffix='_stdout', dir=tmpdir)
83 (handle_e, tmpfile_e) = tempfile.mkstemp(suffix='_stderr', dir=tmpdir)
84 cmd = [tshark, "-nxVr", file]
85 p = subprocess.Popen(cmd, stdout=handle_o, stderr=handle_e)
86 (stdout, stderr) = p.communicate()
87 if p.returncode == 0:
88 return (file, True, tmpfile_o, tmpfile_e)
89 else:
90 return (file, False, tmpfile_o, tmpfile_e)
92 except KeyboardInterrupt:
93 return False
95 finally:
96 os.close(handle_o)
97 os.close(handle_e)
99 def dissect_files(tshark, tmpdir, num_procs, max_files, cap_files):
100 pool = multiprocessing.Pool(num_procs)
101 results = [pool.apply_async(dissect_file_process, [tshark, tmpdir, file]) for file in cap_files]
102 try:
103 for (cur_item_idx,result_async) in enumerate(results):
104 file_result = result_async.get()
105 action = "FAILED" if file_result[1] is False else "PASSED"
106 print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result[0], os.path.getsize(file_result[0]))
107 except KeyboardInterrupt:
108 print "%s was interrupted by user" % (sys.argv[0])
109 pool.terminate()
110 exit(1)
112 def compare_files(tshark_bin, tmpdir, tshark_cmp, num_procs, max_files, cap_files):
113 pool = multiprocessing.Pool(num_procs)
114 results_bin = [pool.apply_async(dissect_file_process, [tshark_bin, tmpdir, file]) for file in cap_files]
115 results_cmp = [pool.apply_async(dissect_file_process, [tshark_cmp, tmpdir, file]) for file in cap_files]
116 try:
117 for (cur_item_idx,(result_async_bin, result_async_cmp)) in enumerate(zip(results_bin, results_cmp)):
118 file_result_bin = result_async_bin.get()
119 file_result_cmp = result_async_cmp.get()
120 if file_result_cmp[1] is False or file_result_bin[1] is False:
121 action = "FAILED (exitcode)"
122 if not filecmp.cmp(file_result_bin[2], file_result_cmp[2]):
123 action = "FAILED (stdout)"
124 if not filecmp.cmp(file_result_bin[3], file_result_cmp[3]):
125 action = "FAILED (stderr)"
126 else:
127 action = "PASSED"
128 os.remove(file_result_bin[2])
129 os.remove(file_result_cmp[2])
130 os.remove(file_result_bin[3])
131 os.remove(file_result_cmp[3])
133 print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_bin[0], os.path.getsize(file_result_bin[0]))
134 print "%s [%u/%u] %s %u bytes" % (action, cur_item_idx+1, max_files, file_result_cmp[0], os.path.getsize(file_result_cmp[0]))
135 except KeyboardInterrupt:
136 print "%s was interrupted by user" % (sys.argv[0])
137 pool.terminate()
138 exit(1)
140 def list_all_proto(cap_hash):
141 proto_hash = {}
142 for files_hash in cap_hash.itervalues():
143 for proto,count in files_hash.iteritems():
144 proto_hash[proto] = count + proto_hash.setdefault(proto, 0)
146 return proto_hash
148 def list_all_files(cap_hash):
149 files = cap_hash.keys()
150 files.sort()
152 return files
154 def list_all_proto_files(cap_hash, proto_comma_delit):
155 protos = [ x.strip() for x in proto_comma_delit.split(',') ]
156 files = []
157 for (file, files_hash) in cap_hash.iteritems():
158 for proto in files_hash.iterkeys():
159 if proto in protos:
160 files.append(file)
161 break
163 return files
165 def index_file_action(options):
166 return options.list_all_proto or \
167 options.list_all_files or \
168 options.list_all_proto_files or \
169 options.dissect_files
171 def find_capture_files(paths, cap_hash):
172 cap_files = []
173 for path in paths:
174 if os.path.isdir(path):
175 path = os.path.normpath(path)
176 for root, dirs, files in os.walk(path):
177 cap_files += [os.path.join(root, name) for name in files if os.path.join(root, name) not in cap_hash]
178 elif path not in cap_hash:
179 cap_files.append(path)
180 return cap_files
182 def find_tshark_executable(bin_dir):
183 for file in ["tshark.exe", "tshark"]:
184 tshark = os.path.join(bin_dir, file)
185 if os.access(tshark, os.X_OK):
186 return tshark
188 return None
190 def main():
191 parser = OptionParser(usage="usage: %prog [options] index_file [file_1|dir_1 [.. file_n|dir_n]]")
192 parser.add_option("-d", "--dissect-files", dest="dissect_files", default=False, action="store_true",
193 help="Dissect all matching files")
194 parser.add_option("-m", "--max-files", dest="max_files", default=sys.maxint, type="int",
195 help="Max number of files to process")
196 parser.add_option("-b", "--binary-dir", dest="bin_dir", default=os.getcwd(),
197 help="Directory containing tshark executable")
198 parser.add_option("-c", "--compare-dir", dest="compare_dir", default=None,
199 help="Directory containing tshark executable which is used for comparison")
200 parser.add_option("-j", dest="num_procs", default=multiprocessing.cpu_count(), type=int,
201 help="Max number of processes to spawn")
202 parser.add_option("-r", "--randomize", default=False, action="store_true",
203 help="Randomize the file list order")
204 parser.add_option("", "--list-all-proto", dest="list_all_proto", default=False, action="store_true",
205 help="List all protocols in index file")
206 parser.add_option("", "--list-all-files", dest="list_all_files", default=False, action="store_true",
207 help="List all files in index file")
208 parser.add_option("", "--list-all-proto-files", dest="list_all_proto_files", default=False,
209 metavar="PROTO_1[, .. PROTO_N]",
210 help="List all files in index file containing the given protocol")
212 (options, args) = parser.parse_args()
214 if len(args) == 0:
215 parser.error("index_file is a required argument")
217 if len(args) == 1 and not index_file_action(options):
218 parser.error("one capture file/directory must be specified")
220 if options.dissect_files and not options.list_all_files and not options.list_all_proto_files:
221 parser.error("--list-all-files or --list-all-proto-files must be specified")
223 if options.dissect_files and not options.compare_dir is None:
224 parser.error("--dissect-files and --compare-dir cannot be specified at the same time")
226 index_file_name = args.pop(0)
227 paths = args
228 cap_hash = {}
229 try:
230 index_file = open(index_file_name, "r")
231 print "index file:", index_file.name, "[OPENED]",
232 cap_hash = pickle.load(index_file)
233 index_file.close()
234 print len(cap_hash), "files"
235 except IOError:
236 print "index file:", index_file_name, "[NEW]"
238 if options.list_all_proto:
239 print list_all_proto(cap_hash)
240 exit(0)
242 indexed_files = []
243 if options.list_all_files:
244 indexed_files = list_all_files(cap_hash)
245 print indexed_files
247 if options.list_all_proto_files:
248 indexed_files = list_all_proto_files(cap_hash, options.list_all_proto_files)
249 print indexed_files
251 tshark_bin = find_tshark_executable(options.bin_dir)
252 if not tshark_bin is None:
253 print "tshark:", tshark_bin, "[FOUND]"
254 else:
255 print "tshark:", tshark_bin, "[MISSING]"
256 exit(1)
258 if not options.compare_dir is None:
259 tshark_cmp = find_tshark_executable(options.compare_dir)
260 if not tshark_cmp is None:
261 print "tshark:", tshark_cmp, "[FOUND]"
262 else:
263 print "tshark:", tshark_cmp, "[MISSING]"
264 exit(1)
266 if options.dissect_files or options.compare_dir:
267 cap_files = indexed_files
268 elif options.list_all_proto_files or options.list_all_files:
269 exit(0)
270 else:
271 cap_files = find_capture_files(paths, cap_hash)
273 if options.randomize:
274 random.shuffle(cap_files)
275 else:
276 cap_files.sort()
278 options.max_files = min(options.max_files, len(cap_files))
279 print "%u total files, %u working files" % (len(cap_files), options.max_files)
280 cap_files = cap_files[:options.max_files]
281 tmpdir = tempfile.mkdtemp()
282 print "Temporary working dir: %s" % tmpdir
284 if options.compare_dir:
285 compare_files(tshark_bin, tmpdir, tshark_cmp, options.num_procs, options.max_files, cap_files)
286 elif options.dissect_files:
287 dissect_files(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files)
288 else:
289 extract_protos_from_file(tshark_bin, tmpdir, options.num_procs, options.max_files, cap_files, cap_hash, index_file_name)
291 os.rmdir(tmpdir)
292 if __name__ == "__main__":
293 main()