3 # Tool to index protocols that appears in the given capture files
5 # The script list_protos_in_cap.sh does the same thing.
7 # Copyright 2009, Kovarththanan Rajaratnam <kovarththanan.rajaratnam@gmail.com>
9 # Wireshark - Network traffic analyzer
10 # By Gerald Combs <gerald@wireshark.org>
11 # Copyright 1998 Gerald Combs
13 # SPDX-License-Identifier: GPL-2.0-or-later
16 from optparse
import OptionParser
17 import multiprocessing
27 def extract_protos_from_file_proces(tshark
, file):
29 cmd
= [tshark
, "-Tfields", "-e", "frame.protocols", "-r", file]
30 p
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
31 (stdout
, stderr
) = p
.communicate()
32 stdout
= stdout
.decode('utf-8')
37 for line
in stdout
.splitlines():
38 if not re
.match(r
'^[\w:-]+$', line
):
41 for proto
in line
.split(':'):
42 proto_hash
[proto
] = 1 + proto_hash
.setdefault(proto
, 0)
44 return (file, proto_hash
)
45 except KeyboardInterrupt:
48 def extract_protos_from_file(tshark
, num_procs
, max_files
, cap_files
, cap_hash
, index_file_name
):
49 pool
= multiprocessing
.Pool(num_procs
)
50 results
= [pool
.apply_async(extract_protos_from_file_proces
, [tshark
, file]) for file in cap_files
]
52 for (cur_item_idx
,result_async
) in enumerate(results
):
53 file_result
= result_async
.get()
54 action
= "SKIPPED" if file_result
[1] is {} else "PROCESSED"
55 print("%s [%u/%u] %s %u bytes" % (action
, cur_item_idx
+1, max_files
, file_result
[0], os
.path
.getsize(file_result
[0])))
56 cap_hash
.update(dict([file_result
]))
57 except KeyboardInterrupt:
58 print("%s was interrupted by user" % (sys
.argv
[0]))
62 index_file
= open(index_file_name
, "wb")
63 pickle
.dump(cap_hash
, index_file
)
67 def dissect_file_process(tshark
, tmpdir
, file):
69 (handle_o
, tmpfile_o
) = tempfile
.mkstemp(suffix
='_stdout', dir=tmpdir
)
70 (handle_e
, tmpfile_e
) = tempfile
.mkstemp(suffix
='_stderr', dir=tmpdir
)
71 cmd
= [tshark
, "-nxVr", file]
72 p
= subprocess
.Popen(cmd
, stdout
=handle_o
, stderr
=handle_e
)
73 (stdout
, stderr
) = p
.communicate()
75 return (file, True, tmpfile_o
, tmpfile_e
)
77 return (file, False, tmpfile_o
, tmpfile_e
)
79 except KeyboardInterrupt:
86 def dissect_files(tshark
, tmpdir
, num_procs
, max_files
, cap_files
):
87 pool
= multiprocessing
.Pool(num_procs
)
88 results
= [pool
.apply_async(dissect_file_process
, [tshark
, tmpdir
, file]) for file in cap_files
]
90 for (cur_item_idx
,result_async
) in enumerate(results
):
91 file_result
= result_async
.get()
92 action
= "FAILED" if file_result
[1] is False else "PASSED"
93 print("%s [%u/%u] %s %u bytes" % (action
, cur_item_idx
+1, max_files
, file_result
[0], os
.path
.getsize(file_result
[0])))
94 except KeyboardInterrupt:
95 print("%s was interrupted by user" % (sys
.argv
[0]))
99 def compare_files(tshark_bin
, tmpdir
, tshark_cmp
, num_procs
, max_files
, cap_files
):
100 pool
= multiprocessing
.Pool(num_procs
)
101 results_bin
= [pool
.apply_async(dissect_file_process
, [tshark_bin
, tmpdir
, file]) for file in cap_files
]
102 results_cmp
= [pool
.apply_async(dissect_file_process
, [tshark_cmp
, tmpdir
, file]) for file in cap_files
]
104 for (cur_item_idx
,(result_async_bin
, result_async_cmp
)) in enumerate(zip(results_bin
, results_cmp
)):
105 file_result_bin
= result_async_bin
.get()
106 file_result_cmp
= result_async_cmp
.get()
107 if file_result_cmp
[1] is False or file_result_bin
[1] is False:
108 action
= "FAILED (exitcode)"
109 if not filecmp
.cmp(file_result_bin
[2], file_result_cmp
[2]):
110 action
= "FAILED (stdout)"
111 if not filecmp
.cmp(file_result_bin
[3], file_result_cmp
[3]):
112 action
= "FAILED (stderr)"
115 os
.remove(file_result_bin
[2])
116 os
.remove(file_result_cmp
[2])
117 os
.remove(file_result_bin
[3])
118 os
.remove(file_result_cmp
[3])
120 print("%s [%u/%u] %s %u bytes" % (action
, cur_item_idx
+1, max_files
, file_result_bin
[0], os
.path
.getsize(file_result_bin
[0])))
121 print("%s [%u/%u] %s %u bytes" % (action
, cur_item_idx
+1, max_files
, file_result_cmp
[0], os
.path
.getsize(file_result_cmp
[0])))
122 except KeyboardInterrupt:
123 print("%s was interrupted by user" % (sys
.argv
[0]))
127 def list_all_proto(cap_hash
):
129 for files_hash
in cap_hash
.values():
130 for proto
,count
in files_hash
.items():
131 proto_hash
[proto
] = count
+ proto_hash
.setdefault(proto
, 0)
135 def list_all_files(cap_hash
):
136 files
= list(cap_hash
.keys())
141 def list_all_proto_files(cap_hash
, proto_comma_delit
):
142 protos
= [ x
.strip() for x
in proto_comma_delit
.split(',') ]
144 for (file, files_hash
) in cap_hash
.items():
145 for proto
in files_hash
.keys():
152 def index_file_action(options
):
153 return options
.list_all_proto
or \
154 options
.list_all_files
or \
155 options
.list_all_proto_files
or \
156 options
.dissect_files
158 def find_capture_files(paths
, cap_hash
):
161 if os
.path
.isdir(path
):
162 path
= os
.path
.normpath(path
)
163 for root
, dirs
, files
in os
.walk(path
):
164 cap_files
+= [os
.path
.join(root
, name
) for name
in files
if os
.path
.join(root
, name
) not in cap_hash
]
165 elif path
not in cap_hash
:
166 cap_files
.append(path
)
169 def find_tshark_executable(bin_dir
):
170 for file in ["tshark.exe", "tshark"]:
171 tshark
= os
.path
.join(bin_dir
, file)
172 if os
.access(tshark
, os
.X_OK
):
178 parser
= OptionParser(usage
="usage: %prog [options] index_file [file_1|dir_1 [.. file_n|dir_n]]")
179 parser
.add_option("-d", "--dissect-files", dest
="dissect_files", default
=False, action
="store_true",
180 help="Dissect all matching files")
181 parser
.add_option("-m", "--max-files", dest
="max_files", default
=sys
.maxsize
, type="int",
182 help="Max number of files to process")
183 parser
.add_option("-b", "--binary-dir", dest
="bin_dir", default
=os
.getcwd(),
184 help="Directory containing tshark executable")
185 parser
.add_option("-c", "--compare-dir", dest
="compare_dir", default
=None,
186 help="Directory containing tshark executable which is used for comparison")
187 parser
.add_option("-j", dest
="num_procs", default
=multiprocessing
.cpu_count(), type=int,
188 help="Max number of processes to spawn")
189 parser
.add_option("-r", "--randomize", default
=False, action
="store_true",
190 help="Randomize the file list order")
191 parser
.add_option("", "--list-all-proto", dest
="list_all_proto", default
=False, action
="store_true",
192 help="List all protocols in index file")
193 parser
.add_option("", "--list-all-files", dest
="list_all_files", default
=False, action
="store_true",
194 help="List all files in index file")
195 parser
.add_option("", "--list-all-proto-files", dest
="list_all_proto_files", default
=False,
196 metavar
="PROTO_1[, .. PROTO_N]",
197 help="List all files in index file containing the given protocol")
199 (options
, args
) = parser
.parse_args()
202 parser
.error("index_file is a required argument")
204 if len(args
) == 1 and not index_file_action(options
):
205 parser
.error("one capture file/directory must be specified")
207 if options
.dissect_files
and not options
.list_all_files
and not options
.list_all_proto_files
:
208 parser
.error("--list-all-files or --list-all-proto-files must be specified")
210 if options
.dissect_files
and options
.compare_dir
is not None:
211 parser
.error("--dissect-files and --compare-dir cannot be specified at the same time")
213 index_file_name
= args
.pop(0)
217 index_file
= open(index_file_name
, "rb")
218 print("index file: %s [OPENED]" % index_file
.name
)
219 cap_hash
= pickle
.load(index_file
)
221 print("%d files" % len(cap_hash
))
223 print("index file: %s [NEW]" % index_file_name
)
225 if options
.list_all_proto
:
226 print(list_all_proto(cap_hash
))
230 if options
.list_all_files
:
231 indexed_files
= list_all_files(cap_hash
)
234 if options
.list_all_proto_files
:
235 indexed_files
= list_all_proto_files(cap_hash
, options
.list_all_proto_files
)
238 tshark_bin
= find_tshark_executable(options
.bin_dir
)
239 if tshark_bin
is not None:
240 print("tshark: %s [FOUND]" % tshark_bin
)
242 print("tshark: %s [MISSING]" % tshark_bin
)
245 if options
.compare_dir
is not None:
246 tshark_cmp
= find_tshark_executable(options
.compare_dir
)
247 if tshark_cmp
is not None:
248 print("tshark: %s [FOUND]" % tshark_cmp
)
250 print("tshark: %s [MISSING]" % tshark_cmp
)
253 if options
.dissect_files
or options
.compare_dir
:
254 cap_files
= indexed_files
255 elif options
.list_all_proto_files
or options
.list_all_files
:
258 cap_files
= find_capture_files(paths
, cap_hash
)
260 if options
.randomize
:
261 random
.shuffle(cap_files
)
265 options
.max_files
= min(options
.max_files
, len(cap_files
))
266 print("%u total files, %u working files" % (len(cap_files
), options
.max_files
))
267 cap_files
= cap_files
[:options
.max_files
]
268 if options
.compare_dir
or options
.dissect_files
:
269 tmpdir
= tempfile
.mkdtemp()
270 print("Temporary working dir: %s" % tmpdir
)
272 if options
.compare_dir
:
273 compare_files(tshark_bin
, tmpdir
, tshark_cmp
, options
.num_procs
, options
.max_files
, cap_files
)
274 elif options
.dissect_files
:
275 dissect_files(tshark_bin
, tmpdir
, options
.num_procs
, options
.max_files
, cap_files
)
277 extract_protos_from_file(tshark_bin
, options
.num_procs
, options
.max_files
, cap_files
, cap_hash
, index_file_name
)
279 # Dissection may result in a non-empty directory.
280 if options
.compare_dir
:
282 if __name__
== "__main__":