3 #######################################################################
5 # A script to parse samba (especially winbind) logfiles.
6 # Trace files should be in a non-syslog format (debug syslog format = no).
8 # --traceid ... Specify the traceid of the request to parse
9 # --pid ... Specify the pid
10 # --breakdown ... Break to separate files per each traceid
11 # --merge-by-timestamp ... Merge logs by timestamp
12 # --flow ... Show the request/sub-request call flow
13 # --flow-compact ... Show the request/sub-request call flow without dcerpc
16 # Copyright (c) 2023 Andreas Schneider <asn@samba.org>
17 # Copyright (c) 2023 Pavel Filipenský <pfilipen@redhat.com>
19 # This program is free software: you can redistribute it and/or modify
20 # it under the terms of the GNU General Public License as published by
21 # the Free Software Foundation, either version 3 of the License, or
22 # (at your option) any later version.
24 # This program is distributed in the hope that it will be useful,
25 # but WITHOUT ANY WARRANTY; without even the implied warranty of
26 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 # GNU General Public License for more details.
29 # You should have received a copy of the GNU General Public License
30 # along with this program. If not, see <http://www.gnu.org/licenses/>.
32 #######################################################################
39 from argparse import ArgumentParser
40 from collections import defaultdict
42 # Trace record consists of a trace header followed by one or more text lines.
44 # This tool expects trace header format based on these smb.conf parameters:
46 # debug syslog format = no
47 # debug hires timestamp = yes
48 # winbind debug traceid = yes
50 # If 'winbind debug traceid = no' is set, then the option --merge-by-timestamp
53 # Each trace header contains a traceid, which is the main identifier for this
54 # tool. A single traceid is either provided via command line option --traceid
55 # or a list of traceids is derived from the PID specified via option --pid.
56 # Creating and evaluating list of traceids from PID can be tricky:
57 # The traceid can appear in a trace record before trace record containing the
58 # PID is processed. So when we see a new traceid we are not sure if it belongs
60 # The PID appears only in the main winbind process (log.winbind). If a
61 # directory with many log files should be processed, we process the files in
63 # It might happen that e.g. log.wb-ADDOMAIN is processed before log.winbind so
64 # we do not know the list of traceids yet.
65 # To make all this easy we put into memory all trace records and do the final
66 # traceid filtering only after all files are read. This can require lot of
67 # memory if files are big.
70 def process_file_no_traceid(record_list, fname):
71 with open(fname, "r") as infile:
72 data = infile.readlines()
76 RE_HEADER_NO_TRACEID = re.compile(
77 r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*")
79 header = RE_HEADER_NO_TRACEID.search(line)
81 # Append all previous trace lines of a record
83 record_list.append((date, None, record_lines, fname))
85 # Remember the new date
86 date = header.group(1)
87 record_lines.append(line)
90 def process_file(record_list, traceid_set, fname, opid, otraceid):
91 with open(fname, "r") as infile:
92 data = infile.readlines()
96 undecided_traceid = False
100 # If traceid option was provided the traceid_set will contain just it
102 traceid_set.add(otraceid)
104 RE_HEADER = re.compile(
105 r"^\[(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\.\d{6}).*?, .*, "
106 r"traceid=([0-9]+).*\]")
107 RE_INTERFACE_VERSION = re.compile(
108 r"^\s+winbindd_interface_version: \[\S* \((\d+)\)\]")
109 RE_ASYNC_REQUEST = re.compile(
110 r"^\s+process_request_send: "
111 r"\[\S* \((\d+)\)\] Handling async request:")
112 # Example of a header line
113 # [2023/05/01 07:40:45.439049, 3, pid=418844, effective(0, 0), real(0, 0), class=winbind, traceid=37] ../../source3/winbindd/winbindd_misc.c:355(winbindd_interface_version)
115 header = RE_HEADER.search(line)
117 # Append all previous trace lines of a record if the traceid is in
120 record_list.append((date, traceid, record_lines, fname))
122 # Remember the new date and the new traceid
123 date = header.group(1)
124 traceid = header.group(2)
125 if traceid != traceid_prev:
126 traceid_prev = traceid
127 undecided_traceid = True
129 # Search for lines that identify a new winbind client and the
132 # winbindd_interface_version: [nss_winbind (500725)]: request interface version (version = 32)
133 # process_request_send: [nss_winbind (500725)] Handling async request: SETPWENT
134 interface_version = RE_INTERFACE_VERSION.search(line)
135 async_request = RE_ASYNC_REQUEST.search(line)
136 if interface_version:
137 pid = interface_version.group(1)
138 if undecided_traceid:
140 traceid_set.add(traceid)
141 undecided_traceid = False
143 pid = async_request.group(1)
144 if undecided_traceid:
146 traceid_set.add(traceid)
147 undecided_traceid = False
148 # For --breakdown add every traceid
149 if not opid and not otraceid:
150 traceid_set.add(traceid)
152 record_lines.append(line)
155 def filter_traceids(record_list, traceid_set):
157 for (d, t, li, f) in record_list:
159 llist.append((d, t, li, f))
163 def filter_flow(record_list):
165 for (date, traceid, lines, filename) in record_list:
167 isflow = re.search(r"^(\s+)flow: (.*)", line)
169 local_list.append(isflow.group(1) + isflow.group(2))
173 def filter_flowcompact(flist):
178 local_list.append(fl)
179 dcerpc_start = re.search(r"^(\s+)-> dcerpc_", fl)
181 end_marker = dcerpc_start.group(1)
183 dcerpc_end = re.search(r"^" + end_marker + "<- dcerpc_", fl)
186 local_list.append(fl)
190 def print_record_list(record_list, file):
192 for (date, traceid, lines, filename) in record_list:
193 # Inform about filename change
194 if filename != f_prev:
195 print("-" * 72, file=file)
196 print("FILE: ", filename, file=file)
197 print("-" * 72, file=file)
199 print(line, end='', file=file)
202 # record_list ... list of quadruplets <date, traceid, [trace lines], filename>
203 # flow_list ... lines from record_list with 'flow' traces
204 # traceid_set ... list of traceids we want to trace
205 # with --traceid ... there is a single traceids
206 # with --pid ... there are all traceids for the PID
207 # with --breakdown ... there are all traceids
211 parser = ArgumentParser()
216 help="logfile or directory"
221 help="specify the traceid of the trace records",
227 help="specify the pid of winbind client",
235 help="breakdown the traces into per traceid files"
238 "--merge-by-timestamp",
242 help="merge logs by timestamp"
249 help="show the request/sub-request flow traces"
256 help="show the request/sub-request flow traces without dcerpc details"
266 parser = setup_parser()
267 options = parser.parse_args()
269 if (not options.traceid and not options.pid and not options.breakdown
270 and not options.merge):
271 print("One of --traceid or --pid is needed"
272 " or --breakdown or --merge-by-timestamp.")
274 elif options.traceid and options.pid:
275 print("Only one of --traceid or --pid is allowed.")
277 elif options.breakdown and (options.traceid or options.pid):
278 print("--breakdown cannot be combined with --traceid and --pid.")
281 if options.flow and not options.traceid:
282 print("Option --flow can be used only together with --traceid.")
285 if options.flowcompact and not options.traceid:
286 print("Option --flow-compact can be used only together with "
290 if options.flow and options.flowcompact:
291 print("Only one of --flow or --flow-compact is allowed.")
295 print("Path to logfile or directory with logs is needed.")
298 merge_with_no_traceid = (not options.traceid and not options.pid
299 and not options.breakdown) and options.merge
302 if os.path.isdir(path):
303 for root, dirs, files in os.walk(path):
305 if merge_with_no_traceid:
306 process_file_no_traceid(
308 os.path.join(root, name)
314 os.path.join(root, name),
318 elif os.path.isfile(path):
319 if merge_with_no_traceid:
320 process_file_no_traceid(
333 print(path, "Path is neither file or directory.")
336 # Sort only using timestamps, no use of traceid
337 if merge_with_no_traceid:
339 print_record_list(record_list, sys.stdout)
342 # Keep only records with matching traceids
343 if not options.breakdown:
344 record_list = filter_traceids(record_list, traceid_set)
346 if options.breakdown:
347 for traceid in traceid_set:
349 with open("%s.full" % traceid, "w") as full_f:
350 full_l = filter_traceids(record_list, {traceid})
353 print_record_list(full_l, full_f)
355 with open("%s.flow" % traceid, "w") as flow_f:
356 flow_l = filter_flow(full_l)
358 print(fl, file=flow_f)
360 with open("%s.flowcompact" % traceid, "w") as flowcompact_f:
361 flowcompact_l = filter_flowcompact(flow_l)
362 for fl in flowcompact_l:
363 print(fl, file=flowcompact_f)
365 flow_list = filter_flow(record_list)
368 elif options.flowcompact:
369 flow_list = filter_flow(record_list)
370 flow_list = filter_flowcompact(flow_list)
376 print_record_list(record_list, sys.stdout)
381 if __name__ == "__main__":