Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / tools / workqueue / wq_monitor.py
blob9e964c5be40c9ff7b9f52612e064faaada4e98bc
1 #!/usr/bin/env drgn
3 # Copyright (C) 2023 Tejun Heo <tj@kernel.org>
4 # Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
6 desc = """
7 This is a drgn script to monitor workqueues. For more info on drgn, visit
8 https://github.com/osandov/drgn.
10 total Total number of work items executed by the workqueue.
12 infl The number of currently in-flight work items.
14 CPUtime Total CPU time consumed by the workqueue in seconds. This is
15 sampled from scheduler ticks and only provides ballpark
16 measurement. "nohz_full=" CPUs are excluded from measurement.
18 CPUitsv The number of times a concurrency-managed work item hogged CPU
19 longer than the threshold (workqueue.cpu_intensive_thresh_us)
20 and got excluded from concurrency management to avoid stalling
21 other work items.
23 CMW/RPR For per-cpu workqueues, the number of concurrency-management
24 wake-ups while executing a work item of the workqueue. For
25 unbound workqueues, the number of times a worker was repatriated
26 to its affinity scope after being migrated to an off-scope CPU by
27 the scheduler.
29 mayday The number of times the rescuer was requested while waiting for
30 new worker creation.
32 rescued The number of work items executed by the rescuer.
33 """
35 import signal
36 import re
37 import time
38 import json
40 import drgn
41 from drgn.helpers.linux.list import list_for_each_entry
43 import argparse
44 parser = argparse.ArgumentParser(description=desc,
45 formatter_class=argparse.RawTextHelpFormatter)
46 parser.add_argument('workqueue', metavar='REGEX', nargs='*',
47 help='Target workqueue name patterns (all if empty)')
48 parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
49 help='Monitoring interval (0 to print once and exit)')
50 parser.add_argument('-j', '--json', action='store_true',
51 help='Output in json')
52 args = parser.parse_args()
54 workqueues = prog['workqueues']
56 WQ_UNBOUND = prog['WQ_UNBOUND']
57 WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
59 PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
60 PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
61 PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
62 PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
63 PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
64 PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope
65 PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
66 PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
67 PWQ_NR_STATS = prog['PWQ_NR_STATS']
69 class WqStats:
70 def __init__(self, wq):
71 self.name = wq.name.string_().decode()
72 self.unbound = wq.flags & WQ_UNBOUND != 0
73 self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
74 self.stats = [0] * PWQ_NR_STATS
75 for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
76 for i in range(PWQ_NR_STATS):
77 self.stats[i] += int(pwq.stats[i])
79 def dict(self, now):
80 return { 'timestamp' : now,
81 'name' : self.name,
82 'unbound' : self.unbound,
83 'mem_reclaim' : self.mem_reclaim,
84 'started' : self.stats[PWQ_STAT_STARTED],
85 'completed' : self.stats[PWQ_STAT_COMPLETED],
86 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
87 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
88 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
89 'repatriated' : self.stats[PWQ_STAT_REPATRIATED],
90 'mayday' : self.stats[PWQ_STAT_MAYDAY],
91 'rescued' : self.stats[PWQ_STAT_RESCUED], }
93 def table_header_str():
94 return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
95 f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
97 def table_row_str(self):
98 cpu_intensive = '-'
99 cmw_rpr = '-'
100 mayday = '-'
101 rescued = '-'
103 if self.unbound:
104 cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
105 else:
106 cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
107 cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
109 if self.mem_reclaim:
110 mayday = str(self.stats[PWQ_STAT_MAYDAY])
111 rescued = str(self.stats[PWQ_STAT_RESCUED])
113 out = f'{self.name[-24:]:24} ' \
114 f'{self.stats[PWQ_STAT_STARTED]:8} ' \
115 f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
116 f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
117 f'{cpu_intensive:>7} ' \
118 f'{cmw_rpr:>7} ' \
119 f'{mayday:>7} ' \
120 f'{rescued:>7} '
121 return out.rstrip(':')
123 exit_req = False
125 def sigint_handler(signr, frame):
126 global exit_req
127 exit_req = True
129 def main():
130 # handle args
131 table_fmt = not args.json
132 interval = args.interval
134 re_str = None
135 if args.workqueue:
136 for r in args.workqueue:
137 if re_str is None:
138 re_str = r
139 else:
140 re_str += '|' + r
142 filter_re = re.compile(re_str) if re_str else None
144 # monitoring loop
145 signal.signal(signal.SIGINT, sigint_handler)
147 while not exit_req:
148 now = time.time()
150 if table_fmt:
151 print()
152 print(WqStats.table_header_str())
154 for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
155 stats = WqStats(wq)
156 if filter_re and not filter_re.search(stats.name):
157 continue
158 if table_fmt:
159 print(stats.table_row_str())
160 else:
161 print(stats.dict(now))
163 if interval == 0:
164 break
165 time.sleep(interval)
167 if __name__ == "__main__":
168 main()