1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PSI_TYPES_H
3 #define _LINUX_PSI_TYPES_H
5 #include <linux/kthread.h>
6 #include <linux/seqlock.h>
7 #include <linux/types.h>
8 #include <linux/kref.h>
9 #include <linux/wait.h>
13 /* Tracked task states */
19 * For IO and CPU stalls the presence of running/oncpu tasks
20 * in the domain means a partial rather than a full stall.
21 * For memory it's not so simple because of page reclaimers:
22 * they are running/oncpu while representing a stall. To tell
23 * whether a domain has productivity left or not, we need to
24 * distinguish between regular running (i.e. productive)
25 * threads and memstall ones.
28 NR_PSI_TASK_COUNTS
= 4,
31 /* Task state bitmasks */
32 #define TSK_IOWAIT (1 << NR_IOWAIT)
33 #define TSK_MEMSTALL (1 << NR_MEMSTALL)
34 #define TSK_RUNNING (1 << NR_RUNNING)
35 #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
37 /* Only one task can be scheduled, no corresponding task count */
38 #define TSK_ONCPU (1 << NR_PSI_TASK_COUNTS)
40 /* Resources that workloads could be stalled on */
45 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
52 * Pressure states for each resource:
54 * SOME: Stalled tasks & working tasks
55 * FULL: Stalled tasks & no working tasks
64 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
67 /* Only per-CPU, to weigh the CPU in the global average: */
72 /* Use one bit in the state mask to track TSK_ONCPU */
73 #define PSI_ONCPU (1 << NR_PSI_STATES)
75 /* Flag whether to re-arm avgs_work, see details in get_recent_times() */
76 #define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1))
78 enum psi_aggregators
{
84 struct psi_group_cpu
{
85 /* 1st cacheline updated by the scheduler */
87 /* Aggregator needs to know of concurrent changes */
88 seqcount_t seq ____cacheline_aligned_in_smp
;
90 /* States of the tasks belonging to this group */
91 unsigned int tasks
[NR_PSI_TASK_COUNTS
];
93 /* Aggregate pressure state derived from the tasks */
96 /* Period time sampling buckets for each state of interest (ns) */
97 u32 times
[NR_PSI_STATES
];
99 /* Time of last task change in this group (rq_clock) */
102 /* 2nd cacheline updated by the aggregator */
104 /* Delta detection against the sampling buckets */
105 u32 times_prev
[NR_PSI_AGGREGATORS
][NR_PSI_STATES
]
106 ____cacheline_aligned_in_smp
;
109 /* PSI growth tracking window */
111 /* Window size in ns */
114 /* Start time of the current window in ns */
117 /* Value at the start of the window */
120 /* Value growth in the previous window */
125 /* PSI state being monitored by the trigger */
126 enum psi_states state
;
128 /* User-spacified threshold in ns */
131 /* List node inside triggers list */
132 struct list_head node
;
134 /* Backpointer needed during trigger destruction */
135 struct psi_group
*group
;
137 /* Wait queue for polling */
138 wait_queue_head_t event_wait
;
140 /* Kernfs file for cgroup triggers */
141 struct kernfs_open_file
*of
;
143 /* Pending event flag */
146 /* Tracking window */
147 struct psi_window win
;
150 * Time last event was generated. Used for rate-limiting
151 * events to one per window
155 /* Deferred event(s) from previous ratelimit window */
158 /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
159 enum psi_aggregators aggregator
;
163 struct psi_group
*parent
;
166 /* Protects data used by the aggregator */
167 struct mutex avgs_lock
;
169 /* Per-cpu task state & time tracking */
170 struct psi_group_cpu __percpu
*pcpu
;
172 /* Running pressure averages */
173 u64 avg_total
[NR_PSI_STATES
- 1];
177 /* Aggregator work control */
178 struct delayed_work avgs_work
;
180 /* Unprivileged triggers against N*PSI_FREQ windows */
181 struct list_head avg_triggers
;
182 u32 avg_nr_triggers
[NR_PSI_STATES
- 1];
184 /* Total stall times and sampled pressure averages */
185 u64 total
[NR_PSI_AGGREGATORS
][NR_PSI_STATES
- 1];
186 unsigned long avg
[NR_PSI_STATES
- 1][3];
188 /* Monitor RT polling work control */
189 struct task_struct __rcu
*rtpoll_task
;
190 struct timer_list rtpoll_timer
;
191 wait_queue_head_t rtpoll_wait
;
192 atomic_t rtpoll_wakeup
;
193 atomic_t rtpoll_scheduled
;
195 /* Protects data used by the monitor */
196 struct mutex rtpoll_trigger_lock
;
198 /* Configured RT polling triggers */
199 struct list_head rtpoll_triggers
;
200 u32 rtpoll_nr_triggers
[NR_PSI_STATES
- 1];
202 u64 rtpoll_min_period
;
204 /* Total stall times at the start of RT polling monitor activation */
205 u64 rtpoll_total
[NR_PSI_STATES
- 1];
206 u64 rtpoll_next_update
;
210 #else /* CONFIG_PSI */
212 #define NR_PSI_RESOURCES 0
214 struct psi_group
{ };
216 #endif /* CONFIG_PSI */
218 #endif /* _LINUX_PSI_TYPES_H */