Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / tools / sched_ext / scx_simple.bpf.c
blob31f915b286c6aa0eeb67455d224d0c8ba23b9933
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * A simple scheduler.
5 * By default, it operates as a simple global weighted vtime scheduler and can
6 * be switched to FIFO scheduling. It also demonstrates the following niceties.
8 * - Statistics tracking how many tasks are queued to local and global dsq's.
9 * - Termination notification for userspace.
11 * While very simple, this scheduler should work reasonably well on CPUs with a
12 * uniform L3 cache topology. While preemption is not implemented, the fact that
13 * the scheduling queue is shared across all CPUs means that whatever is at the
14 * front of the queue is likely to be executed fairly quickly given enough
15 * number of CPUs. The FIFO scheduling mode may be beneficial to some workloads
16 * but comes with the usual problems with FIFO scheduling where saturating
17 * threads can easily drown out interactive ones.
19 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
20 * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
21 * Copyright (c) 2022 David Vernet <dvernet@meta.com>
23 #include <scx/common.bpf.h>
25 char _license[] SEC("license") = "GPL";
27 const volatile bool fifo_sched;
29 static u64 vtime_now;
30 UEI_DEFINE(uei);
33 * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues
34 * (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We
35 * therefore create a separate DSQ with ID 0 that we dispatch to and consume
36 * from. If scx_simple only supported global FIFO scheduling, then we could just
37 * use SCX_DSQ_GLOBAL.
39 #define SHARED_DSQ 0
41 struct {
42 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
43 __uint(key_size, sizeof(u32));
44 __uint(value_size, sizeof(u64));
45 __uint(max_entries, 2); /* [local, global] */
46 } stats SEC(".maps");
48 static void stat_inc(u32 idx)
50 u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
51 if (cnt_p)
52 (*cnt_p)++;
55 static inline bool vtime_before(u64 a, u64 b)
57 return (s64)(a - b) < 0;
60 s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
62 bool is_idle = false;
63 s32 cpu;
65 cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
66 if (is_idle) {
67 stat_inc(0); /* count local queueing */
68 scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
71 return cpu;
74 void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
76 stat_inc(1); /* count global queueing */
78 if (fifo_sched) {
79 scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);
80 } else {
81 u64 vtime = p->scx.dsq_vtime;
84 * Limit the amount of budget that an idling task can accumulate
85 * to one slice.
87 if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
88 vtime = vtime_now - SCX_SLICE_DFL;
90 scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
91 enq_flags);
95 void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev)
97 scx_bpf_dsq_move_to_local(SHARED_DSQ);
100 void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
102 if (fifo_sched)
103 return;
106 * Global vtime always progresses forward as tasks start executing. The
107 * test and update can be performed concurrently from multiple CPUs and
108 * thus racy. Any error should be contained and temporary. Let's just
109 * live with it.
111 if (vtime_before(vtime_now, p->scx.dsq_vtime))
112 vtime_now = p->scx.dsq_vtime;
115 void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable)
117 if (fifo_sched)
118 return;
121 * Scale the execution time by the inverse of the weight and charge.
123 * Note that the default yield implementation yields by setting
124 * @p->scx.slice to zero and the following would treat the yielding task
125 * as if it has consumed all its slice. If this penalizes yielding tasks
126 * too much, determine the execution time by taking explicit timestamps
127 * instead of depending on @p->scx.slice.
129 p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
132 void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
134 p->scx.dsq_vtime = vtime_now;
137 s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
139 return scx_bpf_create_dsq(SHARED_DSQ, -1);
142 void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
144 UEI_RECORD(uei, ei);
147 SCX_OPS_DEFINE(simple_ops,
148 .select_cpu = (void *)simple_select_cpu,
149 .enqueue = (void *)simple_enqueue,
150 .dispatch = (void *)simple_dispatch,
151 .running = (void *)simple_running,
152 .stopping = (void *)simple_stopping,
153 .enable = (void *)simple_enable,
154 .init = (void *)simple_init,
155 .exit = (void *)simple_exit,
156 .name = "simple");