1 /* SPDX-License-Identifier: GPL-2.0 */
5 * By default, it operates as a simple global weighted vtime scheduler and can
6 * be switched to FIFO scheduling. It also demonstrates the following niceties.
8 * - Statistics tracking how many tasks are queued to local and global dsq's.
9 * - Termination notification for userspace.
11 * While very simple, this scheduler should work reasonably well on CPUs with a
12 * uniform L3 cache topology. While preemption is not implemented, the fact that
13 * the scheduling queue is shared across all CPUs means that whatever is at the
14 * front of the queue is likely to be executed fairly quickly given enough
15 * number of CPUs. The FIFO scheduling mode may be beneficial to some workloads
16 * but comes with the usual problems with FIFO scheduling where saturating
17 * threads can easily drown out interactive ones.
19 * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
20 * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
21 * Copyright (c) 2022 David Vernet <dvernet@meta.com>
23 #include <scx/common.bpf.h>
25 char _license
[] SEC("license") = "GPL";
27 const volatile bool fifo_sched
;
33 * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues
34 * (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We
35 * therefore create a separate DSQ with ID 0 that we dispatch to and consume
36 * from. If scx_simple only supported global FIFO scheduling, then we could just
42 __uint(type
, BPF_MAP_TYPE_PERCPU_ARRAY
);
43 __uint(key_size
, sizeof(u32
));
44 __uint(value_size
, sizeof(u64
));
45 __uint(max_entries
, 2); /* [local, global] */
48 static void stat_inc(u32 idx
)
50 u64
*cnt_p
= bpf_map_lookup_elem(&stats
, &idx
);
55 static inline bool vtime_before(u64 a
, u64 b
)
57 return (s64
)(a
- b
) < 0;
60 s32
BPF_STRUCT_OPS(simple_select_cpu
, struct task_struct
*p
, s32 prev_cpu
, u64 wake_flags
)
65 cpu
= scx_bpf_select_cpu_dfl(p
, prev_cpu
, wake_flags
, &is_idle
);
67 stat_inc(0); /* count local queueing */
68 scx_bpf_dsq_insert(p
, SCX_DSQ_LOCAL
, SCX_SLICE_DFL
, 0);
74 void BPF_STRUCT_OPS(simple_enqueue
, struct task_struct
*p
, u64 enq_flags
)
76 stat_inc(1); /* count global queueing */
79 scx_bpf_dsq_insert(p
, SHARED_DSQ
, SCX_SLICE_DFL
, enq_flags
);
81 u64 vtime
= p
->scx
.dsq_vtime
;
84 * Limit the amount of budget that an idling task can accumulate
87 if (vtime_before(vtime
, vtime_now
- SCX_SLICE_DFL
))
88 vtime
= vtime_now
- SCX_SLICE_DFL
;
90 scx_bpf_dsq_insert_vtime(p
, SHARED_DSQ
, SCX_SLICE_DFL
, vtime
,
95 void BPF_STRUCT_OPS(simple_dispatch
, s32 cpu
, struct task_struct
*prev
)
97 scx_bpf_dsq_move_to_local(SHARED_DSQ
);
100 void BPF_STRUCT_OPS(simple_running
, struct task_struct
*p
)
106 * Global vtime always progresses forward as tasks start executing. The
107 * test and update can be performed concurrently from multiple CPUs and
108 * thus racy. Any error should be contained and temporary. Let's just
111 if (vtime_before(vtime_now
, p
->scx
.dsq_vtime
))
112 vtime_now
= p
->scx
.dsq_vtime
;
115 void BPF_STRUCT_OPS(simple_stopping
, struct task_struct
*p
, bool runnable
)
121 * Scale the execution time by the inverse of the weight and charge.
123 * Note that the default yield implementation yields by setting
124 * @p->scx.slice to zero and the following would treat the yielding task
125 * as if it has consumed all its slice. If this penalizes yielding tasks
126 * too much, determine the execution time by taking explicit timestamps
127 * instead of depending on @p->scx.slice.
129 p
->scx
.dsq_vtime
+= (SCX_SLICE_DFL
- p
->scx
.slice
) * 100 / p
->scx
.weight
;
132 void BPF_STRUCT_OPS(simple_enable
, struct task_struct
*p
)
134 p
->scx
.dsq_vtime
= vtime_now
;
137 s32
BPF_STRUCT_OPS_SLEEPABLE(simple_init
)
139 return scx_bpf_create_dsq(SHARED_DSQ
, -1);
142 void BPF_STRUCT_OPS(simple_exit
, struct scx_exit_info
*ei
)
147 SCX_OPS_DEFINE(simple_ops
,
148 .select_cpu
= (void *)simple_select_cpu
,
149 .enqueue
= (void *)simple_enqueue
,
150 .dispatch
= (void *)simple_dispatch
,
151 .running
= (void *)simple_running
,
152 .stopping
= (void *)simple_stopping
,
153 .enable
= (void *)simple_enable
,
154 .init
= (void *)simple_init
,
155 .exit
= (void *)simple_exit
,