1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/version.h>
4 #include <linux/ptrace.h>
5 #include <uapi/linux/bpf.h>
6 #include <bpf/bpf_helpers.h>
9 * The CPU number, cstate number and pstate number are based
10 * on 96boards Hikey with octa CA53 CPUs.
12 * Every CPU have three idle states for cstate:
13 * WFI, CPU_OFF, CLUSTER_OFF
15 * Every CPU have 5 operating points:
16 * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
18 * This code is based on these assumption and other platforms
19 * need to adjust these definitions.
22 #define MAX_PSTATE_ENTRIES 5
23 #define MAX_CSTATE_ENTRIES 3
25 static int cpu_opps
[] = { 208000, 432000, 729000, 960000, 1200000 };
28 * my_map structure is used to record cstate and pstate index and
29 * timestamp (Idx, Ts), when new event incoming we need to update
30 * combination for new state index and timestamp (Idx`, Ts`).
32 * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
33 * interval for the previous state: Duration(Idx) = Ts` - Ts.
35 * Every CPU has one below array for recording state index and
36 * timestamp, and record for cstate and pstate saperately:
38 * +--------------------------+
39 * | cstate timestamp |
40 * +--------------------------+
42 * +--------------------------+
43 * | pstate timestamp |
44 * +--------------------------+
46 * +--------------------------+
48 #define MAP_OFF_CSTATE_TIME 0
49 #define MAP_OFF_CSTATE_IDX 1
50 #define MAP_OFF_PSTATE_TIME 2
51 #define MAP_OFF_PSTATE_IDX 3
54 struct bpf_map_def
SEC("maps") my_map
= {
55 .type
= BPF_MAP_TYPE_ARRAY
,
56 .key_size
= sizeof(u32
),
57 .value_size
= sizeof(u64
),
58 .max_entries
= MAX_CPU
* MAP_OFF_NUM
,
61 /* cstate_duration records duration time for every idle state per CPU */
62 struct bpf_map_def
SEC("maps") cstate_duration
= {
63 .type
= BPF_MAP_TYPE_ARRAY
,
64 .key_size
= sizeof(u32
),
65 .value_size
= sizeof(u64
),
66 .max_entries
= MAX_CPU
* MAX_CSTATE_ENTRIES
,
69 /* pstate_duration records duration time for every operating point per CPU */
70 struct bpf_map_def
SEC("maps") pstate_duration
= {
71 .type
= BPF_MAP_TYPE_ARRAY
,
72 .key_size
= sizeof(u32
),
73 .value_size
= sizeof(u64
),
74 .max_entries
= MAX_CPU
* MAX_PSTATE_ENTRIES
,
78 * The trace events for cpu_idle and cpu_frequency are taken from:
79 * /sys/kernel/debug/tracing/events/power/cpu_idle/format
80 * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
82 * These two events have same format, so define one common structure.
90 /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
91 static u32
find_cpu_pstate_idx(u32 frequency
)
95 for (i
= 0; i
< sizeof(cpu_opps
) / sizeof(u32
); i
++) {
96 if (frequency
== cpu_opps
[i
])
103 SEC("tracepoint/power/cpu_idle")
104 int bpf_prog1(struct cpu_args
*ctx
)
106 u64
*cts
, *pts
, *cstate
, *pstate
, prev_state
, cur_ts
, delta
;
107 u32 key
, cpu
, pstate_idx
;
110 if (ctx
->cpu_id
> MAX_CPU
)
115 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_CSTATE_TIME
;
116 cts
= bpf_map_lookup_elem(&my_map
, &key
);
120 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_CSTATE_IDX
;
121 cstate
= bpf_map_lookup_elem(&my_map
, &key
);
125 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_PSTATE_TIME
;
126 pts
= bpf_map_lookup_elem(&my_map
, &key
);
130 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_PSTATE_IDX
;
131 pstate
= bpf_map_lookup_elem(&my_map
, &key
);
135 prev_state
= *cstate
;
136 *cstate
= ctx
->state
;
139 *cts
= bpf_ktime_get_ns();
143 cur_ts
= bpf_ktime_get_ns();
144 delta
= cur_ts
- *cts
;
148 * When state doesn't equal to (u32)-1, the cpu will enter
149 * one idle state; for this case we need to record interval
153 * +---------------------+
159 * |<- pstate duration ->|
163 if (ctx
->state
!= (u32
)-1) {
165 /* record pstate after have first cpu_frequency event */
169 delta
= cur_ts
- *pts
;
171 pstate_idx
= find_cpu_pstate_idx(*pstate
);
172 if (pstate_idx
>= MAX_PSTATE_ENTRIES
)
175 key
= cpu
* MAX_PSTATE_ENTRIES
+ pstate_idx
;
176 val
= bpf_map_lookup_elem(&pstate_duration
, &key
);
178 __sync_fetch_and_add((long *)val
, delta
);
181 * When state equal to (u32)-1, the cpu just exits from one
182 * specific idle state; for this case we need to record
183 * interval for the pstate.
190 * +---------------------+
192 * |<- cstate duration ->|
198 key
= cpu
* MAX_CSTATE_ENTRIES
+ prev_state
;
199 val
= bpf_map_lookup_elem(&cstate_duration
, &key
);
201 __sync_fetch_and_add((long *)val
, delta
);
204 /* Update timestamp for pstate as new start time */
211 SEC("tracepoint/power/cpu_frequency")
212 int bpf_prog2(struct cpu_args
*ctx
)
214 u64
*pts
, *cstate
, *pstate
, prev_state
, cur_ts
, delta
;
215 u32 key
, cpu
, pstate_idx
;
220 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_PSTATE_TIME
;
221 pts
= bpf_map_lookup_elem(&my_map
, &key
);
225 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_PSTATE_IDX
;
226 pstate
= bpf_map_lookup_elem(&my_map
, &key
);
230 key
= cpu
* MAP_OFF_NUM
+ MAP_OFF_CSTATE_IDX
;
231 cstate
= bpf_map_lookup_elem(&my_map
, &key
);
235 prev_state
= *pstate
;
236 *pstate
= ctx
->state
;
239 *pts
= bpf_ktime_get_ns();
243 cur_ts
= bpf_ktime_get_ns();
244 delta
= cur_ts
- *pts
;
247 /* When CPU is in idle, bail out to skip pstate statistics */
248 if (*cstate
!= (u32
)(-1))
252 * The cpu changes to another different OPP (in below diagram
253 * change frequency from OPP3 to OPP1), need recording interval
254 * for previous frequency OPP3 and update timestamp as start
255 * time for new frequency OPP1.
258 * +---------------------+
264 * |<- pstate duration ->|
268 pstate_idx
= find_cpu_pstate_idx(*pstate
);
269 if (pstate_idx
>= MAX_PSTATE_ENTRIES
)
272 key
= cpu
* MAX_PSTATE_ENTRIES
+ pstate_idx
;
273 val
= bpf_map_lookup_elem(&pstate_duration
, &key
);
275 __sync_fetch_and_add((long *)val
, delta
);
280 char _license
[] SEC("license") = "GPL";
281 u32 _version
SEC("version") = LINUX_VERSION_CODE
;