1 // SPDX-License-Identifier: GPL-2.0
3 /* Copyright (c) 2021 Facebook */
4 /* Copyright (c) 2021 Google */
11 #include <sys/resource.h>
12 #include <linux/err.h>
13 #include <linux/zalloc.h>
14 #include <linux/perf_event.h>
15 #include <api/fs/fs.h>
16 #include <perf/bpf_perf.h>
19 #include "bpf_counter.h"
27 #include "thread_map.h"
29 #include "bpf_skel/bperf_cgroup.skel.h"
31 static struct perf_event_attr cgrp_switch_attr
= {
32 .type
= PERF_TYPE_SOFTWARE
,
33 .config
= PERF_COUNT_SW_CGROUP_SWITCHES
,
34 .size
= sizeof(cgrp_switch_attr
),
39 static struct evsel
*cgrp_switch
;
40 static struct bperf_cgroup_bpf
*skel
;
42 #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
44 static int bperf_load_program(struct evlist
*evlist
)
46 struct bpf_link
*link
;
48 struct cgroup
*cgrp
, *leader_cgrp
;
51 int total_cpus
= cpu__max_cpu().cpu
;
55 skel
= bperf_cgroup_bpf__open();
57 pr_err("Failed to open cgroup skeleton\n");
61 skel
->rodata
->num_cpus
= total_cpus
;
62 skel
->rodata
->num_events
= evlist
->core
.nr_entries
/ nr_cgroups
;
64 if (cgroup_is_v2("perf_event") > 0)
65 skel
->rodata
->use_cgroup_v2
= 1;
67 BUG_ON(evlist
->core
.nr_entries
% nr_cgroups
!= 0);
69 /* we need one copy of events per cpu for reading */
70 map_size
= total_cpus
* evlist
->core
.nr_entries
/ nr_cgroups
;
71 bpf_map__set_max_entries(skel
->maps
.events
, map_size
);
72 bpf_map__set_max_entries(skel
->maps
.cgrp_idx
, nr_cgroups
);
73 /* previous result is saved in a per-cpu array */
74 map_size
= evlist
->core
.nr_entries
/ nr_cgroups
;
75 bpf_map__set_max_entries(skel
->maps
.prev_readings
, map_size
);
76 /* cgroup result needs all events (per-cpu) */
77 map_size
= evlist
->core
.nr_entries
;
78 bpf_map__set_max_entries(skel
->maps
.cgrp_readings
, map_size
);
82 err
= bperf_cgroup_bpf__load(skel
);
84 pr_err("Failed to load cgroup skeleton\n");
90 cgrp_switch
= evsel__new(&cgrp_switch_attr
);
91 if (evsel__open_per_cpu(cgrp_switch
, evlist
->core
.all_cpus
, -1) < 0) {
92 pr_err("Failed to open cgroup switches event\n");
96 perf_cpu_map__for_each_cpu(cpu
, i
, evlist
->core
.all_cpus
) {
97 link
= bpf_program__attach_perf_event(skel
->progs
.on_cgrp_switch
,
100 pr_err("Failed to attach cgroup program\n");
107 * Update cgrp_idx map from cgroup-id to event index.
112 evlist__for_each_entry(evlist
, evsel
) {
113 if (cgrp
== NULL
|| evsel
->cgrp
== leader_cgrp
) {
114 leader_cgrp
= evsel
->cgrp
;
117 /* open single copy of the events w/o cgroup */
118 err
= evsel__open_per_cpu(evsel
, evsel
->core
.cpus
, -1);
120 evsel
->supported
= true;
122 map_fd
= bpf_map__fd(skel
->maps
.events
);
123 perf_cpu_map__for_each_cpu(cpu
, j
, evsel
->core
.cpus
) {
124 int fd
= FD(evsel
, j
);
125 __u32 idx
= evsel
->core
.idx
* total_cpus
+ cpu
.cpu
;
127 bpf_map_update_elem(map_fd
, &idx
, &fd
, BPF_ANY
);
130 evsel
->cgrp
= leader_cgrp
;
133 if (evsel
->cgrp
== cgrp
)
138 if (read_cgroup_id(cgrp
) < 0) {
139 pr_debug("Failed to get cgroup id for %s\n", cgrp
->name
);
143 map_fd
= bpf_map__fd(skel
->maps
.cgrp_idx
);
144 err
= bpf_map_update_elem(map_fd
, &cgrp
->id
, &i
, BPF_ANY
);
146 pr_err("Failed to update cgroup index map\n");
154 * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
155 * whether the kernel support it
157 prog_fd
= bpf_program__fd(skel
->progs
.trigger_read
);
158 err
= bperf_trigger_reading(prog_fd
, 0);
160 pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n"
161 "Therefore, --for-each-cgroup might show inaccurate readings\n");
169 static int bperf_cgrp__load(struct evsel
*evsel
,
170 struct target
*target __maybe_unused
)
172 static bool bperf_loaded
= false;
174 evsel
->bperf_leader_prog_fd
= -1;
175 evsel
->bperf_leader_link_fd
= -1;
177 if (!bperf_loaded
&& bperf_load_program(evsel
->evlist
))
181 /* just to bypass bpf_counter_skip() */
182 evsel
->follower_skel
= (struct bperf_follower_bpf
*)skel
;
187 static int bperf_cgrp__install_pe(struct evsel
*evsel __maybe_unused
,
188 int cpu __maybe_unused
, int fd __maybe_unused
)
195 * trigger the leader prog on each cpu, so the cgrp_reading map could get
196 * the latest results.
198 static int bperf_cgrp__sync_counters(struct evlist
*evlist
)
202 int prog_fd
= bpf_program__fd(skel
->progs
.trigger_read
);
204 perf_cpu_map__for_each_cpu(cpu
, idx
, evlist
->core
.all_cpus
)
205 bperf_trigger_reading(prog_fd
, cpu
.cpu
);
210 static int bperf_cgrp__enable(struct evsel
*evsel
)
215 bperf_cgrp__sync_counters(evsel
->evlist
);
217 skel
->bss
->enabled
= 1;
221 static int bperf_cgrp__disable(struct evsel
*evsel
)
226 bperf_cgrp__sync_counters(evsel
->evlist
);
228 skel
->bss
->enabled
= 0;
232 static int bperf_cgrp__read(struct evsel
*evsel
)
234 struct evlist
*evlist
= evsel
->evlist
;
235 int total_cpus
= cpu__max_cpu().cpu
;
236 struct perf_counts_values
*counts
;
237 struct bpf_perf_event_value
*values
;
238 int reading_map_fd
, err
= 0;
243 bperf_cgrp__sync_counters(evsel
->evlist
);
245 values
= calloc(total_cpus
, sizeof(*values
));
249 reading_map_fd
= bpf_map__fd(skel
->maps
.cgrp_readings
);
251 evlist__for_each_entry(evlist
, evsel
) {
252 __u32 idx
= evsel
->core
.idx
;
256 err
= bpf_map_lookup_elem(reading_map_fd
, &idx
, values
);
258 pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
259 idx
, evsel__name(evsel
), evsel
->cgrp
->name
);
263 perf_cpu_map__for_each_cpu(cpu
, i
, evsel
->core
.cpus
) {
264 counts
= perf_counts(evsel
->counts
, i
, 0);
265 counts
->val
= values
[cpu
.cpu
].counter
;
266 counts
->ena
= values
[cpu
.cpu
].enabled
;
267 counts
->run
= values
[cpu
.cpu
].running
;
276 static int bperf_cgrp__destroy(struct evsel
*evsel
)
281 bperf_cgroup_bpf__destroy(skel
);
282 evsel__delete(cgrp_switch
); // it'll destroy on_switch progs too
287 struct bpf_counter_ops bperf_cgrp_ops
= {
288 .load
= bperf_cgrp__load
,
289 .enable
= bperf_cgrp__enable
,
290 .disable
= bperf_cgrp__disable
,
291 .read
= bperf_cgrp__read
,
292 .install_pe
= bperf_cgrp__install_pe
,
293 .destroy
= bperf_cgrp__destroy
,