1 // SPDX-License-Identifier: GPL-2.0
10 #include <linux/bitmap.h>
13 #include <linux/ctype.h>
14 #include <linux/zalloc.h>
15 #include <internal/cpumap.h>
17 static struct perf_cpu max_cpu_num
;
18 static struct perf_cpu max_present_cpu_num
;
19 static int max_node_num
;
21 * The numa node X as read from /sys/devices/system/node/nodeX indexed by the
24 static int *cpunode_map
;
26 bool perf_record_cpu_map_data__test_bit(int i
,
27 const struct perf_record_cpu_map_data
*data
)
29 int bit_word32
= i
/ 32;
30 __u32 bit_mask32
= 1U << (i
& 31);
31 int bit_word64
= i
/ 64;
32 __u64 bit_mask64
= ((__u64
)1) << (i
& 63);
34 return (data
->mask32_data
.long_size
== 4)
35 ? (bit_word32
< data
->mask32_data
.nr
) &&
36 (data
->mask32_data
.mask
[bit_word32
] & bit_mask32
) != 0
37 : (bit_word64
< data
->mask64_data
.nr
) &&
38 (data
->mask64_data
.mask
[bit_word64
] & bit_mask64
) != 0;
41 /* Read ith mask value from data into the given 64-bit sized bitmap */
42 static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data
*data
,
43 int i
, unsigned long *bitmap
)
45 #if __SIZEOF_LONG__ == 8
46 if (data
->mask32_data
.long_size
== 4)
47 bitmap
[0] = data
->mask32_data
.mask
[i
];
49 bitmap
[0] = data
->mask64_data
.mask
[i
];
51 if (data
->mask32_data
.long_size
== 4) {
52 bitmap
[0] = data
->mask32_data
.mask
[i
];
55 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
56 bitmap
[0] = (unsigned long)(data
->mask64_data
.mask
[i
] >> 32);
57 bitmap
[1] = (unsigned long)data
->mask64_data
.mask
[i
];
59 bitmap
[0] = (unsigned long)data
->mask64_data
.mask
[i
];
60 bitmap
[1] = (unsigned long)(data
->mask64_data
.mask
[i
] >> 32);
65 static struct perf_cpu_map
*cpu_map__from_entries(const struct perf_record_cpu_map_data
*data
)
67 struct perf_cpu_map
*map
;
69 map
= perf_cpu_map__empty_new(data
->cpus_data
.nr
);
73 for (i
= 0; i
< data
->cpus_data
.nr
; i
++) {
75 * Special treatment for -1, which is not real cpu number,
76 * and we need to use (int) -1 to initialize map[i],
77 * otherwise it would become 65535.
79 if (data
->cpus_data
.cpu
[i
] == (u16
) -1)
80 RC_CHK_ACCESS(map
)->map
[i
].cpu
= -1;
82 RC_CHK_ACCESS(map
)->map
[i
].cpu
= (int) data
->cpus_data
.cpu
[i
];
89 static struct perf_cpu_map
*cpu_map__from_mask(const struct perf_record_cpu_map_data
*data
)
91 DECLARE_BITMAP(local_copy
, 64);
92 int weight
= 0, mask_nr
= data
->mask32_data
.nr
;
93 struct perf_cpu_map
*map
;
95 for (int i
= 0; i
< mask_nr
; i
++) {
96 perf_record_cpu_map_data__read_one_mask(data
, i
, local_copy
);
97 weight
+= bitmap_weight(local_copy
, 64);
100 map
= perf_cpu_map__empty_new(weight
);
104 for (int i
= 0, j
= 0; i
< mask_nr
; i
++) {
105 int cpus_per_i
= (i
* data
->mask32_data
.long_size
* BITS_PER_BYTE
);
108 perf_record_cpu_map_data__read_one_mask(data
, i
, local_copy
);
109 for_each_set_bit(cpu
, local_copy
, 64)
110 RC_CHK_ACCESS(map
)->map
[j
++].cpu
= cpu
+ cpus_per_i
;
116 static struct perf_cpu_map
*cpu_map__from_range(const struct perf_record_cpu_map_data
*data
)
118 struct perf_cpu_map
*map
;
121 map
= perf_cpu_map__empty_new(data
->range_cpu_data
.end_cpu
-
122 data
->range_cpu_data
.start_cpu
+ 1 + data
->range_cpu_data
.any_cpu
);
126 if (data
->range_cpu_data
.any_cpu
)
127 RC_CHK_ACCESS(map
)->map
[i
++].cpu
= -1;
129 for (int cpu
= data
->range_cpu_data
.start_cpu
; cpu
<= data
->range_cpu_data
.end_cpu
;
131 RC_CHK_ACCESS(map
)->map
[i
].cpu
= cpu
;
136 struct perf_cpu_map
*cpu_map__new_data(const struct perf_record_cpu_map_data
*data
)
138 switch (data
->type
) {
139 case PERF_CPU_MAP__CPUS
:
140 return cpu_map__from_entries(data
);
141 case PERF_CPU_MAP__MASK
:
142 return cpu_map__from_mask(data
);
143 case PERF_CPU_MAP__RANGE_CPUS
:
144 return cpu_map__from_range(data
);
146 pr_err("cpu_map__new_data unknown type %d\n", data
->type
);
151 size_t cpu_map__fprintf(struct perf_cpu_map
*map
, FILE *fp
)
156 cpu_map__snprint(map
, buf
, sizeof(buf
));
157 return fprintf(fp
, "%s\n", buf
);
161 struct perf_cpu_map
*perf_cpu_map__empty_new(int nr
)
163 struct perf_cpu_map
*cpus
= perf_cpu_map__alloc(nr
);
166 for (int i
= 0; i
< nr
; i
++)
167 RC_CHK_ACCESS(cpus
)->map
[i
].cpu
= -1;
173 struct cpu_aggr_map
*cpu_aggr_map__empty_new(int nr
)
175 struct cpu_aggr_map
*cpus
= malloc(sizeof(*cpus
) + sizeof(struct aggr_cpu_id
) * nr
);
181 for (i
= 0; i
< nr
; i
++)
182 cpus
->map
[i
] = aggr_cpu_id__empty();
188 static int cpu__get_topology_int(int cpu
, const char *name
, int *value
)
192 snprintf(path
, PATH_MAX
,
193 "devices/system/cpu/cpu%d/topology/%s", cpu
, name
);
195 return sysfs__read_int(path
, value
);
198 int cpu__get_socket_id(struct perf_cpu cpu
)
200 int value
, ret
= cpu__get_topology_int(cpu
.cpu
, "physical_package_id", &value
);
204 struct aggr_cpu_id
aggr_cpu_id__socket(struct perf_cpu cpu
, void *data __maybe_unused
)
206 struct aggr_cpu_id id
= aggr_cpu_id__empty();
208 id
.socket
= cpu__get_socket_id(cpu
);
212 static int aggr_cpu_id__cmp(const void *a_pointer
, const void *b_pointer
)
214 struct aggr_cpu_id
*a
= (struct aggr_cpu_id
*)a_pointer
;
215 struct aggr_cpu_id
*b
= (struct aggr_cpu_id
*)b_pointer
;
217 if (a
->node
!= b
->node
)
218 return a
->node
- b
->node
;
219 else if (a
->socket
!= b
->socket
)
220 return a
->socket
- b
->socket
;
221 else if (a
->die
!= b
->die
)
222 return a
->die
- b
->die
;
223 else if (a
->cluster
!= b
->cluster
)
224 return a
->cluster
- b
->cluster
;
225 else if (a
->cache_lvl
!= b
->cache_lvl
)
226 return a
->cache_lvl
- b
->cache_lvl
;
227 else if (a
->cache
!= b
->cache
)
228 return a
->cache
- b
->cache
;
229 else if (a
->core
!= b
->core
)
230 return a
->core
- b
->core
;
232 return a
->thread_idx
- b
->thread_idx
;
235 struct cpu_aggr_map
*cpu_aggr_map__new(const struct perf_cpu_map
*cpus
,
236 aggr_cpu_id_get_t get_id
,
237 void *data
, bool needs_sort
)
241 struct cpu_aggr_map
*c
= cpu_aggr_map__empty_new(perf_cpu_map__nr(cpus
));
246 /* Reset size as it may only be partially filled */
249 perf_cpu_map__for_each_cpu(cpu
, idx
, cpus
) {
250 bool duplicate
= false;
251 struct aggr_cpu_id cpu_id
= get_id(cpu
, data
);
253 for (int j
= 0; j
< c
->nr
; j
++) {
254 if (aggr_cpu_id__equal(&cpu_id
, &c
->map
[j
])) {
260 c
->map
[c
->nr
] = cpu_id
;
265 if (c
->nr
!= perf_cpu_map__nr(cpus
)) {
266 struct cpu_aggr_map
*trimmed_c
=
268 sizeof(struct cpu_aggr_map
) + sizeof(struct aggr_cpu_id
) * c
->nr
);
274 /* ensure we process id in increasing order */
276 qsort(c
->map
, c
->nr
, sizeof(struct aggr_cpu_id
), aggr_cpu_id__cmp
);
282 int cpu__get_die_id(struct perf_cpu cpu
)
284 int value
, ret
= cpu__get_topology_int(cpu
.cpu
, "die_id", &value
);
289 struct aggr_cpu_id
aggr_cpu_id__die(struct perf_cpu cpu
, void *data
)
291 struct aggr_cpu_id id
;
294 die
= cpu__get_die_id(cpu
);
295 /* There is no die_id on legacy system. */
300 * die_id is relative to socket, so start
301 * with the socket ID and then add die to
304 id
= aggr_cpu_id__socket(cpu
, data
);
305 if (aggr_cpu_id__is_empty(&id
))
312 int cpu__get_cluster_id(struct perf_cpu cpu
)
314 int value
, ret
= cpu__get_topology_int(cpu
.cpu
, "cluster_id", &value
);
319 struct aggr_cpu_id
aggr_cpu_id__cluster(struct perf_cpu cpu
, void *data
)
321 int cluster
= cpu__get_cluster_id(cpu
);
322 struct aggr_cpu_id id
;
324 /* There is no cluster_id on legacy system. */
328 id
= aggr_cpu_id__die(cpu
, data
);
329 if (aggr_cpu_id__is_empty(&id
))
332 id
.cluster
= cluster
;
336 int cpu__get_core_id(struct perf_cpu cpu
)
338 int value
, ret
= cpu__get_topology_int(cpu
.cpu
, "core_id", &value
);
342 struct aggr_cpu_id
aggr_cpu_id__core(struct perf_cpu cpu
, void *data
)
344 struct aggr_cpu_id id
;
345 int core
= cpu__get_core_id(cpu
);
347 /* aggr_cpu_id__die returns a struct with socket die, and cluster set. */
348 id
= aggr_cpu_id__cluster(cpu
, data
);
349 if (aggr_cpu_id__is_empty(&id
))
353 * core_id is relative to socket and die, we need a global id.
354 * So we combine the result from cpu_map__get_die with the core id
361 struct aggr_cpu_id
aggr_cpu_id__cpu(struct perf_cpu cpu
, void *data
)
363 struct aggr_cpu_id id
;
365 /* aggr_cpu_id__core returns a struct with socket, die and core set. */
366 id
= aggr_cpu_id__core(cpu
, data
);
367 if (aggr_cpu_id__is_empty(&id
))
375 struct aggr_cpu_id
aggr_cpu_id__node(struct perf_cpu cpu
, void *data __maybe_unused
)
377 struct aggr_cpu_id id
= aggr_cpu_id__empty();
379 id
.node
= cpu__get_node(cpu
);
383 struct aggr_cpu_id
aggr_cpu_id__global(struct perf_cpu cpu
, void *data __maybe_unused
)
385 struct aggr_cpu_id id
= aggr_cpu_id__empty();
387 /* it always aggregates to the cpu 0 */
393 /* setup simple routines to easily access node numbers given a cpu number */
394 static int get_max_num(char *path
, int *max
)
400 if (filename__read_str(path
, &buf
, &num
))
405 /* start on the right, to find highest node num */
407 if ((buf
[num
] == ',') || (buf
[num
] == '-')) {
412 if (sscanf(&buf
[num
], "%d", max
) < 1) {
417 /* convert from 0-based to 1-based */
425 /* Determine highest possible cpu in the system for sparse allocation */
426 static void set_max_cpu_num(void)
433 max_cpu_num
.cpu
= 4096;
434 max_present_cpu_num
.cpu
= 4096;
436 mnt
= sysfs__mountpoint();
440 /* get the highest possible cpu number for a sparse allocation */
441 ret
= snprintf(path
, PATH_MAX
, "%s/devices/system/cpu/possible", mnt
);
442 if (ret
>= PATH_MAX
) {
443 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX
);
447 ret
= get_max_num(path
, &max_cpu_num
.cpu
);
451 /* get the highest present cpu number for a sparse allocation */
452 ret
= snprintf(path
, PATH_MAX
, "%s/devices/system/cpu/present", mnt
);
453 if (ret
>= PATH_MAX
) {
454 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX
);
458 ret
= get_max_num(path
, &max_present_cpu_num
.cpu
);
462 pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num
.cpu
);
465 /* Determine highest possible node in the system for sparse allocation */
466 static void set_max_node_num(void)
475 mnt
= sysfs__mountpoint();
479 /* get the highest possible cpu number for a sparse allocation */
480 ret
= snprintf(path
, PATH_MAX
, "%s/devices/system/node/possible", mnt
);
481 if (ret
>= PATH_MAX
) {
482 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX
);
486 ret
= get_max_num(path
, &max_node_num
);
490 pr_err("Failed to read max nodes, using default of %d\n", max_node_num
);
493 int cpu__max_node(void)
495 if (unlikely(!max_node_num
))
501 struct perf_cpu
cpu__max_cpu(void)
503 if (unlikely(!max_cpu_num
.cpu
))
509 struct perf_cpu
cpu__max_present_cpu(void)
511 if (unlikely(!max_present_cpu_num
.cpu
))
514 return max_present_cpu_num
;
518 int cpu__get_node(struct perf_cpu cpu
)
520 if (unlikely(cpunode_map
== NULL
)) {
521 pr_debug("cpu_map not initialized\n");
525 return cpunode_map
[cpu
.cpu
];
528 static int init_cpunode_map(void)
535 cpunode_map
= calloc(max_cpu_num
.cpu
, sizeof(int));
537 pr_err("%s: calloc failed\n", __func__
);
541 for (i
= 0; i
< max_cpu_num
.cpu
; i
++)
547 int cpu__setup_cpunode_map(void)
549 struct dirent
*dent1
, *dent2
;
551 unsigned int cpu
, mem
;
557 /* initialize globals */
558 if (init_cpunode_map())
561 mnt
= sysfs__mountpoint();
565 n
= snprintf(path
, PATH_MAX
, "%s/devices/system/node", mnt
);
567 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX
);
571 dir1
= opendir(path
);
575 /* walk tree and setup map */
576 while ((dent1
= readdir(dir1
)) != NULL
) {
577 if (dent1
->d_type
!= DT_DIR
|| sscanf(dent1
->d_name
, "node%u", &mem
) < 1)
580 n
= snprintf(buf
, PATH_MAX
, "%s/%s", path
, dent1
->d_name
);
582 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX
);
589 while ((dent2
= readdir(dir2
)) != NULL
) {
590 if (dent2
->d_type
!= DT_LNK
|| sscanf(dent2
->d_name
, "cpu%u", &cpu
) < 1)
592 cpunode_map
[cpu
] = mem
;
600 size_t cpu_map__snprint(struct perf_cpu_map
*map
, char *buf
, size_t size
)
606 #define COMMA first ? "" : ","
608 for (i
= 0; i
< perf_cpu_map__nr(map
) + 1; i
++) {
609 struct perf_cpu cpu
= { .cpu
= INT_MAX
};
610 bool last
= i
== perf_cpu_map__nr(map
);
613 cpu
= perf_cpu_map__cpu(map
, i
);
618 ret
+= snprintf(buf
+ ret
, size
- ret
,
620 perf_cpu_map__cpu(map
, i
).cpu
);
622 } else if (((i
- start
) != (cpu
.cpu
- perf_cpu_map__cpu(map
, start
).cpu
)) || last
) {
626 ret
+= snprintf(buf
+ ret
, size
- ret
,
628 perf_cpu_map__cpu(map
, start
).cpu
);
630 ret
+= snprintf(buf
+ ret
, size
- ret
,
632 perf_cpu_map__cpu(map
, start
).cpu
, perf_cpu_map__cpu(map
, end
).cpu
);
641 pr_debug2("cpumask list: %s\n", buf
);
645 static char hex_char(unsigned char val
)
650 return val
- 10 + 'a';
654 size_t cpu_map__snprint_mask(struct perf_cpu_map
*map
, char *buf
, size_t size
)
658 unsigned char *bitmap
;
659 struct perf_cpu c
, last_cpu
= perf_cpu_map__max(map
);
664 bitmap
= zalloc(last_cpu
.cpu
/ 8 + 1);
665 if (bitmap
== NULL
) {
670 perf_cpu_map__for_each_cpu(c
, idx
, map
)
671 bitmap
[c
.cpu
/ 8] |= 1 << (c
.cpu
% 8);
673 for (int cpu
= last_cpu
.cpu
/ 4 * 4; cpu
>= 0; cpu
-= 4) {
674 unsigned char bits
= bitmap
[cpu
/ 8];
681 *ptr
++ = hex_char(bits
);
682 if ((cpu
% 32) == 0 && cpu
> 0)
688 buf
[size
- 1] = '\0';
692 struct perf_cpu_map
*cpu_map__online(void) /* thread unsafe */
694 static struct perf_cpu_map
*online
;
697 online
= perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */
702 bool aggr_cpu_id__equal(const struct aggr_cpu_id
*a
, const struct aggr_cpu_id
*b
)
704 return a
->thread_idx
== b
->thread_idx
&&
705 a
->node
== b
->node
&&
706 a
->socket
== b
->socket
&&
708 a
->cluster
== b
->cluster
&&
709 a
->cache_lvl
== b
->cache_lvl
&&
710 a
->cache
== b
->cache
&&
711 a
->core
== b
->core
&&
712 a
->cpu
.cpu
== b
->cpu
.cpu
;
715 bool aggr_cpu_id__is_empty(const struct aggr_cpu_id
*a
)
717 return a
->thread_idx
== -1 &&
722 a
->cache_lvl
== -1 &&
728 struct aggr_cpu_id
aggr_cpu_id__empty(void)
730 struct aggr_cpu_id ret
= {
739 .cpu
= (struct perf_cpu
){ .cpu
= -1 },