1 // SPDX-License-Identifier: GPL-2.0
3 * Memory bandwidth monitoring and allocation library
5 * Copyright (C) 2018 Intel Corporation
8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
13 #define UNCORE_IMC "uncore_imc"
14 #define READ_FILE_NAME "events/cas_count_read"
15 #define DYN_PMU_PATH "/sys/bus/event_source/devices"
16 #define SCALE 0.00006103515625
20 #define CON_MBM_LOCAL_BYTES_PATH \
21 "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
23 struct membw_read_format
{
24 __u64 value
; /* The value of the event */
25 __u64 time_enabled
; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
26 __u64 time_running
; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
27 __u64 id
; /* if PERF_FORMAT_ID */
30 struct imc_counter_config
{
34 struct perf_event_attr pe
;
35 struct membw_read_format return_value
;
39 static char mbm_total_path
[1024];
41 static struct imc_counter_config imc_counters_config
[MAX_IMCS
];
42 static const struct resctrl_test
*current_test
;
44 static void read_mem_bw_initialize_perf_event_attr(int i
)
46 memset(&imc_counters_config
[i
].pe
, 0,
47 sizeof(struct perf_event_attr
));
48 imc_counters_config
[i
].pe
.type
= imc_counters_config
[i
].type
;
49 imc_counters_config
[i
].pe
.size
= sizeof(struct perf_event_attr
);
50 imc_counters_config
[i
].pe
.disabled
= 1;
51 imc_counters_config
[i
].pe
.inherit
= 1;
52 imc_counters_config
[i
].pe
.exclude_guest
= 0;
53 imc_counters_config
[i
].pe
.config
=
54 imc_counters_config
[i
].umask
<< 8 |
55 imc_counters_config
[i
].event
;
56 imc_counters_config
[i
].pe
.sample_type
= PERF_SAMPLE_IDENTIFIER
;
57 imc_counters_config
[i
].pe
.read_format
=
58 PERF_FORMAT_TOTAL_TIME_ENABLED
| PERF_FORMAT_TOTAL_TIME_RUNNING
;
61 static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i
)
63 ioctl(imc_counters_config
[i
].fd
, PERF_EVENT_IOC_RESET
, 0);
64 ioctl(imc_counters_config
[i
].fd
, PERF_EVENT_IOC_ENABLE
, 0);
67 static void read_mem_bw_ioctl_perf_event_ioc_disable(int i
)
69 ioctl(imc_counters_config
[i
].fd
, PERF_EVENT_IOC_DISABLE
, 0);
73 * get_read_event_and_umask: Parse config into event and umask
74 * @cas_count_cfg: Config
77 static void get_read_event_and_umask(char *cas_count_cfg
, int count
)
79 char *token
[MAX_TOKENS
];
82 token
[0] = strtok(cas_count_cfg
, "=,");
84 for (i
= 1; i
< MAX_TOKENS
; i
++)
85 token
[i
] = strtok(NULL
, "=,");
87 for (i
= 0; i
< MAX_TOKENS
- 1; i
++) {
90 if (strcmp(token
[i
], "event") == 0)
91 imc_counters_config
[count
].event
= strtol(token
[i
+ 1], NULL
, 16);
92 if (strcmp(token
[i
], "umask") == 0)
93 imc_counters_config
[count
].umask
= strtol(token
[i
+ 1], NULL
, 16);
97 static int open_perf_read_event(int i
, int cpu_no
)
99 imc_counters_config
[i
].fd
=
100 perf_event_open(&imc_counters_config
[i
].pe
, -1, cpu_no
, -1,
101 PERF_FLAG_FD_CLOEXEC
);
103 if (imc_counters_config
[i
].fd
== -1) {
104 fprintf(stderr
, "Error opening leader %llx\n",
105 imc_counters_config
[i
].pe
.config
);
113 /* Get type and config of an iMC counter's read event. */
114 static int read_from_imc_dir(char *imc_dir
, int count
)
116 char cas_count_cfg
[1024], imc_counter_cfg
[1024], imc_counter_type
[1024];
119 /* Get type of iMC counter */
120 sprintf(imc_counter_type
, "%s%s", imc_dir
, "type");
121 fp
= fopen(imc_counter_type
, "r");
123 ksft_perror("Failed to open iMC counter type file");
127 if (fscanf(fp
, "%u", &imc_counters_config
[count
].type
) <= 0) {
128 ksft_perror("Could not get iMC type");
135 /* Get read config */
136 sprintf(imc_counter_cfg
, "%s%s", imc_dir
, READ_FILE_NAME
);
137 fp
= fopen(imc_counter_cfg
, "r");
139 ksft_perror("Failed to open iMC config file");
143 if (fscanf(fp
, "%1023s", cas_count_cfg
) <= 0) {
144 ksft_perror("Could not get iMC cas count read");
151 get_read_event_and_umask(cas_count_cfg
, count
);
157 * A system can have 'n' number of iMC (Integrated Memory Controller)
158 * counters, get that 'n'. Discover the properties of the available
159 * counters in support of needed performance measurement via perf.
160 * For each iMC counter get it's type and config. Also obtain each
161 * counter's event and umask for the memory read events that will be
164 * Enumerate all these details into an array of structures.
166 * Return: >= 0 on success. < 0 on failure.
168 static int num_of_imcs(void)
170 char imc_dir
[512], *temp
;
171 unsigned int count
= 0;
176 dp
= opendir(DYN_PMU_PATH
);
178 while ((ep
= readdir(dp
))) {
179 temp
= strstr(ep
->d_name
, UNCORE_IMC
);
184 * imc counters are named as "uncore_imc_<n>", hence
185 * increment the pointer to point to <n>. Note that
186 * sizeof(UNCORE_IMC) would count for null character as
187 * well and hence the last underscore character in
188 * uncore_imc'_' need not be counted.
190 temp
= temp
+ sizeof(UNCORE_IMC
);
193 * Some directories under "DYN_PMU_PATH" could have
194 * names like "uncore_imc_free_running", hence, check if
195 * first character is a numerical digit or not.
197 if (temp
[0] >= '0' && temp
[0] <= '9') {
198 sprintf(imc_dir
, "%s/%s/", DYN_PMU_PATH
,
200 ret
= read_from_imc_dir(imc_dir
, count
);
211 ksft_print_msg("Unable to find iMC counters\n");
216 ksft_perror("Unable to open PMU directory");
224 int initialize_read_mem_bw_imc(void)
228 imcs
= num_of_imcs();
232 /* Initialize perf_event_attr structures for all iMC's */
233 for (imc
= 0; imc
< imcs
; imc
++)
234 read_mem_bw_initialize_perf_event_attr(imc
);
239 static void perf_close_imc_read_mem_bw(void)
243 for (mc
= 0; mc
< imcs
; mc
++) {
244 if (imc_counters_config
[mc
].fd
!= -1)
245 close(imc_counters_config
[mc
].fd
);
250 * perf_open_imc_read_mem_bw - Open perf fds for IMCs
251 * @cpu_no: CPU number that the benchmark PID is bound to
253 * Return: = 0 on success. < 0 on failure.
255 static int perf_open_imc_read_mem_bw(int cpu_no
)
259 for (imc
= 0; imc
< imcs
; imc
++)
260 imc_counters_config
[imc
].fd
= -1;
262 for (imc
= 0; imc
< imcs
; imc
++) {
263 ret
= open_perf_read_event(imc
, cpu_no
);
271 perf_close_imc_read_mem_bw();
276 * do_imc_read_mem_bw_test - Perform memory bandwidth test
278 * Runs memory bandwidth test over one second period. Also, handles starting
279 * and stopping of the IMC perf counters around the test.
281 static void do_imc_read_mem_bw_test(void)
285 for (imc
= 0; imc
< imcs
; imc
++)
286 read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc
);
290 /* Stop counters after a second to get results. */
291 for (imc
= 0; imc
< imcs
; imc
++)
292 read_mem_bw_ioctl_perf_event_ioc_disable(imc
);
296 * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters
298 * Memory read bandwidth utilized by a process on a socket can be calculated
299 * using iMC counters' read events. Perf events are used to read these
302 * Return: = 0 on success. < 0 on failure.
304 static int get_read_mem_bw_imc(float *bw_imc
)
306 float reads
= 0, of_mul_read
= 1;
310 * Log read event values from all iMC counters into
311 * struct imc_counter_config.
312 * Take overflow into consideration before calculating total bandwidth.
314 for (imc
= 0; imc
< imcs
; imc
++) {
315 struct imc_counter_config
*r
=
316 &imc_counters_config
[imc
];
318 if (read(r
->fd
, &r
->return_value
,
319 sizeof(struct membw_read_format
)) == -1) {
320 ksft_perror("Couldn't get read bandwidth through iMC");
324 __u64 r_time_enabled
= r
->return_value
.time_enabled
;
325 __u64 r_time_running
= r
->return_value
.time_running
;
327 if (r_time_enabled
!= r_time_running
)
328 of_mul_read
= (float)r_time_enabled
/
329 (float)r_time_running
;
331 reads
+= r
->return_value
.value
* of_mul_read
* SCALE
;
339 * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path"
340 * @param: Parameters passed to resctrl_val()
341 * @domain_id: Domain ID (cache ID; for MB, L3 cache ID)
343 void initialize_mem_bw_resctrl(const struct resctrl_val_param
*param
,
346 sprintf(mbm_total_path
, CON_MBM_LOCAL_BYTES_PATH
, RESCTRL_PATH
,
347 param
->ctrlgrp
, domain_id
);
351 * Open file to read MBM local bytes from resctrl FS
353 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file
)
357 fp
= fopen(mbm_bw_file
, "r");
359 ksft_perror("Failed to open total memory bandwidth file");
365 * Get MBM Local bytes as reported by resctrl FS
367 static int get_mem_bw_resctrl(FILE *fp
, unsigned long *mbm_total
)
369 if (fscanf(fp
, "%lu\n", mbm_total
) <= 0) {
370 ksft_perror("Could not get MBM local bytes");
378 void ctrlc_handler(int signum
, siginfo_t
*info
, void *ptr
)
380 /* Only kill child after bm_pid is set after fork() */
382 kill(bm_pid
, SIGKILL
);
384 if (current_test
&& current_test
->cleanup
)
385 current_test
->cleanup();
386 ksft_print_msg("Ending\n\n");
392 * Register CTRL-C handler for parent, as it has to kill
393 * child process before exiting.
395 int signal_handler_register(const struct resctrl_test
*test
)
397 struct sigaction sigact
= {};
403 sigact
.sa_sigaction
= ctrlc_handler
;
404 sigemptyset(&sigact
.sa_mask
);
405 sigact
.sa_flags
= SA_SIGINFO
;
406 if (sigaction(SIGINT
, &sigact
, NULL
) ||
407 sigaction(SIGTERM
, &sigact
, NULL
) ||
408 sigaction(SIGHUP
, &sigact
, NULL
)) {
409 ksft_perror("sigaction");
416 * Reset signal handler to SIG_DFL.
417 * Non-Value return because the caller should keep
418 * the error code of other path even if sigaction fails.
420 void signal_handler_unregister(void)
422 struct sigaction sigact
= {};
425 sigact
.sa_handler
= SIG_DFL
;
426 sigemptyset(&sigact
.sa_mask
);
427 if (sigaction(SIGINT
, &sigact
, NULL
) ||
428 sigaction(SIGTERM
, &sigact
, NULL
) ||
429 sigaction(SIGHUP
, &sigact
, NULL
)) {
430 ksft_perror("sigaction");
435 * print_results_bw: the memory bandwidth results are stored in a file
436 * @filename: file that stores the results
437 * @bm_pid: child pid that runs benchmark
438 * @bw_imc: perf imc counter value
439 * @bw_resc: memory bandwidth value
441 * Return: 0 on success, < 0 on error.
443 static int print_results_bw(char *filename
, pid_t bm_pid
, float bw_imc
,
444 unsigned long bw_resc
)
446 unsigned long diff
= fabs(bw_imc
- bw_resc
);
449 if (strcmp(filename
, "stdio") == 0 || strcmp(filename
, "stderr") == 0) {
450 printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid
, bw_imc
);
451 printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc
, diff
);
453 fp
= fopen(filename
, "a");
455 ksft_perror("Cannot open results file");
459 if (fprintf(fp
, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
460 (int)bm_pid
, bw_imc
, bw_resc
, diff
) <= 0) {
461 ksft_print_msg("Could not log results\n");
473 * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs
474 * @uparams: User supplied parameters
475 * @param: Parameters passed to resctrl_val()
476 * @bm_pid: PID that runs the benchmark
478 * Measure memory bandwidth from resctrl and from another source which is
479 * perf imc value or could be something else if perf imc event is not
480 * available. Compare the two values to validate resctrl value. It takes
481 * 1 sec to measure the data.
482 * resctrl does not distinguish between read and write operations so
483 * its data includes all memory operations.
485 int measure_read_mem_bw(const struct user_params
*uparams
,
486 struct resctrl_val_param
*param
, pid_t bm_pid
)
488 unsigned long bw_resc
, bw_resc_start
, bw_resc_end
;
493 mem_bw_fp
= open_mem_bw_resctrl(mbm_total_path
);
497 ret
= perf_open_imc_read_mem_bw(uparams
->cpu
);
501 ret
= get_mem_bw_resctrl(mem_bw_fp
, &bw_resc_start
);
507 do_imc_read_mem_bw_test();
509 ret
= get_mem_bw_resctrl(mem_bw_fp
, &bw_resc_end
);
513 ret
= get_read_mem_bw_imc(&bw_imc
);
517 perf_close_imc_read_mem_bw();
520 bw_resc
= (bw_resc_end
- bw_resc_start
) / MB
;
522 return print_results_bw(param
->filename
, bm_pid
, bw_imc
, bw_resc
);
525 perf_close_imc_read_mem_bw();
532 * resctrl_val: execute benchmark and measure memory bandwidth on
534 * @test: test information structure
535 * @uparams: user supplied parameters
536 * @param: parameters passed to resctrl_val()
538 * Return: 0 when the test was run, < 0 on error.
540 int resctrl_val(const struct resctrl_test
*test
,
541 const struct user_params
*uparams
,
542 struct resctrl_val_param
*param
)
544 unsigned char *buf
= NULL
;
545 cpu_set_t old_affinity
;
550 if (strcmp(param
->filename
, "") == 0)
551 sprintf(param
->filename
, "stdio");
553 ret
= get_domain_id(test
->resource
, uparams
->cpu
, &domain_id
);
555 ksft_print_msg("Could not get domain ID\n");
561 /* Taskset test to specified CPU. */
562 ret
= taskset_benchmark(ppid
, uparams
->cpu
, &old_affinity
);
566 /* Write test to specified control & monitoring group in resctrl FS. */
567 ret
= write_bm_pid_to_resctrl(ppid
, param
->ctrlgrp
, param
->mongrp
);
572 ret
= param
->init(param
, domain_id
);
578 * If not running user provided benchmark, run the default
579 * "fill_buf". First phase of "fill_buf" is to prepare the
580 * buffer that the benchmark will operate on. No measurements
581 * are needed during this phase and prepared memory will be
582 * passed to next part of benchmark via copy-on-write thus
583 * no impact on the benchmark that relies on reading from
586 if (param
->fill_buf
) {
587 buf
= alloc_buffer(param
->fill_buf
->buf_size
,
588 param
->fill_buf
->memflush
);
599 ksft_perror("Unable to fork");
604 * What needs to be measured runs in separate process until
609 fill_cache_read(buf
, param
->fill_buf
->buf_size
, false);
610 else if (uparams
->benchmark_cmd
[0])
611 execvp(uparams
->benchmark_cmd
[0], (char **)uparams
->benchmark_cmd
);
615 ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid
);
617 /* Give benchmark enough time to fully run. */
620 /* Test runs until the callback setup() tells the test to stop. */
622 ret
= param
->setup(test
, uparams
, param
);
623 if (ret
== END_OF_TESTS
) {
630 ret
= param
->measure(uparams
, param
, bm_pid
);
635 kill(bm_pid
, SIGKILL
);
639 taskset_restore(ppid
, &old_affinity
);