1 // SPDX-License-Identifier: GPL-2.0
3 * Cache Allocation Technology (CAT) test
5 * Copyright (C) 2018 Intel Corporation
8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
14 #define RESULT_FILE_NAME "result_cat"
18 * Minimum difference in LLC misses between a test with n+1 bits CBM to the
19 * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
20 * bits in the CBM mask, the minimum difference must be at least
21 * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
23 * The relationship between number of used CBM bits and difference in LLC
24 * misses is not expected to be linear. With a small number of bits, the
25 * margin is smaller than with larger number of bits. For selftest purposes,
26 * however, linear approach is enough because ultimately only pass/fail
27 * decision has to be made and distinction between strong and stronger
28 * signal is irrelevant.
30 #define MIN_DIFF_PERCENT_PER_BIT 1UL
32 static int show_results_info(__u64 sum_llc_val
, int no_of_bits
,
33 unsigned long cache_span
,
34 unsigned long min_diff_percent
,
35 unsigned long num_of_runs
, bool platform
,
36 __s64
*prev_avg_llc_val
)
38 __u64 avg_llc_val
= 0;
42 avg_llc_val
= sum_llc_val
/ num_of_runs
;
43 if (*prev_avg_llc_val
) {
44 float delta
= (__s64
)(avg_llc_val
- *prev_avg_llc_val
);
46 avg_diff
= delta
/ *prev_avg_llc_val
;
47 ret
= platform
&& (avg_diff
* 100) < (float)min_diff_percent
;
49 ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
50 ret
? "Fail:" : "Pass:", (float)min_diff_percent
);
52 ksft_print_msg("Percent diff=%.1f\n", avg_diff
* 100);
54 *prev_avg_llc_val
= avg_llc_val
;
56 show_cache_info(no_of_bits
, avg_llc_val
, cache_span
, true);
61 /* Remove the highest bit from CBM */
62 static unsigned long next_mask(unsigned long current_mask
)
64 return current_mask
& (current_mask
>> 1);
67 static int check_results(struct resctrl_val_param
*param
, const char *cache_type
,
68 unsigned long cache_total_size
, unsigned long full_cache_mask
,
69 unsigned long current_mask
)
71 char *token_array
[8], temp
[512];
72 __u64 sum_llc_perf_miss
= 0;
73 __s64 prev_avg_llc_val
= 0;
74 unsigned long alloc_size
;
80 ksft_print_msg("Checking for pass/fail\n");
81 fp
= fopen(param
->filename
, "r");
83 ksft_perror("Cannot open file");
88 while (fgets(temp
, sizeof(temp
), fp
)) {
89 char *token
= strtok(temp
, ":\t");
94 token_array
[fields
++] = token
;
95 token
= strtok(NULL
, ":\t");
98 sum_llc_perf_miss
+= strtoull(token_array
[3], NULL
, 0);
101 if (runs
< NUM_OF_RUNS
)
105 ksft_print_msg("Unexpected empty cache mask\n");
109 alloc_size
= cache_portion_size(cache_total_size
, current_mask
, full_cache_mask
);
111 bits
= count_bits(current_mask
);
113 ret
= show_results_info(sum_llc_perf_miss
, bits
,
115 MIN_DIFF_PERCENT_PER_BIT
* (bits
- 1),
116 runs
, get_vendor() == ARCH_INTEL
,
122 sum_llc_perf_miss
= 0;
123 current_mask
= next_mask(current_mask
);
131 static void cat_test_cleanup(void)
133 remove(RESULT_FILE_NAME
);
137 * cat_test - Execute CAT benchmark and measure cache misses
138 * @test: Test information structure
139 * @uparams: User supplied parameters
140 * @param: Parameters passed to cat_test()
141 * @span: Buffer size for the benchmark
142 * @current_mask Start mask for the first iteration
144 * Run CAT selftest by varying the allocated cache portion and comparing the
145 * impact on cache misses (the result analysis is done in check_results()
146 * and show_results_info(), not in this function).
148 * One bit is removed from the CAT allocation bit mask (in current_mask) for
149 * each subsequent test which keeps reducing the size of the allocated cache
150 * portion. A single test flushes the buffer, reads it to warm up the cache,
151 * and reads the buffer again. The cache misses are measured during the last
154 * Return: 0 when the test was run, < 0 on error.
156 static int cat_test(const struct resctrl_test
*test
,
157 const struct user_params
*uparams
,
158 struct resctrl_val_param
*param
,
159 size_t span
, unsigned long current_mask
)
161 struct perf_event_read pe_read
;
162 struct perf_event_attr pea
;
163 cpu_set_t old_affinity
;
169 if (strcmp(param
->filename
, "") == 0)
170 sprintf(param
->filename
, "stdio");
174 /* Taskset benchmark to specified cpu */
175 ret
= taskset_benchmark(bm_pid
, uparams
->cpu
, &old_affinity
);
179 /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
180 ret
= write_bm_pid_to_resctrl(bm_pid
, param
->ctrlgrp
, param
->mongrp
);
184 perf_event_attr_initialize(&pea
, PERF_COUNT_HW_CACHE_MISSES
);
185 perf_event_initialize_read_format(&pe_read
);
186 pe_fd
= perf_open(&pea
, bm_pid
, uparams
->cpu
);
192 buf
= alloc_buffer(span
, 1);
198 while (current_mask
) {
199 snprintf(schemata
, sizeof(schemata
), "%lx", param
->mask
& ~current_mask
);
200 ret
= write_schemata("", schemata
, uparams
->cpu
, test
->resource
);
203 snprintf(schemata
, sizeof(schemata
), "%lx", current_mask
);
204 ret
= write_schemata(param
->ctrlgrp
, schemata
, uparams
->cpu
, test
->resource
);
208 for (i
= 0; i
< NUM_OF_RUNS
; i
++) {
209 mem_flush(buf
, span
);
210 fill_cache_read(buf
, span
, true);
212 ret
= perf_event_reset_enable(pe_fd
);
216 fill_cache_read(buf
, span
, true);
218 ret
= perf_event_measure(pe_fd
, &pe_read
, param
->filename
, bm_pid
);
222 current_mask
= next_mask(current_mask
);
230 taskset_restore(bm_pid
, &old_affinity
);
235 static int cat_run_test(const struct resctrl_test
*test
, const struct user_params
*uparams
)
237 unsigned long long_mask
, start_mask
, full_cache_mask
;
238 unsigned long cache_total_size
= 0;
239 int n
= uparams
->bits
;
245 ret
= get_full_cbm(test
->resource
, &full_cache_mask
);
248 /* Get the largest contiguous exclusive portion of the cache */
249 ret
= get_mask_no_shareable(test
->resource
, &long_mask
);
253 /* Get L3/L2 cache size */
254 ret
= get_cache_size(uparams
->cpu
, test
->resource
, &cache_total_size
);
257 ksft_print_msg("Cache size :%lu\n", cache_total_size
);
259 count_of_bits
= count_contiguous_bits(long_mask
, &start
);
262 n
= count_of_bits
/ 2;
264 if (n
> count_of_bits
- 1) {
265 ksft_print_msg("Invalid input value for no_of_bits n!\n");
266 ksft_print_msg("Please enter value in range 1 to %d\n",
270 start_mask
= create_bit_mask(start
, n
);
272 struct resctrl_val_param param
= {
274 .filename
= RESULT_FILE_NAME
,
277 param
.mask
= long_mask
;
278 span
= cache_portion_size(cache_total_size
, start_mask
, full_cache_mask
);
280 remove(param
.filename
);
282 ret
= cat_test(test
, uparams
, ¶m
, span
, start_mask
);
286 ret
= check_results(¶m
, test
->resource
,
287 cache_total_size
, full_cache_mask
, start_mask
);
291 static bool arch_supports_noncont_cat(const struct resctrl_test
*test
)
293 /* AMD always supports non-contiguous CBM. */
294 if (get_vendor() == ARCH_AMD
)
297 #if defined(__i386__) || defined(__x86_64__) /* arch */
298 unsigned int eax
, ebx
, ecx
, edx
;
299 /* Intel support for non-contiguous CBM needs to be discovered. */
300 if (!strcmp(test
->resource
, "L3"))
301 __cpuid_count(0x10, 1, eax
, ebx
, ecx
, edx
);
302 else if (!strcmp(test
->resource
, "L2"))
303 __cpuid_count(0x10, 2, eax
, ebx
, ecx
, edx
);
307 return ((ecx
>> 3) & 1);
308 #endif /* end arch */
313 static int noncont_cat_run_test(const struct resctrl_test
*test
,
314 const struct user_params
*uparams
)
316 unsigned long full_cache_mask
, cont_mask
, noncont_mask
;
317 unsigned int sparse_masks
;
321 /* Check to compare sparse_masks content to CPUID output. */
322 ret
= resource_info_unsigned_get(test
->resource
, "sparse_masks", &sparse_masks
);
326 if (arch_supports_noncont_cat(test
) != sparse_masks
) {
327 ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
331 /* Write checks initialization. */
332 ret
= get_full_cbm(test
->resource
, &full_cache_mask
);
335 bit_center
= count_bits(full_cache_mask
) / 2;
338 * The bit_center needs to be at least 3 to properly calculate the CBM
339 * hole in the noncont_mask. If it's smaller return an error since the
340 * cache mask is too short and that shouldn't happen.
344 cont_mask
= full_cache_mask
>> bit_center
;
346 /* Contiguous mask write check. */
347 snprintf(schemata
, sizeof(schemata
), "%lx", cont_mask
);
348 ret
= write_schemata("", schemata
, uparams
->cpu
, test
->resource
);
350 ksft_print_msg("Write of contiguous CBM failed\n");
355 * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
356 * Output is compared with support information to catch any edge case errors.
358 noncont_mask
= ~(0xfUL
<< (bit_center
- 2)) & full_cache_mask
;
359 snprintf(schemata
, sizeof(schemata
), "%lx", noncont_mask
);
360 ret
= write_schemata("", schemata
, uparams
->cpu
, test
->resource
);
361 if (ret
&& sparse_masks
)
362 ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
363 else if (ret
&& !sparse_masks
)
364 ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
365 else if (!ret
&& !sparse_masks
)
366 ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
368 return !ret
== !sparse_masks
;
371 static bool noncont_cat_feature_check(const struct resctrl_test
*test
)
373 if (!resctrl_resource_exists(test
->resource
))
376 return resource_info_file_exists(test
->resource
, "sparse_masks");
379 struct resctrl_test l3_cat_test
= {
383 .feature_check
= test_resource_feature_check
,
384 .run_test
= cat_run_test
,
385 .cleanup
= cat_test_cleanup
,
388 struct resctrl_test l3_noncont_cat_test
= {
389 .name
= "L3_NONCONT_CAT",
392 .feature_check
= noncont_cat_feature_check
,
393 .run_test
= noncont_cat_run_test
,
396 struct resctrl_test l2_noncont_cat_test
= {
397 .name
= "L2_NONCONT_CAT",
400 .feature_check
= noncont_cat_feature_check
,
401 .run_test
= noncont_cat_run_test
,