tools/testing/selftests/resctrl/cat_test.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Cache Allocation Technology (CAT) test
   4  *
   5  * Copyright (C) 2018 Intel Corporation
   6  *
   7  * Authors:
   8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
   9  *    Fenghua Yu <fenghua.yu@intel.com>
  10  */
  11 #include "resctrl.h"
  12 #include <unistd.h>
  13
  14 #define RESULT_FILE_NAME        "result_cat"
  15 #define NUM_OF_RUNS             5
  16
  17 /*
  18  * Minimum difference in LLC misses between a test with n+1 bits CBM to the
  19  * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
  20  * bits in the CBM mask, the minimum difference must be at least
  21  * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
  22  *
  23  * The relationship between number of used CBM bits and difference in LLC
  24  * misses is not expected to be linear. With a small number of bits, the
  25  * margin is smaller than with larger number of bits. For selftest purposes,
  26  * however, linear approach is enough because ultimately only pass/fail
  27  * decision has to be made and distinction between strong and stronger
  28  * signal is irrelevant.
  29  */
  30 #define MIN_DIFF_PERCENT_PER_BIT        1UL
  31
  32 static int show_results_info(__u64 sum_llc_val, int no_of_bits,
  33                              unsigned long cache_span,
  34                              unsigned long min_diff_percent,
  35                              unsigned long num_of_runs, bool platform,
  36                              __s64 *prev_avg_llc_val)
  37 {
  38         __u64 avg_llc_val = 0;
  39         float avg_diff;
  40         int ret = 0;
  41
  42         avg_llc_val = sum_llc_val / num_of_runs;
  43         if (*prev_avg_llc_val) {
  44                 float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
  45
  46                 avg_diff = delta / *prev_avg_llc_val;
  47                 ret = platform && (avg_diff * 100) < (float)min_diff_percent;
  48
  49                 ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
  50                                ret ? "Fail:" : "Pass:", (float)min_diff_percent);
  51
  52                 ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
  53         }
  54         *prev_avg_llc_val = avg_llc_val;
  55
  56         show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
  57
  58         return ret;
  59 }
  60
  61 /* Remove the highest bit from CBM */
  62 static unsigned long next_mask(unsigned long current_mask)
  63 {
  64         return current_mask & (current_mask >> 1);
  65 }
  66
  67 static int check_results(struct resctrl_val_param *param, const char *cache_type,
  68                          unsigned long cache_total_size, unsigned long full_cache_mask,
  69                          unsigned long current_mask)
  70 {
  71         char *token_array[8], temp[512];
  72         __u64 sum_llc_perf_miss = 0;
  73         __s64 prev_avg_llc_val = 0;
  74         unsigned long alloc_size;
  75         int runs = 0;
  76         int fail = 0;
  77         int ret;
  78         FILE *fp;
  79
  80         ksft_print_msg("Checking for pass/fail\n");
  81         fp = fopen(param->filename, "r");
  82         if (!fp) {
  83                 ksft_perror("Cannot open file");
  84
  85                 return -1;
  86         }
  87
  88         while (fgets(temp, sizeof(temp), fp)) {
  89                 char *token = strtok(temp, ":\t");
  90                 int fields = 0;
  91                 int bits;
  92
  93                 while (token) {
  94                         token_array[fields++] = token;
  95                         token = strtok(NULL, ":\t");
  96                 }
  97
  98                 sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
  99                 runs++;
 100
 101                 if (runs < NUM_OF_RUNS)
 102                         continue;
 103
 104                 if (!current_mask) {
 105                         ksft_print_msg("Unexpected empty cache mask\n");
 106                         break;
 107                 }
 108
 109                 alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
 110
 111                 bits = count_bits(current_mask);
 112
 113                 ret = show_results_info(sum_llc_perf_miss, bits,
 114                                         alloc_size / 64,
 115                                         MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
 116                                         runs, get_vendor() == ARCH_INTEL,
 117                                         &prev_avg_llc_val);
 118                 if (ret)
 119                         fail = 1;
 120
 121                 runs = 0;
 122                 sum_llc_perf_miss = 0;
 123                 current_mask = next_mask(current_mask);
 124         }
 125
 126         fclose(fp);
 127
 128         return fail;
 129 }
 130
 131 static void cat_test_cleanup(void)
 132 {
 133         remove(RESULT_FILE_NAME);
 134 }
 135
 136 /*
 137  * cat_test - Execute CAT benchmark and measure cache misses
 138  * @test:               Test information structure
 139  * @uparams:            User supplied parameters
 140  * @param:              Parameters passed to cat_test()
 141  * @span:               Buffer size for the benchmark
 142  * @current_mask        Start mask for the first iteration
 143  *
 144  * Run CAT selftest by varying the allocated cache portion and comparing the
 145  * impact on cache misses (the result analysis is done in check_results()
 146  * and show_results_info(), not in this function).
 147  *
 148  * One bit is removed from the CAT allocation bit mask (in current_mask) for
 149  * each subsequent test which keeps reducing the size of the allocated cache
 150  * portion. A single test flushes the buffer, reads it to warm up the cache,
 151  * and reads the buffer again. The cache misses are measured during the last
 152  * read pass.
 153  *
 154  * Return:              0 when the test was run, < 0 on error.
 155  */
 156 static int cat_test(const struct resctrl_test *test,
 157                     const struct user_params *uparams,
 158                     struct resctrl_val_param *param,
 159                     size_t span, unsigned long current_mask)
 160 {
 161         struct perf_event_read pe_read;
 162         struct perf_event_attr pea;
 163         cpu_set_t old_affinity;
 164         unsigned char *buf;
 165         char schemata[64];
 166         int ret, i, pe_fd;
 167         pid_t bm_pid;
 168
 169         if (strcmp(param->filename, "") == 0)
 170                 sprintf(param->filename, "stdio");
 171
 172         bm_pid = getpid();
 173
 174         /* Taskset benchmark to specified cpu */
 175         ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
 176         if (ret)
 177                 return ret;
 178
 179         /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
 180         ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
 181         if (ret)
 182                 goto reset_affinity;
 183
 184         perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
 185         perf_event_initialize_read_format(&pe_read);
 186         pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
 187         if (pe_fd < 0) {
 188                 ret = -1;
 189                 goto reset_affinity;
 190         }
 191
 192         buf = alloc_buffer(span, 1);
 193         if (!buf) {
 194                 ret = -1;
 195                 goto pe_close;
 196         }
 197
 198         while (current_mask) {
 199                 snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
 200                 ret = write_schemata("", schemata, uparams->cpu, test->resource);
 201                 if (ret)
 202                         goto free_buf;
 203                 snprintf(schemata, sizeof(schemata), "%lx", current_mask);
 204                 ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
 205                 if (ret)
 206                         goto free_buf;
 207
 208                 for (i = 0; i < NUM_OF_RUNS; i++) {
 209                         mem_flush(buf, span);
 210                         fill_cache_read(buf, span, true);
 211
 212                         ret = perf_event_reset_enable(pe_fd);
 213                         if (ret)
 214                                 goto free_buf;
 215
 216                         fill_cache_read(buf, span, true);
 217
 218                         ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
 219                         if (ret)
 220                                 goto free_buf;
 221                 }
 222                 current_mask = next_mask(current_mask);
 223         }
 224
 225 free_buf:
 226         free(buf);
 227 pe_close:
 228         close(pe_fd);
 229 reset_affinity:
 230         taskset_restore(bm_pid, &old_affinity);
 231
 232         return ret;
 233 }
 234
 235 static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
 236 {
 237         unsigned long long_mask, start_mask, full_cache_mask;
 238         unsigned long cache_total_size = 0;
 239         int n = uparams->bits;
 240         unsigned int start;
 241         int count_of_bits;
 242         size_t span;
 243         int ret;
 244
 245         ret = get_full_cbm(test->resource, &full_cache_mask);
 246         if (ret)
 247                 return ret;
 248         /* Get the largest contiguous exclusive portion of the cache */
 249         ret = get_mask_no_shareable(test->resource, &long_mask);
 250         if (ret)
 251                 return ret;
 252
 253         /* Get L3/L2 cache size */
 254         ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
 255         if (ret)
 256                 return ret;
 257         ksft_print_msg("Cache size :%lu\n", cache_total_size);
 258
 259         count_of_bits = count_contiguous_bits(long_mask, &start);
 260
 261         if (!n)
 262                 n = count_of_bits / 2;
 263
 264         if (n > count_of_bits - 1) {
 265                 ksft_print_msg("Invalid input value for no_of_bits n!\n");
 266                 ksft_print_msg("Please enter value in range 1 to %d\n",
 267                                count_of_bits - 1);
 268                 return -1;
 269         }
 270         start_mask = create_bit_mask(start, n);
 271
 272         struct resctrl_val_param param = {
 273                 .ctrlgrp        = "c1",
 274                 .filename       = RESULT_FILE_NAME,
 275                 .num_of_runs    = 0,
 276         };
 277         param.mask = long_mask;
 278         span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
 279
 280         remove(param.filename);
 281
 282         ret = cat_test(test, uparams, &param, span, start_mask);
 283         if (ret)
 284                 return ret;
 285
 286         ret = check_results(&param, test->resource,
 287                             cache_total_size, full_cache_mask, start_mask);
 288         return ret;
 289 }
 290
 291 static bool arch_supports_noncont_cat(const struct resctrl_test *test)
 292 {
 293         /* AMD always supports non-contiguous CBM. */
 294         if (get_vendor() == ARCH_AMD)
 295                 return true;
 296
 297 #if defined(__i386__) || defined(__x86_64__) /* arch */
 298         unsigned int eax, ebx, ecx, edx;
 299         /* Intel support for non-contiguous CBM needs to be discovered. */
 300         if (!strcmp(test->resource, "L3"))
 301                 __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
 302         else if (!strcmp(test->resource, "L2"))
 303                 __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
 304         else
 305                 return false;
 306
 307         return ((ecx >> 3) & 1);
 308 #endif /* end arch */
 309
 310         return false;
 311 }
 312
 313 static int noncont_cat_run_test(const struct resctrl_test *test,
 314                                 const struct user_params *uparams)
 315 {
 316         unsigned long full_cache_mask, cont_mask, noncont_mask;
 317         unsigned int sparse_masks;
 318         int bit_center, ret;
 319         char schemata[64];
 320
 321         /* Check to compare sparse_masks content to CPUID output. */
 322         ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
 323         if (ret)
 324                 return ret;
 325
 326         if (arch_supports_noncont_cat(test) != sparse_masks) {
 327                 ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
 328                 return 1;
 329         }
 330
 331         /* Write checks initialization. */
 332         ret = get_full_cbm(test->resource, &full_cache_mask);
 333         if (ret < 0)
 334                 return ret;
 335         bit_center = count_bits(full_cache_mask) / 2;
 336
 337         /*
 338          * The bit_center needs to be at least 3 to properly calculate the CBM
 339          * hole in the noncont_mask. If it's smaller return an error since the
 340          * cache mask is too short and that shouldn't happen.
 341          */
 342         if (bit_center < 3)
 343                 return -EINVAL;
 344         cont_mask = full_cache_mask >> bit_center;
 345
 346         /* Contiguous mask write check. */
 347         snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
 348         ret = write_schemata("", schemata, uparams->cpu, test->resource);
 349         if (ret) {
 350                 ksft_print_msg("Write of contiguous CBM failed\n");
 351                 return 1;
 352         }
 353
 354         /*
 355          * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
 356          * Output is compared with support information to catch any edge case errors.
 357          */
 358         noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
 359         snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
 360         ret = write_schemata("", schemata, uparams->cpu, test->resource);
 361         if (ret && sparse_masks)
 362                 ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
 363         else if (ret && !sparse_masks)
 364                 ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
 365         else if (!ret && !sparse_masks)
 366                 ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
 367
 368         return !ret == !sparse_masks;
 369 }
 370
 371 static bool noncont_cat_feature_check(const struct resctrl_test *test)
 372 {
 373         if (!resctrl_resource_exists(test->resource))
 374                 return false;
 375
 376         return resource_info_file_exists(test->resource, "sparse_masks");
 377 }
 378
 379 struct resctrl_test l3_cat_test = {
 380         .name = "L3_CAT",
 381         .group = "CAT",
 382         .resource = "L3",
 383         .feature_check = test_resource_feature_check,
 384         .run_test = cat_run_test,
 385         .cleanup = cat_test_cleanup,
 386 };
 387
 388 struct resctrl_test l3_noncont_cat_test = {
 389         .name = "L3_NONCONT_CAT",
 390         .group = "CAT",
 391         .resource = "L3",
 392         .feature_check = noncont_cat_feature_check,
 393         .run_test = noncont_cat_run_test,
 394 };
 395
 396 struct resctrl_test l2_noncont_cat_test = {
 397         .name = "L2_NONCONT_CAT",
 398         .group = "CAT",
 399         .resource = "L2",
 400         .feature_check = noncont_cat_feature_check,
 401         .run_test = noncont_cat_run_test,
 402 };