1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 2018 Intel Corporation
8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9 * Fenghua Yu <fenghua.yu@intel.com>
14 #include <sys/types.h>
22 #define PAGE_SIZE (4 * 1024)
23 #define MB (1024 * 1024)
27 #if defined(__i386) || defined(__x86_64)
28 asm volatile("sfence\n\t"
33 static void cl_flush(void *p
)
35 #if defined(__i386) || defined(__x86_64)
36 asm volatile("clflush (%0)\n\t"
37 : : "r"(p
) : "memory");
41 void mem_flush(unsigned char *buf
, size_t buf_size
)
43 unsigned char *cp
= buf
;
46 buf_size
= buf_size
/ CL_SIZE
; /* mem size in cache lines */
48 for (i
= 0; i
< buf_size
; i
++)
49 cl_flush(&cp
[i
* CL_SIZE
]);
55 * Buffer index step advance to workaround HW prefetching interfering with
58 * Must be a prime to step through all indexes of the buffer.
60 * Some primes work better than others on some architectures (from MBA/MBM
61 * result stability point of view).
63 #define FILL_IDX_MULT 23
65 static int fill_one_span_read(unsigned char *buf
, size_t buf_size
)
67 unsigned int size
= buf_size
/ (CL_SIZE
/ 2);
68 unsigned int i
, idx
= 0;
69 unsigned char sum
= 0;
72 * Read the buffer in an order that is unexpected by HW prefetching
73 * optimizations to prevent them interfering with the caching pattern.
75 * The read order is (in terms of halves of cachelines):
76 * i * FILL_IDX_MULT % size
77 * The formula is open-coded below to avoiding modulo inside the loop
78 * as it improves MBA/MBM result stability on some architectures.
80 for (i
= 0; i
< size
; i
++) {
81 sum
+= buf
[idx
* (CL_SIZE
/ 2)];
91 void fill_cache_read(unsigned char *buf
, size_t buf_size
, bool once
)
96 ret
= fill_one_span_read(buf
, buf_size
);
101 /* Consume read result so that reading memory is not optimized out. */
105 unsigned char *alloc_buffer(size_t buf_size
, bool memflush
)
112 ret
= posix_memalign(&buf
, PAGE_SIZE
, buf_size
);
116 /* Initialize the buffer */
118 s64
= buf_size
/ sizeof(uint64_t);
121 *p64
= (uint64_t)rand();
122 p64
+= (CL_SIZE
/ sizeof(uint64_t));
123 s64
-= (CL_SIZE
/ sizeof(uint64_t));
126 /* Flush the memory before using to avoid "cache hot pages" effect */
128 mem_flush(buf
, buf_size
);
133 ssize_t
get_fill_buf_size(int cpu_no
, const char *cache_type
)
135 unsigned long cache_total_size
= 0;
138 ret
= get_cache_size(cpu_no
, cache_type
, &cache_total_size
);
142 return cache_total_size
* 2 > MINIMUM_SPAN
?
143 cache_total_size
* 2 : MINIMUM_SPAN
;