ARC: Cache arc_c value during arc_evict()
[zfs.git] / lib / libspl / include / sys / simd.h
blob2926dc6807644c2166285ec8ba538187185f05ea
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
24 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
27 #ifndef _LIBSPL_SYS_SIMD_H
28 #define _LIBSPL_SYS_SIMD_H
30 #include <sys/isa_defs.h>
31 #include <sys/types.h>
33 /* including <sys/auxv.h> clashes with AT_UID and others */
34 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
35 #if defined(__FreeBSD__)
36 #define AT_HWCAP 25
37 #define AT_HWCAP2 26
38 extern int elf_aux_info(int aux, void *buf, int buflen);
39 static inline unsigned long getauxval(unsigned long key)
41 unsigned long val = 0UL;
43 if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
44 return (0UL);
46 return (val);
48 #elif defined(__linux__)
49 #define AT_HWCAP 16
50 #define AT_HWCAP2 26
51 extern unsigned long getauxval(unsigned long type);
52 #endif /* __linux__ */
53 #endif /* arm || aarch64 || powerpc */
55 #if defined(__x86)
56 #include <cpuid.h>
58 #define kfpu_allowed() 1
59 #define kfpu_begin() do {} while (0)
60 #define kfpu_end() do {} while (0)
61 #define kfpu_init() 0
62 #define kfpu_fini() ((void) 0)
65 * CPUID feature tests for user-space.
67 * x86 registers used implicitly by CPUID
69 typedef enum cpuid_regs {
70 EAX = 0,
71 EBX,
72 ECX,
73 EDX,
74 CPUID_REG_CNT = 4
75 } cpuid_regs_t;
78 * List of instruction sets identified by CPUID
80 typedef enum cpuid_inst_sets {
81 SSE = 0,
82 SSE2,
83 SSE3,
84 SSSE3,
85 SSE4_1,
86 SSE4_2,
87 OSXSAVE,
88 AVX,
89 AVX2,
90 BMI1,
91 BMI2,
92 AVX512F,
93 AVX512CD,
94 AVX512DQ,
95 AVX512BW,
96 AVX512IFMA,
97 AVX512VBMI,
98 AVX512PF,
99 AVX512ER,
100 AVX512VL,
101 AES,
102 PCLMULQDQ,
103 MOVBE,
104 SHA_NI
105 } cpuid_inst_sets_t;
108 * Instruction set descriptor.
110 typedef struct cpuid_feature_desc {
111 uint32_t leaf; /* CPUID leaf */
112 uint32_t subleaf; /* CPUID sub-leaf */
113 uint32_t flag; /* bit mask of the feature */
114 cpuid_regs_t reg; /* which CPUID return register to test */
115 } cpuid_feature_desc_t;
117 #define _AVX512F_BIT (1U << 16)
118 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
119 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
120 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
121 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
122 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
123 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
124 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
125 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
126 #define _AES_BIT (1U << 25)
127 #define _PCLMULQDQ_BIT (1U << 1)
128 #define _MOVBE_BIT (1U << 22)
129 #define _SHA_NI_BIT (1U << 29)
132 * Descriptions of supported instruction sets
134 static const cpuid_feature_desc_t cpuid_features[] = {
135 [SSE] = {1U, 0U, 1U << 25, EDX },
136 [SSE2] = {1U, 0U, 1U << 26, EDX },
137 [SSE3] = {1U, 0U, 1U << 0, ECX },
138 [SSSE3] = {1U, 0U, 1U << 9, ECX },
139 [SSE4_1] = {1U, 0U, 1U << 19, ECX },
140 [SSE4_2] = {1U, 0U, 1U << 20, ECX },
141 [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
142 [AVX] = {1U, 0U, 1U << 28, ECX },
143 [AVX2] = {7U, 0U, 1U << 5, EBX },
144 [BMI1] = {7U, 0U, 1U << 3, EBX },
145 [BMI2] = {7U, 0U, 1U << 8, EBX },
146 [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
147 [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
148 [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
149 [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
150 [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
151 [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
152 [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
153 [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
154 [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX },
155 [AES] = {1U, 0U, _AES_BIT, ECX },
156 [PCLMULQDQ] = {1U, 0U, _PCLMULQDQ_BIT, ECX },
157 [MOVBE] = {1U, 0U, _MOVBE_BIT, ECX },
158 [SHA_NI] = {7U, 0U, _SHA_NI_BIT, EBX },
162 * Check if OS supports AVX and AVX2 by checking XCR0
163 * Only call this function if CPUID indicates that AVX feature is
164 * supported by the CPU, otherwise it might be an illegal instruction.
166 static inline uint64_t
167 xgetbv(uint32_t index)
169 uint32_t eax, edx;
170 /* xgetbv - instruction byte code */
171 __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
172 : "=a" (eax), "=d" (edx)
173 : "c" (index));
175 return ((((uint64_t)edx)<<32) | (uint64_t)eax);
179 * Check if CPU supports a feature
181 static inline boolean_t
182 __cpuid_check_feature(const cpuid_feature_desc_t *desc)
184 uint32_t r[CPUID_REG_CNT];
186 if (__get_cpuid_max(0, NULL) >= desc->leaf) {
188 * __cpuid_count is needed to properly check
189 * for AVX2. It is a macro, so return parameters
190 * are passed by value.
192 __cpuid_count(desc->leaf, desc->subleaf,
193 r[EAX], r[EBX], r[ECX], r[EDX]);
194 return ((r[desc->reg] & desc->flag) == desc->flag);
196 return (B_FALSE);
199 #define CPUID_FEATURE_CHECK(name, id) \
200 static inline boolean_t \
201 __cpuid_has_ ## name(void) \
203 return (__cpuid_check_feature(&cpuid_features[id])); \
207 * Define functions for user-space CPUID features testing
209 CPUID_FEATURE_CHECK(sse, SSE);
210 CPUID_FEATURE_CHECK(sse2, SSE2);
211 CPUID_FEATURE_CHECK(sse3, SSE3);
212 CPUID_FEATURE_CHECK(ssse3, SSSE3);
213 CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
214 CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
215 CPUID_FEATURE_CHECK(avx, AVX);
216 CPUID_FEATURE_CHECK(avx2, AVX2);
217 CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
218 CPUID_FEATURE_CHECK(bmi1, BMI1);
219 CPUID_FEATURE_CHECK(bmi2, BMI2);
220 CPUID_FEATURE_CHECK(avx512f, AVX512F);
221 CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
222 CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
223 CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
224 CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
225 CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
226 CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
227 CPUID_FEATURE_CHECK(avx512er, AVX512ER);
228 CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
229 CPUID_FEATURE_CHECK(aes, AES);
230 CPUID_FEATURE_CHECK(pclmulqdq, PCLMULQDQ);
231 CPUID_FEATURE_CHECK(movbe, MOVBE);
232 CPUID_FEATURE_CHECK(shani, SHA_NI);
235 * Detect register set support
237 static inline boolean_t
238 __simd_state_enabled(const uint64_t state)
240 boolean_t has_osxsave;
241 uint64_t xcr0;
243 has_osxsave = __cpuid_has_osxsave();
244 if (!has_osxsave)
245 return (B_FALSE);
247 xcr0 = xgetbv(0);
248 return ((xcr0 & state) == state);
251 #define _XSTATE_SSE_AVX (0x2 | 0x4)
252 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
254 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
255 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
258 * Check if SSE instruction set is available
260 static inline boolean_t
261 zfs_sse_available(void)
263 return (__cpuid_has_sse());
267 * Check if SSE2 instruction set is available
269 static inline boolean_t
270 zfs_sse2_available(void)
272 return (__cpuid_has_sse2());
276 * Check if SSE3 instruction set is available
278 static inline boolean_t
279 zfs_sse3_available(void)
281 return (__cpuid_has_sse3());
285 * Check if SSSE3 instruction set is available
287 static inline boolean_t
288 zfs_ssse3_available(void)
290 return (__cpuid_has_ssse3());
294 * Check if SSE4.1 instruction set is available
296 static inline boolean_t
297 zfs_sse4_1_available(void)
299 return (__cpuid_has_sse4_1());
303 * Check if SSE4.2 instruction set is available
305 static inline boolean_t
306 zfs_sse4_2_available(void)
308 return (__cpuid_has_sse4_2());
312 * Check if AVX instruction set is available
314 static inline boolean_t
315 zfs_avx_available(void)
317 return (__cpuid_has_avx() && __ymm_enabled());
321 * Check if AVX2 instruction set is available
323 static inline boolean_t
324 zfs_avx2_available(void)
326 return (__cpuid_has_avx2() && __ymm_enabled());
330 * Check if BMI1 instruction set is available
332 static inline boolean_t
333 zfs_bmi1_available(void)
335 return (__cpuid_has_bmi1());
339 * Check if BMI2 instruction set is available
341 static inline boolean_t
342 zfs_bmi2_available(void)
344 return (__cpuid_has_bmi2());
348 * Check if AES instruction set is available
350 static inline boolean_t
351 zfs_aes_available(void)
353 return (__cpuid_has_aes());
357 * Check if PCLMULQDQ instruction set is available
359 static inline boolean_t
360 zfs_pclmulqdq_available(void)
362 return (__cpuid_has_pclmulqdq());
366 * Check if MOVBE instruction is available
368 static inline boolean_t
369 zfs_movbe_available(void)
371 return (__cpuid_has_movbe());
375 * Check if SHA_NI instruction is available
377 static inline boolean_t
378 zfs_shani_available(void)
380 return (__cpuid_has_shani());
384 * AVX-512 family of instruction sets:
386 * AVX512F Foundation
387 * AVX512CD Conflict Detection Instructions
388 * AVX512ER Exponential and Reciprocal Instructions
389 * AVX512PF Prefetch Instructions
391 * AVX512BW Byte and Word Instructions
392 * AVX512DQ Double-word and Quadword Instructions
393 * AVX512VL Vector Length Extensions
395 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
396 * AVX512VBMI Vector Byte Manipulation Instructions
400 * Check if AVX512F instruction set is available
402 static inline boolean_t
403 zfs_avx512f_available(void)
405 return (__cpuid_has_avx512f() && __zmm_enabled());
409 * Check if AVX512CD instruction set is available
411 static inline boolean_t
412 zfs_avx512cd_available(void)
414 return (__cpuid_has_avx512cd() && __zmm_enabled());
418 * Check if AVX512ER instruction set is available
420 static inline boolean_t
421 zfs_avx512er_available(void)
423 return (__cpuid_has_avx512er() && __zmm_enabled());
427 * Check if AVX512PF instruction set is available
429 static inline boolean_t
430 zfs_avx512pf_available(void)
432 return (__cpuid_has_avx512pf() && __zmm_enabled());
436 * Check if AVX512BW instruction set is available
438 static inline boolean_t
439 zfs_avx512bw_available(void)
441 return (__cpuid_has_avx512bw() && __zmm_enabled());
445 * Check if AVX512DQ instruction set is available
447 static inline boolean_t
448 zfs_avx512dq_available(void)
450 return (__cpuid_has_avx512dq() && __zmm_enabled());
454 * Check if AVX512VL instruction set is available
456 static inline boolean_t
457 zfs_avx512vl_available(void)
459 return (__cpuid_has_avx512vl() && __zmm_enabled());
463 * Check if AVX512IFMA instruction set is available
465 static inline boolean_t
466 zfs_avx512ifma_available(void)
468 return (__cpuid_has_avx512ifma() && __zmm_enabled());
472 * Check if AVX512VBMI instruction set is available
474 static inline boolean_t
475 zfs_avx512vbmi_available(void)
477 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
478 __zmm_enabled());
481 #elif defined(__arm__)
483 #define kfpu_allowed() 1
484 #define kfpu_initialize(tsk) do {} while (0)
485 #define kfpu_begin() do {} while (0)
486 #define kfpu_end() do {} while (0)
488 #define HWCAP_NEON 0x00001000
489 #define HWCAP2_SHA2 0x00000008
492 * Check if NEON is available
494 static inline boolean_t
495 zfs_neon_available(void)
497 unsigned long hwcap = getauxval(AT_HWCAP);
498 return (hwcap & HWCAP_NEON);
502 * Check if SHA2 is available
504 static inline boolean_t
505 zfs_sha256_available(void)
507 unsigned long hwcap = getauxval(AT_HWCAP);
508 return (hwcap & HWCAP2_SHA2);
511 #elif defined(__aarch64__)
513 #define kfpu_allowed() 1
514 #define kfpu_initialize(tsk) do {} while (0)
515 #define kfpu_begin() do {} while (0)
516 #define kfpu_end() do {} while (0)
518 #define HWCAP_FP 0x00000001
519 #define HWCAP_SHA2 0x00000040
520 #define HWCAP_SHA512 0x00200000
523 * Check if NEON is available
525 static inline boolean_t
526 zfs_neon_available(void)
528 unsigned long hwcap = getauxval(AT_HWCAP);
529 return (hwcap & HWCAP_FP);
533 * Check if SHA2 is available
535 static inline boolean_t
536 zfs_sha256_available(void)
538 unsigned long hwcap = getauxval(AT_HWCAP);
539 return (hwcap & HWCAP_SHA2);
543 * Check if SHA512 is available
545 static inline boolean_t
546 zfs_sha512_available(void)
548 unsigned long hwcap = getauxval(AT_HWCAP);
549 return (hwcap & HWCAP_SHA512);
552 #elif defined(__powerpc__)
554 #define kfpu_allowed() 0
555 #define kfpu_initialize(tsk) do {} while (0)
556 #define kfpu_begin() do {} while (0)
557 #define kfpu_end() do {} while (0)
559 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
560 #define PPC_FEATURE_HAS_VSX 0x00000080
561 #define PPC_FEATURE2_ARCH_2_07 0x80000000
563 static inline boolean_t
564 zfs_altivec_available(void)
566 unsigned long hwcap = getauxval(AT_HWCAP);
567 return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
570 static inline boolean_t
571 zfs_vsx_available(void)
573 unsigned long hwcap = getauxval(AT_HWCAP);
574 return (hwcap & PPC_FEATURE_HAS_VSX);
577 static inline boolean_t
578 zfs_isa207_available(void)
580 unsigned long hwcap = getauxval(AT_HWCAP);
581 unsigned long hwcap2 = getauxval(AT_HWCAP2);
582 return ((hwcap & PPC_FEATURE_HAS_VSX) &&
583 (hwcap2 & PPC_FEATURE2_ARCH_2_07));
586 #else
588 #define kfpu_allowed() 0
589 #define kfpu_initialize(tsk) do {} while (0)
590 #define kfpu_begin() do {} while (0)
591 #define kfpu_end() do {} while (0)
593 #endif
595 extern void simd_stat_init(void);
596 extern void simd_stat_fini(void);
598 #endif /* _LIBSPL_SYS_SIMD_H */