4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
24 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
27 #ifndef _LIBSPL_SYS_SIMD_H
28 #define _LIBSPL_SYS_SIMD_H
30 #include <sys/isa_defs.h>
31 #include <sys/types.h>
33 /* including <sys/auxv.h> clashes with AT_UID and others */
34 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__)
35 #if defined(__FreeBSD__)
38 extern int elf_aux_info(int aux
, void *buf
, int buflen
);
39 static inline unsigned long getauxval(unsigned long key
)
41 unsigned long val
= 0UL;
43 if (elf_aux_info((int)key
, &val
, sizeof (val
)) != 0)
48 #elif defined(__linux__)
51 extern unsigned long getauxval(unsigned long type
);
52 #endif /* __linux__ */
53 #endif /* arm || aarch64 || powerpc */
58 #define kfpu_allowed() 1
59 #define kfpu_begin() do {} while (0)
60 #define kfpu_end() do {} while (0)
62 #define kfpu_fini() ((void) 0)
65 * CPUID feature tests for user-space.
67 * x86 registers used implicitly by CPUID
69 typedef enum cpuid_regs
{
78 * List of instruction sets identified by CPUID
80 typedef enum cpuid_inst_sets
{
108 * Instruction set descriptor.
110 typedef struct cpuid_feature_desc
{
111 uint32_t leaf
; /* CPUID leaf */
112 uint32_t subleaf
; /* CPUID sub-leaf */
113 uint32_t flag
; /* bit mask of the feature */
114 cpuid_regs_t reg
; /* which CPUID return register to test */
115 } cpuid_feature_desc_t
;
117 #define _AVX512F_BIT (1U << 16)
118 #define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
119 #define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
120 #define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
121 #define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
122 #define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
123 #define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
124 #define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
125 #define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
126 #define _AES_BIT (1U << 25)
127 #define _PCLMULQDQ_BIT (1U << 1)
128 #define _MOVBE_BIT (1U << 22)
129 #define _SHA_NI_BIT (1U << 29)
132 * Descriptions of supported instruction sets
134 static const cpuid_feature_desc_t cpuid_features
[] = {
135 [SSE
] = {1U, 0U, 1U << 25, EDX
},
136 [SSE2
] = {1U, 0U, 1U << 26, EDX
},
137 [SSE3
] = {1U, 0U, 1U << 0, ECX
},
138 [SSSE3
] = {1U, 0U, 1U << 9, ECX
},
139 [SSE4_1
] = {1U, 0U, 1U << 19, ECX
},
140 [SSE4_2
] = {1U, 0U, 1U << 20, ECX
},
141 [OSXSAVE
] = {1U, 0U, 1U << 27, ECX
},
142 [AVX
] = {1U, 0U, 1U << 28, ECX
},
143 [AVX2
] = {7U, 0U, 1U << 5, EBX
},
144 [BMI1
] = {7U, 0U, 1U << 3, EBX
},
145 [BMI2
] = {7U, 0U, 1U << 8, EBX
},
146 [AVX512F
] = {7U, 0U, _AVX512F_BIT
, EBX
},
147 [AVX512CD
] = {7U, 0U, _AVX512CD_BIT
, EBX
},
148 [AVX512DQ
] = {7U, 0U, _AVX512DQ_BIT
, EBX
},
149 [AVX512BW
] = {7U, 0U, _AVX512BW_BIT
, EBX
},
150 [AVX512IFMA
] = {7U, 0U, _AVX512IFMA_BIT
, EBX
},
151 [AVX512VBMI
] = {7U, 0U, _AVX512VBMI_BIT
, ECX
},
152 [AVX512PF
] = {7U, 0U, _AVX512PF_BIT
, EBX
},
153 [AVX512ER
] = {7U, 0U, _AVX512ER_BIT
, EBX
},
154 [AVX512VL
] = {7U, 0U, _AVX512ER_BIT
, EBX
},
155 [AES
] = {1U, 0U, _AES_BIT
, ECX
},
156 [PCLMULQDQ
] = {1U, 0U, _PCLMULQDQ_BIT
, ECX
},
157 [MOVBE
] = {1U, 0U, _MOVBE_BIT
, ECX
},
158 [SHA_NI
] = {7U, 0U, _SHA_NI_BIT
, EBX
},
162 * Check if OS supports AVX and AVX2 by checking XCR0
163 * Only call this function if CPUID indicates that AVX feature is
164 * supported by the CPU, otherwise it might be an illegal instruction.
166 static inline uint64_t
167 xgetbv(uint32_t index
)
170 /* xgetbv - instruction byte code */
171 __asm__
__volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
172 : "=a" (eax
), "=d" (edx
)
175 return ((((uint64_t)edx
)<<32) | (uint64_t)eax
);
179 * Check if CPU supports a feature
181 static inline boolean_t
182 __cpuid_check_feature(const cpuid_feature_desc_t
*desc
)
184 uint32_t r
[CPUID_REG_CNT
];
186 if (__get_cpuid_max(0, NULL
) >= desc
->leaf
) {
188 * __cpuid_count is needed to properly check
189 * for AVX2. It is a macro, so return parameters
190 * are passed by value.
192 __cpuid_count(desc
->leaf
, desc
->subleaf
,
193 r
[EAX
], r
[EBX
], r
[ECX
], r
[EDX
]);
194 return ((r
[desc
->reg
] & desc
->flag
) == desc
->flag
);
199 #define CPUID_FEATURE_CHECK(name, id) \
200 static inline boolean_t \
201 __cpuid_has_ ## name(void) \
203 return (__cpuid_check_feature(&cpuid_features[id])); \
207 * Define functions for user-space CPUID features testing
209 CPUID_FEATURE_CHECK(sse
, SSE
);
210 CPUID_FEATURE_CHECK(sse2
, SSE2
);
211 CPUID_FEATURE_CHECK(sse3
, SSE3
);
212 CPUID_FEATURE_CHECK(ssse3
, SSSE3
);
213 CPUID_FEATURE_CHECK(sse4_1
, SSE4_1
);
214 CPUID_FEATURE_CHECK(sse4_2
, SSE4_2
);
215 CPUID_FEATURE_CHECK(avx
, AVX
);
216 CPUID_FEATURE_CHECK(avx2
, AVX2
);
217 CPUID_FEATURE_CHECK(osxsave
, OSXSAVE
);
218 CPUID_FEATURE_CHECK(bmi1
, BMI1
);
219 CPUID_FEATURE_CHECK(bmi2
, BMI2
);
220 CPUID_FEATURE_CHECK(avx512f
, AVX512F
);
221 CPUID_FEATURE_CHECK(avx512cd
, AVX512CD
);
222 CPUID_FEATURE_CHECK(avx512dq
, AVX512DQ
);
223 CPUID_FEATURE_CHECK(avx512bw
, AVX512BW
);
224 CPUID_FEATURE_CHECK(avx512ifma
, AVX512IFMA
);
225 CPUID_FEATURE_CHECK(avx512vbmi
, AVX512VBMI
);
226 CPUID_FEATURE_CHECK(avx512pf
, AVX512PF
);
227 CPUID_FEATURE_CHECK(avx512er
, AVX512ER
);
228 CPUID_FEATURE_CHECK(avx512vl
, AVX512VL
);
229 CPUID_FEATURE_CHECK(aes
, AES
);
230 CPUID_FEATURE_CHECK(pclmulqdq
, PCLMULQDQ
);
231 CPUID_FEATURE_CHECK(movbe
, MOVBE
);
232 CPUID_FEATURE_CHECK(shani
, SHA_NI
);
235 * Detect register set support
237 static inline boolean_t
238 __simd_state_enabled(const uint64_t state
)
240 boolean_t has_osxsave
;
243 has_osxsave
= __cpuid_has_osxsave();
248 return ((xcr0
& state
) == state
);
251 #define _XSTATE_SSE_AVX (0x2 | 0x4)
252 #define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
254 #define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
255 #define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
258 * Check if SSE instruction set is available
260 static inline boolean_t
261 zfs_sse_available(void)
263 return (__cpuid_has_sse());
267 * Check if SSE2 instruction set is available
269 static inline boolean_t
270 zfs_sse2_available(void)
272 return (__cpuid_has_sse2());
276 * Check if SSE3 instruction set is available
278 static inline boolean_t
279 zfs_sse3_available(void)
281 return (__cpuid_has_sse3());
285 * Check if SSSE3 instruction set is available
287 static inline boolean_t
288 zfs_ssse3_available(void)
290 return (__cpuid_has_ssse3());
294 * Check if SSE4.1 instruction set is available
296 static inline boolean_t
297 zfs_sse4_1_available(void)
299 return (__cpuid_has_sse4_1());
303 * Check if SSE4.2 instruction set is available
305 static inline boolean_t
306 zfs_sse4_2_available(void)
308 return (__cpuid_has_sse4_2());
312 * Check if AVX instruction set is available
314 static inline boolean_t
315 zfs_avx_available(void)
317 return (__cpuid_has_avx() && __ymm_enabled());
321 * Check if AVX2 instruction set is available
323 static inline boolean_t
324 zfs_avx2_available(void)
326 return (__cpuid_has_avx2() && __ymm_enabled());
330 * Check if BMI1 instruction set is available
332 static inline boolean_t
333 zfs_bmi1_available(void)
335 return (__cpuid_has_bmi1());
339 * Check if BMI2 instruction set is available
341 static inline boolean_t
342 zfs_bmi2_available(void)
344 return (__cpuid_has_bmi2());
348 * Check if AES instruction set is available
350 static inline boolean_t
351 zfs_aes_available(void)
353 return (__cpuid_has_aes());
357 * Check if PCLMULQDQ instruction set is available
359 static inline boolean_t
360 zfs_pclmulqdq_available(void)
362 return (__cpuid_has_pclmulqdq());
366 * Check if MOVBE instruction is available
368 static inline boolean_t
369 zfs_movbe_available(void)
371 return (__cpuid_has_movbe());
375 * Check if SHA_NI instruction is available
377 static inline boolean_t
378 zfs_shani_available(void)
380 return (__cpuid_has_shani());
384 * AVX-512 family of instruction sets:
387 * AVX512CD Conflict Detection Instructions
388 * AVX512ER Exponential and Reciprocal Instructions
389 * AVX512PF Prefetch Instructions
391 * AVX512BW Byte and Word Instructions
392 * AVX512DQ Double-word and Quadword Instructions
393 * AVX512VL Vector Length Extensions
395 * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
396 * AVX512VBMI Vector Byte Manipulation Instructions
400 * Check if AVX512F instruction set is available
402 static inline boolean_t
403 zfs_avx512f_available(void)
405 return (__cpuid_has_avx512f() && __zmm_enabled());
409 * Check if AVX512CD instruction set is available
411 static inline boolean_t
412 zfs_avx512cd_available(void)
414 return (__cpuid_has_avx512cd() && __zmm_enabled());
418 * Check if AVX512ER instruction set is available
420 static inline boolean_t
421 zfs_avx512er_available(void)
423 return (__cpuid_has_avx512er() && __zmm_enabled());
427 * Check if AVX512PF instruction set is available
429 static inline boolean_t
430 zfs_avx512pf_available(void)
432 return (__cpuid_has_avx512pf() && __zmm_enabled());
436 * Check if AVX512BW instruction set is available
438 static inline boolean_t
439 zfs_avx512bw_available(void)
441 return (__cpuid_has_avx512bw() && __zmm_enabled());
445 * Check if AVX512DQ instruction set is available
447 static inline boolean_t
448 zfs_avx512dq_available(void)
450 return (__cpuid_has_avx512dq() && __zmm_enabled());
454 * Check if AVX512VL instruction set is available
456 static inline boolean_t
457 zfs_avx512vl_available(void)
459 return (__cpuid_has_avx512vl() && __zmm_enabled());
463 * Check if AVX512IFMA instruction set is available
465 static inline boolean_t
466 zfs_avx512ifma_available(void)
468 return (__cpuid_has_avx512ifma() && __zmm_enabled());
472 * Check if AVX512VBMI instruction set is available
474 static inline boolean_t
475 zfs_avx512vbmi_available(void)
477 return (__cpuid_has_avx512f() && __cpuid_has_avx512vbmi() &&
481 #elif defined(__arm__)
483 #define kfpu_allowed() 1
484 #define kfpu_initialize(tsk) do {} while (0)
485 #define kfpu_begin() do {} while (0)
486 #define kfpu_end() do {} while (0)
488 #define HWCAP_NEON 0x00001000
489 #define HWCAP2_SHA2 0x00000008
492 * Check if NEON is available
494 static inline boolean_t
495 zfs_neon_available(void)
497 unsigned long hwcap
= getauxval(AT_HWCAP
);
498 return (hwcap
& HWCAP_NEON
);
502 * Check if SHA2 is available
504 static inline boolean_t
505 zfs_sha256_available(void)
507 unsigned long hwcap
= getauxval(AT_HWCAP
);
508 return (hwcap
& HWCAP2_SHA2
);
511 #elif defined(__aarch64__)
513 #define kfpu_allowed() 1
514 #define kfpu_initialize(tsk) do {} while (0)
515 #define kfpu_begin() do {} while (0)
516 #define kfpu_end() do {} while (0)
518 #define HWCAP_FP 0x00000001
519 #define HWCAP_SHA2 0x00000040
520 #define HWCAP_SHA512 0x00200000
523 * Check if NEON is available
525 static inline boolean_t
526 zfs_neon_available(void)
528 unsigned long hwcap
= getauxval(AT_HWCAP
);
529 return (hwcap
& HWCAP_FP
);
533 * Check if SHA2 is available
535 static inline boolean_t
536 zfs_sha256_available(void)
538 unsigned long hwcap
= getauxval(AT_HWCAP
);
539 return (hwcap
& HWCAP_SHA2
);
543 * Check if SHA512 is available
545 static inline boolean_t
546 zfs_sha512_available(void)
548 unsigned long hwcap
= getauxval(AT_HWCAP
);
549 return (hwcap
& HWCAP_SHA512
);
552 #elif defined(__powerpc__)
554 #define kfpu_allowed() 0
555 #define kfpu_initialize(tsk) do {} while (0)
556 #define kfpu_begin() do {} while (0)
557 #define kfpu_end() do {} while (0)
559 #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
560 #define PPC_FEATURE_HAS_VSX 0x00000080
561 #define PPC_FEATURE2_ARCH_2_07 0x80000000
563 static inline boolean_t
564 zfs_altivec_available(void)
566 unsigned long hwcap
= getauxval(AT_HWCAP
);
567 return (hwcap
& PPC_FEATURE_HAS_ALTIVEC
);
570 static inline boolean_t
571 zfs_vsx_available(void)
573 unsigned long hwcap
= getauxval(AT_HWCAP
);
574 return (hwcap
& PPC_FEATURE_HAS_VSX
);
577 static inline boolean_t
578 zfs_isa207_available(void)
580 unsigned long hwcap
= getauxval(AT_HWCAP
);
581 unsigned long hwcap2
= getauxval(AT_HWCAP2
);
582 return ((hwcap
& PPC_FEATURE_HAS_VSX
) &&
583 (hwcap2
& PPC_FEATURE2_ARCH_2_07
));
588 #define kfpu_allowed() 0
589 #define kfpu_initialize(tsk) do {} while (0)
590 #define kfpu_begin() do {} while (0)
591 #define kfpu_end() do {} while (0)
595 extern void simd_stat_init(void);
596 extern void simd_stat_fini(void);
598 #endif /* _LIBSPL_SYS_SIMD_H */