6 #include <string.h> // memset
8 typedef unsigned char UChar
;
9 typedef unsigned short int UShort
;
10 typedef unsigned int UInt
;
11 typedef signed int Int
;
12 typedef unsigned char UChar
;
13 typedef unsigned long long int ULong
;
14 typedef signed long long int Long
;
15 typedef double Double
;
17 /* Half-precision floating point is not universally available, so use a
18 synthesized 16 bit type. This allows the testing framework to be shared
19 across all SIMD tests. The functions halfToSingleFPAsInt() and
20 shortToSingle() below are used to create a Float16 type for testing purposes.
22 typedef unsigned short int Float16
;
24 typedef unsigned char Bool
;
25 #define False ((Bool)0)
26 #define True ((Bool)1)
32 enum { TyHF
=1234, TySF
, TyDF
, TyB
, TyH
, TyS
, TyD
, TyNONE
}
44 typedef union _V128 V128
;
46 /* Conversion based on IEEE half-precision, as described in the IEEE 754-2008
47 standard and Arm Reference Manual 'A1.4.2 Half-precision floating-point
48 formats' where hardware capability supports __fp16 (VEX_HWCAPS_ARM64_FP16
49 and VEX_HWCAPS_ARM64_VFP16 set).
51 UInt
halfToSingleFPAsInt(UShort y
);
53 static inline float shortToSingle(UShort imm
)
55 union { float f
; UInt i
; } v
;
56 v
.i
= halfToSingleFPAsInt(imm
);
60 UChar
randUChar ( void );
62 static inline ULong
randULong ( LaneTy ty
)
66 for (i
= 0; i
< 8; i
++) {
67 r
= (r
<< 8) | (ULong
)(0xFF & randUChar());
72 /* Generates a random V128. Ensures that that it contains normalised FP numbers
73 when viewed as either F16x8, F32x4 or F64x2, so that it is reasonable to use
75 void randV128 ( /*OUT*/V128
* v
, LaneTy ty
);
77 static inline void showV128 ( V128
* v
)
80 for (i
= 15; i
>= 0; i
--)
81 printf("%02x", (Int
)v
->u8
[i
]);
84 static inline void showBlock ( const char* msg
, V128
* block
, Int nBlock
)
88 for (i
= 0; i
< nBlock
; i
++) {
95 static inline ULong
dup4x16 ( UInt x
)
105 // Generate a random double- or single-precision number. About 1 time in 2,
106 // instead return a special value (+/- Inf, +/-Nan, denorm). This ensures that
107 // many of the groups of 4 calls here will return a special value.
108 Double
randDouble ( void );
109 Float
randFloat ( void );
111 void randBlock_Doubles ( V128
* block
, Int nBlock
);
112 void randBlock_Floats ( V128
* block
, Int nBlock
);
115 /* ---------------------------------------------------------------- */
116 /* -- Parameterisable test macros -- */
117 /* ---------------------------------------------------------------- */
119 #define DO50(_action) \
121 Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
125 /* Note this also sets the destination register to a known value (0x55..55)
126 since it can sometimes be an input to the instruction too. */
127 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
128 __attribute__((noinline)) \
129 static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
131 for (i = 0; i < ITERS; i++) { \
133 memset(block, 0x55, sizeof(block)); \
134 randV128(&block[0], ty); \
135 randV128(&block[1], ty); \
136 __asm__ __volatile__( \
137 "mov x30, #0 ; msr fpsr, x30 ; " \
138 "ldr q7, [%0, #0] ; " \
139 "ldr q8, [%0, #16] ; " \
140 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
141 "str q8, [%0, #16] ; " \
142 "mrs x30, fpsr ; str x30, [%0, #32] " \
143 : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
145 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN); \
146 UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
147 showV128(&block[0]); printf(" "); \
148 showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
153 /* Note this also sets the destination register to a known value (0x55..55)
154 since it can sometimes be an input to the instruction too. */
155 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM) \
156 __attribute__((noinline)) \
157 static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
159 for (i = 0; i < ITERS; i++) { \
161 memset(block, 0x55, sizeof(block)); \
162 randV128(&block[0], ty); \
163 randV128(&block[1], ty); \
164 randV128(&block[2], ty); \
165 __asm__ __volatile__( \
166 "mov x30, #0 ; msr fpsr, x30 ; " \
167 "ldr q7, [%0, #0] ; " \
168 "ldr q8, [%0, #16] ; " \
169 "ldr q9, [%0, #32] ; " \
170 #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
171 "str q9, [%0, #32] ; " \
172 "mrs x30, fpsr ; str x30, [%0, #48] " \
173 : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
175 printf(#INSN " v9." #SUFFIXD \
176 ", v7." #SUFFIXN ", v8." #SUFFIXM " "); \
177 UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
178 showV128(&block[0]); printf(" "); \
179 showV128(&block[1]); printf(" "); \
180 showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
185 /* Note this also sets the destination register to a known value (0x55..55)
186 since it can sometimes be an input to the instruction too. */
187 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
188 __attribute__((noinline)) \
189 static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
191 for (i = 0; i < ITERS; i++) { \
193 memset(block, 0x55, sizeof(block)); \
194 randV128(&block[0], ty); \
195 randV128(&block[1], ty); \
196 __asm__ __volatile__( \
197 "mov x30, #0 ; msr fpsr, x30 ; " \
198 "ldr q7, [%0, #0] ; " \
199 "ldr q8, [%0, #16] ; " \
200 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
201 "str q8, [%0, #16] ; " \
202 "mrs x30, fpsr ; str x30, [%0, #32] " \
203 : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
205 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " "); \
206 UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
207 showV128(&block[0]); printf(" "); \
208 showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
213 /* Generate a test that involves one integer reg and one vector reg,
214 with no bias as towards which is input or output. */
215 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
216 __attribute__((noinline)) \
217 static void test_##TESTNAME ( LaneTy ty ) { \
219 assert(INTREGNO != 30); \
220 for (i = 0; i < ITERS; i++) { \
222 memset(block, 0x55, sizeof(block)); \
223 randV128(&block[0], ty); \
224 randV128(&block[1], ty); \
225 randV128(&block[2], ty); \
226 randV128(&block[3], ty); \
227 __asm__ __volatile__( \
228 "mov x30, #0 ; msr fpsr, x30 ; " \
229 "ldr q"#VECREGNO", [%0, #0] ; " \
230 "ldr x"#INTREGNO", [%0, #16] ; " \
232 "str q"#VECREGNO", [%0, #32] ; " \
233 "str x"#INTREGNO", [%0, #48] ; " \
234 "mrs x30, fpsr ; str x30, [%0, #64] " \
235 : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
238 UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
239 showV128(&block[0]); printf(" "); \
240 showV128(&block[1]); printf(" "); \
241 showV128(&block[2]); printf(" "); \
242 showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
247 /* Generate a test that involves two vector regs,
248 with no bias as towards which is input or output.
249 It's OK to use x10 as scratch.*/
250 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
251 __attribute__((noinline)) \
252 static void test_##TESTNAME ( LaneTy ty ) { \
254 for (i = 0; i < ITERS; i++) { \
256 memset(block, 0x55, sizeof(block)); \
257 randV128(&block[0], ty); \
258 randV128(&block[1], ty); \
259 randV128(&block[2], ty); \
260 randV128(&block[3], ty); \
261 __asm__ __volatile__( \
262 "mov x30, #0 ; msr fpsr, x30 ; " \
263 "ldr q"#VECREG1NO", [%0, #0] ; " \
264 "ldr q"#VECREG2NO", [%0, #16] ; " \
266 "str q"#VECREG1NO", [%0, #32] ; " \
267 "str q"#VECREG2NO", [%0, #48] ; " \
268 "mrs x30, fpsr ; str x30, [%0, #64] " \
270 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
273 UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
274 showV128(&block[0]); printf(" "); \
275 showV128(&block[1]); printf(" "); \
276 showV128(&block[2]); printf(" "); \
277 showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
282 /* Generate a test that involves three vector regs,
283 with no bias as towards which is input or output. It's also OK
284 to use v16, v17, v18 as scratch. */
285 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
286 __attribute__((noinline)) \
287 static void test_##TESTNAME ( LaneTy ty ) { \
289 for (i = 0; i < ITERS; i++) { \
291 memset(block, 0x55, sizeof(block)); \
292 randV128(&block[0], ty); \
293 randV128(&block[1], ty); \
294 randV128(&block[2], ty); \
295 randV128(&block[3], ty); \
296 randV128(&block[4], ty); \
297 randV128(&block[5], ty); \
298 __asm__ __volatile__( \
299 "mov x30, #0 ; msr fpsr, x30 ; " \
300 "ldr q"#VECREG1NO", [%0, #0] ; " \
301 "ldr q"#VECREG2NO", [%0, #16] ; " \
302 "ldr q"#VECREG3NO", [%0, #32] ; " \
304 "str q"#VECREG1NO", [%0, #48] ; " \
305 "str q"#VECREG2NO", [%0, #64] ; " \
306 "str q"#VECREG3NO", [%0, #80] ; " \
307 "mrs x30, fpsr ; str x30, [%0, #96] " \
309 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
310 "v16", "v17", "v18", "x30" \
313 UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
314 showV128(&block[0]); printf(" "); \
315 showV128(&block[1]); printf(" "); \
316 showV128(&block[2]); printf(" "); \
317 showV128(&block[3]); printf(" "); \
318 showV128(&block[4]); printf(" "); \
319 showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
324 /* Generate a test that involves four vector regs,
325 with no bias as towards which is input or output. It's also OK
326 to use v16, v17, v18 as scratch. */
327 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
328 VECREG3NO,VECREG4NO) \
329 __attribute__((noinline)) \
330 static void test_##TESTNAME ( LaneTy ty ) { \
332 for (i = 0; i < ITERS; i++) { \
334 memset(block, 0x55, sizeof(block)); \
335 randV128(&block[0], ty); \
336 randV128(&block[1], ty); \
337 randV128(&block[2], ty); \
338 randV128(&block[3], ty); \
339 randV128(&block[4], ty); \
340 randV128(&block[5], ty); \
341 randV128(&block[6], ty); \
342 randV128(&block[7], ty); \
343 __asm__ __volatile__( \
344 "mov x30, #0 ; msr fpsr, x30 ; " \
345 "ldr q"#VECREG1NO", [%0, #0] ; " \
346 "ldr q"#VECREG2NO", [%0, #16] ; " \
347 "ldr q"#VECREG3NO", [%0, #32] ; " \
348 "ldr q"#VECREG4NO", [%0, #48] ; " \
350 "str q"#VECREG1NO", [%0, #64] ; " \
351 "str q"#VECREG2NO", [%0, #80] ; " \
352 "str q"#VECREG3NO", [%0, #96] ; " \
353 "str q"#VECREG4NO", [%0, #112] ; " \
354 "mrs x30, fpsr ; str x30, [%0, #128] " \
356 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
357 "v"#VECREG3NO, "v"#VECREG4NO, \
358 "v16", "v17", "v18", "x30" \
361 UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
362 showV128(&block[0]); printf(" "); \
363 showV128(&block[1]); printf(" "); \
364 showV128(&block[2]); printf(" "); \
365 showV128(&block[3]); printf(" "); \
366 showV128(&block[4]); printf(" "); \
367 showV128(&block[5]); printf(" "); \
368 showV128(&block[6]); printf(" "); \
369 showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
374 #endif /* ARM64_SIMD_H */