regtest: broaden none/tests/linux/bug498317 suppression for PPC
[valgrind.git] / none / tests / arm64 / simd.h
blobefaceb58f46fbebdcba7e026051eb6a846619ccf
1 #ifndef ARM64_SIMD_H
2 #define ARM64_SIMD_H
4 #include <assert.h>
5 #include <stdio.h>
6 #include <string.h> // memset
8 typedef unsigned char UChar;
9 typedef unsigned short int UShort;
10 typedef unsigned int UInt;
11 typedef signed int Int;
12 typedef unsigned char UChar;
13 typedef unsigned long long int ULong;
14 typedef signed long long int Long;
15 typedef double Double;
16 typedef float Float;
17 /* Half-precision floating point is not universally available, so use a
18 synthesized 16 bit type. This allows the testing framework to be shared
19 across all SIMD tests. The functions halfToSingleFPAsInt() and
20 shortToSingle() below are used to create a Float16 type for testing purposes.
22 typedef unsigned short int Float16;
24 typedef unsigned char Bool;
25 #define False ((Bool)0)
26 #define True ((Bool)1)
29 #define ITERS 1
31 typedef
32 enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
33 LaneTy;
35 union _V128 {
36 UChar u8[16];
37 UShort u16[8];
38 UInt u32[4];
39 ULong u64[2];
40 Float16 f16[8];
41 Float f32[4];
42 Double f64[2];
44 typedef union _V128 V128;
46 /* Conversion based on IEEE half-precision, as described in the IEEE 754-2008
47 standard and Arm Reference Manual 'A1.4.2 Half-precision floating-point
48 formats' where hardware capability supports __fp16 (VEX_HWCAPS_ARM64_FP16
49 and VEX_HWCAPS_ARM64_VFP16 set).
51 UInt halfToSingleFPAsInt(UShort y);
53 static inline float shortToSingle(UShort imm)
55 union { float f; UInt i; } v;
56 v.i = halfToSingleFPAsInt(imm);
57 return v.f;
60 UChar randUChar ( void );
62 static inline ULong randULong ( LaneTy ty )
64 Int i;
65 ULong r = 0;
66 for (i = 0; i < 8; i++) {
67 r = (r << 8) | (ULong)(0xFF & randUChar());
69 return r;
72 /* Generates a random V128. Ensures that that it contains normalised FP numbers
73 when viewed as either F16x8, F32x4 or F64x2, so that it is reasonable to use
74 in FP test cases. */
75 void randV128 ( /*OUT*/V128* v, LaneTy ty );
77 static inline void showV128 ( V128* v )
79 Int i;
80 for (i = 15; i >= 0; i--)
81 printf("%02x", (Int)v->u8[i]);
84 static inline void showBlock ( const char* msg, V128* block, Int nBlock )
86 Int i;
87 printf("%s\n", msg);
88 for (i = 0; i < nBlock; i++) {
89 printf(" ");
90 showV128(&block[i]);
91 printf("\n");
95 static inline ULong dup4x16 ( UInt x )
97 ULong r = x & 0xF;
98 r |= (r << 4);
99 r |= (r << 8);
100 r |= (r << 16);
101 r |= (r << 32);
102 return r;
105 // Generate a random double- or single-precision number. About 1 time in 2,
106 // instead return a special value (+/- Inf, +/-Nan, denorm). This ensures that
107 // many of the groups of 4 calls here will return a special value.
108 Double randDouble ( void );
109 Float randFloat ( void );
111 void randBlock_Doubles ( V128* block, Int nBlock );
112 void randBlock_Floats ( V128* block, Int nBlock );
115 /* ---------------------------------------------------------------- */
116 /* -- Parameterisable test macros -- */
117 /* ---------------------------------------------------------------- */
119 #define DO50(_action) \
120 do { \
121 Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
122 } while (0)
125 /* Note this also sets the destination register to a known value (0x55..55)
126 since it can sometimes be an input to the instruction too. */
127 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
128 __attribute__((noinline)) \
129 static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
130 Int i; \
131 for (i = 0; i < ITERS; i++) { \
132 V128 block[2+1]; \
133 memset(block, 0x55, sizeof(block)); \
134 randV128(&block[0], ty); \
135 randV128(&block[1], ty); \
136 __asm__ __volatile__( \
137 "mov x30, #0 ; msr fpsr, x30 ; " \
138 "ldr q7, [%0, #0] ; " \
139 "ldr q8, [%0, #16] ; " \
140 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
141 "str q8, [%0, #16] ; " \
142 "mrs x30, fpsr ; str x30, [%0, #32] " \
143 : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
144 ); \
145 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN); \
146 UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
147 showV128(&block[0]); printf(" "); \
148 showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
153 /* Note this also sets the destination register to a known value (0x55..55)
154 since it can sometimes be an input to the instruction too. */
155 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM) \
156 __attribute__((noinline)) \
157 static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
158 Int i; \
159 for (i = 0; i < ITERS; i++) { \
160 V128 block[3+1]; \
161 memset(block, 0x55, sizeof(block)); \
162 randV128(&block[0], ty); \
163 randV128(&block[1], ty); \
164 randV128(&block[2], ty); \
165 __asm__ __volatile__( \
166 "mov x30, #0 ; msr fpsr, x30 ; " \
167 "ldr q7, [%0, #0] ; " \
168 "ldr q8, [%0, #16] ; " \
169 "ldr q9, [%0, #32] ; " \
170 #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
171 "str q9, [%0, #32] ; " \
172 "mrs x30, fpsr ; str x30, [%0, #48] " \
173 : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
174 ); \
175 printf(#INSN " v9." #SUFFIXD \
176 ", v7." #SUFFIXN ", v8." #SUFFIXM " "); \
177 UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
178 showV128(&block[0]); printf(" "); \
179 showV128(&block[1]); printf(" "); \
180 showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
185 /* Note this also sets the destination register to a known value (0x55..55)
186 since it can sometimes be an input to the instruction too. */
187 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
188 __attribute__((noinline)) \
189 static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
190 Int i; \
191 for (i = 0; i < ITERS; i++) { \
192 V128 block[2+1]; \
193 memset(block, 0x55, sizeof(block)); \
194 randV128(&block[0], ty); \
195 randV128(&block[1], ty); \
196 __asm__ __volatile__( \
197 "mov x30, #0 ; msr fpsr, x30 ; " \
198 "ldr q7, [%0, #0] ; " \
199 "ldr q8, [%0, #16] ; " \
200 #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
201 "str q8, [%0, #16] ; " \
202 "mrs x30, fpsr ; str x30, [%0, #32] " \
203 : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
204 ); \
205 printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " "); \
206 UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
207 showV128(&block[0]); printf(" "); \
208 showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
213 /* Generate a test that involves one integer reg and one vector reg,
214 with no bias as towards which is input or output. */
215 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
216 __attribute__((noinline)) \
217 static void test_##TESTNAME ( LaneTy ty ) { \
218 Int i; \
219 assert(INTREGNO != 30); \
220 for (i = 0; i < ITERS; i++) { \
221 V128 block[4+1]; \
222 memset(block, 0x55, sizeof(block)); \
223 randV128(&block[0], ty); \
224 randV128(&block[1], ty); \
225 randV128(&block[2], ty); \
226 randV128(&block[3], ty); \
227 __asm__ __volatile__( \
228 "mov x30, #0 ; msr fpsr, x30 ; " \
229 "ldr q"#VECREGNO", [%0, #0] ; " \
230 "ldr x"#INTREGNO", [%0, #16] ; " \
231 INSN " ; " \
232 "str q"#VECREGNO", [%0, #32] ; " \
233 "str x"#INTREGNO", [%0, #48] ; " \
234 "mrs x30, fpsr ; str x30, [%0, #64] " \
235 : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
236 ); \
237 printf(INSN " "); \
238 UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
239 showV128(&block[0]); printf(" "); \
240 showV128(&block[1]); printf(" "); \
241 showV128(&block[2]); printf(" "); \
242 showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
247 /* Generate a test that involves two vector regs,
248 with no bias as towards which is input or output.
249 It's OK to use x10 as scratch.*/
250 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
251 __attribute__((noinline)) \
252 static void test_##TESTNAME ( LaneTy ty ) { \
253 Int i; \
254 for (i = 0; i < ITERS; i++) { \
255 V128 block[4+1]; \
256 memset(block, 0x55, sizeof(block)); \
257 randV128(&block[0], ty); \
258 randV128(&block[1], ty); \
259 randV128(&block[2], ty); \
260 randV128(&block[3], ty); \
261 __asm__ __volatile__( \
262 "mov x30, #0 ; msr fpsr, x30 ; " \
263 "ldr q"#VECREG1NO", [%0, #0] ; " \
264 "ldr q"#VECREG2NO", [%0, #16] ; " \
265 INSN " ; " \
266 "str q"#VECREG1NO", [%0, #32] ; " \
267 "str q"#VECREG2NO", [%0, #48] ; " \
268 "mrs x30, fpsr ; str x30, [%0, #64] " \
269 : : "r"(&block[0]) \
270 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
271 ); \
272 printf(INSN " "); \
273 UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
274 showV128(&block[0]); printf(" "); \
275 showV128(&block[1]); printf(" "); \
276 showV128(&block[2]); printf(" "); \
277 showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
282 /* Generate a test that involves three vector regs,
283 with no bias as towards which is input or output. It's also OK
284 to use v16, v17, v18 as scratch. */
285 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
286 __attribute__((noinline)) \
287 static void test_##TESTNAME ( LaneTy ty ) { \
288 Int i; \
289 for (i = 0; i < ITERS; i++) { \
290 V128 block[6+1]; \
291 memset(block, 0x55, sizeof(block)); \
292 randV128(&block[0], ty); \
293 randV128(&block[1], ty); \
294 randV128(&block[2], ty); \
295 randV128(&block[3], ty); \
296 randV128(&block[4], ty); \
297 randV128(&block[5], ty); \
298 __asm__ __volatile__( \
299 "mov x30, #0 ; msr fpsr, x30 ; " \
300 "ldr q"#VECREG1NO", [%0, #0] ; " \
301 "ldr q"#VECREG2NO", [%0, #16] ; " \
302 "ldr q"#VECREG3NO", [%0, #32] ; " \
303 INSN " ; " \
304 "str q"#VECREG1NO", [%0, #48] ; " \
305 "str q"#VECREG2NO", [%0, #64] ; " \
306 "str q"#VECREG3NO", [%0, #80] ; " \
307 "mrs x30, fpsr ; str x30, [%0, #96] " \
308 : : "r"(&block[0]) \
309 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
310 "v16", "v17", "v18", "x30" \
311 ); \
312 printf(INSN " "); \
313 UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
314 showV128(&block[0]); printf(" "); \
315 showV128(&block[1]); printf(" "); \
316 showV128(&block[2]); printf(" "); \
317 showV128(&block[3]); printf(" "); \
318 showV128(&block[4]); printf(" "); \
319 showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
324 /* Generate a test that involves four vector regs,
325 with no bias as towards which is input or output. It's also OK
326 to use v16, v17, v18 as scratch. */
327 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
328 VECREG3NO,VECREG4NO) \
329 __attribute__((noinline)) \
330 static void test_##TESTNAME ( LaneTy ty ) { \
331 Int i; \
332 for (i = 0; i < ITERS; i++) { \
333 V128 block[8+1]; \
334 memset(block, 0x55, sizeof(block)); \
335 randV128(&block[0], ty); \
336 randV128(&block[1], ty); \
337 randV128(&block[2], ty); \
338 randV128(&block[3], ty); \
339 randV128(&block[4], ty); \
340 randV128(&block[5], ty); \
341 randV128(&block[6], ty); \
342 randV128(&block[7], ty); \
343 __asm__ __volatile__( \
344 "mov x30, #0 ; msr fpsr, x30 ; " \
345 "ldr q"#VECREG1NO", [%0, #0] ; " \
346 "ldr q"#VECREG2NO", [%0, #16] ; " \
347 "ldr q"#VECREG3NO", [%0, #32] ; " \
348 "ldr q"#VECREG4NO", [%0, #48] ; " \
349 INSN " ; " \
350 "str q"#VECREG1NO", [%0, #64] ; " \
351 "str q"#VECREG2NO", [%0, #80] ; " \
352 "str q"#VECREG3NO", [%0, #96] ; " \
353 "str q"#VECREG4NO", [%0, #112] ; " \
354 "mrs x30, fpsr ; str x30, [%0, #128] " \
355 : : "r"(&block[0]) \
356 : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
357 "v"#VECREG3NO, "v"#VECREG4NO, \
358 "v16", "v17", "v18", "x30" \
359 ); \
360 printf(INSN " "); \
361 UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
362 showV128(&block[0]); printf(" "); \
363 showV128(&block[1]); printf(" "); \
364 showV128(&block[2]); printf(" "); \
365 showV128(&block[3]); printf(" "); \
366 showV128(&block[4]); printf(" "); \
367 showV128(&block[5]); printf(" "); \
368 showV128(&block[6]); printf(" "); \
369 showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
374 #endif /* ARM64_SIMD_H */