5 #include "tests/malloc.h"
7 typedef unsigned char UChar
;
8 typedef unsigned int UInt
;
9 typedef unsigned long int UWord
;
10 typedef unsigned long long int ULong
;
11 typedef double Double
;
14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
16 typedef union { UChar u8
[16]; UInt u32
[4]; Float f32
[4]; Double f64
[2]; } XMM
;
17 typedef union { UChar u8
[32]; UInt u32
[8]; XMM xmm
[2]; } YMM
;
18 typedef struct { YMM r1
; YMM r2
; YMM r3
; YMM r4
; YMM m
; } Block
;
20 void showFloat ( XMM
* vec
, int idx
)
22 Float f
= vec
->f32
[idx
];
23 int neg
= signbit (f
);
24 char sign
= neg
!= 0 ? '-' : ' ';
25 switch (fpclassify (f
)) {
27 for (int i
= idx
* 4 + 3; i
>= idx
* 4; i
--)
28 printf("%02x", (UInt
)vec
->u8
[i
]);
32 printf ("[ %cINF ]", sign
);
36 printf ("[%cZERO ]", sign
);
44 printf ("[%cSUBNR]", sign
);
50 void showDouble ( XMM
* vec
, int idx
)
52 Double d
= vec
->f64
[idx
];
53 int neg
= signbit (d
);
54 char sign
= neg
!= 0 ? '-' : ' ';
55 switch (fpclassify (d
)) {
57 for (int i
= idx
* 8 + 7; i
>= idx
* 8; i
--)
58 printf("%02x", (UInt
)vec
->u8
[i
]);
62 printf ("[ %cINF ]", sign
);
66 printf ("[ %cZERO ]", sign
);
74 printf ("[ %cSUBNORMAL ]", sign
);
80 void showXMM ( XMM
* vec
, int isDouble
)
83 showDouble ( vec
, 1 );
85 showDouble ( vec
, 0 );
97 void showYMM ( YMM
* vec
, int isDouble
)
99 assert(IS_32_ALIGNED(vec
));
100 showXMM ( &vec
->xmm
[1], isDouble
);
102 showXMM ( &vec
->xmm
[0], isDouble
);
105 void showBlock ( char* msg
, Block
* block
, int isDouble
)
107 printf(" %s\n", msg
);
108 printf("r1: "); showYMM(&block
->r1
, isDouble
); printf("\n");
109 printf("r2: "); showYMM(&block
->r2
, isDouble
); printf("\n");
110 printf("r3: "); showYMM(&block
->r3
, isDouble
); printf("\n");
111 printf("r4: "); showYMM(&block
->r4
, isDouble
); printf("\n");
112 printf(" m: "); showYMM(&block
->m
, isDouble
); printf("\n");
115 static Double special_values
[10];
117 static __attribute__((noinline
))
118 Double
negate ( Double d
) { return -d
; }
119 static __attribute__((noinline
))
120 Double
divf64 ( Double x
, Double y
) { return x
/y
; }
122 static __attribute__((noinline
))
123 Double
plusZero ( void ) { return 0.0; }
124 static __attribute__((noinline
))
125 Double
minusZero ( void ) { return negate(plusZero()); }
127 static __attribute__((noinline
))
128 Double
plusOne ( void ) { return 1.0; }
129 static __attribute__((noinline
))
130 Double
minusOne ( void ) { return negate(plusOne()); }
132 static __attribute__((noinline
))
133 Double
plusInf ( void ) { return 1.0 / 0.0; }
134 static __attribute__((noinline
))
135 Double
minusInf ( void ) { return negate(plusInf()); }
137 static __attribute__((noinline
))
138 Double
plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
139 static __attribute__((noinline
))
140 Double
minusNaN ( void ) { return negate(plusNaN()); }
142 static __attribute__((noinline
))
143 Double
plusDenorm ( void ) { return 1.23e-315 / 1e3
; }
144 static __attribute__((noinline
))
145 Double
minusDenorm ( void ) { return negate(plusDenorm()); }
147 static void init_special_values ( void )
149 special_values
[0] = plusZero();
150 special_values
[1] = minusZero();
151 special_values
[2] = plusOne();
152 special_values
[3] = minusOne();
153 special_values
[4] = plusInf();
154 special_values
[5] = minusInf();
155 special_values
[6] = plusNaN();
156 special_values
[7] = minusNaN();
157 special_values
[8] = plusDenorm();
158 special_values
[9] = minusDenorm();
161 void specialFBlock ( Block
* b
)
164 Float
* p
= (Float
*)b
;
165 for (i
= 0; i
< sizeof(Block
) / sizeof(Float
); i
++)
166 p
[i
] = (Float
) special_values
[i
% 10];
169 void specialDBlock ( Block
* b
)
172 Double
* p
= (Double
*)b
;
173 for (i
= 0; i
< sizeof(Block
) / sizeof(Double
); i
++)
174 p
[i
] = special_values
[i
% 10];
177 UChar
randUChar ( void )
179 static UInt seed
= 80021;
180 seed
= 1103515245 * seed
+ 12345;
181 return (seed
>> 17) & 0xFF;
184 void randBlock ( Block
* b
)
187 UChar
* p
= (UChar
*)b
;
188 for (i
= 0; i
< sizeof(Block
); i
++)
192 void oneBlock ( Block
* b
)
195 UChar
* p
= (UChar
*)b
;
196 for (i
= 0; i
< sizeof(Block
); i
++)
200 #define GEN_test(_name, _instr, _isD) \
201 __attribute__ ((noinline)) void \
202 test_##_name ( const char *n, Block* b) \
204 printf("%s %s\n", #_name, n); \
205 showBlock("before", b, _isD); \
206 __asm__ __volatile__( \
207 "vmovdqa 0(%0),%%ymm7" "\n\t" \
208 "vmovdqa 32(%0),%%ymm8" "\n\t" \
209 "vmovdqa 64(%0),%%ymm6" "\n\t" \
210 "vmovdqa 96(%0),%%ymm9" "\n\t" \
211 "leaq 128(%0),%%r14" "\n\t" \
213 "vmovdqa %%ymm7, 0(%0)" "\n\t" \
214 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
215 "vmovdqa %%ymm6, 64(%0)" "\n\t" \
216 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
219 : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
221 showBlock("after", b, _isD); \
225 /* All these defines do the same thing (and someone with stronger
226 preprocessor foo could probably express things much smaller).
227 They generate 4 different functions to test 4 variants of an
228 fma4 instruction. One with as input 4 registers, one where
229 the output register is also one of the input registers and
230 two versions where different inputs are a memory location.
231 The xmm variants create 128 versions, the ymm variants 256. */
233 #define GEN_test_VFMADDPD_xmm(_name) \
234 GEN_test(_name##_xmm, \
235 "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
236 GEN_test(_name##_xmm_src_dst, \
237 "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
238 GEN_test(_name##_xmm_mem1, \
239 "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
240 GEN_test(_name##_xmm_mem2, \
241 "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
242 GEN_test_VFMADDPD_xmm(VFMADDPD
)
244 #define GEN_test_VFMADDPD_ymm(_name) \
245 GEN_test(_name##_ymm, \
246 "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
247 GEN_test(_name##_ymm_src_dst, \
248 "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
249 GEN_test(_name##_ymm_mem1, \
250 "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
251 GEN_test(_name##_ymm_mem2, \
252 "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
253 GEN_test_VFMADDPD_ymm(VFMADDPD
)
255 #define GEN_test_VFMADDPS_xmm(_name) \
256 GEN_test(_name##_xmm, \
257 "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
258 GEN_test(_name##_xmm_src_dst, \
259 "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
260 GEN_test(_name##_xmm_mem1, \
261 "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
262 GEN_test(_name##_xmm_mem2, \
263 "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
264 GEN_test_VFMADDPS_xmm(VFMADDPS
)
266 #define GEN_test_VFMADDPS_ymm(_name) \
267 GEN_test(_name##_ymm, \
268 "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
269 GEN_test(_name##_ymm_src_dst, \
270 "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
271 GEN_test(_name##_ymm_mem1, \
272 "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
273 GEN_test(_name##_ymm_mem2, \
274 "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
275 GEN_test_VFMADDPS_ymm(VFMADDPS
)
277 #define GEN_test_VFMADDSD_xmm(_name) \
278 GEN_test(_name##_xmm, \
279 "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
280 GEN_test(_name##_xmm_src_dst, \
281 "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
282 GEN_test(_name##_xmm_mem1, \
283 "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
284 GEN_test(_name##_xmm_mem2, \
285 "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
286 GEN_test_VFMADDSD_xmm(VFMADDSD
)
288 #define GEN_test_VFMADDSS_xmm(_name) \
289 GEN_test(_name##_xmm, \
290 "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
291 GEN_test(_name##_xmm_src_dst, \
292 "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
293 GEN_test(_name##_xmm_mem1, \
294 "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
295 GEN_test(_name##_xmm_mem2, \
296 "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
297 GEN_test_VFMADDSS_xmm(VFMADDSS
)
299 #define GEN_test_VFMADDSUBPD_xmm(_name) \
300 GEN_test(_name##_xmm, \
301 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
302 GEN_test(_name##_xmm_src_dst, \
303 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
304 GEN_test(_name##_xmm_mem1, \
305 "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
306 GEN_test(_name##_xmm_mem2, \
307 "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
308 GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD
)
310 #define GEN_test_VFMADDSUBPD_ymm(_name) \
311 GEN_test(_name##_ymm, \
312 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
313 GEN_test(_name##_ymm_src_dst, \
314 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
315 GEN_test(_name##_ymm_mem1, \
316 "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
317 GEN_test(_name##_ymm_mem2, \
318 "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
319 GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD
)
321 #define GEN_test_VFMADDSUBPS_xmm(_name) \
322 GEN_test(_name##_xmm, \
323 "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
324 GEN_test(_name##_xmm_src_dst, \
325 "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
326 GEN_test(_name##_xmm_mem1, \
327 "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
328 GEN_test(_name##_xmm_mem2, \
329 "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
330 GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS
)
332 #define GEN_test_VFMADDSUBPS_ymm(_name) \
333 GEN_test(_name##_ymm, \
334 "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
335 GEN_test(_name##_ymm_src_dst, \
336 "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
337 GEN_test(_name##_ymm_mem1, \
338 "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
339 GEN_test(_name##_ymm_mem2, \
340 "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
341 GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS
)
343 #define GEN_test_VFMSUBADDPD_xmm(_name) \
344 GEN_test(_name##_xmm, \
345 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
346 GEN_test(_name##_xmm_src_dst, \
347 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
348 GEN_test(_name##_xmm_mem1, \
349 "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
350 GEN_test(_name##_xmm_mem2, \
351 "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
352 GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD
)
354 #define GEN_test_VFMSUBADDPD_ymm(_name) \
355 GEN_test(_name##_ymm, \
356 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
357 GEN_test(_name##_ymm_src_dst, \
358 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
359 GEN_test(_name##_ymm_mem1, \
360 "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
361 GEN_test(_name##_ymm_mem2, \
362 "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
363 GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD
)
365 #define GEN_test_VFMSUBADDPS_xmm(_name) \
366 GEN_test(_name##_xmm, \
367 "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
368 GEN_test(_name##_xmm_src_dst, \
369 "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
370 GEN_test(_name##_xmm_mem1, \
371 "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
372 GEN_test(_name##_xmm_mem2, \
373 "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
374 GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS
)
376 #define GEN_test_VFMSUBADDPS_ymm(_name) \
377 GEN_test(_name##_ymm, \
378 "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
379 GEN_test(_name##_ymm_src_dst, \
380 "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
381 GEN_test(_name##_ymm_mem1, \
382 "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
383 GEN_test(_name##_ymm_mem2, \
384 "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
385 GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS
)
387 #define GEN_test_VFMSUBPD_xmm(_name) \
388 GEN_test(_name##_xmm, \
389 "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
390 GEN_test(_name##_xmm_src_dst, \
391 "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
392 GEN_test(_name##_xmm_mem1, \
393 "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
394 GEN_test(_name##_xmm_mem2, \
395 "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
396 GEN_test_VFMSUBPD_xmm(VFMSUBPD
)
398 #define GEN_test_VFMSUBPD_ymm(_name) \
399 GEN_test(_name##_ymm, \
400 "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
401 GEN_test(_name##_ymm_src_dst, \
402 "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
403 GEN_test(_name##_ymm_mem1, \
404 "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
405 GEN_test(_name##_ymm_mem2, \
406 "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
407 GEN_test_VFMSUBPD_ymm(VFMSUBPD
)
409 #define GEN_test_VFMSUBPS_xmm(_name) \
410 GEN_test(_name##_xmm, \
411 "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
412 GEN_test(_name##_xmm_src_dst, \
413 "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
414 GEN_test(_name##_xmm_mem1, \
415 "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
416 GEN_test(_name##_xmm_mem2, \
417 "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
418 GEN_test_VFMSUBPS_xmm(VFMSUBPS
)
420 #define GEN_test_VFMSUBPS_ymm(_name) \
421 GEN_test(_name##_ymm, \
422 "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
423 GEN_test(_name##_ymm_src_dst, \
424 "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
425 GEN_test(_name##_ymm_mem1, \
426 "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
427 GEN_test(_name##_ymm_mem2, \
428 "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
429 GEN_test_VFMSUBPS_ymm(VFMSUBPS
)
431 #define GEN_test_VFMSUBSD_xmm(_name) \
432 GEN_test(_name##_xmm, \
433 "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
434 GEN_test(_name##_xmm_src_dst, \
435 "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
436 GEN_test(_name##_xmm_mem1, \
437 "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
438 GEN_test(_name##_xmm_mem2, \
439 "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
440 GEN_test_VFMSUBSD_xmm(VFMSUBSD
)
442 #define GEN_test_VFMSUBSS_xmm(_name) \
443 GEN_test(_name##_xmm, \
444 "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
445 GEN_test(_name##_xmm_src_dst, \
446 "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
447 GEN_test(_name##_xmm_mem1, \
448 "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
449 GEN_test(_name##_xmm_mem2, \
450 "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
451 GEN_test_VFMSUBSS_xmm(VFMSUBSS
)
453 #define GEN_test_VFNMADDPD_xmm(_name) \
454 GEN_test(_name##_xmm, \
455 "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
456 GEN_test(_name##_xmm_src_dst, \
457 "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
458 GEN_test(_name##_xmm_mem1, \
459 "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
460 GEN_test(_name##_xmm_mem2, \
461 "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
462 GEN_test_VFNMADDPD_xmm(VFNMADDPD
)
464 #define GEN_test_VFNMADDPD_ymm(_name) \
465 GEN_test(_name##_ymm, \
466 "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
467 GEN_test(_name##_ymm_src_dst, \
468 "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
469 GEN_test(_name##_ymm_mem1, \
470 "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
471 GEN_test(_name##_ymm_mem2, \
472 "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
473 GEN_test_VFNMADDPD_ymm(VFNMADDPD
)
475 #define GEN_test_VFNMADDPS_xmm(_name) \
476 GEN_test(_name##_xmm, \
477 "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
478 GEN_test(_name##_xmm_src_dst, \
479 "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
480 GEN_test(_name##_xmm_mem1, \
481 "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
482 GEN_test(_name##_xmm_mem2, \
483 "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
484 GEN_test_VFNMADDPS_xmm(VFNMADDPS
)
486 #define GEN_test_VFNMADDPS_ymm(_name) \
487 GEN_test(_name##_ymm, \
488 "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
489 GEN_test(_name##_ymm_src_dst, \
490 "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
491 GEN_test(_name##_ymm_mem1, \
492 "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
493 GEN_test(_name##_ymm_mem2, \
494 "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
495 GEN_test_VFNMADDPS_ymm(VFNMADDPS
)
497 #define GEN_test_VFNMADDSD_xmm(_name) \
498 GEN_test(_name##_xmm, \
499 "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
500 GEN_test(_name##_xmm_src_dst, \
501 "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
502 GEN_test(_name##_xmm_mem1, \
503 "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
504 GEN_test(_name##_xmm_mem2, \
505 "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
506 GEN_test_VFNMADDSD_xmm(VFNMADDSD
)
508 #define GEN_test_VFNMADDSS_xmm(_name) \
509 GEN_test(_name##_xmm, \
510 "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
511 GEN_test(_name##_xmm_src_dst, \
512 "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
513 GEN_test(_name##_xmm_mem1, \
514 "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
515 GEN_test(_name##_xmm_mem2, \
516 "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
517 GEN_test_VFNMADDSS_xmm(VFNMADDSS
)
519 #define GEN_test_VFNMSUBPD_xmm(_name) \
520 GEN_test(_name##_xmm, \
521 "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
522 GEN_test(_name##_xmm_src_dst, \
523 "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
524 GEN_test(_name##_xmm_mem1, \
525 "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
526 GEN_test(_name##_xmm_mem2, \
527 "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
528 GEN_test_VFNMSUBPD_xmm(VFNMSUBPD
)
530 #define GEN_test_VFNMSUBPD_ymm(_name) \
531 GEN_test(_name##_ymm, \
532 "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
533 GEN_test(_name##_ymm_src_dst, \
534 "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
535 GEN_test(_name##_ymm_mem1, \
536 "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
537 GEN_test(_name##_ymm_mem2, \
538 "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
539 GEN_test_VFNMSUBPD_ymm(VFNMSUBPD
)
541 #define GEN_test_VFNMSUBPS_xmm(_name) \
542 GEN_test(_name##_xmm, \
543 "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
544 GEN_test(_name##_xmm_src_dst, \
545 "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
546 GEN_test(_name##_xmm_mem1, \
547 "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
548 GEN_test(_name##_xmm_mem2, \
549 "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
550 GEN_test_VFNMSUBPS_xmm(VFNMSUBPS
)
552 #define GEN_test_VFNMSUBPS_ymm(_name) \
553 GEN_test(_name##_ymm, \
554 "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
555 GEN_test(_name##_ymm_src_dst, \
556 "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
557 GEN_test(_name##_ymm_mem1, \
558 "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
559 GEN_test(_name##_ymm_mem2, \
560 "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
561 GEN_test_VFNMSUBPS_ymm(VFNMSUBPS
)
563 #define GEN_test_VFNMSUBSD_xmm(_name) \
564 GEN_test(_name##_xmm, \
565 "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
566 GEN_test(_name##_xmm_src_dst, \
567 "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
568 GEN_test(_name##_xmm_mem1, \
569 "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
570 GEN_test(_name##_xmm_mem2, \
571 "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
572 GEN_test_VFNMSUBSD_xmm(VFNMSUBSD
)
574 #define GEN_test_VFNMSUBSS_xmm(_name) \
575 GEN_test(_name##_xmm, \
576 "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
577 GEN_test(_name##_xmm_src_dst, \
578 "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
579 GEN_test(_name##_xmm_mem1, \
580 "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
581 GEN_test(_name##_xmm_mem2, \
582 "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
583 GEN_test_VFNMSUBSS_xmm(VFNMSUBSS
)
585 #define DO_test_block(_name, _sub, _bname, _block) \
586 test_##_name##_##_sub(_bname,_block);
588 #define DO_test(_name, _sub, _isD) { \
589 Block* b = memalign32(sizeof(Block)); \
591 DO_test_block(_name, _sub, "ones", b); \
594 DO_test_block(_name, _sub, "specialD", b); \
597 DO_test_block(_name, _sub, "specialF", b); \
600 DO_test_block(_name, _sub, "rand", b); \
604 #define DO_tests_xmm(_name,_isD) \
605 DO_test(_name, xmm, _isD); \
606 DO_test(_name, xmm_src_dst, _isD); \
607 DO_test(_name, xmm_mem1, _isD); \
608 DO_test(_name, xmm_mem2, _isD);
610 #define DO_tests_ymm(_name,_isD) \
611 DO_test(_name, ymm, _isD); \
612 DO_test(_name, ymm_src_dst, _isD); \
613 DO_test(_name, ymm_mem1, _isD); \
614 DO_test(_name, ymm_mem2, _isD);
618 init_special_values();
621 DO_tests_xmm(VFMADDPD
, 1);
622 DO_tests_xmm(VFMADDPS
, 0);
623 DO_tests_xmm(VFMADDSD
, 1);
624 DO_tests_xmm(VFMADDSS
, 0);
625 DO_tests_xmm(VFMADDSUBPD
, 1);
626 DO_tests_xmm(VFMADDSUBPS
, 0);
627 DO_tests_xmm(VFMSUBADDPD
, 1);
628 DO_tests_xmm(VFMSUBADDPS
, 0);
629 DO_tests_xmm(VFMSUBPD
, 1);
630 DO_tests_xmm(VFMSUBPS
, 0);
631 DO_tests_xmm(VFMSUBSD
, 1);
632 DO_tests_xmm(VFMSUBSS
, 0);
633 DO_tests_xmm(VFNMADDPD
, 1);
634 DO_tests_xmm(VFNMADDPS
, 0);
635 DO_tests_xmm(VFNMADDSD
, 1);
636 DO_tests_xmm(VFNMADDSS
, 0);
637 DO_tests_xmm(VFNMSUBPD
, 1);
638 DO_tests_xmm(VFNMSUBPS
, 0);
639 DO_tests_xmm(VFNMSUBSD
, 1);
640 DO_tests_xmm(VFNMSUBSS
, 0);
644 DO_tests_ymm(VFMADDPD, 1);
645 DO_tests_ymm(VFMADDPS, 0);
646 DO_tests_ymm(VFMADDSUBPD, 1);
647 DO_tests_ymm(VFMADDSUBPS, 0);
648 DO_tests_ymm(VFMSUBADDPD, 1);
649 DO_tests_ymm(VFMSUBADDPS, 0);
650 DO_tests_ymm(VFMSUBPD, 1);
651 DO_tests_ymm(VFMSUBPS, 0);
652 DO_tests_ymm(VFNMADDPD, 1);
653 DO_tests_ymm(VFNMADDPS, 0);
654 DO_tests_ymm(VFNMSUBPD, 1);
655 DO_tests_ymm(VFNMSUBPS, 0);