6 float x
[N
], y
[N
], z
[N
], expected
[N
], res
[N
];
7 } ft
__attribute__((aligned (32)));
10 double x
[N
], y
[N
], z
[N
], expected
[N
], res
[N
];
11 } dt
__attribute__((aligned (32)));
13 float plus_zero
, plus_infty
, minus_infty
, nan_value
;
15 static int testf( float x
, float y
)
18 memcpy( &a
, &x
, sizeof (a
) );
19 memcpy( &b
, &y
, sizeof (b
) );
20 if ((a
& 0x7fc00000U
) == 0x7fc00000U
)
21 return (b
& 0x7fc00000U
) != 0x7fc00000U
;
22 return memcmp( &a
, &b
, sizeof (a
) ) != 0;
25 static int test_fmaf( void )
29 for (i
= 0; i
< N
; i
++) {
31 __asm
__volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
32 thisres
|= testf( w
, ft
.expected
[i
] );
33 __asm
__volatile__ ("vfmadd132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "m" (ft
.y
[i
]), "x" (ft
.z
[i
]));
34 thisres
|= testf( w
, ft
.expected
[i
] );
35 __asm
__volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
36 thisres
|= testf( w
, ft
.expected
[i
] );
37 __asm
__volatile__ ("vfmadd213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "m" (ft
.z
[i
]));
38 thisres
|= testf( w
, ft
.expected
[i
] );
39 __asm
__volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "x" (ft
.y
[i
]), "0" (ft
.z
[i
]));
40 thisres
|= testf( w
, ft
.expected
[i
] );
41 __asm
__volatile__ ("vfmadd231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "m" (ft
.y
[i
]), "0" (ft
.z
[i
]));
42 thisres
|= testf( w
, ft
.expected
[i
] );
44 printf( "Failure 1 %d %a %a\n", i
, w
, ft
.expected
[i
] );
47 __asm
__volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
48 thisres
|= testf( -w
, ft
.expected
[i
] );
49 __asm
__volatile__ ("vfnmsub132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "m" (ft
.y
[i
]), "x" (ft
.z
[i
]));
50 thisres
|= testf( -w
, ft
.expected
[i
] );
51 __asm
__volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
52 thisres
|= testf( -w
, ft
.expected
[i
] );
53 __asm
__volatile__ ("vfnmsub213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "m" (ft
.z
[i
]));
54 thisres
|= testf( -w
, ft
.expected
[i
] );
55 __asm
__volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "x" (ft
.y
[i
]), "0" (ft
.z
[i
]));
56 thisres
|= testf( -w
, ft
.expected
[i
] );
57 __asm
__volatile__ ("vfnmsub231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "m" (ft
.y
[i
]), "0" (ft
.z
[i
]));
58 thisres
|= testf( -w
, ft
.expected
[i
] );
60 printf( "Failure 2 %d %a %a\n", i
, w
, ft
.expected
[i
] );
63 for (i
= 0; i
< N
; i
++)
65 for (i
= 0; i
< N
; i
++) {
67 __asm
__volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
68 thisres
|= testf( w
, ft
.expected
[i
] );
69 __asm
__volatile__ ("vfmsub132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "m" (ft
.y
[i
]), "x" (ft
.z
[i
]));
70 thisres
|= testf( w
, ft
.expected
[i
] );
71 __asm
__volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
72 thisres
|= testf( w
, ft
.expected
[i
] );
73 __asm
__volatile__ ("vfmsub213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "m" (ft
.z
[i
]));
74 thisres
|= testf( w
, ft
.expected
[i
] );
75 __asm
__volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "x" (ft
.y
[i
]), "0" (ft
.z
[i
]));
76 thisres
|= testf( w
, ft
.expected
[i
] );
77 __asm
__volatile__ ("vfmsub231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "m" (ft
.y
[i
]), "0" (ft
.z
[i
]));
78 thisres
|= testf( w
, ft
.expected
[i
] );
80 printf( "Failure 3 %d %a %a\n", i
, w
, ft
.expected
[i
] );
83 __asm
__volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
84 thisres
|= testf( -w
, ft
.expected
[i
] );
85 __asm
__volatile__ ("vfnmadd132ss %2, %3, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "m" (ft
.y
[i
]), "x" (ft
.z
[i
]));
86 thisres
|= testf( -w
, ft
.expected
[i
] );
87 __asm
__volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "x" (ft
.z
[i
]));
88 thisres
|= testf( -w
, ft
.expected
[i
] );
89 __asm
__volatile__ ("vfnmadd213ss %3, %2, %0" : "=x" (w
) : "0" (ft
.x
[i
]), "x" (ft
.y
[i
]), "m" (ft
.z
[i
]));
90 thisres
|= testf( -w
, ft
.expected
[i
] );
91 __asm
__volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "x" (ft
.y
[i
]), "0" (ft
.z
[i
]));
92 thisres
|= testf( -w
, ft
.expected
[i
] );
93 __asm
__volatile__ ("vfnmadd231ss %2, %1, %0" : "=x" (w
) : "x" (ft
.x
[i
]), "m" (ft
.y
[i
]), "0" (ft
.z
[i
]));
94 thisres
|= testf( -w
, ft
.expected
[i
] );
96 printf( "Failure 4 %d %a %a\n", i
, w
, ft
.expected
[i
] );
99 for (i
= 0; i
< N
; i
++)
101 for (i
= 0; i
< N
; i
+= 4) {
103 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
104 "vfmadd132ps %%xmm7, %%xmm8, %%xmm9;"
105 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
106 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
107 for (j
= 0; j
< 4; j
++)
108 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
109 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
110 "vfmadd132ps (%2), %%xmm8, %%xmm9;"
111 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
112 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
113 for (j
= 0; j
< 4; j
++)
114 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
115 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
116 "vfmadd213ps %%xmm7, %%xmm8, %%xmm9;"
117 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
118 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
119 for (j
= 0; j
< 4; j
++)
120 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
121 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
122 "vfmadd213ps (%3), %%xmm8, %%xmm9;"
123 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
124 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
125 for (j
= 0; j
< 4; j
++)
126 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
127 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
128 "vfmadd231ps %%xmm7, %%xmm8, %%xmm9;"
129 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
130 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
131 for (j
= 0; j
< 4; j
++)
132 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
133 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
134 "vfmadd231ps (%2), %%xmm8, %%xmm9;"
135 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
136 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
137 for (j
= 0; j
< 4; j
++)
138 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
140 printf( "Failure 5 %d", i
);
141 for (j
= 0; j
< 4; j
++)
142 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
147 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
148 "vfnmsub132ps %%xmm7, %%xmm8, %%xmm9;"
149 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
150 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
151 for (j
= 0; j
< 4; j
++)
152 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
153 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
154 "vfnmsub132ps (%2), %%xmm8, %%xmm9;"
155 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
156 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
157 for (j
= 0; j
< 4; j
++)
158 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
159 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
160 "vfnmsub213ps %%xmm7, %%xmm8, %%xmm9;"
161 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
162 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
163 for (j
= 0; j
< 4; j
++)
164 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
165 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
166 "vfnmsub213ps (%3), %%xmm8, %%xmm9;"
167 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
168 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
169 for (j
= 0; j
< 4; j
++)
170 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
171 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
172 "vfnmsub231ps %%xmm7, %%xmm8, %%xmm9;"
173 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
174 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
175 for (j
= 0; j
< 4; j
++)
176 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
177 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
178 "vfnmsub231ps (%2), %%xmm8, %%xmm9;"
179 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
180 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
181 for (j
= 0; j
< 4; j
++)
182 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
184 printf( "Failure 6 %d", i
);
185 for (j
= 0; j
< 4; j
++)
186 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
191 for (i
= 0; i
< N
; i
++)
193 for (i
= 0; i
< N
; i
+= 4) {
195 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
196 "vfmsub132ps %%xmm7, %%xmm8, %%xmm9;"
197 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
198 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
199 for (j
= 0; j
< 4; j
++)
200 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
201 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
202 "vfmsub132ps (%2), %%xmm8, %%xmm9;"
203 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
204 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
205 for (j
= 0; j
< 4; j
++)
206 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
207 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
208 "vfmsub213ps %%xmm7, %%xmm8, %%xmm9;"
209 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
210 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
211 for (j
= 0; j
< 4; j
++)
212 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
213 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
214 "vfmsub213ps (%3), %%xmm8, %%xmm9;"
215 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
216 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
217 for (j
= 0; j
< 4; j
++)
218 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
219 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
220 "vfmsub231ps %%xmm7, %%xmm8, %%xmm9;"
221 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
222 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
223 for (j
= 0; j
< 4; j
++)
224 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
225 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
226 "vfmsub231ps (%2), %%xmm8, %%xmm9;"
227 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
228 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
229 for (j
= 0; j
< 4; j
++)
230 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
232 printf( "Failure 7 %d", i
);
233 for (j
= 0; j
< 4; j
++)
234 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
239 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
240 "vfnmadd132ps %%xmm7, %%xmm8, %%xmm9;"
241 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
242 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
243 for (j
= 0; j
< 4; j
++)
244 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
245 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
246 "vfnmadd132ps (%2), %%xmm8, %%xmm9;"
247 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
248 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
249 for (j
= 0; j
< 4; j
++)
250 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
251 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
252 "vfnmadd213ps %%xmm7, %%xmm8, %%xmm9;"
253 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
254 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
255 for (j
= 0; j
< 4; j
++)
256 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
257 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
258 "vfnmadd213ps (%3), %%xmm8, %%xmm9;"
259 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
260 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
261 for (j
= 0; j
< 4; j
++)
262 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
263 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
264 "vfnmadd231ps %%xmm7, %%xmm8, %%xmm9;"
265 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
266 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
267 for (j
= 0; j
< 4; j
++)
268 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
269 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
270 "vfnmadd231ps (%2), %%xmm8, %%xmm9;"
271 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
272 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
273 for (j
= 0; j
< 4; j
++)
274 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
276 printf( "Failure 8 %d", i
);
277 for (j
= 0; j
< 4; j
++)
278 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
283 for (i
= 1; i
< N
; i
+= 2)
285 for (i
= 0; i
< N
; i
+= 4) {
287 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
288 "vfmaddsub132ps %%xmm7, %%xmm8, %%xmm9;"
289 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
290 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
291 for (j
= 0; j
< 4; j
++)
292 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
293 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
294 "vfmaddsub132ps (%2), %%xmm8, %%xmm9;"
295 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
296 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
297 for (j
= 0; j
< 4; j
++)
298 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
299 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
300 "vfmaddsub213ps %%xmm7, %%xmm8, %%xmm9;"
301 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
302 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
303 for (j
= 0; j
< 4; j
++)
304 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
305 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
306 "vfmaddsub213ps (%3), %%xmm8, %%xmm9;"
307 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
308 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
309 for (j
= 0; j
< 4; j
++)
310 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
311 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
312 "vfmaddsub231ps %%xmm7, %%xmm8, %%xmm9;"
313 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
314 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
315 for (j
= 0; j
< 4; j
++)
316 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
317 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
318 "vfmaddsub231ps (%2), %%xmm8, %%xmm9;"
319 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
320 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
321 for (j
= 0; j
< 4; j
++)
322 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
324 printf( "Failure 9 %d", i
);
325 for (j
= 0; j
< 4; j
++)
326 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
331 for (i
= 0; i
< N
; i
++)
333 for (i
= 0; i
< N
; i
+= 4) {
335 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%3), %%xmm8;"
336 "vfmsubadd132ps %%xmm7, %%xmm8, %%xmm9;"
337 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
338 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
339 for (j
= 0; j
< 4; j
++)
340 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
341 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm8;"
342 "vfmsubadd132ps (%2), %%xmm8, %%xmm9;"
343 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
344 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
345 for (j
= 0; j
< 4; j
++)
346 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
347 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%3), %%xmm7; vmovaps (%2), %%xmm8;"
348 "vfmsubadd213ps %%xmm7, %%xmm8, %%xmm9;"
349 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
350 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
351 for (j
= 0; j
< 4; j
++)
352 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
353 __asm
__volatile__ ("vmovaps (%1), %%xmm9; vmovaps (%2), %%xmm8;"
354 "vfmsubadd213ps (%3), %%xmm8, %%xmm9;"
355 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
356 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
357 for (j
= 0; j
< 4; j
++)
358 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
359 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%2), %%xmm7; vmovaps (%1), %%xmm8;"
360 "vfmsubadd231ps %%xmm7, %%xmm8, %%xmm9;"
361 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
362 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
363 for (j
= 0; j
< 4; j
++)
364 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
365 __asm
__volatile__ ("vmovaps (%3), %%xmm9; vmovaps (%1), %%xmm8;"
366 "vfmsubadd231ps (%2), %%xmm8, %%xmm9;"
367 "vmovaps %%xmm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
368 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
369 for (j
= 0; j
< 4; j
++)
370 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
372 printf( "Failure 10 %d", i
);
373 for (j
= 0; j
< 4; j
++)
374 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
379 for (i
= 1; i
< N
; i
+= 2)
381 for (i
= 0; i
< N
; i
+= 8) {
383 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
384 "vfmadd132ps %%ymm7, %%ymm8, %%ymm9;"
385 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
386 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
387 for (j
= 0; j
< 8; j
++)
388 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
389 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
390 "vfmadd132ps (%2), %%ymm8, %%ymm9;"
391 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
392 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
393 for (j
= 0; j
< 8; j
++)
394 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
395 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
396 "vfmadd213ps %%ymm7, %%ymm8, %%ymm9;"
397 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
398 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
399 for (j
= 0; j
< 8; j
++)
400 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
401 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
402 "vfmadd213ps (%3), %%ymm8, %%ymm9;"
403 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
404 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
405 for (j
= 0; j
< 8; j
++)
406 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
407 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
408 "vfmadd231ps %%ymm7, %%ymm8, %%ymm9;"
409 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
410 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
411 for (j
= 0; j
< 8; j
++)
412 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
413 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
414 "vfmadd231ps (%2), %%ymm8, %%ymm9;"
415 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
416 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
417 for (j
= 0; j
< 8; j
++)
418 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
420 printf( "Failure 11 %d", i
);
421 for (j
= 0; j
< 8; j
++)
422 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
427 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
428 "vfnmsub132ps %%ymm7, %%ymm8, %%ymm9;"
429 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
430 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
431 for (j
= 0; j
< 8; j
++)
432 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
433 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
434 "vfnmsub132ps (%2), %%ymm8, %%ymm9;"
435 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
436 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
437 for (j
= 0; j
< 8; j
++)
438 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
439 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
440 "vfnmsub213ps %%ymm7, %%ymm8, %%ymm9;"
441 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
442 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
443 for (j
= 0; j
< 8; j
++)
444 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
445 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
446 "vfnmsub213ps (%3), %%ymm8, %%ymm9;"
447 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
448 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
449 for (j
= 0; j
< 8; j
++)
450 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
451 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
452 "vfnmsub231ps %%ymm7, %%ymm8, %%ymm9;"
453 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
454 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
455 for (j
= 0; j
< 8; j
++)
456 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
457 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
458 "vfnmsub231ps (%2), %%ymm8, %%ymm9;"
459 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
460 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
461 for (j
= 0; j
< 8; j
++)
462 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
464 printf( "Failure 12 %d", i
);
465 for (j
= 0; j
< 8; j
++)
466 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
471 for (i
= 0; i
< N
; i
++)
473 for (i
= 0; i
< N
; i
+= 8) {
475 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
476 "vfmsub132ps %%ymm7, %%ymm8, %%ymm9;"
477 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
478 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
479 for (j
= 0; j
< 8; j
++)
480 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
481 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
482 "vfmsub132ps (%2), %%ymm8, %%ymm9;"
483 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
484 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
485 for (j
= 0; j
< 8; j
++)
486 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
487 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
488 "vfmsub213ps %%ymm7, %%ymm8, %%ymm9;"
489 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
490 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
491 for (j
= 0; j
< 8; j
++)
492 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
493 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
494 "vfmsub213ps (%3), %%ymm8, %%ymm9;"
495 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
496 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
497 for (j
= 0; j
< 8; j
++)
498 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
499 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
500 "vfmsub231ps %%ymm7, %%ymm8, %%ymm9;"
501 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
502 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
503 for (j
= 0; j
< 8; j
++)
504 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
505 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
506 "vfmsub231ps (%2), %%ymm8, %%ymm9;"
507 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
508 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
509 for (j
= 0; j
< 8; j
++)
510 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
512 printf( "Failure 13 %d", i
);
513 for (j
= 0; j
< 8; j
++)
514 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
519 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
520 "vfnmadd132ps %%ymm7, %%ymm8, %%ymm9;"
521 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
522 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
523 for (j
= 0; j
< 8; j
++)
524 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
525 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
526 "vfnmadd132ps (%2), %%ymm8, %%ymm9;"
527 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
528 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
529 for (j
= 0; j
< 8; j
++)
530 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
531 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
532 "vfnmadd213ps %%ymm7, %%ymm8, %%ymm9;"
533 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
534 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
535 for (j
= 0; j
< 8; j
++)
536 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
537 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
538 "vfnmadd213ps (%3), %%ymm8, %%ymm9;"
539 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
540 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
541 for (j
= 0; j
< 8; j
++)
542 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
543 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
544 "vfnmadd231ps %%ymm7, %%ymm8, %%ymm9;"
545 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
546 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
547 for (j
= 0; j
< 8; j
++)
548 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
549 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
550 "vfnmadd231ps (%2), %%ymm8, %%ymm9;"
551 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
552 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
553 for (j
= 0; j
< 8; j
++)
554 thisres
|= testf( -ft
.res
[i
+j
], ft
.expected
[i
+j
] );
556 printf( "Failure 14 %d", i
);
557 for (j
= 0; j
< 8; j
++)
558 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
563 for (i
= 1; i
< N
; i
+= 2)
565 for (i
= 0; i
< N
; i
+= 8) {
567 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
568 "vfmaddsub132ps %%ymm7, %%ymm8, %%ymm9;"
569 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
570 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
571 for (j
= 0; j
< 8; j
++)
572 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
573 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
574 "vfmaddsub132ps (%2), %%ymm8, %%ymm9;"
575 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
576 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
577 for (j
= 0; j
< 8; j
++)
578 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
579 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
580 "vfmaddsub213ps %%ymm7, %%ymm8, %%ymm9;"
581 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
582 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
583 for (j
= 0; j
< 8; j
++)
584 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
585 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
586 "vfmaddsub213ps (%3), %%ymm8, %%ymm9;"
587 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
588 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
589 for (j
= 0; j
< 8; j
++)
590 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
591 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
592 "vfmaddsub231ps %%ymm7, %%ymm8, %%ymm9;"
593 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
594 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
595 for (j
= 0; j
< 8; j
++)
596 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
597 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
598 "vfmaddsub231ps (%2), %%ymm8, %%ymm9;"
599 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
600 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
601 for (j
= 0; j
< 8; j
++)
602 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
604 printf( "Failure 15 %d", i
);
605 for (j
= 0; j
< 8; j
++)
606 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
611 for (i
= 0; i
< N
; i
++)
613 for (i
= 0; i
< N
; i
+= 8) {
615 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%3), %%ymm8;"
616 "vfmsubadd132ps %%ymm7, %%ymm8, %%ymm9;"
617 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
618 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
619 for (j
= 0; j
< 8; j
++)
620 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
621 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm8;"
622 "vfmsubadd132ps (%2), %%ymm8, %%ymm9;"
623 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
624 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
625 for (j
= 0; j
< 8; j
++)
626 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
627 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%3), %%ymm7; vmovaps (%2), %%ymm8;"
628 "vfmsubadd213ps %%ymm7, %%ymm8, %%ymm9;"
629 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
630 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
631 for (j
= 0; j
< 8; j
++)
632 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
633 __asm
__volatile__ ("vmovaps (%1), %%ymm9; vmovaps (%2), %%ymm8;"
634 "vfmsubadd213ps (%3), %%ymm8, %%ymm9;"
635 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
636 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
637 for (j
= 0; j
< 8; j
++)
638 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
639 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%2), %%ymm7; vmovaps (%1), %%ymm8;"
640 "vfmsubadd231ps %%ymm7, %%ymm8, %%ymm9;"
641 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
642 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
643 for (j
= 0; j
< 8; j
++)
644 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
645 __asm
__volatile__ ("vmovaps (%3), %%ymm9; vmovaps (%1), %%ymm8;"
646 "vfmsubadd231ps (%2), %%ymm8, %%ymm9;"
647 "vmovaps %%ymm9, (%0)" : : "r" (&ft
.res
[i
]), "r" (&ft
.x
[i
]),
648 "r" (&ft
.y
[i
]), "r" (&ft
.z
[i
]) : "xmm7", "xmm8", "xmm9");
649 for (j
= 0; j
< 8; j
++)
650 thisres
|= testf( ft
.res
[i
+j
], ft
.expected
[i
+j
] );
652 printf( "Failure 16 %d", i
);
653 for (j
= 0; j
< 8; j
++)
654 printf( " %a %a", ft
.res
[i
+j
], ft
.expected
[i
+j
] );
659 for (i
= 1; i
< N
; i
+= 2)
664 static int test( double x
, double y
)
666 unsigned long long a
, b
;
667 memcpy( &a
, &x
, sizeof (a
) );
668 memcpy( &b
, &y
, sizeof (b
) );
669 if ((a
& 0x7ff8000000000000ULL
) == 0x7ff8000000000000ULL
)
670 return (b
& 0x7ff8000000000000ULL
) != 0x7ff8000000000000ULL
;
671 return memcmp( &a
, &b
, sizeof (a
) ) != 0;
674 static int test_fma( void )
678 for (i
= 0; i
< N
; i
++) {
680 __asm
__volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
681 thisres
|= test( w
, dt
.expected
[i
] );
682 __asm
__volatile__ ("vfmadd132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "m" (dt
.y
[i
]), "x" (dt
.z
[i
]));
683 thisres
|= test( w
, dt
.expected
[i
] );
684 __asm
__volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
685 thisres
|= test( w
, dt
.expected
[i
] );
686 __asm
__volatile__ ("vfmadd213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "m" (dt
.z
[i
]));
687 thisres
|= test( w
, dt
.expected
[i
] );
688 __asm
__volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "x" (dt
.y
[i
]), "0" (dt
.z
[i
]));
689 thisres
|= test( w
, dt
.expected
[i
] );
690 __asm
__volatile__ ("vfmadd231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "m" (dt
.y
[i
]), "0" (dt
.z
[i
]));
691 thisres
|= test( w
, dt
.expected
[i
] );
693 printf( "Failure 1 %d %a %a\n", i
, w
, dt
.expected
[i
] );
696 __asm
__volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
697 thisres
|= test( -w
, dt
.expected
[i
] );
698 __asm
__volatile__ ("vfnmsub132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "m" (dt
.y
[i
]), "x" (dt
.z
[i
]));
699 thisres
|= test( -w
, dt
.expected
[i
] );
700 __asm
__volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
701 thisres
|= test( -w
, dt
.expected
[i
] );
702 __asm
__volatile__ ("vfnmsub213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "m" (dt
.z
[i
]));
703 thisres
|= test( -w
, dt
.expected
[i
] );
704 __asm
__volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "x" (dt
.y
[i
]), "0" (dt
.z
[i
]));
705 thisres
|= test( -w
, dt
.expected
[i
] );
706 __asm
__volatile__ ("vfnmsub231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "m" (dt
.y
[i
]), "0" (dt
.z
[i
]));
707 thisres
|= test( -w
, dt
.expected
[i
] );
709 printf( "Failure 2 %d %a %a\n", i
, w
, dt
.expected
[i
] );
712 for (i
= 0; i
< N
; i
++)
714 for (i
= 0; i
< N
; i
++) {
716 __asm
__volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
717 thisres
|= test( w
, dt
.expected
[i
] );
718 __asm
__volatile__ ("vfmsub132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "m" (dt
.y
[i
]), "x" (dt
.z
[i
]));
719 thisres
|= test( w
, dt
.expected
[i
] );
720 __asm
__volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
721 thisres
|= test( w
, dt
.expected
[i
] );
722 __asm
__volatile__ ("vfmsub213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "m" (dt
.z
[i
]));
723 thisres
|= test( w
, dt
.expected
[i
] );
724 __asm
__volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "x" (dt
.y
[i
]), "0" (dt
.z
[i
]));
725 thisres
|= test( w
, dt
.expected
[i
] );
726 __asm
__volatile__ ("vfmsub231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "m" (dt
.y
[i
]), "0" (dt
.z
[i
]));
727 thisres
|= test( w
, dt
.expected
[i
] );
729 printf( "Failure 3 %d %a %a\n", i
, w
, dt
.expected
[i
] );
732 __asm
__volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
733 thisres
|= test( -w
, dt
.expected
[i
] );
734 __asm
__volatile__ ("vfnmadd132sd %2, %3, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "m" (dt
.y
[i
]), "x" (dt
.z
[i
]));
735 thisres
|= test( -w
, dt
.expected
[i
] );
736 __asm
__volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "x" (dt
.z
[i
]));
737 thisres
|= test( -w
, dt
.expected
[i
] );
738 __asm
__volatile__ ("vfnmadd213sd %3, %2, %0" : "=x" (w
) : "0" (dt
.x
[i
]), "x" (dt
.y
[i
]), "m" (dt
.z
[i
]));
739 thisres
|= test( -w
, dt
.expected
[i
] );
740 __asm
__volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "x" (dt
.y
[i
]), "0" (dt
.z
[i
]));
741 thisres
|= test( -w
, dt
.expected
[i
] );
742 __asm
__volatile__ ("vfnmadd231sd %2, %1, %0" : "=x" (w
) : "x" (dt
.x
[i
]), "m" (dt
.y
[i
]), "0" (dt
.z
[i
]));
743 thisres
|= test( -w
, dt
.expected
[i
] );
745 printf( "Failure 4 %d %a %a\n", i
, w
, dt
.expected
[i
] );
748 for (i
= 0; i
< N
; i
++)
750 for (i
= 0; i
< N
; i
+= 2) {
752 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
753 "vfmadd132pd %%xmm7, %%xmm8, %%xmm9;"
754 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
755 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
756 for (j
= 0; j
< 2; j
++)
757 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
758 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
759 "vfmadd132pd (%2), %%xmm8, %%xmm9;"
760 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
761 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
762 for (j
= 0; j
< 2; j
++)
763 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
764 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
765 "vfmadd213pd %%xmm7, %%xmm8, %%xmm9;"
766 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
767 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
768 for (j
= 0; j
< 2; j
++)
769 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
770 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
771 "vfmadd213pd (%3), %%xmm8, %%xmm9;"
772 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
773 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
774 for (j
= 0; j
< 2; j
++)
775 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
776 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
777 "vfmadd231pd %%xmm7, %%xmm8, %%xmm9;"
778 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
779 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
780 for (j
= 0; j
< 2; j
++)
781 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
782 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
783 "vfmadd231pd (%2), %%xmm8, %%xmm9;"
784 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
785 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
786 for (j
= 0; j
< 2; j
++)
787 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
789 printf( "Failure 5 %d", i
);
790 for (j
= 0; j
< 2; j
++)
791 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
796 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
797 "vfnmsub132pd %%xmm7, %%xmm8, %%xmm9;"
798 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
799 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
800 for (j
= 0; j
< 2; j
++)
801 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
802 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
803 "vfnmsub132pd (%2), %%xmm8, %%xmm9;"
804 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
805 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
806 for (j
= 0; j
< 2; j
++)
807 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
808 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
809 "vfnmsub213pd %%xmm7, %%xmm8, %%xmm9;"
810 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
811 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
812 for (j
= 0; j
< 2; j
++)
813 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
814 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
815 "vfnmsub213pd (%3), %%xmm8, %%xmm9;"
816 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
817 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
818 for (j
= 0; j
< 2; j
++)
819 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
820 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
821 "vfnmsub231pd %%xmm7, %%xmm8, %%xmm9;"
822 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
823 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
824 for (j
= 0; j
< 2; j
++)
825 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
826 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
827 "vfnmsub231pd (%2), %%xmm8, %%xmm9;"
828 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
829 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
830 for (j
= 0; j
< 2; j
++)
831 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
833 printf( "Failure 6 %d", i
);
834 for (j
= 0; j
< 2; j
++)
835 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
840 for (i
= 0; i
< N
; i
++)
842 for (i
= 0; i
< N
; i
+= 2) {
844 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
845 "vfmsub132pd %%xmm7, %%xmm8, %%xmm9;"
846 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
847 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
848 for (j
= 0; j
< 2; j
++)
849 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
850 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
851 "vfmsub132pd (%2), %%xmm8, %%xmm9;"
852 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
853 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
854 for (j
= 0; j
< 2; j
++)
855 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
856 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
857 "vfmsub213pd %%xmm7, %%xmm8, %%xmm9;"
858 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
859 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
860 for (j
= 0; j
< 2; j
++)
861 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
862 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
863 "vfmsub213pd (%3), %%xmm8, %%xmm9;"
864 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
865 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
866 for (j
= 0; j
< 2; j
++)
867 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
868 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
869 "vfmsub231pd %%xmm7, %%xmm8, %%xmm9;"
870 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
871 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
872 for (j
= 0; j
< 2; j
++)
873 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
874 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
875 "vfmsub231pd (%2), %%xmm8, %%xmm9;"
876 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
877 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
878 for (j
= 0; j
< 2; j
++)
879 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
881 printf( "Failure 7 %d", i
);
882 for (j
= 0; j
< 2; j
++)
883 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
888 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
889 "vfnmadd132pd %%xmm7, %%xmm8, %%xmm9;"
890 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
891 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
892 for (j
= 0; j
< 2; j
++)
893 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
894 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
895 "vfnmadd132pd (%2), %%xmm8, %%xmm9;"
896 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
897 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
898 for (j
= 0; j
< 2; j
++)
899 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
900 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
901 "vfnmadd213pd %%xmm7, %%xmm8, %%xmm9;"
902 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
903 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
904 for (j
= 0; j
< 2; j
++)
905 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
906 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
907 "vfnmadd213pd (%3), %%xmm8, %%xmm9;"
908 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
909 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
910 for (j
= 0; j
< 2; j
++)
911 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
912 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
913 "vfnmadd231pd %%xmm7, %%xmm8, %%xmm9;"
914 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
915 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
916 for (j
= 0; j
< 2; j
++)
917 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
918 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
919 "vfnmadd231pd (%2), %%xmm8, %%xmm9;"
920 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
921 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
922 for (j
= 0; j
< 2; j
++)
923 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
925 printf( "Failure 8 %d", i
);
926 for (j
= 0; j
< 2; j
++)
927 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
932 for (i
= 1; i
< N
; i
+= 2)
934 for (i
= 0; i
< N
; i
+= 2) {
936 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
937 "vfmaddsub132pd %%xmm7, %%xmm8, %%xmm9;"
938 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
939 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
940 for (j
= 0; j
< 2; j
++)
941 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
942 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
943 "vfmaddsub132pd (%2), %%xmm8, %%xmm9;"
944 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
945 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
946 for (j
= 0; j
< 2; j
++)
947 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
948 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
949 "vfmaddsub213pd %%xmm7, %%xmm8, %%xmm9;"
950 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
951 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
952 for (j
= 0; j
< 2; j
++)
953 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
954 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
955 "vfmaddsub213pd (%3), %%xmm8, %%xmm9;"
956 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
957 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
958 for (j
= 0; j
< 2; j
++)
959 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
960 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
961 "vfmaddsub231pd %%xmm7, %%xmm8, %%xmm9;"
962 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
963 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
964 for (j
= 0; j
< 2; j
++)
965 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
966 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
967 "vfmaddsub231pd (%2), %%xmm8, %%xmm9;"
968 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
969 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
970 for (j
= 0; j
< 2; j
++)
971 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
973 printf( "Failure 9 %d", i
);
974 for (j
= 0; j
< 2; j
++)
975 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
980 for (i
= 0; i
< N
; i
++)
982 for (i
= 0; i
< N
; i
+= 2) {
984 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%3), %%xmm8;"
985 "vfmsubadd132pd %%xmm7, %%xmm8, %%xmm9;"
986 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
987 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
988 for (j
= 0; j
< 2; j
++)
989 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
990 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm8;"
991 "vfmsubadd132pd (%2), %%xmm8, %%xmm9;"
992 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
993 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
994 for (j
= 0; j
< 2; j
++)
995 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
996 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%3), %%xmm7; vmovapd (%2), %%xmm8;"
997 "vfmsubadd213pd %%xmm7, %%xmm8, %%xmm9;"
998 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
999 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1000 for (j
= 0; j
< 2; j
++)
1001 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1002 __asm
__volatile__ ("vmovapd (%1), %%xmm9; vmovapd (%2), %%xmm8;"
1003 "vfmsubadd213pd (%3), %%xmm8, %%xmm9;"
1004 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1005 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1006 for (j
= 0; j
< 2; j
++)
1007 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1008 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%2), %%xmm7; vmovapd (%1), %%xmm8;"
1009 "vfmsubadd231pd %%xmm7, %%xmm8, %%xmm9;"
1010 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1011 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1012 for (j
= 0; j
< 2; j
++)
1013 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1014 __asm
__volatile__ ("vmovapd (%3), %%xmm9; vmovapd (%1), %%xmm8;"
1015 "vfmsubadd231pd (%2), %%xmm8, %%xmm9;"
1016 "vmovapd %%xmm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1017 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1018 for (j
= 0; j
< 2; j
++)
1019 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1021 printf( "Failure 10 %d", i
);
1022 for (j
= 0; j
< 2; j
++)
1023 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1028 for (i
= 1; i
< N
; i
+= 2)
1030 for (i
= 0; i
< N
; i
+= 4) {
1032 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1033 "vfmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1034 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1035 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1036 for (j
= 0; j
< 4; j
++)
1037 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1038 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1039 "vfmadd132pd (%2), %%ymm8, %%ymm9;"
1040 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1041 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1042 for (j
= 0; j
< 4; j
++)
1043 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1044 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1045 "vfmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1046 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1047 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1048 for (j
= 0; j
< 4; j
++)
1049 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1050 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1051 "vfmadd213pd (%3), %%ymm8, %%ymm9;"
1052 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1053 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1054 for (j
= 0; j
< 4; j
++)
1055 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1056 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1057 "vfmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1058 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1059 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1060 for (j
= 0; j
< 4; j
++)
1061 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1062 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1063 "vfmadd231pd (%2), %%ymm8, %%ymm9;"
1064 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1065 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1066 for (j
= 0; j
< 4; j
++)
1067 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1069 printf( "Failure 11 %d", i
);
1070 for (j
= 0; j
< 4; j
++)
1071 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1076 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1077 "vfnmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1078 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1079 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1080 for (j
= 0; j
< 4; j
++)
1081 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1082 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1083 "vfnmsub132pd (%2), %%ymm8, %%ymm9;"
1084 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1085 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1086 for (j
= 0; j
< 4; j
++)
1087 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1088 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1089 "vfnmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1090 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1091 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1092 for (j
= 0; j
< 4; j
++)
1093 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1094 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1095 "vfnmsub213pd (%3), %%ymm8, %%ymm9;"
1096 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1097 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1098 for (j
= 0; j
< 4; j
++)
1099 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1100 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1101 "vfnmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1102 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1103 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1104 for (j
= 0; j
< 4; j
++)
1105 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1106 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1107 "vfnmsub231pd (%2), %%ymm8, %%ymm9;"
1108 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1109 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1110 for (j
= 0; j
< 4; j
++)
1111 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1113 printf( "Failure 12 %d", i
);
1114 for (j
= 0; j
< 4; j
++)
1115 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1120 for (i
= 0; i
< N
; i
++)
1122 for (i
= 0; i
< N
; i
+= 4) {
1124 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1125 "vfmsub132pd %%ymm7, %%ymm8, %%ymm9;"
1126 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1127 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1128 for (j
= 0; j
< 4; j
++)
1129 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1130 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1131 "vfmsub132pd (%2), %%ymm8, %%ymm9;"
1132 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1133 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1134 for (j
= 0; j
< 4; j
++)
1135 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1136 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1137 "vfmsub213pd %%ymm7, %%ymm8, %%ymm9;"
1138 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1139 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1140 for (j
= 0; j
< 4; j
++)
1141 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1142 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1143 "vfmsub213pd (%3), %%ymm8, %%ymm9;"
1144 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1145 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1146 for (j
= 0; j
< 4; j
++)
1147 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1148 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1149 "vfmsub231pd %%ymm7, %%ymm8, %%ymm9;"
1150 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1151 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1152 for (j
= 0; j
< 4; j
++)
1153 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1154 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1155 "vfmsub231pd (%2), %%ymm8, %%ymm9;"
1156 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1157 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1158 for (j
= 0; j
< 4; j
++)
1159 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1161 printf( "Failure 13 %d", i
);
1162 for (j
= 0; j
< 4; j
++)
1163 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1168 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1169 "vfnmadd132pd %%ymm7, %%ymm8, %%ymm9;"
1170 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1171 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1172 for (j
= 0; j
< 4; j
++)
1173 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1174 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1175 "vfnmadd132pd (%2), %%ymm8, %%ymm9;"
1176 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1177 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1178 for (j
= 0; j
< 4; j
++)
1179 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1180 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1181 "vfnmadd213pd %%ymm7, %%ymm8, %%ymm9;"
1182 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1183 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1184 for (j
= 0; j
< 4; j
++)
1185 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1186 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1187 "vfnmadd213pd (%3), %%ymm8, %%ymm9;"
1188 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1189 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1190 for (j
= 0; j
< 4; j
++)
1191 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1192 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1193 "vfnmadd231pd %%ymm7, %%ymm8, %%ymm9;"
1194 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1195 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1196 for (j
= 0; j
< 4; j
++)
1197 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1198 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1199 "vfnmadd231pd (%2), %%ymm8, %%ymm9;"
1200 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1201 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1202 for (j
= 0; j
< 4; j
++)
1203 thisres
|= test( -dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1205 printf( "Failure 14 %d", i
);
1206 for (j
= 0; j
< 4; j
++)
1207 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1212 for (i
= 1; i
< N
; i
+= 2)
1214 for (i
= 0; i
< N
; i
+= 4) {
1216 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1217 "vfmaddsub132pd %%ymm7, %%ymm8, %%ymm9;"
1218 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1219 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1220 for (j
= 0; j
< 4; j
++)
1221 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1222 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1223 "vfmaddsub132pd (%2), %%ymm8, %%ymm9;"
1224 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1225 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1226 for (j
= 0; j
< 4; j
++)
1227 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1228 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1229 "vfmaddsub213pd %%ymm7, %%ymm8, %%ymm9;"
1230 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1231 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1232 for (j
= 0; j
< 4; j
++)
1233 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1234 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1235 "vfmaddsub213pd (%3), %%ymm8, %%ymm9;"
1236 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1237 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1238 for (j
= 0; j
< 4; j
++)
1239 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1240 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1241 "vfmaddsub231pd %%ymm7, %%ymm8, %%ymm9;"
1242 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1243 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1244 for (j
= 0; j
< 4; j
++)
1245 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1246 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1247 "vfmaddsub231pd (%2), %%ymm8, %%ymm9;"
1248 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1249 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1250 for (j
= 0; j
< 4; j
++)
1251 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1253 printf( "Failure 15 %d", i
);
1254 for (j
= 0; j
< 4; j
++)
1255 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1260 for (i
= 0; i
< N
; i
++)
1262 for (i
= 0; i
< N
; i
+= 4) {
1264 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%3), %%ymm8;"
1265 "vfmsubadd132pd %%ymm7, %%ymm8, %%ymm9;"
1266 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1267 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1268 for (j
= 0; j
< 4; j
++)
1269 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1270 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm8;"
1271 "vfmsubadd132pd (%2), %%ymm8, %%ymm9;"
1272 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1273 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1274 for (j
= 0; j
< 4; j
++)
1275 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1276 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%3), %%ymm7; vmovapd (%2), %%ymm8;"
1277 "vfmsubadd213pd %%ymm7, %%ymm8, %%ymm9;"
1278 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1279 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1280 for (j
= 0; j
< 4; j
++)
1281 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1282 __asm
__volatile__ ("vmovapd (%1), %%ymm9; vmovapd (%2), %%ymm8;"
1283 "vfmsubadd213pd (%3), %%ymm8, %%ymm9;"
1284 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1285 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1286 for (j
= 0; j
< 4; j
++)
1287 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1288 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%2), %%ymm7; vmovapd (%1), %%ymm8;"
1289 "vfmsubadd231pd %%ymm7, %%ymm8, %%ymm9;"
1290 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1291 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1292 for (j
= 0; j
< 4; j
++)
1293 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1294 __asm
__volatile__ ("vmovapd (%3), %%ymm9; vmovapd (%1), %%ymm8;"
1295 "vfmsubadd231pd (%2), %%ymm8, %%ymm9;"
1296 "vmovapd %%ymm9, (%0)" : : "r" (&dt
.res
[i
]), "r" (&dt
.x
[i
]),
1297 "r" (&dt
.y
[i
]), "r" (&dt
.z
[i
]) : "xmm7", "xmm8", "xmm9");
1298 for (j
= 0; j
< 4; j
++)
1299 thisres
|= test( dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1301 printf( "Failure 16 %d", i
);
1302 for (j
= 0; j
< 4; j
++)
1303 printf( " %a %a", dt
.res
[i
+j
], dt
.expected
[i
+j
] );
1308 for (i
= 1; i
< N
; i
+= 2)
1318 __asm
__volatile__ ("" : : "r" (&plus_zero
) : "memory");
1319 nan_value
= plus_zero
/ plus_zero
;
1320 plus_infty
= 3.40282346638528859812e+38F
* 16.0F
;
1321 minus_infty
= -plus_infty
;
1322 #define TEST_F( a, b, c, d ) \
1327 ft.expected[i] = d; \
1330 TEST_F( 1.0, 2.0, 3.0, 5.0 );
1331 TEST_F( nan_value
, 2.0, 3.0, nan_value
);
1332 TEST_F( 1.0, nan_value
, 3.0, nan_value
);
1333 TEST_F( 1.0, 2.0, nan_value
, nan_value
);
1334 TEST_F( plus_infty
, 0.0, nan_value
, nan_value
);
1335 TEST_F( minus_infty
, 0.0, nan_value
, nan_value
);
1336 TEST_F( 0.0, plus_infty
, nan_value
, nan_value
);
1337 TEST_F( 0.0, minus_infty
, nan_value
, nan_value
);
1338 TEST_F( plus_infty
, 0.0, 1.0, nan_value
);
1339 TEST_F( minus_infty
, 0.0, 1.0, nan_value
);
1340 TEST_F( 0.0, plus_infty
, 1.0, nan_value
);
1341 TEST_F( 0.0, minus_infty
, 1.0, nan_value
);
1342 TEST_F( plus_infty
, plus_infty
, minus_infty
, nan_value
);
1343 TEST_F( minus_infty
, plus_infty
, plus_infty
, nan_value
);
1344 TEST_F( plus_infty
, minus_infty
, plus_infty
, nan_value
);
1345 TEST_F( minus_infty
, minus_infty
, minus_infty
, nan_value
);
1346 TEST_F( plus_infty
, 3.5L, minus_infty
, nan_value
);
1347 TEST_F( minus_infty
, -7.5L, minus_infty
, nan_value
);
1348 TEST_F( -13.5L, plus_infty
, plus_infty
, nan_value
);
1349 TEST_F( minus_infty
, 7.5L, plus_infty
, nan_value
);
1350 TEST_F( 1.25L, 0.75L, 0.0625L, 1.0L );
1351 TEST_F( -3.40282346638528859812e+38F
, -3.40282346638528859812e+38F
, minus_infty
, minus_infty
);
1352 TEST_F( 3.40282346638528859812e+38F
/ 2, 3.40282346638528859812e+38F
/ 2, minus_infty
, minus_infty
);
1353 TEST_F( -3.40282346638528859812e+38F
, 3.40282346638528859812e+38F
, plus_infty
, plus_infty
);
1354 TEST_F( 3.40282346638528859812e+38F
/ 2, -3.40282346638528859812e+38F
/ 4, plus_infty
, plus_infty
);
1355 TEST_F( plus_infty
, 4, plus_infty
, plus_infty
);
1356 TEST_F( 2, minus_infty
, minus_infty
, minus_infty
);
1357 TEST_F( minus_infty
, minus_infty
, plus_infty
, plus_infty
);
1358 TEST_F( plus_infty
, minus_infty
, minus_infty
, minus_infty
);
1359 TEST_F( 0x1.7ff8p
+13, 0x1.000002p
+0, 0x1.ffffp
-24, 0x1.7ff802p
+13 );
1360 TEST_F( 0x1.fffp
+0, 0x1.00001p
+0, -0x1.fffp
+0, 0x1.fffp
-20 );
1361 TEST_F( 0x1.9abcdep
+127, 0x0.9abcdep
-126, -0x1.f08948p
+0, 0x1.bb421p
-25 );
1362 TEST_F( 0x1.9abcdep
+100, 0x0.9abcdep
-126, -0x1.f08948p
-27, 0x1.bb421p
-52 );
1363 TEST_F( 0x1.fffffep
+127, 0x1.001p
+0, -0x1.fffffep
+127, 0x1.fffffep
+115 );
1364 TEST_F( -0x1.fffffep
+127, 0x1.fffffep
+0, 0x1.fffffep
+127, -0x1.fffffap
+127 );
1365 TEST_F( 0x1.fffffep
+127, 2.0, -0x1.fffffep
+127, 0x1.fffffep
+127 );
1367 res
|= test_fmaf( );
1369 #define TEST( a, b, c, d ) \
1374 dt.expected[i] = d; \
1377 TEST( 1.0, 2.0, 3.0, 5.0 );
1378 TEST( nan_value
, 2.0, 3.0, nan_value
);
1379 TEST( 1.0, nan_value
, 3.0, nan_value
);
1380 TEST( 1.0, 2.0, nan_value
, nan_value
);
1381 TEST( plus_infty
, 0.0, nan_value
, nan_value
);
1382 TEST( minus_infty
, 0.0, nan_value
, nan_value
);
1383 TEST( 0.0, plus_infty
, nan_value
, nan_value
);
1384 TEST( 0.0, minus_infty
, nan_value
, nan_value
);
1385 TEST( plus_infty
, 0.0, 1.0, nan_value
);
1386 TEST( minus_infty
, 0.0, 1.0, nan_value
);
1387 TEST( 0.0, plus_infty
, 1.0, nan_value
);
1388 TEST( 0.0, minus_infty
, 1.0, nan_value
);
1389 TEST( plus_infty
, plus_infty
, minus_infty
, nan_value
);
1390 TEST( minus_infty
, plus_infty
, plus_infty
, nan_value
);
1391 TEST( plus_infty
, minus_infty
, plus_infty
, nan_value
);
1392 TEST( minus_infty
, minus_infty
, minus_infty
, nan_value
);
1393 TEST( plus_infty
, 3.5L, minus_infty
, nan_value
);
1394 TEST( minus_infty
, -7.5L, minus_infty
, nan_value
);
1395 TEST( -13.5L, plus_infty
, plus_infty
, nan_value
);
1396 TEST( minus_infty
, 7.5L, plus_infty
, nan_value
);
1397 TEST( 1.25L, 0.75L, 0.0625L, 1.0L );
1398 TEST( -1.79769313486231570815e+308L, -1.79769313486231570815e+308L, minus_infty
, minus_infty
);
1399 TEST( 1.79769313486231570815e+308L / 2, 1.79769313486231570815e+308L / 2, minus_infty
, minus_infty
);
1400 TEST( -1.79769313486231570815e+308L, 1.79769313486231570815e+308L, plus_infty
, plus_infty
);
1401 TEST( 1.79769313486231570815e+308L / 2, -1.79769313486231570815e+308L / 4, plus_infty
, plus_infty
);
1402 TEST( plus_infty
, 4, plus_infty
, plus_infty
);
1403 TEST( 2, minus_infty
, minus_infty
, minus_infty
);
1404 TEST( minus_infty
, minus_infty
, plus_infty
, plus_infty
);
1405 TEST( plus_infty
, minus_infty
, minus_infty
, minus_infty
);
1406 TEST( 0x1.7fp
+13, 0x1.0000000000001p
+0, 0x1.ffep
-48, 0x1.7f00000000001p
+13 );
1407 TEST( 0x1.fffp
+0, 0x1.0000000000001p
+0, -0x1.fffp
+0, 0x1.fffp
-52 );
1408 TEST( 0x1.0000002p
+0, 0x1.ffffffcp
-1, 0x1p
-300, 1.0 );
1409 TEST( 0x1.0000002p
+0, 0x1.ffffffcp
-1, -0x1p
-300, 0x1.fffffffffffffp
-1 );
1410 TEST( 0x1.deadbeef2feedp
+1023, 0x0.deadbeef2feedp
-1022, -0x1.a05f8c01a4bfbp
+1, 0x1.0989687bc9da4p
-53 );
1411 TEST( 0x1.deadbeef2feedp
+900, 0x0.deadbeef2feedp
-1022, -0x1.a05f8c01a4bfbp
-122, 0x1.0989687bc9da4p
-176 );
1412 TEST( 0x1.fffffffffffffp
+1023, 0x1.001p
+0, -0x1.fffffffffffffp
+1023, 0x1.fffffffffffffp
+1011 );
1413 TEST( -0x1.fffffffffffffp
+1023, 0x1.fffffffffffffp
+0, 0x1.fffffffffffffp
+1023, -0x1.ffffffffffffdp
+1023 );
1414 TEST( 0x1.fffffffffffffp
+1023, 2.0, -0x1.fffffffffffffp
+1023, 0x1.fffffffffffffp
+1023 );
1415 TEST( 0x1.6a09e667f3bccp
-538, 0x1.6a09e667f3bccp
-538, 0.0, 0.0 );
1416 TEST( 0x1.deadbeef2feedp
-495, 0x1.deadbeef2feedp
-495, -0x1.bf86a5786a574p
-989, 0x0.0000042625a1fp
-1022 );
1417 TEST( 0x1.deadbeef2feedp
-503, 0x1.deadbeef2feedp
-503, -0x1.bf86a5786a574p
-1005, 0x0.0000000004262p
-1022 );
1418 TEST( 0x1p
-537, 0x1p
-538, 0x1p
-1074, 0x0.0000000000002p
-1022 );
1419 TEST( 0x1.7fffff8p
-968, 0x1p
-106, 0x0.000001p
-1022, 0x0.0000010000001p
-1022 );
1420 TEST( 0x1.4000004p
-967, 0x1p
-106, 0x0.000001p
-1022, 0x0.0000010000003p
-1022 );
1421 TEST( 0x1.4p
-967, -0x1p
-106, -0x0.000001p
-1022, -0x0.0000010000002p
-1022 );
1422 TEST( -0x1.19cab66d73e17p
-959, 0x1.c7108a8c5ff51p
-107, -0x0.80b0ad65d9b64p
-1022, -0x0.80b0ad65d9d59p
-1022 );
1423 TEST( -0x1.d2eaed6e8e9d3p
-979, -0x1.4e066c62ac9ddp
-63, -0x0.9245e6b003454p
-1022, -0x0.9245c09c5fb5dp
-1022 );
1424 TEST( 0x1.153d650bb9f06p
-907, 0x1.2d01230d48407p
-125, -0x0.b278d5acfc3cp
-1022, -0x0.b22757123bbe9p
-1022 );
1425 TEST( -0x1.fffffffffffffp
-711, 0x1.fffffffffffffp
-275, 0x1.fffffe00007ffp
-983, 0x1.7ffffe00007ffp
-983 );
1429 printf( "Testing successful\n");