1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX2
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VL
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512FP16
7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VLDQ
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VL
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512FP16
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VLDQ
19 define <2 x double> @fneg_v2f64(<2 x double> %p) nounwind {
20 ; X86-SSE-LABEL: fneg_v2f64:
22 ; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
25 ; X86-AVX-LABEL: fneg_v2f64:
27 ; X86-AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
30 ; X86-AVX512VL-LABEL: fneg_v2f64:
31 ; X86-AVX512VL: # %bb.0:
32 ; X86-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
33 ; X86-AVX512VL-NEXT: retl
35 ; X86-AVX512FP16-LABEL: fneg_v2f64:
36 ; X86-AVX512FP16: # %bb.0:
37 ; X86-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
38 ; X86-AVX512FP16-NEXT: retl
40 ; X86-AVX512VLDQ-LABEL: fneg_v2f64:
41 ; X86-AVX512VLDQ: # %bb.0:
42 ; X86-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
43 ; X86-AVX512VLDQ-NEXT: retl
45 ; X64-SSE-LABEL: fneg_v2f64:
47 ; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
50 ; X64-AVX-LABEL: fneg_v2f64:
52 ; X64-AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
55 ; X64-AVX512VL-LABEL: fneg_v2f64:
56 ; X64-AVX512VL: # %bb.0:
57 ; X64-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
58 ; X64-AVX512VL-NEXT: retq
60 ; X64-AVX512FP16-LABEL: fneg_v2f64:
61 ; X64-AVX512FP16: # %bb.0:
62 ; X64-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
63 ; X64-AVX512FP16-NEXT: retq
65 ; X64-AVX512VLDQ-LABEL: fneg_v2f64:
66 ; X64-AVX512VLDQ: # %bb.0:
67 ; X64-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
68 ; X64-AVX512VLDQ-NEXT: retq
69 %t = fsub <2 x double> <double -0.0, double -0.0>, %p
73 define <4 x float> @fneg_v4f32(<4 x float> %p) nounwind {
74 ; X86-SSE-LABEL: fneg_v4f32:
76 ; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
79 ; X86-AVX1-LABEL: fneg_v4f32:
81 ; X86-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
84 ; X86-AVX2-LABEL: fneg_v4f32:
86 ; X86-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
87 ; X86-AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0
90 ; X86-AVX512VL-LABEL: fneg_v4f32:
91 ; X86-AVX512VL: # %bb.0:
92 ; X86-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
93 ; X86-AVX512VL-NEXT: retl
95 ; X86-AVX512FP16-LABEL: fneg_v4f32:
96 ; X86-AVX512FP16: # %bb.0:
97 ; X86-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
98 ; X86-AVX512FP16-NEXT: retl
100 ; X86-AVX512VLDQ-LABEL: fneg_v4f32:
101 ; X86-AVX512VLDQ: # %bb.0:
102 ; X86-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
103 ; X86-AVX512VLDQ-NEXT: retl
105 ; X64-SSE-LABEL: fneg_v4f32:
107 ; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
110 ; X64-AVX1-LABEL: fneg_v4f32:
112 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
113 ; X64-AVX1-NEXT: retq
115 ; X64-AVX2-LABEL: fneg_v4f32:
117 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
118 ; X64-AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0
119 ; X64-AVX2-NEXT: retq
121 ; X64-AVX512VL-LABEL: fneg_v4f32:
122 ; X64-AVX512VL: # %bb.0:
123 ; X64-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
124 ; X64-AVX512VL-NEXT: retq
126 ; X64-AVX512FP16-LABEL: fneg_v4f32:
127 ; X64-AVX512FP16: # %bb.0:
128 ; X64-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
129 ; X64-AVX512FP16-NEXT: retq
131 ; X64-AVX512VLDQ-LABEL: fneg_v4f32:
132 ; X64-AVX512VLDQ: # %bb.0:
133 ; X64-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
134 ; X64-AVX512VLDQ-NEXT: retq
135 %t = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %p
139 define <8 x half> @fneg_v8f16(ptr %p) nounwind {
140 ; X86-SSE-LABEL: fneg_v8f16:
142 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
143 ; X86-SSE-NEXT: movaps (%eax), %xmm0
144 ; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
147 ; X86-AVX1-LABEL: fneg_v8f16:
149 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
150 ; X86-AVX1-NEXT: vmovaps (%eax), %xmm0
151 ; X86-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
152 ; X86-AVX1-NEXT: retl
154 ; X86-AVX2-LABEL: fneg_v8f16:
156 ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
157 ; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
158 ; X86-AVX2-NEXT: vpxor (%eax), %xmm0, %xmm0
159 ; X86-AVX2-NEXT: retl
161 ; X86-AVX512-LABEL: fneg_v8f16:
162 ; X86-AVX512: # %bb.0:
163 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
164 ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
165 ; X86-AVX512-NEXT: vpxor (%eax), %xmm0, %xmm0
166 ; X86-AVX512-NEXT: retl
168 ; X64-SSE-LABEL: fneg_v8f16:
170 ; X64-SSE-NEXT: movaps (%rdi), %xmm0
171 ; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
174 ; X64-AVX1-LABEL: fneg_v8f16:
176 ; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0
177 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
178 ; X64-AVX1-NEXT: retq
180 ; X64-AVX2-LABEL: fneg_v8f16:
182 ; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
183 ; X64-AVX2-NEXT: vpxor (%rdi), %xmm0, %xmm0
184 ; X64-AVX2-NEXT: retq
186 ; X64-AVX512-LABEL: fneg_v8f16:
187 ; X64-AVX512: # %bb.0:
188 ; X64-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
189 ; X64-AVX512-NEXT: vpxor (%rdi), %xmm0, %xmm0
190 ; X64-AVX512-NEXT: retq
191 %v = load <8 x half>, ptr %p, align 16
192 %nnv = fsub <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %v
200 define <4 x double> @fneg_v4f64(<4 x double> %p) nounwind {
201 ; X86-SSE-LABEL: fneg_v4f64:
203 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
204 ; X86-SSE-NEXT: xorps %xmm2, %xmm0
205 ; X86-SSE-NEXT: xorps %xmm2, %xmm1
208 ; X86-AVX1-LABEL: fneg_v4f64:
210 ; X86-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
211 ; X86-AVX1-NEXT: retl
213 ; X86-AVX2-LABEL: fneg_v4f64:
215 ; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
216 ; X86-AVX2-NEXT: vxorps %ymm1, %ymm0, %ymm0
217 ; X86-AVX2-NEXT: retl
219 ; X86-AVX512VL-LABEL: fneg_v4f64:
220 ; X86-AVX512VL: # %bb.0:
221 ; X86-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
222 ; X86-AVX512VL-NEXT: retl
224 ; X86-AVX512FP16-LABEL: fneg_v4f64:
225 ; X86-AVX512FP16: # %bb.0:
226 ; X86-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
227 ; X86-AVX512FP16-NEXT: retl
229 ; X86-AVX512VLDQ-LABEL: fneg_v4f64:
230 ; X86-AVX512VLDQ: # %bb.0:
231 ; X86-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
232 ; X86-AVX512VLDQ-NEXT: retl
234 ; X64-SSE-LABEL: fneg_v4f64:
236 ; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0]
237 ; X64-SSE-NEXT: xorps %xmm2, %xmm0
238 ; X64-SSE-NEXT: xorps %xmm2, %xmm1
241 ; X64-AVX1-LABEL: fneg_v4f64:
243 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
244 ; X64-AVX1-NEXT: retq
246 ; X64-AVX2-LABEL: fneg_v4f64:
248 ; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
249 ; X64-AVX2-NEXT: vxorps %ymm1, %ymm0, %ymm0
250 ; X64-AVX2-NEXT: retq
252 ; X64-AVX512VL-LABEL: fneg_v4f64:
253 ; X64-AVX512VL: # %bb.0:
254 ; X64-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
255 ; X64-AVX512VL-NEXT: retq
257 ; X64-AVX512FP16-LABEL: fneg_v4f64:
258 ; X64-AVX512FP16: # %bb.0:
259 ; X64-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
260 ; X64-AVX512FP16-NEXT: retq
262 ; X64-AVX512VLDQ-LABEL: fneg_v4f64:
263 ; X64-AVX512VLDQ: # %bb.0:
264 ; X64-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
265 ; X64-AVX512VLDQ-NEXT: retq
266 %t = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %p
270 define <8 x float> @fneg_v8f32(<8 x float> %p) nounwind {
271 ; X86-SSE-LABEL: fneg_v8f32:
273 ; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
274 ; X86-SSE-NEXT: xorps %xmm2, %xmm0
275 ; X86-SSE-NEXT: xorps %xmm2, %xmm1
278 ; X86-AVX1-LABEL: fneg_v8f32:
280 ; X86-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
281 ; X86-AVX1-NEXT: retl
283 ; X86-AVX2-LABEL: fneg_v8f32:
285 ; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
286 ; X86-AVX2-NEXT: vxorps %ymm1, %ymm0, %ymm0
287 ; X86-AVX2-NEXT: retl
289 ; X86-AVX512VL-LABEL: fneg_v8f32:
290 ; X86-AVX512VL: # %bb.0:
291 ; X86-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
292 ; X86-AVX512VL-NEXT: retl
294 ; X86-AVX512FP16-LABEL: fneg_v8f32:
295 ; X86-AVX512FP16: # %bb.0:
296 ; X86-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
297 ; X86-AVX512FP16-NEXT: retl
299 ; X86-AVX512VLDQ-LABEL: fneg_v8f32:
300 ; X86-AVX512VLDQ: # %bb.0:
301 ; X86-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
302 ; X86-AVX512VLDQ-NEXT: retl
304 ; X64-SSE-LABEL: fneg_v8f32:
306 ; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
307 ; X64-SSE-NEXT: xorps %xmm2, %xmm0
308 ; X64-SSE-NEXT: xorps %xmm2, %xmm1
311 ; X64-AVX1-LABEL: fneg_v8f32:
313 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
314 ; X64-AVX1-NEXT: retq
316 ; X64-AVX2-LABEL: fneg_v8f32:
318 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
319 ; X64-AVX2-NEXT: vxorps %ymm1, %ymm0, %ymm0
320 ; X64-AVX2-NEXT: retq
322 ; X64-AVX512VL-LABEL: fneg_v8f32:
323 ; X64-AVX512VL: # %bb.0:
324 ; X64-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
325 ; X64-AVX512VL-NEXT: retq
327 ; X64-AVX512FP16-LABEL: fneg_v8f32:
328 ; X64-AVX512FP16: # %bb.0:
329 ; X64-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
330 ; X64-AVX512FP16-NEXT: retq
332 ; X64-AVX512VLDQ-LABEL: fneg_v8f32:
333 ; X64-AVX512VLDQ: # %bb.0:
334 ; X64-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
335 ; X64-AVX512VLDQ-NEXT: retq
336 %t = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %p
340 define <16 x half> @fneg_v16f16(ptr %p) nounwind {
341 ; X86-SSE-LABEL: fneg_v16f16:
343 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
344 ; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
345 ; X86-SSE-NEXT: movaps (%eax), %xmm0
346 ; X86-SSE-NEXT: xorps %xmm1, %xmm0
347 ; X86-SSE-NEXT: xorps 16(%eax), %xmm1
350 ; X86-AVX1-LABEL: fneg_v16f16:
352 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
353 ; X86-AVX1-NEXT: vmovups (%eax), %ymm0
354 ; X86-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
355 ; X86-AVX1-NEXT: retl
357 ; X86-AVX2-LABEL: fneg_v16f16:
359 ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
360 ; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
361 ; X86-AVX2-NEXT: vpxor (%eax), %ymm0, %ymm0
362 ; X86-AVX2-NEXT: retl
364 ; X86-AVX512-LABEL: fneg_v16f16:
365 ; X86-AVX512: # %bb.0:
366 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
367 ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
368 ; X86-AVX512-NEXT: vpxor (%eax), %ymm0, %ymm0
369 ; X86-AVX512-NEXT: retl
371 ; X64-SSE-LABEL: fneg_v16f16:
373 ; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
374 ; X64-SSE-NEXT: movaps (%rdi), %xmm0
375 ; X64-SSE-NEXT: xorps %xmm1, %xmm0
376 ; X64-SSE-NEXT: xorps 16(%rdi), %xmm1
379 ; X64-AVX1-LABEL: fneg_v16f16:
381 ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
382 ; X64-AVX1-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
383 ; X64-AVX1-NEXT: retq
385 ; X64-AVX2-LABEL: fneg_v16f16:
387 ; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
388 ; X64-AVX2-NEXT: vpxor (%rdi), %ymm0, %ymm0
389 ; X64-AVX2-NEXT: retq
391 ; X64-AVX512-LABEL: fneg_v16f16:
392 ; X64-AVX512: # %bb.0:
393 ; X64-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
394 ; X64-AVX512-NEXT: vpxor (%rdi), %ymm0, %ymm0
395 ; X64-AVX512-NEXT: retq
396 %v = load <16 x half>, ptr %p, align 16
397 %nnv = fsub <16 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %v
405 define <8 x double> @fneg_v8f64(<8 x double> %p) nounwind {
406 ; X86-SSE-LABEL: fneg_v8f64:
408 ; X86-SSE-NEXT: pushl %ebp
409 ; X86-SSE-NEXT: movl %esp, %ebp
410 ; X86-SSE-NEXT: andl $-16, %esp
411 ; X86-SSE-NEXT: subl $16, %esp
412 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0]
413 ; X86-SSE-NEXT: xorps %xmm3, %xmm0
414 ; X86-SSE-NEXT: xorps %xmm3, %xmm1
415 ; X86-SSE-NEXT: xorps %xmm3, %xmm2
416 ; X86-SSE-NEXT: xorps 8(%ebp), %xmm3
417 ; X86-SSE-NEXT: movl %ebp, %esp
418 ; X86-SSE-NEXT: popl %ebp
421 ; X86-AVX-LABEL: fneg_v8f64:
423 ; X86-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
424 ; X86-AVX-NEXT: vxorps %ymm2, %ymm0, %ymm0
425 ; X86-AVX-NEXT: vxorps %ymm2, %ymm1, %ymm1
428 ; X86-AVX512VL-LABEL: fneg_v8f64:
429 ; X86-AVX512VL: # %bb.0:
430 ; X86-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
431 ; X86-AVX512VL-NEXT: retl
433 ; X86-AVX512FP16-LABEL: fneg_v8f64:
434 ; X86-AVX512FP16: # %bb.0:
435 ; X86-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
436 ; X86-AVX512FP16-NEXT: retl
438 ; X86-AVX512VLDQ-LABEL: fneg_v8f64:
439 ; X86-AVX512VLDQ: # %bb.0:
440 ; X86-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
441 ; X86-AVX512VLDQ-NEXT: retl
443 ; X64-SSE-LABEL: fneg_v8f64:
445 ; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.0E+0,-0.0E+0]
446 ; X64-SSE-NEXT: xorps %xmm4, %xmm0
447 ; X64-SSE-NEXT: xorps %xmm4, %xmm1
448 ; X64-SSE-NEXT: xorps %xmm4, %xmm2
449 ; X64-SSE-NEXT: xorps %xmm4, %xmm3
452 ; X64-AVX-LABEL: fneg_v8f64:
454 ; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
455 ; X64-AVX-NEXT: vxorps %ymm2, %ymm0, %ymm0
456 ; X64-AVX-NEXT: vxorps %ymm2, %ymm1, %ymm1
459 ; X64-AVX512VL-LABEL: fneg_v8f64:
460 ; X64-AVX512VL: # %bb.0:
461 ; X64-AVX512VL-NEXT: vpxorq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
462 ; X64-AVX512VL-NEXT: retq
464 ; X64-AVX512FP16-LABEL: fneg_v8f64:
465 ; X64-AVX512FP16: # %bb.0:
466 ; X64-AVX512FP16-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
467 ; X64-AVX512FP16-NEXT: retq
469 ; X64-AVX512VLDQ-LABEL: fneg_v8f64:
470 ; X64-AVX512VLDQ: # %bb.0:
471 ; X64-AVX512VLDQ-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
472 ; X64-AVX512VLDQ-NEXT: retq
473 %t = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %p
477 define <16 x float> @fneg_v16f32(<16 x float> %p) nounwind {
478 ; X86-SSE-LABEL: fneg_v16f32:
480 ; X86-SSE-NEXT: pushl %ebp
481 ; X86-SSE-NEXT: movl %esp, %ebp
482 ; X86-SSE-NEXT: andl $-16, %esp
483 ; X86-SSE-NEXT: subl $16, %esp
484 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
485 ; X86-SSE-NEXT: xorps %xmm3, %xmm0
486 ; X86-SSE-NEXT: xorps %xmm3, %xmm1
487 ; X86-SSE-NEXT: xorps %xmm3, %xmm2
488 ; X86-SSE-NEXT: xorps 8(%ebp), %xmm3
489 ; X86-SSE-NEXT: movl %ebp, %esp
490 ; X86-SSE-NEXT: popl %ebp
493 ; X86-AVX-LABEL: fneg_v16f32:
495 ; X86-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
496 ; X86-AVX-NEXT: vxorps %ymm2, %ymm0, %ymm0
497 ; X86-AVX-NEXT: vxorps %ymm2, %ymm1, %ymm1
500 ; X86-AVX512VL-LABEL: fneg_v16f32:
501 ; X86-AVX512VL: # %bb.0:
502 ; X86-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
503 ; X86-AVX512VL-NEXT: retl
505 ; X86-AVX512FP16-LABEL: fneg_v16f32:
506 ; X86-AVX512FP16: # %bb.0:
507 ; X86-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
508 ; X86-AVX512FP16-NEXT: retl
510 ; X86-AVX512VLDQ-LABEL: fneg_v16f32:
511 ; X86-AVX512VLDQ: # %bb.0:
512 ; X86-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
513 ; X86-AVX512VLDQ-NEXT: retl
515 ; X64-SSE-LABEL: fneg_v16f32:
517 ; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
518 ; X64-SSE-NEXT: xorps %xmm4, %xmm0
519 ; X64-SSE-NEXT: xorps %xmm4, %xmm1
520 ; X64-SSE-NEXT: xorps %xmm4, %xmm2
521 ; X64-SSE-NEXT: xorps %xmm4, %xmm3
524 ; X64-AVX-LABEL: fneg_v16f32:
526 ; X64-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
527 ; X64-AVX-NEXT: vxorps %ymm2, %ymm0, %ymm0
528 ; X64-AVX-NEXT: vxorps %ymm2, %ymm1, %ymm1
531 ; X64-AVX512VL-LABEL: fneg_v16f32:
532 ; X64-AVX512VL: # %bb.0:
533 ; X64-AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
534 ; X64-AVX512VL-NEXT: retq
536 ; X64-AVX512FP16-LABEL: fneg_v16f32:
537 ; X64-AVX512FP16: # %bb.0:
538 ; X64-AVX512FP16-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
539 ; X64-AVX512FP16-NEXT: retq
541 ; X64-AVX512VLDQ-LABEL: fneg_v16f32:
542 ; X64-AVX512VLDQ: # %bb.0:
543 ; X64-AVX512VLDQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
544 ; X64-AVX512VLDQ-NEXT: retq
545 %t = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %p
549 define <32 x half> @fneg_v32f16(ptr %p) nounwind {
550 ; X86-SSE-LABEL: fneg_v32f16:
552 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
553 ; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
554 ; X86-SSE-NEXT: movaps (%eax), %xmm0
555 ; X86-SSE-NEXT: xorps %xmm3, %xmm0
556 ; X86-SSE-NEXT: movaps 16(%eax), %xmm1
557 ; X86-SSE-NEXT: xorps %xmm3, %xmm1
558 ; X86-SSE-NEXT: movaps 32(%eax), %xmm2
559 ; X86-SSE-NEXT: xorps %xmm3, %xmm2
560 ; X86-SSE-NEXT: xorps 48(%eax), %xmm3
563 ; X86-AVX1-LABEL: fneg_v32f16:
565 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
566 ; X86-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
567 ; X86-AVX1-NEXT: vxorps (%eax), %ymm1, %ymm0
568 ; X86-AVX1-NEXT: vxorps 32(%eax), %ymm1, %ymm1
569 ; X86-AVX1-NEXT: retl
571 ; X86-AVX2-LABEL: fneg_v32f16:
573 ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
574 ; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
575 ; X86-AVX2-NEXT: vpxor (%eax), %ymm1, %ymm0
576 ; X86-AVX2-NEXT: vpxor 32(%eax), %ymm1, %ymm1
577 ; X86-AVX2-NEXT: retl
579 ; X86-AVX512VL-LABEL: fneg_v32f16:
580 ; X86-AVX512VL: # %bb.0:
581 ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
582 ; X86-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
583 ; X86-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
584 ; X86-AVX512VL-NEXT: vpxorq (%eax), %zmm0, %zmm0
585 ; X86-AVX512VL-NEXT: retl
587 ; X86-AVX512FP16-LABEL: fneg_v32f16:
588 ; X86-AVX512FP16: # %bb.0:
589 ; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %eax
590 ; X86-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
591 ; X86-AVX512FP16-NEXT: vpxorq (%eax), %zmm0, %zmm0
592 ; X86-AVX512FP16-NEXT: retl
594 ; X86-AVX512VLDQ-LABEL: fneg_v32f16:
595 ; X86-AVX512VLDQ: # %bb.0:
596 ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax
597 ; X86-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
598 ; X86-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
599 ; X86-AVX512VLDQ-NEXT: vpxorq (%eax), %zmm0, %zmm0
600 ; X86-AVX512VLDQ-NEXT: retl
602 ; X64-SSE-LABEL: fneg_v32f16:
604 ; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
605 ; X64-SSE-NEXT: movaps (%rdi), %xmm0
606 ; X64-SSE-NEXT: xorps %xmm3, %xmm0
607 ; X64-SSE-NEXT: movaps 16(%rdi), %xmm1
608 ; X64-SSE-NEXT: xorps %xmm3, %xmm1
609 ; X64-SSE-NEXT: movaps 32(%rdi), %xmm2
610 ; X64-SSE-NEXT: xorps %xmm3, %xmm2
611 ; X64-SSE-NEXT: xorps 48(%rdi), %xmm3
614 ; X64-AVX1-LABEL: fneg_v32f16:
616 ; X64-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
617 ; X64-AVX1-NEXT: vxorps (%rdi), %ymm1, %ymm0
618 ; X64-AVX1-NEXT: vxorps 32(%rdi), %ymm1, %ymm1
619 ; X64-AVX1-NEXT: retq
621 ; X64-AVX2-LABEL: fneg_v32f16:
623 ; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
624 ; X64-AVX2-NEXT: vpxor (%rdi), %ymm1, %ymm0
625 ; X64-AVX2-NEXT: vpxor 32(%rdi), %ymm1, %ymm1
626 ; X64-AVX2-NEXT: retq
628 ; X64-AVX512VL-LABEL: fneg_v32f16:
629 ; X64-AVX512VL: # %bb.0:
630 ; X64-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
631 ; X64-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
632 ; X64-AVX512VL-NEXT: vpxorq (%rdi), %zmm0, %zmm0
633 ; X64-AVX512VL-NEXT: retq
635 ; X64-AVX512FP16-LABEL: fneg_v32f16:
636 ; X64-AVX512FP16: # %bb.0:
637 ; X64-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
638 ; X64-AVX512FP16-NEXT: vpxorq (%rdi), %zmm0, %zmm0
639 ; X64-AVX512FP16-NEXT: retq
641 ; X64-AVX512VLDQ-LABEL: fneg_v32f16:
642 ; X64-AVX512VLDQ: # %bb.0:
643 ; X64-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
644 ; X64-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
645 ; X64-AVX512VLDQ-NEXT: vpxorq (%rdi), %zmm0, %zmm0
646 ; X64-AVX512VLDQ-NEXT: retq
647 %v = load <32 x half>, ptr %p, align 16
648 %nnv = fsub <32 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, %v
651 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: