1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VL
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VLDQ
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ
9 define <2 x double> @fabs_v2f64(<2 x double> %p) {
10 ; X86-AVX-LABEL: fabs_v2f64:
12 ; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
15 ; X86-AVX512VL-LABEL: fabs_v2f64:
16 ; X86-AVX512VL: # %bb.0:
17 ; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
18 ; X86-AVX512VL-NEXT: retl
20 ; X86-AVX512VLDQ-LABEL: fabs_v2f64:
21 ; X86-AVX512VLDQ: # %bb.0:
22 ; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
23 ; X86-AVX512VLDQ-NEXT: retl
25 ; X64-AVX-LABEL: fabs_v2f64:
27 ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
30 ; X64-AVX512VL-LABEL: fabs_v2f64:
31 ; X64-AVX512VL: # %bb.0:
32 ; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
33 ; X64-AVX512VL-NEXT: retq
35 ; X64-AVX512VLDQ-LABEL: fabs_v2f64:
36 ; X64-AVX512VLDQ: # %bb.0:
37 ; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
38 ; X64-AVX512VLDQ-NEXT: retq
39 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
42 declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
44 define <4 x float> @fabs_v4f32(<4 x float> %p) {
45 ; X86-AVX-LABEL: fabs_v4f32:
47 ; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
50 ; X86-AVX512VL-LABEL: fabs_v4f32:
51 ; X86-AVX512VL: # %bb.0:
52 ; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
53 ; X86-AVX512VL-NEXT: retl
55 ; X86-AVX512VLDQ-LABEL: fabs_v4f32:
56 ; X86-AVX512VLDQ: # %bb.0:
57 ; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
58 ; X86-AVX512VLDQ-NEXT: retl
60 ; X64-AVX-LABEL: fabs_v4f32:
62 ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
65 ; X64-AVX512VL-LABEL: fabs_v4f32:
66 ; X64-AVX512VL: # %bb.0:
67 ; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
68 ; X64-AVX512VL-NEXT: retq
70 ; X64-AVX512VLDQ-LABEL: fabs_v4f32:
71 ; X64-AVX512VLDQ: # %bb.0:
72 ; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
73 ; X64-AVX512VLDQ-NEXT: retq
74 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
77 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
79 define <4 x double> @fabs_v4f64(<4 x double> %p) {
80 ; X86-AVX-LABEL: fabs_v4f64:
82 ; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
85 ; X86-AVX512VL-LABEL: fabs_v4f64:
86 ; X86-AVX512VL: # %bb.0:
87 ; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
88 ; X86-AVX512VL-NEXT: retl
90 ; X86-AVX512VLDQ-LABEL: fabs_v4f64:
91 ; X86-AVX512VLDQ: # %bb.0:
92 ; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
93 ; X86-AVX512VLDQ-NEXT: retl
95 ; X64-AVX-LABEL: fabs_v4f64:
97 ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
100 ; X64-AVX512VL-LABEL: fabs_v4f64:
101 ; X64-AVX512VL: # %bb.0:
102 ; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
103 ; X64-AVX512VL-NEXT: retq
105 ; X64-AVX512VLDQ-LABEL: fabs_v4f64:
106 ; X64-AVX512VLDQ: # %bb.0:
107 ; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
108 ; X64-AVX512VLDQ-NEXT: retq
109 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
112 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
114 define <8 x float> @fabs_v8f32(<8 x float> %p) {
115 ; X86-AVX-LABEL: fabs_v8f32:
117 ; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
120 ; X86-AVX512VL-LABEL: fabs_v8f32:
121 ; X86-AVX512VL: # %bb.0:
122 ; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
123 ; X86-AVX512VL-NEXT: retl
125 ; X86-AVX512VLDQ-LABEL: fabs_v8f32:
126 ; X86-AVX512VLDQ: # %bb.0:
127 ; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
128 ; X86-AVX512VLDQ-NEXT: retl
130 ; X64-AVX-LABEL: fabs_v8f32:
132 ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
135 ; X64-AVX512VL-LABEL: fabs_v8f32:
136 ; X64-AVX512VL: # %bb.0:
137 ; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
138 ; X64-AVX512VL-NEXT: retq
140 ; X64-AVX512VLDQ-LABEL: fabs_v8f32:
141 ; X64-AVX512VLDQ: # %bb.0:
142 ; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
143 ; X64-AVX512VLDQ-NEXT: retq
144 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
147 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
149 define <8 x double> @fabs_v8f64(<8 x double> %p) {
150 ; X86-AVX-LABEL: fabs_v8f64:
152 ; X86-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
153 ; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
154 ; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
157 ; X86-AVX512VL-LABEL: fabs_v8f64:
158 ; X86-AVX512VL: # %bb.0:
159 ; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
160 ; X86-AVX512VL-NEXT: retl
162 ; X86-AVX512VLDQ-LABEL: fabs_v8f64:
163 ; X86-AVX512VLDQ: # %bb.0:
164 ; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
165 ; X86-AVX512VLDQ-NEXT: retl
167 ; X64-AVX-LABEL: fabs_v8f64:
169 ; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
170 ; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
171 ; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
174 ; X64-AVX512VL-LABEL: fabs_v8f64:
175 ; X64-AVX512VL: # %bb.0:
176 ; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
177 ; X64-AVX512VL-NEXT: retq
179 ; X64-AVX512VLDQ-LABEL: fabs_v8f64:
180 ; X64-AVX512VLDQ: # %bb.0:
181 ; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
182 ; X64-AVX512VLDQ-NEXT: retq
183 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
186 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
188 define <16 x float> @fabs_v16f32(<16 x float> %p) {
189 ; X86-AVX-LABEL: fabs_v16f32:
191 ; X86-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
192 ; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
193 ; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
196 ; X86-AVX512VL-LABEL: fabs_v16f32:
197 ; X86-AVX512VL: # %bb.0:
198 ; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
199 ; X86-AVX512VL-NEXT: retl
201 ; X86-AVX512VLDQ-LABEL: fabs_v16f32:
202 ; X86-AVX512VLDQ: # %bb.0:
203 ; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
204 ; X86-AVX512VLDQ-NEXT: retl
206 ; X64-AVX-LABEL: fabs_v16f32:
208 ; X64-AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
209 ; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
210 ; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
213 ; X64-AVX512VL-LABEL: fabs_v16f32:
214 ; X64-AVX512VL: # %bb.0:
215 ; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
216 ; X64-AVX512VL-NEXT: retq
218 ; X64-AVX512VLDQ-LABEL: fabs_v16f32:
219 ; X64-AVX512VLDQ: # %bb.0:
220 ; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
221 ; X64-AVX512VLDQ-NEXT: retq
222 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
225 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
227 ; PR20354: when generating code for a vector fabs op,
228 ; make sure that we're only turning off the sign bit of each float value.
229 ; No constant pool loads or vector ops are needed for the fabs of a
230 ; bitcasted integer constant; we should just return an integer constant
231 ; that has the sign bits turned off.
233 ; So instead of something like this:
234 ; movabsq (constant pool load of mask for sign bits)
235 ; vmovq (move from integer register to vector/fp register)
236 ; vandps (mask off sign bits)
237 ; vmovq (move vector/fp register back to integer return register)
239 ; We should generate:
240 ; mov (put constant value in return register)
242 define i64 @fabs_v2f32_1() {
243 ; X86-LABEL: fabs_v2f32_1:
245 ; X86-NEXT: xorl %eax, %eax
246 ; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
249 ; X64-LABEL: fabs_v2f32_1:
251 ; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
253 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
254 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
255 %ret = bitcast <2 x float> %fabs to i64
259 define i64 @fabs_v2f32_2() {
260 ; X86-LABEL: fabs_v2f32_2:
262 ; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
263 ; X86-NEXT: xorl %edx, %edx
266 ; X64-LABEL: fabs_v2f32_2:
268 ; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
270 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
271 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
272 %ret = bitcast <2 x float> %fabs to i64
276 declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)