1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
9 define <2 x double> @fabs_v2f64(<2 x double> %p) {
10 ; X32-LABEL: fabs_v2f64:
12 ; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
15 ; X64-LABEL: fabs_v2f64:
17 ; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
19 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
22 declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
24 define <4 x float> @fabs_v4f32(<4 x float> %p) {
25 ; X32_AVX-LABEL: fabs_v4f32:
27 ; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0
30 ; X32_AVX512VL-LABEL: fabs_v4f32:
31 ; X32_AVX512VL: # %bb.0:
32 ; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
33 ; X32_AVX512VL-NEXT: retl
35 ; X32_AVX512VLDQ-LABEL: fabs_v4f32:
36 ; X32_AVX512VLDQ: # %bb.0:
37 ; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
38 ; X32_AVX512VLDQ-NEXT: retl
40 ; X64_AVX-LABEL: fabs_v4f32:
42 ; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
45 ; X64_AVX512VL-LABEL: fabs_v4f32:
46 ; X64_AVX512VL: # %bb.0:
47 ; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
48 ; X64_AVX512VL-NEXT: retq
50 ; X64_AVX512VLDQ-LABEL: fabs_v4f32:
51 ; X64_AVX512VLDQ: # %bb.0:
52 ; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
53 ; X64_AVX512VLDQ-NEXT: retq
54 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
57 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
59 define <4 x double> @fabs_v4f64(<4 x double> %p) {
60 ; X32_AVX-LABEL: fabs_v4f64:
62 ; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
65 ; X32_AVX512VL-LABEL: fabs_v4f64:
66 ; X32_AVX512VL: # %bb.0:
67 ; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
68 ; X32_AVX512VL-NEXT: retl
70 ; X32_AVX512VLDQ-LABEL: fabs_v4f64:
71 ; X32_AVX512VLDQ: # %bb.0:
72 ; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
73 ; X32_AVX512VLDQ-NEXT: retl
75 ; X64_AVX-LABEL: fabs_v4f64:
77 ; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
80 ; X64_AVX512VL-LABEL: fabs_v4f64:
81 ; X64_AVX512VL: # %bb.0:
82 ; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
83 ; X64_AVX512VL-NEXT: retq
85 ; X64_AVX512VLDQ-LABEL: fabs_v4f64:
86 ; X64_AVX512VLDQ: # %bb.0:
87 ; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
88 ; X64_AVX512VLDQ-NEXT: retq
89 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
92 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
94 define <8 x float> @fabs_v8f32(<8 x float> %p) {
95 ; X32_AVX-LABEL: fabs_v8f32:
97 ; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0
100 ; X32_AVX512VL-LABEL: fabs_v8f32:
101 ; X32_AVX512VL: # %bb.0:
102 ; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
103 ; X32_AVX512VL-NEXT: retl
105 ; X32_AVX512VLDQ-LABEL: fabs_v8f32:
106 ; X32_AVX512VLDQ: # %bb.0:
107 ; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
108 ; X32_AVX512VLDQ-NEXT: retl
110 ; X64_AVX-LABEL: fabs_v8f32:
112 ; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
115 ; X64_AVX512VL-LABEL: fabs_v8f32:
116 ; X64_AVX512VL: # %bb.0:
117 ; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
118 ; X64_AVX512VL-NEXT: retq
120 ; X64_AVX512VLDQ-LABEL: fabs_v8f32:
121 ; X64_AVX512VLDQ: # %bb.0:
122 ; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
123 ; X64_AVX512VLDQ-NEXT: retq
124 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
127 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
129 define <8 x double> @fabs_v8f64(<8 x double> %p) {
130 ; X32_AVX-LABEL: fabs_v8f64:
132 ; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
133 ; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
134 ; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
137 ; X32_AVX512VL-LABEL: fabs_v8f64:
138 ; X32_AVX512VL: # %bb.0:
139 ; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
140 ; X32_AVX512VL-NEXT: retl
142 ; X32_AVX512VLDQ-LABEL: fabs_v8f64:
143 ; X32_AVX512VLDQ: # %bb.0:
144 ; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
145 ; X32_AVX512VLDQ-NEXT: retl
147 ; X64_AVX-LABEL: fabs_v8f64:
149 ; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
150 ; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
151 ; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
154 ; X64_AVX512VL-LABEL: fabs_v8f64:
155 ; X64_AVX512VL: # %bb.0:
156 ; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
157 ; X64_AVX512VL-NEXT: retq
159 ; X64_AVX512VLDQ-LABEL: fabs_v8f64:
160 ; X64_AVX512VLDQ: # %bb.0:
161 ; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
162 ; X64_AVX512VLDQ-NEXT: retq
163 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
166 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
168 define <16 x float> @fabs_v16f32(<16 x float> %p) {
169 ; X32_AVX-LABEL: fabs_v16f32:
171 ; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
172 ; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
173 ; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
176 ; X32_AVX512VL-LABEL: fabs_v16f32:
177 ; X32_AVX512VL: # %bb.0:
178 ; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
179 ; X32_AVX512VL-NEXT: retl
181 ; X32_AVX512VLDQ-LABEL: fabs_v16f32:
182 ; X32_AVX512VLDQ: # %bb.0:
183 ; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
184 ; X32_AVX512VLDQ-NEXT: retl
186 ; X64_AVX-LABEL: fabs_v16f32:
188 ; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
189 ; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
190 ; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
193 ; X64_AVX512VL-LABEL: fabs_v16f32:
194 ; X64_AVX512VL: # %bb.0:
195 ; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
196 ; X64_AVX512VL-NEXT: retq
198 ; X64_AVX512VLDQ-LABEL: fabs_v16f32:
199 ; X64_AVX512VLDQ: # %bb.0:
200 ; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
201 ; X64_AVX512VLDQ-NEXT: retq
202 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
205 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
207 ; PR20354: when generating code for a vector fabs op,
208 ; make sure that we're only turning off the sign bit of each float value.
209 ; No constant pool loads or vector ops are needed for the fabs of a
210 ; bitcasted integer constant; we should just return an integer constant
211 ; that has the sign bits turned off.
213 ; So instead of something like this:
214 ; movabsq (constant pool load of mask for sign bits)
215 ; vmovq (move from integer register to vector/fp register)
216 ; vandps (mask off sign bits)
217 ; vmovq (move vector/fp register back to integer return register)
219 ; We should generate:
220 ; mov (put constant value in return register)
222 define i64 @fabs_v2f32_1() {
223 ; X32-LABEL: fabs_v2f32_1:
225 ; X32-NEXT: xorl %eax, %eax
226 ; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
229 ; X64-LABEL: fabs_v2f32_1:
231 ; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
233 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
234 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
235 %ret = bitcast <2 x float> %fabs to i64
239 define i64 @fabs_v2f32_2() {
240 ; X32-LABEL: fabs_v2f32_2:
242 ; X32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
243 ; X32-NEXT: xorl %edx, %edx
246 ; X64-LABEL: fabs_v2f32_2:
248 ; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
250 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
251 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
252 %ret = bitcast <2 x float> %fabs to i64
256 declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)