1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6 ; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
7 ; so we need to edit it to remove the NAN constant comments
10 ; copysign(x, c1) -> fabs(x) iff ispos(c1)
11 define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
12 ; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
14 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
17 ; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
19 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
20 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
22 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
26 define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
27 ; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
29 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
32 ; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
34 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
35 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
37 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
41 define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) {
42 ; SSE-LABEL: combine_vec_fcopysign_fabs_sgn:
44 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
47 ; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
49 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
50 ; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
52 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
53 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
57 ; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
58 define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
59 ; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
61 ; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
64 ; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
66 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
67 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
69 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
73 define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
74 ; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
76 ; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
79 ; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
81 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
82 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
84 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
88 define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
89 ; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
91 ; SSE-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
94 ; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
96 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
97 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
99 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
100 %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
101 %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
105 ; copysign(fabs(x), y) -> copysign(x, y)
106 define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) {
107 ; SSE-LABEL: combine_vec_fcopysign_fabs_mag:
109 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
110 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
111 ; SSE-NEXT: orps %xmm1, %xmm0
114 ; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
116 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
117 ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
118 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
119 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
120 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
122 %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
123 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
127 ; copysign(fneg(x), y) -> copysign(x, y)
128 define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) {
129 ; SSE-LABEL: combine_vec_fcopysign_fneg_mag:
131 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
132 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
133 ; SSE-NEXT: orps %xmm1, %xmm0
136 ; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
138 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
139 ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
140 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
141 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
142 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
144 %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
145 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
149 ; copysign(copysign(x,z), y) -> copysign(x, y)
150 define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
151 ; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag:
153 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
154 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
155 ; SSE-NEXT: orps %xmm1, %xmm0
158 ; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
160 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
161 ; AVX-NEXT: vandps %xmm2, %xmm1, %xmm1
162 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
163 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
164 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
166 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
167 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
171 ; copysign(x, copysign(y,z)) -> copysign(x, z)
172 define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
173 ; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn:
175 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
176 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177 ; SSE-NEXT: orps %xmm2, %xmm0
180 ; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
182 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
183 ; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
184 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
185 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
186 ; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
188 %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
189 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
193 ; copysign(x, fp_extend(y)) -> copysign(x, y)
194 define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) {
195 ; SSE-LABEL: combine_vec_fcopysign_fpext_sgn:
197 ; SSE-NEXT: cvtps2pd %xmm2, %xmm3
198 ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
199 ; SSE-NEXT: cvtps2pd %xmm2, %xmm2
200 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN]
201 ; SSE-NEXT: andps %xmm4, %xmm0
202 ; SSE-NEXT: movaps %xmm4, %xmm5
203 ; SSE-NEXT: andnps %xmm3, %xmm5
204 ; SSE-NEXT: orps %xmm5, %xmm0
205 ; SSE-NEXT: andps %xmm4, %xmm1
206 ; SSE-NEXT: andnps %xmm2, %xmm4
207 ; SSE-NEXT: orps %xmm4, %xmm1
210 ; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
212 ; AVX-NEXT: vcvtps2pd %xmm1, %ymm1
213 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
214 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
215 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
216 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1
217 ; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
219 %1 = fpext <4 x float> %y to <4 x double>
220 %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
224 ; copysign(x, fp_round(y)) -> copysign(x, y)
225 define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) {
226 ; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn:
228 ; SSE-NEXT: cvtpd2ps %xmm2, %xmm2
229 ; SSE-NEXT: cvtpd2ps %xmm1, %xmm1
230 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
231 ; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
232 ; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
233 ; SSE-NEXT: orpd %xmm1, %xmm0
236 ; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
238 ; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1
239 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
240 ; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
241 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
242 ; AVX-NEXT: vandpd %xmm2, %xmm1, %xmm1
243 ; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
244 ; AVX-NEXT: vzeroupper
246 %1 = fptrunc <4 x double> %y to <4 x float>
247 %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
251 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
252 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
253 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn)