1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
4 ; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527
6 ; Use an AVX target to show that the potential problem
7 ; is not limited to 128-bit types/registers. Ie, widening
8 ; up to 256-bits may also result in bogus libcalls.
10 ; Use fsin as the representative test for various data types.
12 declare <1 x float> @llvm.sin.v1f32(<1 x float>)
13 declare <2 x float> @llvm.sin.v2f32(<2 x float>)
14 declare <3 x float> @llvm.sin.v3f32(<3 x float>)
15 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
16 declare <5 x float> @llvm.sin.v5f32(<5 x float>)
17 declare <6 x float> @llvm.sin.v6f32(<6 x float>)
18 declare <3 x double> @llvm.sin.v3f64(<3 x double>)
20 declare <1 x float> @llvm.tan.v1f32(<1 x float>)
21 declare <2 x float> @llvm.tan.v2f32(<2 x float>)
22 declare <3 x float> @llvm.tan.v3f32(<3 x float>)
23 declare <4 x float> @llvm.tan.v4f32(<4 x float>)
24 declare <5 x float> @llvm.tan.v5f32(<5 x float>)
25 declare <6 x float> @llvm.tan.v6f32(<6 x float>)
26 declare <3 x double> @llvm.tan.v3f64(<3 x double>)
28 ; Verify that all of the potential libcall candidates are handled.
29 ; Some of these have custom lowering, so those cases won't have
32 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
33 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
34 declare <2 x float> @llvm.cos.v2f32(<2 x float>)
35 declare <2 x float> @llvm.exp.v2f32(<2 x float>)
36 declare <2 x float> @llvm.exp2.v2f32(<2 x float>)
37 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
38 declare <2 x float> @llvm.log.v2f32(<2 x float>)
39 declare <2 x float> @llvm.log10.v2f32(<2 x float>)
40 declare <2 x float> @llvm.log2.v2f32(<2 x float>)
41 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
42 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
43 declare <2 x float> @llvm.round.v2f32(<2 x float>)
44 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
45 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
47 define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
48 ; CHECK-LABEL: sin_v1f32:
50 ; CHECK-NEXT: pushq %rax
51 ; CHECK-NEXT: callq sinf@PLT
52 ; CHECK-NEXT: popq %rax
54 %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
58 define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
59 ; CHECK-LABEL: sin_v2f32:
61 ; CHECK-NEXT: subq $40, %rsp
62 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
63 ; CHECK-NEXT: callq sinf@PLT
64 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
65 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
66 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
67 ; CHECK-NEXT: callq sinf@PLT
68 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
69 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
70 ; CHECK-NEXT: addq $40, %rsp
72 %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
76 define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
77 ; CHECK-LABEL: sin_v3f32:
79 ; CHECK-NEXT: subq $40, %rsp
80 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
81 ; CHECK-NEXT: callq sinf@PLT
82 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
83 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
84 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
85 ; CHECK-NEXT: callq sinf@PLT
86 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
87 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
88 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
89 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
90 ; CHECK-NEXT: # xmm0 = mem[1,0]
91 ; CHECK-NEXT: callq sinf@PLT
92 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
93 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
94 ; CHECK-NEXT: addq $40, %rsp
96 %r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x)
100 define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
101 ; CHECK-LABEL: sin_v4f32:
103 ; CHECK-NEXT: subq $40, %rsp
104 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
105 ; CHECK-NEXT: callq sinf@PLT
106 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
107 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
108 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
109 ; CHECK-NEXT: callq sinf@PLT
110 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
111 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
112 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
113 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
114 ; CHECK-NEXT: # xmm0 = mem[1,0]
115 ; CHECK-NEXT: callq sinf@PLT
116 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
117 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
118 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
119 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
120 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
121 ; CHECK-NEXT: callq sinf@PLT
122 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
123 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
124 ; CHECK-NEXT: addq $40, %rsp
126 %r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
130 define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
131 ; CHECK-LABEL: sin_v5f32:
133 ; CHECK-NEXT: subq $72, %rsp
134 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
135 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
136 ; CHECK-NEXT: vzeroupper
137 ; CHECK-NEXT: callq sinf@PLT
138 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
139 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
140 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
141 ; CHECK-NEXT: callq sinf@PLT
142 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
143 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
144 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
145 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
146 ; CHECK-NEXT: # xmm0 = mem[1,0]
147 ; CHECK-NEXT: callq sinf@PLT
148 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
149 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
150 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
151 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
152 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
153 ; CHECK-NEXT: callq sinf@PLT
154 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
155 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
156 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
157 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
158 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
159 ; CHECK-NEXT: vzeroupper
160 ; CHECK-NEXT: callq sinf@PLT
161 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
162 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
163 ; CHECK-NEXT: addq $72, %rsp
165 %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
169 define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
170 ; CHECK-LABEL: sin_v6f32:
172 ; CHECK-NEXT: subq $72, %rsp
173 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
174 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
175 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
176 ; CHECK-NEXT: vzeroupper
177 ; CHECK-NEXT: callq sinf@PLT
178 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
179 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
180 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
181 ; CHECK-NEXT: callq sinf@PLT
182 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
183 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
184 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
185 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
186 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
187 ; CHECK-NEXT: vzeroupper
188 ; CHECK-NEXT: callq sinf@PLT
189 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
190 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
191 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
192 ; CHECK-NEXT: callq sinf@PLT
193 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
194 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
195 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
196 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
197 ; CHECK-NEXT: # xmm0 = mem[1,0]
198 ; CHECK-NEXT: callq sinf@PLT
199 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
200 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
201 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
202 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
203 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
204 ; CHECK-NEXT: callq sinf@PLT
205 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
206 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
207 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
208 ; CHECK-NEXT: addq $72, %rsp
210 %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
214 define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
215 ; CHECK-LABEL: sin_v3f64:
217 ; CHECK-NEXT: subq $72, %rsp
218 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
219 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
220 ; CHECK-NEXT: vzeroupper
221 ; CHECK-NEXT: callq sin@PLT
222 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
223 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
224 ; CHECK-NEXT: # xmm0 = mem[1,0]
225 ; CHECK-NEXT: callq sin@PLT
226 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
227 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
228 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
229 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
230 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
231 ; CHECK-NEXT: vzeroupper
232 ; CHECK-NEXT: callq sin@PLT
233 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
234 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
235 ; CHECK-NEXT: addq $72, %rsp
237 %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)
241 define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
242 ; CHECK-LABEL: tan_v1f32:
244 ; CHECK-NEXT: pushq %rax
245 ; CHECK-NEXT: callq tanf@PLT
246 ; CHECK-NEXT: popq %rax
248 %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
252 define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
253 ; CHECK-LABEL: tan_v2f32:
255 ; CHECK-NEXT: subq $40, %rsp
256 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
257 ; CHECK-NEXT: callq tanf@PLT
258 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
259 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
260 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
261 ; CHECK-NEXT: callq tanf@PLT
262 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
263 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
264 ; CHECK-NEXT: addq $40, %rsp
266 %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
270 define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
271 ; CHECK-LABEL: tan_v3f32:
273 ; CHECK-NEXT: subq $40, %rsp
274 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
275 ; CHECK-NEXT: callq tanf@PLT
276 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
277 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
278 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
279 ; CHECK-NEXT: callq tanf@PLT
280 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
281 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
282 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
283 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
284 ; CHECK-NEXT: # xmm0 = mem[1,0]
285 ; CHECK-NEXT: callq tanf@PLT
286 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
287 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
288 ; CHECK-NEXT: addq $40, %rsp
290 %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
294 define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
295 ; CHECK-LABEL: tan_v4f32:
297 ; CHECK-NEXT: subq $40, %rsp
298 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
299 ; CHECK-NEXT: callq tanf@PLT
300 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
301 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
302 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
303 ; CHECK-NEXT: callq tanf@PLT
304 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
305 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
306 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
307 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
308 ; CHECK-NEXT: # xmm0 = mem[1,0]
309 ; CHECK-NEXT: callq tanf@PLT
310 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
311 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
312 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
313 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
314 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
315 ; CHECK-NEXT: callq tanf@PLT
316 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
317 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
318 ; CHECK-NEXT: addq $40, %rsp
320 %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
324 define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
325 ; CHECK-LABEL: tan_v5f32:
327 ; CHECK-NEXT: subq $72, %rsp
328 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
329 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
330 ; CHECK-NEXT: vzeroupper
331 ; CHECK-NEXT: callq tanf@PLT
332 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
333 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
334 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
335 ; CHECK-NEXT: callq tanf@PLT
336 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
337 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
338 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
339 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
340 ; CHECK-NEXT: # xmm0 = mem[1,0]
341 ; CHECK-NEXT: callq tanf@PLT
342 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
343 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
344 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
345 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
346 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
347 ; CHECK-NEXT: callq tanf@PLT
348 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
349 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
350 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
351 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
352 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
353 ; CHECK-NEXT: vzeroupper
354 ; CHECK-NEXT: callq tanf@PLT
355 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
356 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
357 ; CHECK-NEXT: addq $72, %rsp
359 %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
363 define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
364 ; CHECK-LABEL: tan_v6f32:
366 ; CHECK-NEXT: subq $72, %rsp
367 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
368 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
369 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
370 ; CHECK-NEXT: vzeroupper
371 ; CHECK-NEXT: callq tanf@PLT
372 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
373 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
374 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
375 ; CHECK-NEXT: callq tanf@PLT
376 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
377 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
378 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
379 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
380 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
381 ; CHECK-NEXT: vzeroupper
382 ; CHECK-NEXT: callq tanf@PLT
383 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
384 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
385 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
386 ; CHECK-NEXT: callq tanf@PLT
387 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
388 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
389 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
390 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
391 ; CHECK-NEXT: # xmm0 = mem[1,0]
392 ; CHECK-NEXT: callq tanf@PLT
393 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
394 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
395 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
396 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
397 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
398 ; CHECK-NEXT: callq tanf@PLT
399 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
400 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
401 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
402 ; CHECK-NEXT: addq $72, %rsp
404 %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
408 define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
409 ; CHECK-LABEL: tan_v3f64:
411 ; CHECK-NEXT: subq $72, %rsp
412 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
413 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
414 ; CHECK-NEXT: vzeroupper
415 ; CHECK-NEXT: callq tan@PLT
416 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
417 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
418 ; CHECK-NEXT: # xmm0 = mem[1,0]
419 ; CHECK-NEXT: callq tan@PLT
420 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
421 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
422 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
423 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
424 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
425 ; CHECK-NEXT: vzeroupper
426 ; CHECK-NEXT: callq tan@PLT
427 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
428 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
429 ; CHECK-NEXT: addq $72, %rsp
431 %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
435 define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
436 ; CHECK-LABEL: fabs_v2f32:
438 ; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
440 %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
444 define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind {
445 ; CHECK-LABEL: ceil_v2f32:
447 ; CHECK-NEXT: vroundps $10, %xmm0, %xmm0
449 %r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
453 define <2 x float> @cos_v2f32(<2 x float> %x) nounwind {
454 ; CHECK-LABEL: cos_v2f32:
456 ; CHECK-NEXT: subq $40, %rsp
457 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
458 ; CHECK-NEXT: callq cosf@PLT
459 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
460 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
461 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
462 ; CHECK-NEXT: callq cosf@PLT
463 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
464 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
465 ; CHECK-NEXT: addq $40, %rsp
467 %r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x)
471 define <2 x float> @exp_v2f32(<2 x float> %x) nounwind {
472 ; CHECK-LABEL: exp_v2f32:
474 ; CHECK-NEXT: subq $40, %rsp
475 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
476 ; CHECK-NEXT: callq expf@PLT
477 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
478 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
479 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
480 ; CHECK-NEXT: callq expf@PLT
481 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
482 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
483 ; CHECK-NEXT: addq $40, %rsp
485 %r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x)
489 define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind {
490 ; CHECK-LABEL: exp2_v2f32:
492 ; CHECK-NEXT: subq $40, %rsp
493 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
494 ; CHECK-NEXT: callq exp2f@PLT
495 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
496 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
497 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
498 ; CHECK-NEXT: callq exp2f@PLT
499 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
500 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
501 ; CHECK-NEXT: addq $40, %rsp
503 %r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
507 define <2 x float> @floor_v2f32(<2 x float> %x) nounwind {
508 ; CHECK-LABEL: floor_v2f32:
510 ; CHECK-NEXT: vroundps $9, %xmm0, %xmm0
512 %r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
516 define <2 x float> @log_v2f32(<2 x float> %x) nounwind {
517 ; CHECK-LABEL: log_v2f32:
519 ; CHECK-NEXT: subq $40, %rsp
520 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
521 ; CHECK-NEXT: callq logf@PLT
522 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
523 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
524 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
525 ; CHECK-NEXT: callq logf@PLT
526 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
527 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
528 ; CHECK-NEXT: addq $40, %rsp
530 %r = call <2 x float> @llvm.log.v2f32(<2 x float> %x)
534 define <2 x float> @log10_v2f32(<2 x float> %x) nounwind {
535 ; CHECK-LABEL: log10_v2f32:
537 ; CHECK-NEXT: subq $40, %rsp
538 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
539 ; CHECK-NEXT: callq log10f@PLT
540 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
541 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
542 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
543 ; CHECK-NEXT: callq log10f@PLT
544 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
545 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
546 ; CHECK-NEXT: addq $40, %rsp
548 %r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x)
552 define <2 x float> @log2_v2f32(<2 x float> %x) nounwind {
553 ; CHECK-LABEL: log2_v2f32:
555 ; CHECK-NEXT: subq $40, %rsp
556 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
557 ; CHECK-NEXT: callq log2f@PLT
558 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
559 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
560 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
561 ; CHECK-NEXT: callq log2f@PLT
562 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
563 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
564 ; CHECK-NEXT: addq $40, %rsp
566 %r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x)
570 define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind {
571 ; CHECK-LABEL: nearbyint__v2f32:
573 ; CHECK-NEXT: vroundps $12, %xmm0, %xmm0
575 %r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x)
579 define <2 x float> @rint_v2f32(<2 x float> %x) nounwind {
580 ; CHECK-LABEL: rint_v2f32:
582 ; CHECK-NEXT: vroundps $4, %xmm0, %xmm0
584 %r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x)
588 define <2 x float> @round_v2f32(<2 x float> %x) nounwind {
589 ; CHECK-LABEL: round_v2f32:
591 ; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
592 ; CHECK-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
593 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
594 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
596 %r = call <2 x float> @llvm.round.v2f32(<2 x float> %x)
600 define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind {
601 ; CHECK-LABEL: sqrt_v2f32:
603 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0
605 %r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
609 define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind {
610 ; CHECK-LABEL: trunc_v2f32:
612 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
614 %r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x)