1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
4 ; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527
6 ; Use an AVX target to show that the potential problem
7 ; is not limited to 128-bit types/registers. Ie, widening
8 ; up to 256-bits may also result in bogus libcalls.
10 ; Use fsin as the representative test for various data types.
12 declare <1 x float> @llvm.sin.v1f32(<1 x float>)
13 declare <2 x float> @llvm.sin.v2f32(<2 x float>)
14 declare <3 x float> @llvm.sin.v3f32(<3 x float>)
15 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
16 declare <5 x float> @llvm.sin.v5f32(<5 x float>)
17 declare <6 x float> @llvm.sin.v6f32(<6 x float>)
18 declare <3 x double> @llvm.sin.v3f64(<3 x double>)
20 declare <1 x float> @llvm.tan.v1f32(<1 x float>)
21 declare <2 x float> @llvm.tan.v2f32(<2 x float>)
22 declare <3 x float> @llvm.tan.v3f32(<3 x float>)
23 declare <4 x float> @llvm.tan.v4f32(<4 x float>)
24 declare <5 x float> @llvm.tan.v5f32(<5 x float>)
25 declare <6 x float> @llvm.tan.v6f32(<6 x float>)
26 declare <3 x double> @llvm.tan.v3f64(<3 x double>)
28 declare <1 x float> @llvm.acos.v1f32(<1 x float>)
29 declare <2 x float> @llvm.acos.v2f32(<2 x float>)
30 declare <3 x float> @llvm.acos.v3f32(<3 x float>)
31 declare <4 x float> @llvm.acos.v4f32(<4 x float>)
32 declare <5 x float> @llvm.acos.v5f32(<5 x float>)
33 declare <6 x float> @llvm.acos.v6f32(<6 x float>)
34 declare <3 x double> @llvm.acos.v3f64(<3 x double
36 declare <1 x float> @llvm.asin.v1f32(<1 x float>)
37 declare <2 x float> @llvm.asin.v2f32(<2 x float>)
38 declare <3 x float> @llvm.asin.v3f32(<3 x float>)
39 declare <4 x float> @llvm.asin.v4f32(<4 x float>)
40 declare <5 x float> @llvm.asin.v5f32(<5 x float>)
41 declare <6 x float> @llvm.asin.v6f32(<6 x float>)
42 declare <3 x double> @llvm.asin.v3f64(<3 x double>)
44 declare <1 x float> @llvm.atan.v1f32(<1 x float>)
45 declare <2 x float> @llvm.atan.v2f32(<2 x float>)
46 declare <3 x float> @llvm.atan.v3f32(<3 x float>)
47 declare <4 x float> @llvm.atan.v4f32(<4 x float>)
48 declare <5 x float> @llvm.atan.v5f32(<5 x float>)
49 declare <6 x float> @llvm.atan.v6f32(<6 x float>)
50 declare <3 x double> @llvm.atan.v3f64(<3 x double>)
52 declare <1 x float> @llvm.cosh.v1f32(<1 x float>)
53 declare <2 x float> @llvm.cosh.v2f32(<2 x float>)
54 declare <3 x float> @llvm.cosh.v3f32(<3 x float>)
55 declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
56 declare <5 x float> @llvm.cosh.v5f32(<5 x float>)
57 declare <6 x float> @llvm.cosh.v6f32(<6 x float>)
58 declare <3 x double> @llvm.cosh.v3f64(<3 x double>)
60 declare <1 x float> @llvm.sinh.v1f32(<1 x float>)
61 declare <2 x float> @llvm.sinh.v2f32(<2 x float>)
62 declare <3 x float> @llvm.sinh.v3f32(<3 x float>)
63 declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
64 declare <5 x float> @llvm.sinh.v5f32(<5 x float>)
65 declare <6 x float> @llvm.sinh.v6f32(<6 x float>)
66 declare <3 x double> @llvm.sinh.v3f64(<3 x double>)
68 declare <1 x float> @llvm.tanh.v1f32(<1 x float>)
69 declare <2 x float> @llvm.tanh.v2f32(<2 x float>)
70 declare <3 x float> @llvm.tanh.v3f32(<3 x float>)
71 declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
72 declare <5 x float> @llvm.tanh.v5f32(<5 x float>)
73 declare <6 x float> @llvm.tanh.v6f32(<6 x float>)
74 declare <3 x double> @llvm.tanh.v3f64(<3 x double>)
76 ; Verify that all of the potential libcall candidates are handled.
77 ; Some of these have custom lowering, so those cases won't have
80 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
81 declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
82 declare <2 x float> @llvm.cos.v2f32(<2 x float>)
83 declare <2 x float> @llvm.exp.v2f32(<2 x float>)
84 declare <2 x float> @llvm.exp2.v2f32(<2 x float>)
85 declare <2 x float> @llvm.floor.v2f32(<2 x float>)
86 declare <2 x float> @llvm.log.v2f32(<2 x float>)
87 declare <2 x float> @llvm.log10.v2f32(<2 x float>)
88 declare <2 x float> @llvm.log2.v2f32(<2 x float>)
89 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
90 declare <2 x float> @llvm.rint.v2f32(<2 x float>)
91 declare <2 x float> @llvm.round.v2f32(<2 x float>)
92 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
93 declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
95 define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
96 ; CHECK-LABEL: sin_v1f32:
98 ; CHECK-NEXT: pushq %rax
99 ; CHECK-NEXT: callq sinf@PLT
100 ; CHECK-NEXT: popq %rax
102 %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
106 define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
107 ; CHECK-LABEL: sin_v2f32:
109 ; CHECK-NEXT: subq $40, %rsp
110 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
111 ; CHECK-NEXT: callq sinf@PLT
112 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
113 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
114 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
115 ; CHECK-NEXT: callq sinf@PLT
116 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
117 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
118 ; CHECK-NEXT: addq $40, %rsp
120 %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
124 define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
125 ; CHECK-LABEL: sin_v3f32:
127 ; CHECK-NEXT: subq $40, %rsp
128 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
129 ; CHECK-NEXT: callq sinf@PLT
130 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
131 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
132 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
133 ; CHECK-NEXT: callq sinf@PLT
134 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
135 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
136 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
137 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
138 ; CHECK-NEXT: # xmm0 = mem[1,0]
139 ; CHECK-NEXT: callq sinf@PLT
140 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
141 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
142 ; CHECK-NEXT: addq $40, %rsp
144 %r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x)
148 define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
149 ; CHECK-LABEL: sin_v4f32:
151 ; CHECK-NEXT: subq $40, %rsp
152 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
153 ; CHECK-NEXT: callq sinf@PLT
154 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
155 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
156 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
157 ; CHECK-NEXT: callq sinf@PLT
158 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
159 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
160 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
161 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
162 ; CHECK-NEXT: # xmm0 = mem[1,0]
163 ; CHECK-NEXT: callq sinf@PLT
164 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
165 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
166 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
167 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
168 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
169 ; CHECK-NEXT: callq sinf@PLT
170 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
171 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
172 ; CHECK-NEXT: addq $40, %rsp
174 %r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
178 define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
179 ; CHECK-LABEL: sin_v5f32:
181 ; CHECK-NEXT: subq $72, %rsp
182 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
183 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
184 ; CHECK-NEXT: vzeroupper
185 ; CHECK-NEXT: callq sinf@PLT
186 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
187 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
188 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
189 ; CHECK-NEXT: callq sinf@PLT
190 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
191 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
192 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
193 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
194 ; CHECK-NEXT: # xmm0 = mem[1,0]
195 ; CHECK-NEXT: callq sinf@PLT
196 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
197 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
198 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
199 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
200 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
201 ; CHECK-NEXT: callq sinf@PLT
202 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
203 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
204 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
205 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
206 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
207 ; CHECK-NEXT: vzeroupper
208 ; CHECK-NEXT: callq sinf@PLT
209 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
210 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
211 ; CHECK-NEXT: addq $72, %rsp
213 %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
217 define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
218 ; CHECK-LABEL: sin_v6f32:
220 ; CHECK-NEXT: subq $72, %rsp
221 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
222 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
223 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
224 ; CHECK-NEXT: vzeroupper
225 ; CHECK-NEXT: callq sinf@PLT
226 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
227 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
228 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
229 ; CHECK-NEXT: callq sinf@PLT
230 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
231 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
232 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
233 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
234 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
235 ; CHECK-NEXT: vzeroupper
236 ; CHECK-NEXT: callq sinf@PLT
237 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
238 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
239 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
240 ; CHECK-NEXT: callq sinf@PLT
241 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
242 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
243 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
244 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
245 ; CHECK-NEXT: # xmm0 = mem[1,0]
246 ; CHECK-NEXT: callq sinf@PLT
247 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
248 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
249 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
250 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
251 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
252 ; CHECK-NEXT: callq sinf@PLT
253 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
254 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
255 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
256 ; CHECK-NEXT: addq $72, %rsp
258 %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
262 define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
263 ; CHECK-LABEL: sin_v3f64:
265 ; CHECK-NEXT: subq $72, %rsp
266 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
267 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
268 ; CHECK-NEXT: vzeroupper
269 ; CHECK-NEXT: callq sin@PLT
270 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
271 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
272 ; CHECK-NEXT: # xmm0 = mem[1,0]
273 ; CHECK-NEXT: callq sin@PLT
274 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
275 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
276 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
277 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
278 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
279 ; CHECK-NEXT: vzeroupper
280 ; CHECK-NEXT: callq sin@PLT
281 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
282 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
283 ; CHECK-NEXT: addq $72, %rsp
285 %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)
289 define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
290 ; CHECK-LABEL: tan_v1f32:
292 ; CHECK-NEXT: pushq %rax
293 ; CHECK-NEXT: callq tanf@PLT
294 ; CHECK-NEXT: popq %rax
296 %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
300 define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
301 ; CHECK-LABEL: tan_v2f32:
303 ; CHECK-NEXT: subq $40, %rsp
304 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
305 ; CHECK-NEXT: callq tanf@PLT
306 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
307 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
308 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
309 ; CHECK-NEXT: callq tanf@PLT
310 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
311 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
312 ; CHECK-NEXT: addq $40, %rsp
314 %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
318 define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
319 ; CHECK-LABEL: tan_v3f32:
321 ; CHECK-NEXT: subq $40, %rsp
322 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
323 ; CHECK-NEXT: callq tanf@PLT
324 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
325 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
326 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
327 ; CHECK-NEXT: callq tanf@PLT
328 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
329 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
330 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
331 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
332 ; CHECK-NEXT: # xmm0 = mem[1,0]
333 ; CHECK-NEXT: callq tanf@PLT
334 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
335 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
336 ; CHECK-NEXT: addq $40, %rsp
338 %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
342 define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
343 ; CHECK-LABEL: tan_v4f32:
345 ; CHECK-NEXT: subq $40, %rsp
346 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
347 ; CHECK-NEXT: callq tanf@PLT
348 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
349 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
350 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
351 ; CHECK-NEXT: callq tanf@PLT
352 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
353 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
354 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
355 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
356 ; CHECK-NEXT: # xmm0 = mem[1,0]
357 ; CHECK-NEXT: callq tanf@PLT
358 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
359 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
360 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
361 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
362 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
363 ; CHECK-NEXT: callq tanf@PLT
364 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
365 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
366 ; CHECK-NEXT: addq $40, %rsp
368 %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
372 define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
373 ; CHECK-LABEL: tan_v5f32:
375 ; CHECK-NEXT: subq $72, %rsp
376 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
377 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
378 ; CHECK-NEXT: vzeroupper
379 ; CHECK-NEXT: callq tanf@PLT
380 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
381 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
382 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
383 ; CHECK-NEXT: callq tanf@PLT
384 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
385 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
386 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
387 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
388 ; CHECK-NEXT: # xmm0 = mem[1,0]
389 ; CHECK-NEXT: callq tanf@PLT
390 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
391 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
392 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
393 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
394 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
395 ; CHECK-NEXT: callq tanf@PLT
396 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
397 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
398 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
399 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
400 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
401 ; CHECK-NEXT: vzeroupper
402 ; CHECK-NEXT: callq tanf@PLT
403 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
404 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
405 ; CHECK-NEXT: addq $72, %rsp
407 %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
411 define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
412 ; CHECK-LABEL: tan_v6f32:
414 ; CHECK-NEXT: subq $72, %rsp
415 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
416 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
417 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
418 ; CHECK-NEXT: vzeroupper
419 ; CHECK-NEXT: callq tanf@PLT
420 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
421 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
422 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
423 ; CHECK-NEXT: callq tanf@PLT
424 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
425 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
426 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
427 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
428 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
429 ; CHECK-NEXT: vzeroupper
430 ; CHECK-NEXT: callq tanf@PLT
431 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
432 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
433 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
434 ; CHECK-NEXT: callq tanf@PLT
435 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
436 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
437 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
438 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
439 ; CHECK-NEXT: # xmm0 = mem[1,0]
440 ; CHECK-NEXT: callq tanf@PLT
441 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
442 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
443 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
444 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
445 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
446 ; CHECK-NEXT: callq tanf@PLT
447 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
448 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
449 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
450 ; CHECK-NEXT: addq $72, %rsp
452 %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
456 define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
457 ; CHECK-LABEL: tan_v3f64:
459 ; CHECK-NEXT: subq $72, %rsp
460 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
461 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
462 ; CHECK-NEXT: vzeroupper
463 ; CHECK-NEXT: callq tan@PLT
464 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
465 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
466 ; CHECK-NEXT: # xmm0 = mem[1,0]
467 ; CHECK-NEXT: callq tan@PLT
468 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
469 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
470 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
471 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
472 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
473 ; CHECK-NEXT: vzeroupper
474 ; CHECK-NEXT: callq tan@PLT
475 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
476 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
477 ; CHECK-NEXT: addq $72, %rsp
479 %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
483 define <1 x float> @acos_v1f32(<1 x float> %x) nounwind {
484 ; CHECK-LABEL: acos_v1f32:
486 ; CHECK-NEXT: pushq %rax
487 ; CHECK-NEXT: callq acosf@PLT
488 ; CHECK-NEXT: popq %rax
490 %r = call <1 x float> @llvm.acos.v1f32(<1 x float> %x)
494 define <2 x float> @acos_v2f32(<2 x float> %x) nounwind {
495 ; CHECK-LABEL: acos_v2f32:
497 ; CHECK-NEXT: subq $40, %rsp
498 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
499 ; CHECK-NEXT: callq acosf@PLT
500 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
501 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
502 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
503 ; CHECK-NEXT: callq acosf@PLT
504 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
505 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
506 ; CHECK-NEXT: addq $40, %rsp
508 %r = call <2 x float> @llvm.acos.v2f32(<2 x float> %x)
512 define <3 x float> @acos_v3f32(<3 x float> %x) nounwind {
513 ; CHECK-LABEL: acos_v3f32:
515 ; CHECK-NEXT: subq $40, %rsp
516 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
517 ; CHECK-NEXT: callq acosf@PLT
518 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
519 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
520 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
521 ; CHECK-NEXT: callq acosf@PLT
522 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
523 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
524 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
525 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
526 ; CHECK-NEXT: # xmm0 = mem[1,0]
527 ; CHECK-NEXT: callq acosf@PLT
528 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
529 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
530 ; CHECK-NEXT: addq $40, %rsp
532 %r = call <3 x float> @llvm.acos.v3f32(<3 x float> %x)
536 define <4 x float> @acos_v4f32(<4 x float> %x) nounwind {
537 ; CHECK-LABEL: acos_v4f32:
539 ; CHECK-NEXT: subq $40, %rsp
540 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
541 ; CHECK-NEXT: callq acosf@PLT
542 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
543 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
544 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
545 ; CHECK-NEXT: callq acosf@PLT
546 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
547 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
548 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
549 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
550 ; CHECK-NEXT: # xmm0 = mem[1,0]
551 ; CHECK-NEXT: callq acosf@PLT
552 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
553 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
554 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
555 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
556 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
557 ; CHECK-NEXT: callq acosf@PLT
558 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
559 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
560 ; CHECK-NEXT: addq $40, %rsp
562 %r = call <4 x float> @llvm.acos.v4f32(<4 x float> %x)
566 define <5 x float> @acos_v5f32(<5 x float> %x) nounwind {
567 ; CHECK-LABEL: acos_v5f32:
569 ; CHECK-NEXT: subq $72, %rsp
570 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
571 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
572 ; CHECK-NEXT: vzeroupper
573 ; CHECK-NEXT: callq acosf@PLT
574 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
575 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
576 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
577 ; CHECK-NEXT: callq acosf@PLT
578 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
579 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
580 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
581 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
582 ; CHECK-NEXT: # xmm0 = mem[1,0]
583 ; CHECK-NEXT: callq acosf@PLT
584 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
585 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
586 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
587 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
588 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
589 ; CHECK-NEXT: callq acosf@PLT
590 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
591 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
592 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
593 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
594 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
595 ; CHECK-NEXT: vzeroupper
596 ; CHECK-NEXT: callq acosf@PLT
597 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
598 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
599 ; CHECK-NEXT: addq $72, %rsp
601 %r = call <5 x float> @llvm.acos.v5f32(<5 x float> %x)
605 define <6 x float> @acos_v6f32(<6 x float> %x) nounwind {
606 ; CHECK-LABEL: acos_v6f32:
608 ; CHECK-NEXT: subq $72, %rsp
609 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
610 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
611 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
612 ; CHECK-NEXT: vzeroupper
613 ; CHECK-NEXT: callq acosf@PLT
614 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
615 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
616 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
617 ; CHECK-NEXT: callq acosf@PLT
618 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
619 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
620 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
621 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
622 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
623 ; CHECK-NEXT: vzeroupper
624 ; CHECK-NEXT: callq acosf@PLT
625 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
626 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
627 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
628 ; CHECK-NEXT: callq acosf@PLT
629 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
630 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
631 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
632 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
633 ; CHECK-NEXT: # xmm0 = mem[1,0]
634 ; CHECK-NEXT: callq acosf@PLT
635 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
636 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
637 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
638 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
639 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
640 ; CHECK-NEXT: callq acosf@PLT
641 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
642 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
643 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
644 ; CHECK-NEXT: addq $72, %rsp
646 %r = call <6 x float> @llvm.acos.v6f32(<6 x float> %x)
650 define <3 x double> @acos_v3f64(<3 x double> %x) nounwind {
651 ; CHECK-LABEL: acos_v3f64:
653 ; CHECK-NEXT: subq $72, %rsp
654 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
655 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
656 ; CHECK-NEXT: vzeroupper
657 ; CHECK-NEXT: callq acos@PLT
658 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
659 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
660 ; CHECK-NEXT: # xmm0 = mem[1,0]
661 ; CHECK-NEXT: callq acos@PLT
662 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
663 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
664 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
665 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
666 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
667 ; CHECK-NEXT: vzeroupper
668 ; CHECK-NEXT: callq acos@PLT
669 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
670 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
671 ; CHECK-NEXT: addq $72, %rsp
673 %r = call <3 x double> @llvm.acos.v3f64(<3 x double> %x)
677 define <1 x float> @asin_v1f32(<1 x float> %x) nounwind {
678 ; CHECK-LABEL: asin_v1f32:
680 ; CHECK-NEXT: pushq %rax
681 ; CHECK-NEXT: callq asinf@PLT
682 ; CHECK-NEXT: popq %rax
684 %r = call <1 x float> @llvm.asin.v1f32(<1 x float> %x)
688 define <2 x float> @asin_v2f32(<2 x float> %x) nounwind {
689 ; CHECK-LABEL: asin_v2f32:
691 ; CHECK-NEXT: subq $40, %rsp
692 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
693 ; CHECK-NEXT: callq asinf@PLT
694 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
695 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
696 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
697 ; CHECK-NEXT: callq asinf@PLT
698 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
699 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
700 ; CHECK-NEXT: addq $40, %rsp
702 %r = call <2 x float> @llvm.asin.v2f32(<2 x float> %x)
706 define <3 x float> @asin_v3f32(<3 x float> %x) nounwind {
707 ; CHECK-LABEL: asin_v3f32:
709 ; CHECK-NEXT: subq $40, %rsp
710 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
711 ; CHECK-NEXT: callq asinf@PLT
712 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
713 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
714 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
715 ; CHECK-NEXT: callq asinf@PLT
716 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
717 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
718 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
719 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
720 ; CHECK-NEXT: # xmm0 = mem[1,0]
721 ; CHECK-NEXT: callq asinf@PLT
722 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
723 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
724 ; CHECK-NEXT: addq $40, %rsp
726 %r = call <3 x float> @llvm.asin.v3f32(<3 x float> %x)
730 define <4 x float> @asin_v4f32(<4 x float> %x) nounwind {
731 ; CHECK-LABEL: asin_v4f32:
733 ; CHECK-NEXT: subq $40, %rsp
734 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
735 ; CHECK-NEXT: callq asinf@PLT
736 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
737 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
738 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
739 ; CHECK-NEXT: callq asinf@PLT
740 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
741 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
742 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
743 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
744 ; CHECK-NEXT: # xmm0 = mem[1,0]
745 ; CHECK-NEXT: callq asinf@PLT
746 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
747 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
748 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
749 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
750 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
751 ; CHECK-NEXT: callq asinf@PLT
752 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
753 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
754 ; CHECK-NEXT: addq $40, %rsp
756 %r = call <4 x float> @llvm.asin.v4f32(<4 x float> %x)
760 define <5 x float> @asin_v5f32(<5 x float> %x) nounwind {
761 ; CHECK-LABEL: asin_v5f32:
763 ; CHECK-NEXT: subq $72, %rsp
764 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
765 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
766 ; CHECK-NEXT: vzeroupper
767 ; CHECK-NEXT: callq asinf@PLT
768 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
769 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
770 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
771 ; CHECK-NEXT: callq asinf@PLT
772 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
773 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
774 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
775 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
776 ; CHECK-NEXT: # xmm0 = mem[1,0]
777 ; CHECK-NEXT: callq asinf@PLT
778 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
779 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
780 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
781 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
782 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
783 ; CHECK-NEXT: callq asinf@PLT
784 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
785 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
786 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
787 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
788 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
789 ; CHECK-NEXT: vzeroupper
790 ; CHECK-NEXT: callq asinf@PLT
791 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
792 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
793 ; CHECK-NEXT: addq $72, %rsp
795 %r = call <5 x float> @llvm.asin.v5f32(<5 x float> %x)
799 define <6 x float> @asin_v6f32(<6 x float> %x) nounwind {
800 ; CHECK-LABEL: asin_v6f32:
802 ; CHECK-NEXT: subq $72, %rsp
803 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
804 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
805 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
806 ; CHECK-NEXT: vzeroupper
807 ; CHECK-NEXT: callq asinf@PLT
808 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
809 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
810 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
811 ; CHECK-NEXT: callq asinf@PLT
812 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
813 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
814 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
815 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
816 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
817 ; CHECK-NEXT: vzeroupper
818 ; CHECK-NEXT: callq asinf@PLT
819 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
820 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
821 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
822 ; CHECK-NEXT: callq asinf@PLT
823 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
824 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
825 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
826 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
827 ; CHECK-NEXT: # xmm0 = mem[1,0]
828 ; CHECK-NEXT: callq asinf@PLT
829 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
830 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
831 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
832 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
833 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
834 ; CHECK-NEXT: callq asinf@PLT
835 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
836 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
837 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
838 ; CHECK-NEXT: addq $72, %rsp
840 %r = call <6 x float> @llvm.asin.v6f32(<6 x float> %x)
844 define <3 x double> @asin_v3f64(<3 x double> %x) nounwind {
845 ; CHECK-LABEL: asin_v3f64:
847 ; CHECK-NEXT: subq $72, %rsp
848 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
849 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
850 ; CHECK-NEXT: vzeroupper
851 ; CHECK-NEXT: callq asin@PLT
852 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
853 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
854 ; CHECK-NEXT: # xmm0 = mem[1,0]
855 ; CHECK-NEXT: callq asin@PLT
856 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
857 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
858 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
859 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
860 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
861 ; CHECK-NEXT: vzeroupper
862 ; CHECK-NEXT: callq asin@PLT
863 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
864 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
865 ; CHECK-NEXT: addq $72, %rsp
867 %r = call <3 x double> @llvm.asin.v3f64(<3 x double> %x)
871 define <1 x float> @atan_v1f32(<1 x float> %x) nounwind {
872 ; CHECK-LABEL: atan_v1f32:
874 ; CHECK-NEXT: pushq %rax
875 ; CHECK-NEXT: callq atanf@PLT
876 ; CHECK-NEXT: popq %rax
878 %r = call <1 x float> @llvm.atan.v1f32(<1 x float> %x)
882 define <2 x float> @atan_v2f32(<2 x float> %x) nounwind {
883 ; CHECK-LABEL: atan_v2f32:
885 ; CHECK-NEXT: subq $40, %rsp
886 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
887 ; CHECK-NEXT: callq atanf@PLT
888 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
889 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
890 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
891 ; CHECK-NEXT: callq atanf@PLT
892 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
893 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
894 ; CHECK-NEXT: addq $40, %rsp
896 %r = call <2 x float> @llvm.atan.v2f32(<2 x float> %x)
900 define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
901 ; CHECK-LABEL: atan_v3f32:
903 ; CHECK-NEXT: subq $40, %rsp
904 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
905 ; CHECK-NEXT: callq atanf@PLT
906 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
907 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
908 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
909 ; CHECK-NEXT: callq atanf@PLT
910 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
911 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
912 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
913 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
914 ; CHECK-NEXT: # xmm0 = mem[1,0]
915 ; CHECK-NEXT: callq atanf@PLT
916 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
917 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
918 ; CHECK-NEXT: addq $40, %rsp
920 %r = call <3 x float> @llvm.atan.v3f32(<3 x float> %x)
924 define <4 x float> @atan_v4f32(<4 x float> %x) nounwind {
925 ; CHECK-LABEL: atan_v4f32:
927 ; CHECK-NEXT: subq $40, %rsp
928 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
929 ; CHECK-NEXT: callq atanf@PLT
930 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
931 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
932 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
933 ; CHECK-NEXT: callq atanf@PLT
934 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
935 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
936 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
937 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
938 ; CHECK-NEXT: # xmm0 = mem[1,0]
939 ; CHECK-NEXT: callq atanf@PLT
940 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
941 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
942 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
943 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
944 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
945 ; CHECK-NEXT: callq atanf@PLT
946 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
947 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
948 ; CHECK-NEXT: addq $40, %rsp
950 %r = call <4 x float> @llvm.atan.v4f32(<4 x float> %x)
954 define <5 x float> @atan_v5f32(<5 x float> %x) nounwind {
955 ; CHECK-LABEL: atan_v5f32:
957 ; CHECK-NEXT: subq $72, %rsp
958 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
959 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
960 ; CHECK-NEXT: vzeroupper
961 ; CHECK-NEXT: callq atanf@PLT
962 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
963 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
964 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
965 ; CHECK-NEXT: callq atanf@PLT
966 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
967 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
968 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
969 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
970 ; CHECK-NEXT: # xmm0 = mem[1,0]
971 ; CHECK-NEXT: callq atanf@PLT
972 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
973 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
974 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
975 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
976 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
977 ; CHECK-NEXT: callq atanf@PLT
978 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
979 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
980 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
981 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
982 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
983 ; CHECK-NEXT: vzeroupper
984 ; CHECK-NEXT: callq atanf@PLT
985 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
986 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
987 ; CHECK-NEXT: addq $72, %rsp
989 %r = call <5 x float> @llvm.atan.v5f32(<5 x float> %x)
993 define <6 x float> @atan_v6f32(<6 x float> %x) nounwind {
994 ; CHECK-LABEL: atan_v6f32:
996 ; CHECK-NEXT: subq $72, %rsp
997 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
998 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
999 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1000 ; CHECK-NEXT: vzeroupper
1001 ; CHECK-NEXT: callq atanf@PLT
1002 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1003 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1004 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1005 ; CHECK-NEXT: callq atanf@PLT
1006 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1007 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1008 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1009 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1010 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1011 ; CHECK-NEXT: vzeroupper
1012 ; CHECK-NEXT: callq atanf@PLT
1013 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1014 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1015 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1016 ; CHECK-NEXT: callq atanf@PLT
1017 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1018 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1019 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1020 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1021 ; CHECK-NEXT: # xmm0 = mem[1,0]
1022 ; CHECK-NEXT: callq atanf@PLT
1023 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1024 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1025 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1026 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1027 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1028 ; CHECK-NEXT: callq atanf@PLT
1029 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1030 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1031 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1032 ; CHECK-NEXT: addq $72, %rsp
1034 %r = call <6 x float> @llvm.atan.v6f32(<6 x float> %x)
1038 define <3 x double> @atan_v3f64(<3 x double> %x) nounwind {
1039 ; CHECK-LABEL: atan_v3f64:
1041 ; CHECK-NEXT: subq $72, %rsp
1042 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1043 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1044 ; CHECK-NEXT: vzeroupper
1045 ; CHECK-NEXT: callq atan@PLT
1046 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1047 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1048 ; CHECK-NEXT: # xmm0 = mem[1,0]
1049 ; CHECK-NEXT: callq atan@PLT
1050 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
1051 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1052 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
1053 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1054 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1055 ; CHECK-NEXT: vzeroupper
1056 ; CHECK-NEXT: callq atan@PLT
1057 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1058 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1059 ; CHECK-NEXT: addq $72, %rsp
1061 %r = call <3 x double> @llvm.atan.v3f64(<3 x double> %x)
1065 define <1 x float> @cosh_v1f32(<1 x float> %x) nounwind {
1066 ; CHECK-LABEL: cosh_v1f32:
1068 ; CHECK-NEXT: pushq %rax
1069 ; CHECK-NEXT: callq coshf@PLT
1070 ; CHECK-NEXT: popq %rax
1072 %r = call <1 x float> @llvm.cosh.v1f32(<1 x float> %x)
1076 define <2 x float> @cosh_v2f32(<2 x float> %x) nounwind {
1077 ; CHECK-LABEL: cosh_v2f32:
1079 ; CHECK-NEXT: subq $40, %rsp
1080 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1081 ; CHECK-NEXT: callq coshf@PLT
1082 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1083 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1084 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1085 ; CHECK-NEXT: callq coshf@PLT
1086 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1087 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1088 ; CHECK-NEXT: addq $40, %rsp
1090 %r = call <2 x float> @llvm.cosh.v2f32(<2 x float> %x)
1094 define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind {
1095 ; CHECK-LABEL: cosh_v3f32:
1097 ; CHECK-NEXT: subq $40, %rsp
1098 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1099 ; CHECK-NEXT: callq coshf@PLT
1100 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1101 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1102 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1103 ; CHECK-NEXT: callq coshf@PLT
1104 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1105 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1106 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1107 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1108 ; CHECK-NEXT: # xmm0 = mem[1,0]
1109 ; CHECK-NEXT: callq coshf@PLT
1110 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1111 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1112 ; CHECK-NEXT: addq $40, %rsp
1114 %r = call <3 x float> @llvm.cosh.v3f32(<3 x float> %x)
1118 define <4 x float> @cosh_v4f32(<4 x float> %x) nounwind {
1119 ; CHECK-LABEL: cosh_v4f32:
1121 ; CHECK-NEXT: subq $40, %rsp
1122 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1123 ; CHECK-NEXT: callq coshf@PLT
1124 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1125 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1126 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1127 ; CHECK-NEXT: callq coshf@PLT
1128 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1129 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1130 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1131 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1132 ; CHECK-NEXT: # xmm0 = mem[1,0]
1133 ; CHECK-NEXT: callq coshf@PLT
1134 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1135 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1136 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1137 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1138 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1139 ; CHECK-NEXT: callq coshf@PLT
1140 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1141 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1142 ; CHECK-NEXT: addq $40, %rsp
1144 %r = call <4 x float> @llvm.cosh.v4f32(<4 x float> %x)
1148 define <5 x float> @cosh_v5f32(<5 x float> %x) nounwind {
1149 ; CHECK-LABEL: cosh_v5f32:
1151 ; CHECK-NEXT: subq $72, %rsp
1152 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1153 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1154 ; CHECK-NEXT: vzeroupper
1155 ; CHECK-NEXT: callq coshf@PLT
1156 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1157 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1158 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1159 ; CHECK-NEXT: callq coshf@PLT
1160 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1161 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1162 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1163 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1164 ; CHECK-NEXT: # xmm0 = mem[1,0]
1165 ; CHECK-NEXT: callq coshf@PLT
1166 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1167 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1168 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1169 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1170 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1171 ; CHECK-NEXT: callq coshf@PLT
1172 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1173 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1174 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1175 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1176 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1177 ; CHECK-NEXT: vzeroupper
1178 ; CHECK-NEXT: callq coshf@PLT
1179 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1180 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1181 ; CHECK-NEXT: addq $72, %rsp
1183 %r = call <5 x float> @llvm.cosh.v5f32(<5 x float> %x)
1187 define <6 x float> @cosh_v6f32(<6 x float> %x) nounwind {
1188 ; CHECK-LABEL: cosh_v6f32:
1190 ; CHECK-NEXT: subq $72, %rsp
1191 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1192 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1193 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1194 ; CHECK-NEXT: vzeroupper
1195 ; CHECK-NEXT: callq coshf@PLT
1196 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1197 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1198 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1199 ; CHECK-NEXT: callq coshf@PLT
1200 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1201 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1202 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1203 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1204 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1205 ; CHECK-NEXT: vzeroupper
1206 ; CHECK-NEXT: callq coshf@PLT
1207 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1208 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1209 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1210 ; CHECK-NEXT: callq coshf@PLT
1211 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1212 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1213 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1214 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1215 ; CHECK-NEXT: # xmm0 = mem[1,0]
1216 ; CHECK-NEXT: callq coshf@PLT
1217 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1218 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1219 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1220 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1221 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1222 ; CHECK-NEXT: callq coshf@PLT
1223 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1224 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1225 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1226 ; CHECK-NEXT: addq $72, %rsp
1228 %r = call <6 x float> @llvm.cosh.v6f32(<6 x float> %x)
1232 define <3 x double> @cosh_v3f64(<3 x double> %x) nounwind {
1233 ; CHECK-LABEL: cosh_v3f64:
1235 ; CHECK-NEXT: subq $72, %rsp
1236 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1237 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1238 ; CHECK-NEXT: vzeroupper
1239 ; CHECK-NEXT: callq cosh@PLT
1240 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1241 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1242 ; CHECK-NEXT: # xmm0 = mem[1,0]
1243 ; CHECK-NEXT: callq cosh@PLT
1244 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
1245 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1246 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
1247 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1248 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1249 ; CHECK-NEXT: vzeroupper
1250 ; CHECK-NEXT: callq cosh@PLT
1251 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1252 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1253 ; CHECK-NEXT: addq $72, %rsp
1255 %r = call <3 x double> @llvm.cosh.v3f64(<3 x double> %x)
1259 define <1 x float> @sinh_v1f32(<1 x float> %x) nounwind {
1260 ; CHECK-LABEL: sinh_v1f32:
1262 ; CHECK-NEXT: pushq %rax
1263 ; CHECK-NEXT: callq sinhf@PLT
1264 ; CHECK-NEXT: popq %rax
1266 %r = call <1 x float> @llvm.sinh.v1f32(<1 x float> %x)
1270 define <2 x float> @sinh_v2f32(<2 x float> %x) nounwind {
1271 ; CHECK-LABEL: sinh_v2f32:
1273 ; CHECK-NEXT: subq $40, %rsp
1274 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1275 ; CHECK-NEXT: callq sinhf@PLT
1276 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1277 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1278 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1279 ; CHECK-NEXT: callq sinhf@PLT
1280 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1281 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1282 ; CHECK-NEXT: addq $40, %rsp
1284 %r = call <2 x float> @llvm.sinh.v2f32(<2 x float> %x)
1288 define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
1289 ; CHECK-LABEL: sinh_v3f32:
1291 ; CHECK-NEXT: subq $40, %rsp
1292 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1293 ; CHECK-NEXT: callq sinhf@PLT
1294 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1295 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1296 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1297 ; CHECK-NEXT: callq sinhf@PLT
1298 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1299 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1300 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1301 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1302 ; CHECK-NEXT: # xmm0 = mem[1,0]
1303 ; CHECK-NEXT: callq sinhf@PLT
1304 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1305 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1306 ; CHECK-NEXT: addq $40, %rsp
1308 %r = call <3 x float> @llvm.sinh.v3f32(<3 x float> %x)
1312 define <4 x float> @sinh_v4f32(<4 x float> %x) nounwind {
1313 ; CHECK-LABEL: sinh_v4f32:
1315 ; CHECK-NEXT: subq $40, %rsp
1316 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1317 ; CHECK-NEXT: callq sinhf@PLT
1318 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1319 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1320 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1321 ; CHECK-NEXT: callq sinhf@PLT
1322 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1323 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1324 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1325 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1326 ; CHECK-NEXT: # xmm0 = mem[1,0]
1327 ; CHECK-NEXT: callq sinhf@PLT
1328 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1329 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1330 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1331 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1332 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1333 ; CHECK-NEXT: callq sinhf@PLT
1334 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1335 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1336 ; CHECK-NEXT: addq $40, %rsp
1338 %r = call <4 x float> @llvm.sinh.v4f32(<4 x float> %x)
1342 define <5 x float> @sinh_v5f32(<5 x float> %x) nounwind {
1343 ; CHECK-LABEL: sinh_v5f32:
1345 ; CHECK-NEXT: subq $72, %rsp
1346 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1347 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1348 ; CHECK-NEXT: vzeroupper
1349 ; CHECK-NEXT: callq sinhf@PLT
1350 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1351 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1352 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1353 ; CHECK-NEXT: callq sinhf@PLT
1354 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1355 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1356 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1357 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1358 ; CHECK-NEXT: # xmm0 = mem[1,0]
1359 ; CHECK-NEXT: callq sinhf@PLT
1360 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1361 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1362 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1363 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1364 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1365 ; CHECK-NEXT: callq sinhf@PLT
1366 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1367 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1368 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1369 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1370 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1371 ; CHECK-NEXT: vzeroupper
1372 ; CHECK-NEXT: callq sinhf@PLT
1373 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1374 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1375 ; CHECK-NEXT: addq $72, %rsp
1377 %r = call <5 x float> @llvm.sinh.v5f32(<5 x float> %x)
1381 define <6 x float> @sinh_v6f32(<6 x float> %x) nounwind {
1382 ; CHECK-LABEL: sinh_v6f32:
1384 ; CHECK-NEXT: subq $72, %rsp
1385 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1386 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1387 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1388 ; CHECK-NEXT: vzeroupper
1389 ; CHECK-NEXT: callq sinhf@PLT
1390 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1391 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1392 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1393 ; CHECK-NEXT: callq sinhf@PLT
1394 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1395 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1396 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1397 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1398 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1399 ; CHECK-NEXT: vzeroupper
1400 ; CHECK-NEXT: callq sinhf@PLT
1401 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1402 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1403 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1404 ; CHECK-NEXT: callq sinhf@PLT
1405 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1406 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1407 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1408 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1409 ; CHECK-NEXT: # xmm0 = mem[1,0]
1410 ; CHECK-NEXT: callq sinhf@PLT
1411 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1412 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1413 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1414 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1415 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1416 ; CHECK-NEXT: callq sinhf@PLT
1417 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1418 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1419 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1420 ; CHECK-NEXT: addq $72, %rsp
1422 %r = call <6 x float> @llvm.sinh.v6f32(<6 x float> %x)
1426 define <3 x double> @sinh_v3f64(<3 x double> %x) nounwind {
1427 ; CHECK-LABEL: sinh_v3f64:
1429 ; CHECK-NEXT: subq $72, %rsp
1430 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1431 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1432 ; CHECK-NEXT: vzeroupper
1433 ; CHECK-NEXT: callq sinh@PLT
1434 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1435 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1436 ; CHECK-NEXT: # xmm0 = mem[1,0]
1437 ; CHECK-NEXT: callq sinh@PLT
1438 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
1439 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1440 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
1441 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1442 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1443 ; CHECK-NEXT: vzeroupper
1444 ; CHECK-NEXT: callq sinh@PLT
1445 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1446 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1447 ; CHECK-NEXT: addq $72, %rsp
1449 %r = call <3 x double> @llvm.sinh.v3f64(<3 x double> %x)
1453 define <1 x float> @tanh_v1f32(<1 x float> %x) nounwind {
1454 ; CHECK-LABEL: tanh_v1f32:
1456 ; CHECK-NEXT: pushq %rax
1457 ; CHECK-NEXT: callq tanhf@PLT
1458 ; CHECK-NEXT: popq %rax
1460 %r = call <1 x float> @llvm.tanh.v1f32(<1 x float> %x)
1464 define <2 x float> @tanh_v2f32(<2 x float> %x) nounwind {
1465 ; CHECK-LABEL: tanh_v2f32:
1467 ; CHECK-NEXT: subq $40, %rsp
1468 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1469 ; CHECK-NEXT: callq tanhf@PLT
1470 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1471 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1472 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1473 ; CHECK-NEXT: callq tanhf@PLT
1474 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1475 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1476 ; CHECK-NEXT: addq $40, %rsp
1478 %r = call <2 x float> @llvm.tanh.v2f32(<2 x float> %x)
1482 define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind {
1483 ; CHECK-LABEL: tanh_v3f32:
1485 ; CHECK-NEXT: subq $40, %rsp
1486 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1487 ; CHECK-NEXT: callq tanhf@PLT
1488 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1489 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1490 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1491 ; CHECK-NEXT: callq tanhf@PLT
1492 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1493 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1494 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1495 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1496 ; CHECK-NEXT: # xmm0 = mem[1,0]
1497 ; CHECK-NEXT: callq tanhf@PLT
1498 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1499 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1500 ; CHECK-NEXT: addq $40, %rsp
1502 %r = call <3 x float> @llvm.tanh.v3f32(<3 x float> %x)
1506 define <4 x float> @tanh_v4f32(<4 x float> %x) nounwind {
1507 ; CHECK-LABEL: tanh_v4f32:
1509 ; CHECK-NEXT: subq $40, %rsp
1510 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1511 ; CHECK-NEXT: callq tanhf@PLT
1512 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1513 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1514 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1515 ; CHECK-NEXT: callq tanhf@PLT
1516 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1517 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1518 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1519 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1520 ; CHECK-NEXT: # xmm0 = mem[1,0]
1521 ; CHECK-NEXT: callq tanhf@PLT
1522 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1523 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1524 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1525 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1526 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1527 ; CHECK-NEXT: callq tanhf@PLT
1528 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1529 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1530 ; CHECK-NEXT: addq $40, %rsp
1532 %r = call <4 x float> @llvm.tanh.v4f32(<4 x float> %x)
1536 define <5 x float> @tanh_v5f32(<5 x float> %x) nounwind {
1537 ; CHECK-LABEL: tanh_v5f32:
1539 ; CHECK-NEXT: subq $72, %rsp
1540 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1541 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1542 ; CHECK-NEXT: vzeroupper
1543 ; CHECK-NEXT: callq tanhf@PLT
1544 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1545 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1546 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1547 ; CHECK-NEXT: callq tanhf@PLT
1548 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1549 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1550 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1551 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1552 ; CHECK-NEXT: # xmm0 = mem[1,0]
1553 ; CHECK-NEXT: callq tanhf@PLT
1554 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1555 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1556 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1557 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1558 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1559 ; CHECK-NEXT: callq tanhf@PLT
1560 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1561 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1562 ; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
1563 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1564 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1565 ; CHECK-NEXT: vzeroupper
1566 ; CHECK-NEXT: callq tanhf@PLT
1567 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1568 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1569 ; CHECK-NEXT: addq $72, %rsp
1571 %r = call <5 x float> @llvm.tanh.v5f32(<5 x float> %x)
1575 define <6 x float> @tanh_v6f32(<6 x float> %x) nounwind {
1576 ; CHECK-LABEL: tanh_v6f32:
1578 ; CHECK-NEXT: subq $72, %rsp
1579 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1580 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1581 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1582 ; CHECK-NEXT: vzeroupper
1583 ; CHECK-NEXT: callq tanhf@PLT
1584 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1585 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1586 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1587 ; CHECK-NEXT: callq tanhf@PLT
1588 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1589 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1590 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1591 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1592 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1593 ; CHECK-NEXT: vzeroupper
1594 ; CHECK-NEXT: callq tanhf@PLT
1595 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1596 ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1597 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1598 ; CHECK-NEXT: callq tanhf@PLT
1599 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1600 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1601 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1602 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1603 ; CHECK-NEXT: # xmm0 = mem[1,0]
1604 ; CHECK-NEXT: callq tanhf@PLT
1605 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1606 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1607 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1608 ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1609 ; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
1610 ; CHECK-NEXT: callq tanhf@PLT
1611 ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
1612 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1613 ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1614 ; CHECK-NEXT: addq $72, %rsp
1616 %r = call <6 x float> @llvm.tanh.v6f32(<6 x float> %x)
1620 define <3 x double> @tanh_v3f64(<3 x double> %x) nounwind {
1621 ; CHECK-LABEL: tanh_v3f64:
1623 ; CHECK-NEXT: subq $72, %rsp
1624 ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1625 ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
1626 ; CHECK-NEXT: vzeroupper
1627 ; CHECK-NEXT: callq tanh@PLT
1628 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1629 ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1630 ; CHECK-NEXT: # xmm0 = mem[1,0]
1631 ; CHECK-NEXT: callq tanh@PLT
1632 ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
1633 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1634 ; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
1635 ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1636 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
1637 ; CHECK-NEXT: vzeroupper
1638 ; CHECK-NEXT: callq tanh@PLT
1639 ; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
1640 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1641 ; CHECK-NEXT: addq $72, %rsp
1643 %r = call <3 x double> @llvm.tanh.v3f64(<3 x double> %x)
1647 define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
1648 ; CHECK-LABEL: fabs_v2f32:
1650 ; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1652 %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
1656 define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind {
1657 ; CHECK-LABEL: ceil_v2f32:
1659 ; CHECK-NEXT: vroundps $10, %xmm0, %xmm0
1661 %r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
1665 define <2 x float> @cos_v2f32(<2 x float> %x) nounwind {
1666 ; CHECK-LABEL: cos_v2f32:
1668 ; CHECK-NEXT: subq $40, %rsp
1669 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1670 ; CHECK-NEXT: callq cosf@PLT
1671 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1672 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1673 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1674 ; CHECK-NEXT: callq cosf@PLT
1675 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1676 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1677 ; CHECK-NEXT: addq $40, %rsp
1679 %r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x)
1683 define <2 x float> @exp_v2f32(<2 x float> %x) nounwind {
1684 ; CHECK-LABEL: exp_v2f32:
1686 ; CHECK-NEXT: subq $40, %rsp
1687 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1688 ; CHECK-NEXT: callq expf@PLT
1689 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1690 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1691 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1692 ; CHECK-NEXT: callq expf@PLT
1693 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1694 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1695 ; CHECK-NEXT: addq $40, %rsp
1697 %r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x)
1701 define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind {
1702 ; CHECK-LABEL: exp2_v2f32:
1704 ; CHECK-NEXT: subq $40, %rsp
1705 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1706 ; CHECK-NEXT: callq exp2f@PLT
1707 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1708 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1709 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1710 ; CHECK-NEXT: callq exp2f@PLT
1711 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1712 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1713 ; CHECK-NEXT: addq $40, %rsp
1715 %r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
1719 define <2 x float> @floor_v2f32(<2 x float> %x) nounwind {
1720 ; CHECK-LABEL: floor_v2f32:
1722 ; CHECK-NEXT: vroundps $9, %xmm0, %xmm0
1724 %r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
1728 define <2 x float> @log_v2f32(<2 x float> %x) nounwind {
1729 ; CHECK-LABEL: log_v2f32:
1731 ; CHECK-NEXT: subq $40, %rsp
1732 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1733 ; CHECK-NEXT: callq logf@PLT
1734 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1735 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1736 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1737 ; CHECK-NEXT: callq logf@PLT
1738 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1739 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1740 ; CHECK-NEXT: addq $40, %rsp
1742 %r = call <2 x float> @llvm.log.v2f32(<2 x float> %x)
1746 define <2 x float> @log10_v2f32(<2 x float> %x) nounwind {
1747 ; CHECK-LABEL: log10_v2f32:
1749 ; CHECK-NEXT: subq $40, %rsp
1750 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1751 ; CHECK-NEXT: callq log10f@PLT
1752 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1753 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1754 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1755 ; CHECK-NEXT: callq log10f@PLT
1756 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1757 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1758 ; CHECK-NEXT: addq $40, %rsp
1760 %r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x)
1764 define <2 x float> @log2_v2f32(<2 x float> %x) nounwind {
1765 ; CHECK-LABEL: log2_v2f32:
1767 ; CHECK-NEXT: subq $40, %rsp
1768 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
1769 ; CHECK-NEXT: callq log2f@PLT
1770 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1771 ; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1772 ; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
1773 ; CHECK-NEXT: callq log2f@PLT
1774 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1775 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1776 ; CHECK-NEXT: addq $40, %rsp
1778 %r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x)
1782 define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind {
1783 ; CHECK-LABEL: nearbyint__v2f32:
1785 ; CHECK-NEXT: vroundps $12, %xmm0, %xmm0
1787 %r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x)
1791 define <2 x float> @rint_v2f32(<2 x float> %x) nounwind {
1792 ; CHECK-LABEL: rint_v2f32:
1794 ; CHECK-NEXT: vroundps $4, %xmm0, %xmm0
1796 %r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x)
1800 define <2 x float> @round_v2f32(<2 x float> %x) nounwind {
1801 ; CHECK-LABEL: round_v2f32:
1803 ; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1804 ; CHECK-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1805 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
1806 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
1808 %r = call <2 x float> @llvm.round.v2f32(<2 x float> %x)
1812 define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind {
1813 ; CHECK-LABEL: sqrt_v2f32:
1815 ; CHECK-NEXT: vsqrtps %xmm0, %xmm0
1817 %r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
1821 define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind {
1822 ; CHECK-LABEL: trunc_v2f32:
1824 ; CHECK-NEXT: vroundps $11, %xmm0, %xmm0
1826 %r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x)