1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
4 ; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
5 ; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
7 define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
8 ; CHECK-LABEL: sqrt_float32_t:
9 ; CHECK: @ %bb.0: @ %entry
10 ; CHECK-NEXT: vsqrt.f32 s3, s3
11 ; CHECK-NEXT: vsqrt.f32 s2, s2
12 ; CHECK-NEXT: vsqrt.f32 s1, s1
13 ; CHECK-NEXT: vsqrt.f32 s0, s0
16 %0 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %src)
20 define arm_aapcs_vfpcc <8 x half> @sqrt_float16_t(<8 x half> %src) {
21 ; CHECK-LABEL: sqrt_float16_t:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vmovx.f16 s4, s0
24 ; CHECK-NEXT: vsqrt.f16 s0, s0
25 ; CHECK-NEXT: vsqrt.f16 s4, s4
26 ; CHECK-NEXT: vins.f16 s0, s4
27 ; CHECK-NEXT: vmovx.f16 s4, s1
28 ; CHECK-NEXT: vsqrt.f16 s4, s4
29 ; CHECK-NEXT: vsqrt.f16 s1, s1
30 ; CHECK-NEXT: vins.f16 s1, s4
31 ; CHECK-NEXT: vmovx.f16 s4, s2
32 ; CHECK-NEXT: vsqrt.f16 s4, s4
33 ; CHECK-NEXT: vsqrt.f16 s2, s2
34 ; CHECK-NEXT: vins.f16 s2, s4
35 ; CHECK-NEXT: vmovx.f16 s4, s3
36 ; CHECK-NEXT: vsqrt.f16 s4, s4
37 ; CHECK-NEXT: vsqrt.f16 s3, s3
38 ; CHECK-NEXT: vins.f16 s3, s4
41 %0 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %src)
45 define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
46 ; CHECK-LABEL: sqrt_float64_t:
47 ; CHECK: @ %bb.0: @ %entry
48 ; CHECK-NEXT: .save {r7, lr}
49 ; CHECK-NEXT: push {r7, lr}
50 ; CHECK-NEXT: .vsave {d8, d9}
51 ; CHECK-NEXT: vpush {d8, d9}
52 ; CHECK-NEXT: vmov q4, q0
53 ; CHECK-NEXT: vmov r0, r1, d9
55 ; CHECK-NEXT: vmov r2, r3, d8
56 ; CHECK-NEXT: vmov d9, r0, r1
57 ; CHECK-NEXT: mov r0, r2
58 ; CHECK-NEXT: mov r1, r3
60 ; CHECK-NEXT: vmov d8, r0, r1
61 ; CHECK-NEXT: vmov q0, q4
62 ; CHECK-NEXT: vpop {d8, d9}
63 ; CHECK-NEXT: pop {r7, pc}
65 %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
69 define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
70 ; CHECK-LABEL: cos_float32_t:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: .save {r4, r5, r7, lr}
73 ; CHECK-NEXT: push {r4, r5, r7, lr}
74 ; CHECK-NEXT: .vsave {d8, d9}
75 ; CHECK-NEXT: vpush {d8, d9}
76 ; CHECK-NEXT: vmov q4, q0
77 ; CHECK-NEXT: vmov r0, r4, d9
79 ; CHECK-NEXT: mov r5, r0
80 ; CHECK-NEXT: mov r0, r4
82 ; CHECK-NEXT: vmov r4, r1, d8
83 ; CHECK-NEXT: vmov s19, r0
84 ; CHECK-NEXT: vmov s18, r5
85 ; CHECK-NEXT: mov r0, r1
87 ; CHECK-NEXT: vmov s17, r0
88 ; CHECK-NEXT: mov r0, r4
90 ; CHECK-NEXT: vmov s16, r0
91 ; CHECK-NEXT: vmov q0, q4
92 ; CHECK-NEXT: vpop {d8, d9}
93 ; CHECK-NEXT: pop {r4, r5, r7, pc}
95 %0 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %src)
99 define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) {
100 ; CHECK-LABEL: cos_float16_t:
101 ; CHECK: @ %bb.0: @ %entry
102 ; CHECK-NEXT: .save {r7, lr}
103 ; CHECK-NEXT: push {r7, lr}
104 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
105 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
106 ; CHECK-NEXT: vmov q4, q0
107 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
108 ; CHECK-NEXT: vmov r0, s0
109 ; CHECK-NEXT: bl cosf
110 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
111 ; CHECK-NEXT: vmov s16, r0
112 ; CHECK-NEXT: vmov r1, s0
113 ; CHECK-NEXT: mov r0, r1
114 ; CHECK-NEXT: bl cosf
115 ; CHECK-NEXT: vmov s0, r0
116 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
117 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
118 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
119 ; CHECK-NEXT: vmov r0, s0
120 ; CHECK-NEXT: bl cosf
121 ; CHECK-NEXT: vmov s0, r0
122 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
123 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
124 ; CHECK-NEXT: vmov r0, s0
125 ; CHECK-NEXT: bl cosf
126 ; CHECK-NEXT: vmov s0, r0
127 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
128 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
129 ; CHECK-NEXT: vmov r0, s0
130 ; CHECK-NEXT: bl cosf
131 ; CHECK-NEXT: vmov s0, r0
132 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
133 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
134 ; CHECK-NEXT: vmov r0, s0
135 ; CHECK-NEXT: bl cosf
136 ; CHECK-NEXT: vmov s0, r0
137 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
138 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
139 ; CHECK-NEXT: vmov r0, s0
140 ; CHECK-NEXT: bl cosf
141 ; CHECK-NEXT: vmov s0, r0
142 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
143 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
144 ; CHECK-NEXT: vmov r0, s0
145 ; CHECK-NEXT: bl cosf
146 ; CHECK-NEXT: vmov s0, r0
147 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
148 ; CHECK-NEXT: vmov q0, q5
149 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
150 ; CHECK-NEXT: pop {r7, pc}
152 %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src)
156 define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
157 ; CHECK-LABEL: cos_float64_t:
158 ; CHECK: @ %bb.0: @ %entry
159 ; CHECK-NEXT: .save {r7, lr}
160 ; CHECK-NEXT: push {r7, lr}
161 ; CHECK-NEXT: .vsave {d8, d9}
162 ; CHECK-NEXT: vpush {d8, d9}
163 ; CHECK-NEXT: vmov q4, q0
164 ; CHECK-NEXT: vmov r0, r1, d9
166 ; CHECK-NEXT: vmov r2, r3, d8
167 ; CHECK-NEXT: vmov d9, r0, r1
168 ; CHECK-NEXT: mov r0, r2
169 ; CHECK-NEXT: mov r1, r3
171 ; CHECK-NEXT: vmov d8, r0, r1
172 ; CHECK-NEXT: vmov q0, q4
173 ; CHECK-NEXT: vpop {d8, d9}
174 ; CHECK-NEXT: pop {r7, pc}
176 %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
180 define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
181 ; CHECK-LABEL: sin_float32_t:
182 ; CHECK: @ %bb.0: @ %entry
183 ; CHECK-NEXT: .save {r4, r5, r7, lr}
184 ; CHECK-NEXT: push {r4, r5, r7, lr}
185 ; CHECK-NEXT: .vsave {d8, d9}
186 ; CHECK-NEXT: vpush {d8, d9}
187 ; CHECK-NEXT: vmov q4, q0
188 ; CHECK-NEXT: vmov r0, r4, d9
189 ; CHECK-NEXT: bl sinf
190 ; CHECK-NEXT: mov r5, r0
191 ; CHECK-NEXT: mov r0, r4
192 ; CHECK-NEXT: bl sinf
193 ; CHECK-NEXT: vmov r4, r1, d8
194 ; CHECK-NEXT: vmov s19, r0
195 ; CHECK-NEXT: vmov s18, r5
196 ; CHECK-NEXT: mov r0, r1
197 ; CHECK-NEXT: bl sinf
198 ; CHECK-NEXT: vmov s17, r0
199 ; CHECK-NEXT: mov r0, r4
200 ; CHECK-NEXT: bl sinf
201 ; CHECK-NEXT: vmov s16, r0
202 ; CHECK-NEXT: vmov q0, q4
203 ; CHECK-NEXT: vpop {d8, d9}
204 ; CHECK-NEXT: pop {r4, r5, r7, pc}
206 %0 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %src)
210 define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) {
211 ; CHECK-LABEL: sin_float16_t:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: .save {r7, lr}
214 ; CHECK-NEXT: push {r7, lr}
215 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
216 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
217 ; CHECK-NEXT: vmov q4, q0
218 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
219 ; CHECK-NEXT: vmov r0, s0
220 ; CHECK-NEXT: bl sinf
221 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
222 ; CHECK-NEXT: vmov s16, r0
223 ; CHECK-NEXT: vmov r1, s0
224 ; CHECK-NEXT: mov r0, r1
225 ; CHECK-NEXT: bl sinf
226 ; CHECK-NEXT: vmov s0, r0
227 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
228 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
229 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
230 ; CHECK-NEXT: vmov r0, s0
231 ; CHECK-NEXT: bl sinf
232 ; CHECK-NEXT: vmov s0, r0
233 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
234 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
235 ; CHECK-NEXT: vmov r0, s0
236 ; CHECK-NEXT: bl sinf
237 ; CHECK-NEXT: vmov s0, r0
238 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
239 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
240 ; CHECK-NEXT: vmov r0, s0
241 ; CHECK-NEXT: bl sinf
242 ; CHECK-NEXT: vmov s0, r0
243 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
244 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
245 ; CHECK-NEXT: vmov r0, s0
246 ; CHECK-NEXT: bl sinf
247 ; CHECK-NEXT: vmov s0, r0
248 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
249 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
250 ; CHECK-NEXT: vmov r0, s0
251 ; CHECK-NEXT: bl sinf
252 ; CHECK-NEXT: vmov s0, r0
253 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
254 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
255 ; CHECK-NEXT: vmov r0, s0
256 ; CHECK-NEXT: bl sinf
257 ; CHECK-NEXT: vmov s0, r0
258 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
259 ; CHECK-NEXT: vmov q0, q5
260 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
261 ; CHECK-NEXT: pop {r7, pc}
263 %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src)
267 define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
268 ; CHECK-LABEL: sin_float64_t:
269 ; CHECK: @ %bb.0: @ %entry
270 ; CHECK-NEXT: .save {r7, lr}
271 ; CHECK-NEXT: push {r7, lr}
272 ; CHECK-NEXT: .vsave {d8, d9}
273 ; CHECK-NEXT: vpush {d8, d9}
274 ; CHECK-NEXT: vmov q4, q0
275 ; CHECK-NEXT: vmov r0, r1, d9
277 ; CHECK-NEXT: vmov r2, r3, d8
278 ; CHECK-NEXT: vmov d9, r0, r1
279 ; CHECK-NEXT: mov r0, r2
280 ; CHECK-NEXT: mov r1, r3
282 ; CHECK-NEXT: vmov d8, r0, r1
283 ; CHECK-NEXT: vmov q0, q4
284 ; CHECK-NEXT: vpop {d8, d9}
285 ; CHECK-NEXT: pop {r7, pc}
287 %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
291 define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
292 ; CHECK-LABEL: exp_float32_t:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: .save {r4, r5, r7, lr}
295 ; CHECK-NEXT: push {r4, r5, r7, lr}
296 ; CHECK-NEXT: .vsave {d8, d9}
297 ; CHECK-NEXT: vpush {d8, d9}
298 ; CHECK-NEXT: vmov q4, q0
299 ; CHECK-NEXT: vmov r0, r4, d9
300 ; CHECK-NEXT: bl expf
301 ; CHECK-NEXT: mov r5, r0
302 ; CHECK-NEXT: mov r0, r4
303 ; CHECK-NEXT: bl expf
304 ; CHECK-NEXT: vmov r4, r1, d8
305 ; CHECK-NEXT: vmov s19, r0
306 ; CHECK-NEXT: vmov s18, r5
307 ; CHECK-NEXT: mov r0, r1
308 ; CHECK-NEXT: bl expf
309 ; CHECK-NEXT: vmov s17, r0
310 ; CHECK-NEXT: mov r0, r4
311 ; CHECK-NEXT: bl expf
312 ; CHECK-NEXT: vmov s16, r0
313 ; CHECK-NEXT: vmov q0, q4
314 ; CHECK-NEXT: vpop {d8, d9}
315 ; CHECK-NEXT: pop {r4, r5, r7, pc}
317 %0 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %src)
321 define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) {
322 ; CHECK-LABEL: exp_float16_t:
323 ; CHECK: @ %bb.0: @ %entry
324 ; CHECK-NEXT: .save {r7, lr}
325 ; CHECK-NEXT: push {r7, lr}
326 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
327 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
328 ; CHECK-NEXT: vmov q4, q0
329 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
330 ; CHECK-NEXT: vmov r0, s0
331 ; CHECK-NEXT: bl expf
332 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
333 ; CHECK-NEXT: vmov s16, r0
334 ; CHECK-NEXT: vmov r1, s0
335 ; CHECK-NEXT: mov r0, r1
336 ; CHECK-NEXT: bl expf
337 ; CHECK-NEXT: vmov s0, r0
338 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
339 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
340 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
341 ; CHECK-NEXT: vmov r0, s0
342 ; CHECK-NEXT: bl expf
343 ; CHECK-NEXT: vmov s0, r0
344 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
345 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
346 ; CHECK-NEXT: vmov r0, s0
347 ; CHECK-NEXT: bl expf
348 ; CHECK-NEXT: vmov s0, r0
349 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
350 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
351 ; CHECK-NEXT: vmov r0, s0
352 ; CHECK-NEXT: bl expf
353 ; CHECK-NEXT: vmov s0, r0
354 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
355 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
356 ; CHECK-NEXT: vmov r0, s0
357 ; CHECK-NEXT: bl expf
358 ; CHECK-NEXT: vmov s0, r0
359 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
360 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
361 ; CHECK-NEXT: vmov r0, s0
362 ; CHECK-NEXT: bl expf
363 ; CHECK-NEXT: vmov s0, r0
364 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
365 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
366 ; CHECK-NEXT: vmov r0, s0
367 ; CHECK-NEXT: bl expf
368 ; CHECK-NEXT: vmov s0, r0
369 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
370 ; CHECK-NEXT: vmov q0, q5
371 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
372 ; CHECK-NEXT: pop {r7, pc}
374 %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src)
378 define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
379 ; CHECK-LABEL: exp_float64_t:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: .save {r7, lr}
382 ; CHECK-NEXT: push {r7, lr}
383 ; CHECK-NEXT: .vsave {d8, d9}
384 ; CHECK-NEXT: vpush {d8, d9}
385 ; CHECK-NEXT: vmov q4, q0
386 ; CHECK-NEXT: vmov r0, r1, d9
388 ; CHECK-NEXT: vmov r2, r3, d8
389 ; CHECK-NEXT: vmov d9, r0, r1
390 ; CHECK-NEXT: mov r0, r2
391 ; CHECK-NEXT: mov r1, r3
393 ; CHECK-NEXT: vmov d8, r0, r1
394 ; CHECK-NEXT: vmov q0, q4
395 ; CHECK-NEXT: vpop {d8, d9}
396 ; CHECK-NEXT: pop {r7, pc}
398 %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
402 define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
403 ; CHECK-LABEL: exp2_float32_t:
404 ; CHECK: @ %bb.0: @ %entry
405 ; CHECK-NEXT: .save {r4, r5, r7, lr}
406 ; CHECK-NEXT: push {r4, r5, r7, lr}
407 ; CHECK-NEXT: .vsave {d8, d9}
408 ; CHECK-NEXT: vpush {d8, d9}
409 ; CHECK-NEXT: vmov q4, q0
410 ; CHECK-NEXT: vmov r0, r4, d9
411 ; CHECK-NEXT: bl exp2f
412 ; CHECK-NEXT: mov r5, r0
413 ; CHECK-NEXT: mov r0, r4
414 ; CHECK-NEXT: bl exp2f
415 ; CHECK-NEXT: vmov r4, r1, d8
416 ; CHECK-NEXT: vmov s19, r0
417 ; CHECK-NEXT: vmov s18, r5
418 ; CHECK-NEXT: mov r0, r1
419 ; CHECK-NEXT: bl exp2f
420 ; CHECK-NEXT: vmov s17, r0
421 ; CHECK-NEXT: mov r0, r4
422 ; CHECK-NEXT: bl exp2f
423 ; CHECK-NEXT: vmov s16, r0
424 ; CHECK-NEXT: vmov q0, q4
425 ; CHECK-NEXT: vpop {d8, d9}
426 ; CHECK-NEXT: pop {r4, r5, r7, pc}
428 %0 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %src)
432 define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) {
433 ; CHECK-LABEL: exp2_float16_t:
434 ; CHECK: @ %bb.0: @ %entry
435 ; CHECK-NEXT: .save {r7, lr}
436 ; CHECK-NEXT: push {r7, lr}
437 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
438 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
439 ; CHECK-NEXT: vmov q4, q0
440 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
441 ; CHECK-NEXT: vmov r0, s0
442 ; CHECK-NEXT: bl exp2f
443 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
444 ; CHECK-NEXT: vmov s16, r0
445 ; CHECK-NEXT: vmov r1, s0
446 ; CHECK-NEXT: mov r0, r1
447 ; CHECK-NEXT: bl exp2f
448 ; CHECK-NEXT: vmov s0, r0
449 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
450 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
451 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
452 ; CHECK-NEXT: vmov r0, s0
453 ; CHECK-NEXT: bl exp2f
454 ; CHECK-NEXT: vmov s0, r0
455 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
456 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
457 ; CHECK-NEXT: vmov r0, s0
458 ; CHECK-NEXT: bl exp2f
459 ; CHECK-NEXT: vmov s0, r0
460 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
461 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
462 ; CHECK-NEXT: vmov r0, s0
463 ; CHECK-NEXT: bl exp2f
464 ; CHECK-NEXT: vmov s0, r0
465 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
466 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
467 ; CHECK-NEXT: vmov r0, s0
468 ; CHECK-NEXT: bl exp2f
469 ; CHECK-NEXT: vmov s0, r0
470 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
471 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
472 ; CHECK-NEXT: vmov r0, s0
473 ; CHECK-NEXT: bl exp2f
474 ; CHECK-NEXT: vmov s0, r0
475 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
476 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
477 ; CHECK-NEXT: vmov r0, s0
478 ; CHECK-NEXT: bl exp2f
479 ; CHECK-NEXT: vmov s0, r0
480 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
481 ; CHECK-NEXT: vmov q0, q5
482 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
483 ; CHECK-NEXT: pop {r7, pc}
485 %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src)
489 define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
490 ; CHECK-LABEL: exp2_float64_t:
491 ; CHECK: @ %bb.0: @ %entry
492 ; CHECK-NEXT: .save {r7, lr}
493 ; CHECK-NEXT: push {r7, lr}
494 ; CHECK-NEXT: .vsave {d8, d9}
495 ; CHECK-NEXT: vpush {d8, d9}
496 ; CHECK-NEXT: vmov q4, q0
497 ; CHECK-NEXT: vmov r0, r1, d9
498 ; CHECK-NEXT: bl exp2
499 ; CHECK-NEXT: vmov r2, r3, d8
500 ; CHECK-NEXT: vmov d9, r0, r1
501 ; CHECK-NEXT: mov r0, r2
502 ; CHECK-NEXT: mov r1, r3
503 ; CHECK-NEXT: bl exp2
504 ; CHECK-NEXT: vmov d8, r0, r1
505 ; CHECK-NEXT: vmov q0, q4
506 ; CHECK-NEXT: vpop {d8, d9}
507 ; CHECK-NEXT: pop {r7, pc}
509 %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
513 define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
514 ; CHECK-LABEL: log_float32_t:
515 ; CHECK: @ %bb.0: @ %entry
516 ; CHECK-NEXT: .save {r4, r5, r7, lr}
517 ; CHECK-NEXT: push {r4, r5, r7, lr}
518 ; CHECK-NEXT: .vsave {d8, d9}
519 ; CHECK-NEXT: vpush {d8, d9}
520 ; CHECK-NEXT: vmov q4, q0
521 ; CHECK-NEXT: vmov r0, r4, d9
522 ; CHECK-NEXT: bl logf
523 ; CHECK-NEXT: mov r5, r0
524 ; CHECK-NEXT: mov r0, r4
525 ; CHECK-NEXT: bl logf
526 ; CHECK-NEXT: vmov r4, r1, d8
527 ; CHECK-NEXT: vmov s19, r0
528 ; CHECK-NEXT: vmov s18, r5
529 ; CHECK-NEXT: mov r0, r1
530 ; CHECK-NEXT: bl logf
531 ; CHECK-NEXT: vmov s17, r0
532 ; CHECK-NEXT: mov r0, r4
533 ; CHECK-NEXT: bl logf
534 ; CHECK-NEXT: vmov s16, r0
535 ; CHECK-NEXT: vmov q0, q4
536 ; CHECK-NEXT: vpop {d8, d9}
537 ; CHECK-NEXT: pop {r4, r5, r7, pc}
539 %0 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %src)
543 define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) {
544 ; CHECK-LABEL: log_float16_t:
545 ; CHECK: @ %bb.0: @ %entry
546 ; CHECK-NEXT: .save {r7, lr}
547 ; CHECK-NEXT: push {r7, lr}
548 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
549 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
550 ; CHECK-NEXT: vmov q4, q0
551 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
552 ; CHECK-NEXT: vmov r0, s0
553 ; CHECK-NEXT: bl logf
554 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
555 ; CHECK-NEXT: vmov s16, r0
556 ; CHECK-NEXT: vmov r1, s0
557 ; CHECK-NEXT: mov r0, r1
558 ; CHECK-NEXT: bl logf
559 ; CHECK-NEXT: vmov s0, r0
560 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
561 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
562 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
563 ; CHECK-NEXT: vmov r0, s0
564 ; CHECK-NEXT: bl logf
565 ; CHECK-NEXT: vmov s0, r0
566 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
567 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
568 ; CHECK-NEXT: vmov r0, s0
569 ; CHECK-NEXT: bl logf
570 ; CHECK-NEXT: vmov s0, r0
571 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
572 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
573 ; CHECK-NEXT: vmov r0, s0
574 ; CHECK-NEXT: bl logf
575 ; CHECK-NEXT: vmov s0, r0
576 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
577 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
578 ; CHECK-NEXT: vmov r0, s0
579 ; CHECK-NEXT: bl logf
580 ; CHECK-NEXT: vmov s0, r0
581 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
582 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
583 ; CHECK-NEXT: vmov r0, s0
584 ; CHECK-NEXT: bl logf
585 ; CHECK-NEXT: vmov s0, r0
586 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
587 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
588 ; CHECK-NEXT: vmov r0, s0
589 ; CHECK-NEXT: bl logf
590 ; CHECK-NEXT: vmov s0, r0
591 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
592 ; CHECK-NEXT: vmov q0, q5
593 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
594 ; CHECK-NEXT: pop {r7, pc}
596 %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src)
600 define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
601 ; CHECK-LABEL: log_float64_t:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: .save {r7, lr}
604 ; CHECK-NEXT: push {r7, lr}
605 ; CHECK-NEXT: .vsave {d8, d9}
606 ; CHECK-NEXT: vpush {d8, d9}
607 ; CHECK-NEXT: vmov q4, q0
608 ; CHECK-NEXT: vmov r0, r1, d9
610 ; CHECK-NEXT: vmov r2, r3, d8
611 ; CHECK-NEXT: vmov d9, r0, r1
612 ; CHECK-NEXT: mov r0, r2
613 ; CHECK-NEXT: mov r1, r3
615 ; CHECK-NEXT: vmov d8, r0, r1
616 ; CHECK-NEXT: vmov q0, q4
617 ; CHECK-NEXT: vpop {d8, d9}
618 ; CHECK-NEXT: pop {r7, pc}
620 %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
624 define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
625 ; CHECK-LABEL: log2_float32_t:
626 ; CHECK: @ %bb.0: @ %entry
627 ; CHECK-NEXT: .save {r4, r5, r7, lr}
628 ; CHECK-NEXT: push {r4, r5, r7, lr}
629 ; CHECK-NEXT: .vsave {d8, d9}
630 ; CHECK-NEXT: vpush {d8, d9}
631 ; CHECK-NEXT: vmov q4, q0
632 ; CHECK-NEXT: vmov r0, r4, d9
633 ; CHECK-NEXT: bl log2f
634 ; CHECK-NEXT: mov r5, r0
635 ; CHECK-NEXT: mov r0, r4
636 ; CHECK-NEXT: bl log2f
637 ; CHECK-NEXT: vmov r4, r1, d8
638 ; CHECK-NEXT: vmov s19, r0
639 ; CHECK-NEXT: vmov s18, r5
640 ; CHECK-NEXT: mov r0, r1
641 ; CHECK-NEXT: bl log2f
642 ; CHECK-NEXT: vmov s17, r0
643 ; CHECK-NEXT: mov r0, r4
644 ; CHECK-NEXT: bl log2f
645 ; CHECK-NEXT: vmov s16, r0
646 ; CHECK-NEXT: vmov q0, q4
647 ; CHECK-NEXT: vpop {d8, d9}
648 ; CHECK-NEXT: pop {r4, r5, r7, pc}
650 %0 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %src)
654 define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) {
655 ; CHECK-LABEL: log2_float16_t:
656 ; CHECK: @ %bb.0: @ %entry
657 ; CHECK-NEXT: .save {r7, lr}
658 ; CHECK-NEXT: push {r7, lr}
659 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
660 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
661 ; CHECK-NEXT: vmov q4, q0
662 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
663 ; CHECK-NEXT: vmov r0, s0
664 ; CHECK-NEXT: bl log2f
665 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
666 ; CHECK-NEXT: vmov s16, r0
667 ; CHECK-NEXT: vmov r1, s0
668 ; CHECK-NEXT: mov r0, r1
669 ; CHECK-NEXT: bl log2f
670 ; CHECK-NEXT: vmov s0, r0
671 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
672 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
673 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
674 ; CHECK-NEXT: vmov r0, s0
675 ; CHECK-NEXT: bl log2f
676 ; CHECK-NEXT: vmov s0, r0
677 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
678 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
679 ; CHECK-NEXT: vmov r0, s0
680 ; CHECK-NEXT: bl log2f
681 ; CHECK-NEXT: vmov s0, r0
682 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
683 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
684 ; CHECK-NEXT: vmov r0, s0
685 ; CHECK-NEXT: bl log2f
686 ; CHECK-NEXT: vmov s0, r0
687 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
688 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
689 ; CHECK-NEXT: vmov r0, s0
690 ; CHECK-NEXT: bl log2f
691 ; CHECK-NEXT: vmov s0, r0
692 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
693 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
694 ; CHECK-NEXT: vmov r0, s0
695 ; CHECK-NEXT: bl log2f
696 ; CHECK-NEXT: vmov s0, r0
697 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
698 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
699 ; CHECK-NEXT: vmov r0, s0
700 ; CHECK-NEXT: bl log2f
701 ; CHECK-NEXT: vmov s0, r0
702 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
703 ; CHECK-NEXT: vmov q0, q5
704 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
705 ; CHECK-NEXT: pop {r7, pc}
707 %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src)
711 define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
712 ; CHECK-LABEL: log2_float64_t:
713 ; CHECK: @ %bb.0: @ %entry
714 ; CHECK-NEXT: .save {r7, lr}
715 ; CHECK-NEXT: push {r7, lr}
716 ; CHECK-NEXT: .vsave {d8, d9}
717 ; CHECK-NEXT: vpush {d8, d9}
718 ; CHECK-NEXT: vmov q4, q0
719 ; CHECK-NEXT: vmov r0, r1, d9
720 ; CHECK-NEXT: bl log2
721 ; CHECK-NEXT: vmov r2, r3, d8
722 ; CHECK-NEXT: vmov d9, r0, r1
723 ; CHECK-NEXT: mov r0, r2
724 ; CHECK-NEXT: mov r1, r3
725 ; CHECK-NEXT: bl log2
726 ; CHECK-NEXT: vmov d8, r0, r1
727 ; CHECK-NEXT: vmov q0, q4
728 ; CHECK-NEXT: vpop {d8, d9}
729 ; CHECK-NEXT: pop {r7, pc}
731 %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
735 define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
736 ; CHECK-LABEL: log10_float32_t:
737 ; CHECK: @ %bb.0: @ %entry
738 ; CHECK-NEXT: .save {r4, r5, r7, lr}
739 ; CHECK-NEXT: push {r4, r5, r7, lr}
740 ; CHECK-NEXT: .vsave {d8, d9}
741 ; CHECK-NEXT: vpush {d8, d9}
742 ; CHECK-NEXT: vmov q4, q0
743 ; CHECK-NEXT: vmov r0, r4, d9
744 ; CHECK-NEXT: bl log10f
745 ; CHECK-NEXT: mov r5, r0
746 ; CHECK-NEXT: mov r0, r4
747 ; CHECK-NEXT: bl log10f
748 ; CHECK-NEXT: vmov r4, r1, d8
749 ; CHECK-NEXT: vmov s19, r0
750 ; CHECK-NEXT: vmov s18, r5
751 ; CHECK-NEXT: mov r0, r1
752 ; CHECK-NEXT: bl log10f
753 ; CHECK-NEXT: vmov s17, r0
754 ; CHECK-NEXT: mov r0, r4
755 ; CHECK-NEXT: bl log10f
756 ; CHECK-NEXT: vmov s16, r0
757 ; CHECK-NEXT: vmov q0, q4
758 ; CHECK-NEXT: vpop {d8, d9}
759 ; CHECK-NEXT: pop {r4, r5, r7, pc}
761 %0 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %src)
765 define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) {
766 ; CHECK-LABEL: log10_float16_t:
767 ; CHECK: @ %bb.0: @ %entry
768 ; CHECK-NEXT: .save {r7, lr}
769 ; CHECK-NEXT: push {r7, lr}
770 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
771 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
772 ; CHECK-NEXT: vmov q4, q0
773 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
774 ; CHECK-NEXT: vmov r0, s0
775 ; CHECK-NEXT: bl log10f
776 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
777 ; CHECK-NEXT: vmov s16, r0
778 ; CHECK-NEXT: vmov r1, s0
779 ; CHECK-NEXT: mov r0, r1
780 ; CHECK-NEXT: bl log10f
781 ; CHECK-NEXT: vmov s0, r0
782 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
783 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
784 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
785 ; CHECK-NEXT: vmov r0, s0
786 ; CHECK-NEXT: bl log10f
787 ; CHECK-NEXT: vmov s0, r0
788 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
789 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
790 ; CHECK-NEXT: vmov r0, s0
791 ; CHECK-NEXT: bl log10f
792 ; CHECK-NEXT: vmov s0, r0
793 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
794 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
795 ; CHECK-NEXT: vmov r0, s0
796 ; CHECK-NEXT: bl log10f
797 ; CHECK-NEXT: vmov s0, r0
798 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
799 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
800 ; CHECK-NEXT: vmov r0, s0
801 ; CHECK-NEXT: bl log10f
802 ; CHECK-NEXT: vmov s0, r0
803 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
804 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
805 ; CHECK-NEXT: vmov r0, s0
806 ; CHECK-NEXT: bl log10f
807 ; CHECK-NEXT: vmov s0, r0
808 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
809 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
810 ; CHECK-NEXT: vmov r0, s0
811 ; CHECK-NEXT: bl log10f
812 ; CHECK-NEXT: vmov s0, r0
813 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
814 ; CHECK-NEXT: vmov q0, q5
815 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
816 ; CHECK-NEXT: pop {r7, pc}
818 %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src)
822 define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
823 ; CHECK-LABEL: log10_float64_t:
824 ; CHECK: @ %bb.0: @ %entry
825 ; CHECK-NEXT: .save {r7, lr}
826 ; CHECK-NEXT: push {r7, lr}
827 ; CHECK-NEXT: .vsave {d8, d9}
828 ; CHECK-NEXT: vpush {d8, d9}
829 ; CHECK-NEXT: vmov q4, q0
830 ; CHECK-NEXT: vmov r0, r1, d9
831 ; CHECK-NEXT: bl log10
832 ; CHECK-NEXT: vmov r2, r3, d8
833 ; CHECK-NEXT: vmov d9, r0, r1
834 ; CHECK-NEXT: mov r0, r2
835 ; CHECK-NEXT: mov r1, r3
836 ; CHECK-NEXT: bl log10
837 ; CHECK-NEXT: vmov d8, r0, r1
838 ; CHECK-NEXT: vmov q0, q4
839 ; CHECK-NEXT: vpop {d8, d9}
840 ; CHECK-NEXT: pop {r7, pc}
842 %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
846 define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
847 ; CHECK-LABEL: pow_float32_t:
848 ; CHECK: @ %bb.0: @ %entry
849 ; CHECK-NEXT: .save {r4, r5, r6, lr}
850 ; CHECK-NEXT: push {r4, r5, r6, lr}
851 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
852 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
853 ; CHECK-NEXT: vmov q4, q1
854 ; CHECK-NEXT: vmov q5, q0
855 ; CHECK-NEXT: vmov r0, r4, d11
856 ; CHECK-NEXT: vmov r1, r5, d9
857 ; CHECK-NEXT: bl powf
858 ; CHECK-NEXT: mov r6, r0
859 ; CHECK-NEXT: mov r0, r4
860 ; CHECK-NEXT: mov r1, r5
861 ; CHECK-NEXT: bl powf
862 ; CHECK-NEXT: vmov r4, r2, d10
863 ; CHECK-NEXT: vmov r5, r1, d8
864 ; CHECK-NEXT: vmov s19, r0
865 ; CHECK-NEXT: vmov s18, r6
866 ; CHECK-NEXT: mov r0, r2
867 ; CHECK-NEXT: bl powf
868 ; CHECK-NEXT: vmov s17, r0
869 ; CHECK-NEXT: mov r0, r4
870 ; CHECK-NEXT: mov r1, r5
871 ; CHECK-NEXT: bl powf
872 ; CHECK-NEXT: vmov s16, r0
873 ; CHECK-NEXT: vmov q0, q4
874 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
875 ; CHECK-NEXT: pop {r4, r5, r6, pc}
877 %0 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %src1, <4 x float> %src2)
881 define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %src2) {
882 ; CHECK-LABEL: pow_float16_t:
883 ; CHECK: @ %bb.0: @ %entry
884 ; CHECK-NEXT: .save {r7, lr}
885 ; CHECK-NEXT: push {r7, lr}
886 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
887 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
888 ; CHECK-NEXT: vmov q5, q0
889 ; CHECK-NEXT: vmov q4, q1
890 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20
891 ; CHECK-NEXT: vmov r0, s0
892 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
893 ; CHECK-NEXT: vmov r1, s0
894 ; CHECK-NEXT: bl powf
895 ; CHECK-NEXT: vcvtt.f32.f16 s0, s20
896 ; CHECK-NEXT: vmov r2, s0
897 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
898 ; CHECK-NEXT: vmov r1, s0
899 ; CHECK-NEXT: vmov s16, r0
900 ; CHECK-NEXT: mov r0, r2
901 ; CHECK-NEXT: bl powf
902 ; CHECK-NEXT: vmov s0, r0
903 ; CHECK-NEXT: vcvtb.f16.f32 s24, s16
904 ; CHECK-NEXT: vcvtt.f16.f32 s24, s0
905 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21
906 ; CHECK-NEXT: vmov r0, s0
907 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
908 ; CHECK-NEXT: vmov r1, s0
909 ; CHECK-NEXT: bl powf
910 ; CHECK-NEXT: vmov s0, r0
911 ; CHECK-NEXT: vcvtb.f16.f32 s25, s0
912 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21
913 ; CHECK-NEXT: vmov r0, s0
914 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
915 ; CHECK-NEXT: vmov r1, s0
916 ; CHECK-NEXT: bl powf
917 ; CHECK-NEXT: vmov s0, r0
918 ; CHECK-NEXT: vcvtt.f16.f32 s25, s0
919 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22
920 ; CHECK-NEXT: vmov r0, s0
921 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
922 ; CHECK-NEXT: vmov r1, s0
923 ; CHECK-NEXT: bl powf
924 ; CHECK-NEXT: vmov s0, r0
925 ; CHECK-NEXT: vcvtb.f16.f32 s26, s0
926 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22
927 ; CHECK-NEXT: vmov r0, s0
928 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
929 ; CHECK-NEXT: vmov r1, s0
930 ; CHECK-NEXT: bl powf
931 ; CHECK-NEXT: vmov s0, r0
932 ; CHECK-NEXT: vcvtt.f16.f32 s26, s0
933 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23
934 ; CHECK-NEXT: vmov r0, s0
935 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
936 ; CHECK-NEXT: vmov r1, s0
937 ; CHECK-NEXT: bl powf
938 ; CHECK-NEXT: vmov s0, r0
939 ; CHECK-NEXT: vcvtb.f16.f32 s27, s0
940 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23
941 ; CHECK-NEXT: vmov r0, s0
942 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
943 ; CHECK-NEXT: vmov r1, s0
944 ; CHECK-NEXT: bl powf
945 ; CHECK-NEXT: vmov s0, r0
946 ; CHECK-NEXT: vcvtt.f16.f32 s27, s0
947 ; CHECK-NEXT: vmov q0, q6
948 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
949 ; CHECK-NEXT: pop {r7, pc}
951 %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2)
955 define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
956 ; CHECK-LABEL: pow_float64_t:
957 ; CHECK: @ %bb.0: @ %entry
958 ; CHECK-NEXT: .save {r7, lr}
959 ; CHECK-NEXT: push {r7, lr}
960 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
961 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
962 ; CHECK-NEXT: vmov q4, q1
963 ; CHECK-NEXT: vmov q5, q0
964 ; CHECK-NEXT: vmov r0, r1, d11
965 ; CHECK-NEXT: vmov r2, r3, d9
967 ; CHECK-NEXT: vmov lr, r12, d10
968 ; CHECK-NEXT: vmov r2, r3, d8
969 ; CHECK-NEXT: vmov d9, r0, r1
970 ; CHECK-NEXT: mov r0, lr
971 ; CHECK-NEXT: mov r1, r12
973 ; CHECK-NEXT: vmov d8, r0, r1
974 ; CHECK-NEXT: vmov q0, q4
975 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
976 ; CHECK-NEXT: pop {r7, pc}
978 %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
982 define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
983 ; CHECK-LABEL: copysign_float32_t:
984 ; CHECK: @ %bb.0: @ %entry
985 ; CHECK-NEXT: .save {r4, r5, r7, lr}
986 ; CHECK-NEXT: push {r4, r5, r7, lr}
987 ; CHECK-NEXT: vmov r12, r1, d2
988 ; CHECK-NEXT: vmov r2, lr, d3
989 ; CHECK-NEXT: vmov r3, r0, d0
990 ; CHECK-NEXT: vmov r4, r5, d1
991 ; CHECK-NEXT: lsrs r1, r1, #31
992 ; CHECK-NEXT: bfi r0, r1, #31, #1
993 ; CHECK-NEXT: lsrs r1, r2, #31
994 ; CHECK-NEXT: bfi r4, r1, #31, #1
995 ; CHECK-NEXT: lsr.w r1, lr, #31
996 ; CHECK-NEXT: bfi r5, r1, #31, #1
997 ; CHECK-NEXT: lsr.w r1, r12, #31
998 ; CHECK-NEXT: bfi r3, r1, #31, #1
999 ; CHECK-NEXT: vmov s2, r4
1000 ; CHECK-NEXT: vmov s3, r5
1001 ; CHECK-NEXT: vmov s1, r0
1002 ; CHECK-NEXT: vmov s0, r3
1003 ; CHECK-NEXT: pop {r4, r5, r7, pc}
1005 %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2)
1009 define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) {
1010 ; CHECK-LABEL: copysign_float16_t:
1011 ; CHECK: @ %bb.0: @ %entry
1012 ; CHECK-NEXT: .pad #32
1013 ; CHECK-NEXT: sub sp, #32
1014 ; CHECK-NEXT: vmovx.f16 s8, s4
1015 ; CHECK-NEXT: vstr.16 s8, [sp, #24]
1016 ; CHECK-NEXT: vstr.16 s4, [sp, #28]
1017 ; CHECK-NEXT: vmovx.f16 s4, s5
1018 ; CHECK-NEXT: vstr.16 s4, [sp, #16]
1019 ; CHECK-NEXT: vmovx.f16 s4, s6
1020 ; CHECK-NEXT: vstr.16 s5, [sp, #20]
1021 ; CHECK-NEXT: vstr.16 s4, [sp, #8]
1022 ; CHECK-NEXT: vmovx.f16 s4, s7
1023 ; CHECK-NEXT: vstr.16 s6, [sp, #12]
1024 ; CHECK-NEXT: vstr.16 s4, [sp]
1025 ; CHECK-NEXT: vstr.16 s7, [sp, #4]
1026 ; CHECK-NEXT: ldrb.w r0, [sp, #25]
1027 ; CHECK-NEXT: vmovx.f16 s4, s0
1028 ; CHECK-NEXT: vabs.f16 s4, s4
1029 ; CHECK-NEXT: vneg.f16 s6, s4
1030 ; CHECK-NEXT: lsls r0, r0, #24
1032 ; CHECK-NEXT: vmovpl.f32 s6, s4
1033 ; CHECK-NEXT: ldrb.w r0, [sp, #29]
1034 ; CHECK-NEXT: vabs.f16 s4, s0
1035 ; CHECK-NEXT: vneg.f16 s0, s4
1036 ; CHECK-NEXT: lsls r0, r0, #24
1038 ; CHECK-NEXT: vmovpl.f32 s0, s4
1039 ; CHECK-NEXT: ldrb.w r0, [sp, #17]
1040 ; CHECK-NEXT: vmovx.f16 s4, s1
1041 ; CHECK-NEXT: vabs.f16 s4, s4
1042 ; CHECK-NEXT: vins.f16 s0, s6
1043 ; CHECK-NEXT: vneg.f16 s6, s4
1044 ; CHECK-NEXT: lsls r0, r0, #24
1046 ; CHECK-NEXT: vmovpl.f32 s6, s4
1047 ; CHECK-NEXT: ldrb.w r0, [sp, #21]
1048 ; CHECK-NEXT: vabs.f16 s4, s1
1049 ; CHECK-NEXT: vneg.f16 s1, s4
1050 ; CHECK-NEXT: lsls r0, r0, #24
1052 ; CHECK-NEXT: vmovpl.f32 s1, s4
1053 ; CHECK-NEXT: ldrb.w r0, [sp, #9]
1054 ; CHECK-NEXT: vmovx.f16 s4, s2
1055 ; CHECK-NEXT: vabs.f16 s4, s4
1056 ; CHECK-NEXT: vins.f16 s1, s6
1057 ; CHECK-NEXT: vneg.f16 s6, s4
1058 ; CHECK-NEXT: lsls r0, r0, #24
1060 ; CHECK-NEXT: vmovpl.f32 s6, s4
1061 ; CHECK-NEXT: ldrb.w r0, [sp, #13]
1062 ; CHECK-NEXT: vabs.f16 s4, s2
1063 ; CHECK-NEXT: vneg.f16 s2, s4
1064 ; CHECK-NEXT: lsls r0, r0, #24
1066 ; CHECK-NEXT: vmovpl.f32 s2, s4
1067 ; CHECK-NEXT: ldrb.w r0, [sp, #1]
1068 ; CHECK-NEXT: vmovx.f16 s4, s3
1069 ; CHECK-NEXT: vabs.f16 s4, s4
1070 ; CHECK-NEXT: vins.f16 s2, s6
1071 ; CHECK-NEXT: vneg.f16 s6, s4
1072 ; CHECK-NEXT: lsls r0, r0, #24
1074 ; CHECK-NEXT: vmovpl.f32 s6, s4
1075 ; CHECK-NEXT: ldrb.w r0, [sp, #5]
1076 ; CHECK-NEXT: vabs.f16 s4, s3
1077 ; CHECK-NEXT: vneg.f16 s3, s4
1078 ; CHECK-NEXT: lsls r0, r0, #24
1080 ; CHECK-NEXT: vmovpl.f32 s3, s4
1081 ; CHECK-NEXT: vins.f16 s3, s6
1082 ; CHECK-NEXT: add sp, #32
1085 %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2)
1089 define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
1090 ; CHECK-LABEL: copysign_float64_t:
1091 ; CHECK: @ %bb.0: @ %entry
1092 ; CHECK-NEXT: .save {r7, lr}
1093 ; CHECK-NEXT: push {r7, lr}
1094 ; CHECK-NEXT: vmov r0, r1, d3
1095 ; CHECK-NEXT: vmov r0, lr, d2
1096 ; CHECK-NEXT: vmov r0, r3, d1
1097 ; CHECK-NEXT: vmov r12, r2, d0
1098 ; CHECK-NEXT: lsrs r1, r1, #31
1099 ; CHECK-NEXT: bfi r3, r1, #31, #1
1100 ; CHECK-NEXT: lsr.w r1, lr, #31
1101 ; CHECK-NEXT: bfi r2, r1, #31, #1
1102 ; CHECK-NEXT: vmov d1, r0, r3
1103 ; CHECK-NEXT: vmov d0, r12, r2
1104 ; CHECK-NEXT: pop {r7, pc}
1106 %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
1110 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1111 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
1112 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
1113 declare <4 x float> @llvm.exp.v4f32(<4 x float>)
1114 declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
1115 declare <4 x float> @llvm.log.v4f32(<4 x float>)
1116 declare <4 x float> @llvm.log2.v4f32(<4 x float>)
1117 declare <4 x float> @llvm.log10.v4f32(<4 x float>)
1118 declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
1119 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1120 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
1121 declare <8 x half> @llvm.cos.v8f16(<8 x half>)
1122 declare <8 x half> @llvm.sin.v8f16(<8 x half>)
1123 declare <8 x half> @llvm.exp.v8f16(<8 x half>)
1124 declare <8 x half> @llvm.exp2.v8f16(<8 x half>)
1125 declare <8 x half> @llvm.log.v8f16(<8 x half>)
1126 declare <8 x half> @llvm.log2.v8f16(<8 x half>)
1127 declare <8 x half> @llvm.log10.v8f16(<8 x half>)
1128 declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
1129 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
1130 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1131 declare <2 x double> @llvm.cos.v2f64(<2 x double>)
1132 declare <2 x double> @llvm.sin.v2f64(<2 x double>)
1133 declare <2 x double> @llvm.exp.v2f64(<2 x double>)
1134 declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
1135 declare <2 x double> @llvm.log.v2f64(<2 x double>)
1136 declare <2 x double> @llvm.log2.v2f64(<2 x double>)
1137 declare <2 x double> @llvm.log10.v2f64(<2 x double>)
1138 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
1139 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)