1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP
4 ; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16
5 ; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP
7 define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
8 ; CHECK-LABEL: sqrt_float32_t:
9 ; CHECK: @ %bb.0: @ %entry
10 ; CHECK-NEXT: vsqrt.f32 s3, s3
11 ; CHECK-NEXT: vsqrt.f32 s2, s2
12 ; CHECK-NEXT: vsqrt.f32 s1, s1
13 ; CHECK-NEXT: vsqrt.f32 s0, s0
16 %0 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %src)
20 define arm_aapcs_vfpcc <8 x half> @sqrt_float16_t(<8 x half> %src) {
21 ; CHECK-LABEL: sqrt_float16_t:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vmovx.f16 s4, s0
24 ; CHECK-NEXT: vsqrt.f16 s0, s0
25 ; CHECK-NEXT: vsqrt.f16 s4, s4
26 ; CHECK-NEXT: vins.f16 s0, s4
27 ; CHECK-NEXT: vmovx.f16 s4, s1
28 ; CHECK-NEXT: vsqrt.f16 s4, s4
29 ; CHECK-NEXT: vsqrt.f16 s1, s1
30 ; CHECK-NEXT: vins.f16 s1, s4
31 ; CHECK-NEXT: vmovx.f16 s4, s2
32 ; CHECK-NEXT: vsqrt.f16 s4, s4
33 ; CHECK-NEXT: vsqrt.f16 s2, s2
34 ; CHECK-NEXT: vins.f16 s2, s4
35 ; CHECK-NEXT: vmovx.f16 s4, s3
36 ; CHECK-NEXT: vsqrt.f16 s4, s4
37 ; CHECK-NEXT: vsqrt.f16 s3, s3
38 ; CHECK-NEXT: vins.f16 s3, s4
41 %0 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %src)
45 define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
46 ; CHECK-LABEL: sqrt_float64_t:
47 ; CHECK: @ %bb.0: @ %entry
48 ; CHECK-NEXT: .save {r7, lr}
49 ; CHECK-NEXT: push {r7, lr}
50 ; CHECK-NEXT: .vsave {d8, d9}
51 ; CHECK-NEXT: vpush {d8, d9}
52 ; CHECK-NEXT: vmov q4, q0
53 ; CHECK-NEXT: vmov r0, r1, d9
55 ; CHECK-NEXT: vmov r2, r3, d8
56 ; CHECK-NEXT: vmov d9, r0, r1
57 ; CHECK-NEXT: mov r0, r2
58 ; CHECK-NEXT: mov r1, r3
60 ; CHECK-NEXT: vmov d8, r0, r1
61 ; CHECK-NEXT: vmov q0, q4
62 ; CHECK-NEXT: vpop {d8, d9}
63 ; CHECK-NEXT: pop {r7, pc}
65 %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
69 define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
70 ; CHECK-LABEL: cos_float32_t:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: .save {r4, r5, r7, lr}
73 ; CHECK-NEXT: push {r4, r5, r7, lr}
74 ; CHECK-NEXT: .vsave {d8, d9}
75 ; CHECK-NEXT: vpush {d8, d9}
76 ; CHECK-NEXT: vmov q4, q0
77 ; CHECK-NEXT: vmov r0, r4, d9
79 ; CHECK-NEXT: mov r5, r0
80 ; CHECK-NEXT: mov r0, r4
82 ; CHECK-NEXT: vmov r4, r1, d8
83 ; CHECK-NEXT: vmov s19, r0
84 ; CHECK-NEXT: vmov s18, r5
85 ; CHECK-NEXT: mov r0, r1
87 ; CHECK-NEXT: vmov s17, r0
88 ; CHECK-NEXT: mov r0, r4
90 ; CHECK-NEXT: vmov s16, r0
91 ; CHECK-NEXT: vmov q0, q4
92 ; CHECK-NEXT: vpop {d8, d9}
93 ; CHECK-NEXT: pop {r4, r5, r7, pc}
95 %0 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %src)
99 define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) {
100 ; CHECK-LABEL: cos_float16_t:
101 ; CHECK: @ %bb.0: @ %entry
102 ; CHECK-NEXT: .save {r7, lr}
103 ; CHECK-NEXT: push {r7, lr}
104 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
105 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
106 ; CHECK-NEXT: vmov q4, q0
107 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
108 ; CHECK-NEXT: vmov r0, s0
109 ; CHECK-NEXT: bl cosf
110 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
111 ; CHECK-NEXT: vmov s16, r0
112 ; CHECK-NEXT: vmov r1, s0
113 ; CHECK-NEXT: mov r0, r1
114 ; CHECK-NEXT: bl cosf
115 ; CHECK-NEXT: vmov s0, r0
116 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
117 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
118 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
119 ; CHECK-NEXT: vmov r0, s0
120 ; CHECK-NEXT: bl cosf
121 ; CHECK-NEXT: vmov s0, r0
122 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
123 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
124 ; CHECK-NEXT: vmov r0, s0
125 ; CHECK-NEXT: bl cosf
126 ; CHECK-NEXT: vmov s0, r0
127 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
128 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
129 ; CHECK-NEXT: vmov r0, s0
130 ; CHECK-NEXT: bl cosf
131 ; CHECK-NEXT: vmov s0, r0
132 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
133 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
134 ; CHECK-NEXT: vmov r0, s0
135 ; CHECK-NEXT: bl cosf
136 ; CHECK-NEXT: vmov s0, r0
137 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
138 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
139 ; CHECK-NEXT: vmov r0, s0
140 ; CHECK-NEXT: bl cosf
141 ; CHECK-NEXT: vmov s0, r0
142 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
143 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
144 ; CHECK-NEXT: vmov r0, s0
145 ; CHECK-NEXT: bl cosf
146 ; CHECK-NEXT: vmov s0, r0
147 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
148 ; CHECK-NEXT: vmov q0, q5
149 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
150 ; CHECK-NEXT: pop {r7, pc}
152 %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src)
156 define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
157 ; CHECK-LABEL: cos_float64_t:
158 ; CHECK: @ %bb.0: @ %entry
159 ; CHECK-NEXT: .save {r7, lr}
160 ; CHECK-NEXT: push {r7, lr}
161 ; CHECK-NEXT: .vsave {d8, d9}
162 ; CHECK-NEXT: vpush {d8, d9}
163 ; CHECK-NEXT: vmov q4, q0
164 ; CHECK-NEXT: vmov r0, r1, d9
166 ; CHECK-NEXT: vmov r2, r3, d8
167 ; CHECK-NEXT: vmov d9, r0, r1
168 ; CHECK-NEXT: mov r0, r2
169 ; CHECK-NEXT: mov r1, r3
171 ; CHECK-NEXT: vmov d8, r0, r1
172 ; CHECK-NEXT: vmov q0, q4
173 ; CHECK-NEXT: vpop {d8, d9}
174 ; CHECK-NEXT: pop {r7, pc}
176 %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
180 define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
181 ; CHECK-LABEL: sin_float32_t:
182 ; CHECK: @ %bb.0: @ %entry
183 ; CHECK-NEXT: .save {r4, r5, r7, lr}
184 ; CHECK-NEXT: push {r4, r5, r7, lr}
185 ; CHECK-NEXT: .vsave {d8, d9}
186 ; CHECK-NEXT: vpush {d8, d9}
187 ; CHECK-NEXT: vmov q4, q0
188 ; CHECK-NEXT: vmov r0, r4, d9
189 ; CHECK-NEXT: bl sinf
190 ; CHECK-NEXT: mov r5, r0
191 ; CHECK-NEXT: mov r0, r4
192 ; CHECK-NEXT: bl sinf
193 ; CHECK-NEXT: vmov r4, r1, d8
194 ; CHECK-NEXT: vmov s19, r0
195 ; CHECK-NEXT: vmov s18, r5
196 ; CHECK-NEXT: mov r0, r1
197 ; CHECK-NEXT: bl sinf
198 ; CHECK-NEXT: vmov s17, r0
199 ; CHECK-NEXT: mov r0, r4
200 ; CHECK-NEXT: bl sinf
201 ; CHECK-NEXT: vmov s16, r0
202 ; CHECK-NEXT: vmov q0, q4
203 ; CHECK-NEXT: vpop {d8, d9}
204 ; CHECK-NEXT: pop {r4, r5, r7, pc}
206 %0 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %src)
210 define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) {
211 ; CHECK-LABEL: sin_float16_t:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: .save {r7, lr}
214 ; CHECK-NEXT: push {r7, lr}
215 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
216 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
217 ; CHECK-NEXT: vmov q4, q0
218 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
219 ; CHECK-NEXT: vmov r0, s0
220 ; CHECK-NEXT: bl sinf
221 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
222 ; CHECK-NEXT: vmov s16, r0
223 ; CHECK-NEXT: vmov r1, s0
224 ; CHECK-NEXT: mov r0, r1
225 ; CHECK-NEXT: bl sinf
226 ; CHECK-NEXT: vmov s0, r0
227 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
228 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
229 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
230 ; CHECK-NEXT: vmov r0, s0
231 ; CHECK-NEXT: bl sinf
232 ; CHECK-NEXT: vmov s0, r0
233 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
234 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
235 ; CHECK-NEXT: vmov r0, s0
236 ; CHECK-NEXT: bl sinf
237 ; CHECK-NEXT: vmov s0, r0
238 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
239 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
240 ; CHECK-NEXT: vmov r0, s0
241 ; CHECK-NEXT: bl sinf
242 ; CHECK-NEXT: vmov s0, r0
243 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
244 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
245 ; CHECK-NEXT: vmov r0, s0
246 ; CHECK-NEXT: bl sinf
247 ; CHECK-NEXT: vmov s0, r0
248 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
249 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
250 ; CHECK-NEXT: vmov r0, s0
251 ; CHECK-NEXT: bl sinf
252 ; CHECK-NEXT: vmov s0, r0
253 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
254 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
255 ; CHECK-NEXT: vmov r0, s0
256 ; CHECK-NEXT: bl sinf
257 ; CHECK-NEXT: vmov s0, r0
258 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
259 ; CHECK-NEXT: vmov q0, q5
260 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
261 ; CHECK-NEXT: pop {r7, pc}
263 %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src)
267 define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
268 ; CHECK-LABEL: sin_float64_t:
269 ; CHECK: @ %bb.0: @ %entry
270 ; CHECK-NEXT: .save {r7, lr}
271 ; CHECK-NEXT: push {r7, lr}
272 ; CHECK-NEXT: .vsave {d8, d9}
273 ; CHECK-NEXT: vpush {d8, d9}
274 ; CHECK-NEXT: vmov q4, q0
275 ; CHECK-NEXT: vmov r0, r1, d9
277 ; CHECK-NEXT: vmov r2, r3, d8
278 ; CHECK-NEXT: vmov d9, r0, r1
279 ; CHECK-NEXT: mov r0, r2
280 ; CHECK-NEXT: mov r1, r3
282 ; CHECK-NEXT: vmov d8, r0, r1
283 ; CHECK-NEXT: vmov q0, q4
284 ; CHECK-NEXT: vpop {d8, d9}
285 ; CHECK-NEXT: pop {r7, pc}
287 %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
291 define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
292 ; CHECK-LABEL: tan_float32_t:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: .save {r4, r5, r7, lr}
295 ; CHECK-NEXT: push {r4, r5, r7, lr}
296 ; CHECK-NEXT: .vsave {d8, d9}
297 ; CHECK-NEXT: vpush {d8, d9}
298 ; CHECK-NEXT: vmov q4, q0
299 ; CHECK-NEXT: vmov r0, r4, d9
300 ; CHECK-NEXT: bl tanf
301 ; CHECK-NEXT: mov r5, r0
302 ; CHECK-NEXT: mov r0, r4
303 ; CHECK-NEXT: bl tanf
304 ; CHECK-NEXT: vmov r4, r1, d8
305 ; CHECK-NEXT: vmov s19, r0
306 ; CHECK-NEXT: vmov s18, r5
307 ; CHECK-NEXT: mov r0, r1
308 ; CHECK-NEXT: bl tanf
309 ; CHECK-NEXT: vmov s17, r0
310 ; CHECK-NEXT: mov r0, r4
311 ; CHECK-NEXT: bl tanf
312 ; CHECK-NEXT: vmov s16, r0
313 ; CHECK-NEXT: vmov q0, q4
314 ; CHECK-NEXT: vpop {d8, d9}
315 ; CHECK-NEXT: pop {r4, r5, r7, pc}
317 %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
321 define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
322 ; CHECK-LABEL: tan_float16_t:
323 ; CHECK: @ %bb.0: @ %entry
324 ; CHECK-NEXT: .save {r7, lr}
325 ; CHECK-NEXT: push {r7, lr}
326 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
327 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
328 ; CHECK-NEXT: vmov q4, q0
329 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
330 ; CHECK-NEXT: vmov r0, s0
331 ; CHECK-NEXT: bl tanf
332 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
333 ; CHECK-NEXT: vmov s16, r0
334 ; CHECK-NEXT: vmov r1, s0
335 ; CHECK-NEXT: mov r0, r1
336 ; CHECK-NEXT: bl tanf
337 ; CHECK-NEXT: vmov s0, r0
338 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
339 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
340 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
341 ; CHECK-NEXT: vmov r0, s0
342 ; CHECK-NEXT: bl tanf
343 ; CHECK-NEXT: vmov s0, r0
344 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
345 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
346 ; CHECK-NEXT: vmov r0, s0
347 ; CHECK-NEXT: bl tanf
348 ; CHECK-NEXT: vmov s0, r0
349 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
350 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
351 ; CHECK-NEXT: vmov r0, s0
352 ; CHECK-NEXT: bl tanf
353 ; CHECK-NEXT: vmov s0, r0
354 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
355 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
356 ; CHECK-NEXT: vmov r0, s0
357 ; CHECK-NEXT: bl tanf
358 ; CHECK-NEXT: vmov s0, r0
359 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
360 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
361 ; CHECK-NEXT: vmov r0, s0
362 ; CHECK-NEXT: bl tanf
363 ; CHECK-NEXT: vmov s0, r0
364 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
365 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
366 ; CHECK-NEXT: vmov r0, s0
367 ; CHECK-NEXT: bl tanf
368 ; CHECK-NEXT: vmov s0, r0
369 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
370 ; CHECK-NEXT: vmov q0, q5
371 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
372 ; CHECK-NEXT: pop {r7, pc}
374 %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
378 define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
379 ; CHECK-LABEL: tan_float64_t:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: .save {r7, lr}
382 ; CHECK-NEXT: push {r7, lr}
383 ; CHECK-NEXT: .vsave {d8, d9}
384 ; CHECK-NEXT: vpush {d8, d9}
385 ; CHECK-NEXT: vmov q4, q0
386 ; CHECK-NEXT: vmov r0, r1, d9
388 ; CHECK-NEXT: vmov r2, r3, d8
389 ; CHECK-NEXT: vmov d9, r0, r1
390 ; CHECK-NEXT: mov r0, r2
391 ; CHECK-NEXT: mov r1, r3
393 ; CHECK-NEXT: vmov d8, r0, r1
394 ; CHECK-NEXT: vmov q0, q4
395 ; CHECK-NEXT: vpop {d8, d9}
396 ; CHECK-NEXT: pop {r7, pc}
398 %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
402 define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
403 ; CHECK-LABEL: exp_float32_t:
404 ; CHECK: @ %bb.0: @ %entry
405 ; CHECK-NEXT: .save {r4, r5, r7, lr}
406 ; CHECK-NEXT: push {r4, r5, r7, lr}
407 ; CHECK-NEXT: .vsave {d8, d9}
408 ; CHECK-NEXT: vpush {d8, d9}
409 ; CHECK-NEXT: vmov q4, q0
410 ; CHECK-NEXT: vmov r0, r4, d9
411 ; CHECK-NEXT: bl expf
412 ; CHECK-NEXT: mov r5, r0
413 ; CHECK-NEXT: mov r0, r4
414 ; CHECK-NEXT: bl expf
415 ; CHECK-NEXT: vmov r4, r1, d8
416 ; CHECK-NEXT: vmov s19, r0
417 ; CHECK-NEXT: vmov s18, r5
418 ; CHECK-NEXT: mov r0, r1
419 ; CHECK-NEXT: bl expf
420 ; CHECK-NEXT: vmov s17, r0
421 ; CHECK-NEXT: mov r0, r4
422 ; CHECK-NEXT: bl expf
423 ; CHECK-NEXT: vmov s16, r0
424 ; CHECK-NEXT: vmov q0, q4
425 ; CHECK-NEXT: vpop {d8, d9}
426 ; CHECK-NEXT: pop {r4, r5, r7, pc}
428 %0 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %src)
432 define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) {
433 ; CHECK-LABEL: exp_float16_t:
434 ; CHECK: @ %bb.0: @ %entry
435 ; CHECK-NEXT: .save {r7, lr}
436 ; CHECK-NEXT: push {r7, lr}
437 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
438 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
439 ; CHECK-NEXT: vmov q4, q0
440 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
441 ; CHECK-NEXT: vmov r0, s0
442 ; CHECK-NEXT: bl expf
443 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
444 ; CHECK-NEXT: vmov s16, r0
445 ; CHECK-NEXT: vmov r1, s0
446 ; CHECK-NEXT: mov r0, r1
447 ; CHECK-NEXT: bl expf
448 ; CHECK-NEXT: vmov s0, r0
449 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
450 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
451 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
452 ; CHECK-NEXT: vmov r0, s0
453 ; CHECK-NEXT: bl expf
454 ; CHECK-NEXT: vmov s0, r0
455 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
456 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
457 ; CHECK-NEXT: vmov r0, s0
458 ; CHECK-NEXT: bl expf
459 ; CHECK-NEXT: vmov s0, r0
460 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
461 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
462 ; CHECK-NEXT: vmov r0, s0
463 ; CHECK-NEXT: bl expf
464 ; CHECK-NEXT: vmov s0, r0
465 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
466 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
467 ; CHECK-NEXT: vmov r0, s0
468 ; CHECK-NEXT: bl expf
469 ; CHECK-NEXT: vmov s0, r0
470 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
471 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
472 ; CHECK-NEXT: vmov r0, s0
473 ; CHECK-NEXT: bl expf
474 ; CHECK-NEXT: vmov s0, r0
475 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
476 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
477 ; CHECK-NEXT: vmov r0, s0
478 ; CHECK-NEXT: bl expf
479 ; CHECK-NEXT: vmov s0, r0
480 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
481 ; CHECK-NEXT: vmov q0, q5
482 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
483 ; CHECK-NEXT: pop {r7, pc}
485 %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src)
489 define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
490 ; CHECK-LABEL: exp_float64_t:
491 ; CHECK: @ %bb.0: @ %entry
492 ; CHECK-NEXT: .save {r7, lr}
493 ; CHECK-NEXT: push {r7, lr}
494 ; CHECK-NEXT: .vsave {d8, d9}
495 ; CHECK-NEXT: vpush {d8, d9}
496 ; CHECK-NEXT: vmov q4, q0
497 ; CHECK-NEXT: vmov r0, r1, d9
499 ; CHECK-NEXT: vmov r2, r3, d8
500 ; CHECK-NEXT: vmov d9, r0, r1
501 ; CHECK-NEXT: mov r0, r2
502 ; CHECK-NEXT: mov r1, r3
504 ; CHECK-NEXT: vmov d8, r0, r1
505 ; CHECK-NEXT: vmov q0, q4
506 ; CHECK-NEXT: vpop {d8, d9}
507 ; CHECK-NEXT: pop {r7, pc}
509 %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
513 define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
514 ; CHECK-LABEL: exp2_float32_t:
515 ; CHECK: @ %bb.0: @ %entry
516 ; CHECK-NEXT: .save {r4, r5, r7, lr}
517 ; CHECK-NEXT: push {r4, r5, r7, lr}
518 ; CHECK-NEXT: .vsave {d8, d9}
519 ; CHECK-NEXT: vpush {d8, d9}
520 ; CHECK-NEXT: vmov q4, q0
521 ; CHECK-NEXT: vmov r0, r4, d9
522 ; CHECK-NEXT: bl exp2f
523 ; CHECK-NEXT: mov r5, r0
524 ; CHECK-NEXT: mov r0, r4
525 ; CHECK-NEXT: bl exp2f
526 ; CHECK-NEXT: vmov r4, r1, d8
527 ; CHECK-NEXT: vmov s19, r0
528 ; CHECK-NEXT: vmov s18, r5
529 ; CHECK-NEXT: mov r0, r1
530 ; CHECK-NEXT: bl exp2f
531 ; CHECK-NEXT: vmov s17, r0
532 ; CHECK-NEXT: mov r0, r4
533 ; CHECK-NEXT: bl exp2f
534 ; CHECK-NEXT: vmov s16, r0
535 ; CHECK-NEXT: vmov q0, q4
536 ; CHECK-NEXT: vpop {d8, d9}
537 ; CHECK-NEXT: pop {r4, r5, r7, pc}
539 %0 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %src)
543 define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) {
544 ; CHECK-LABEL: exp2_float16_t:
545 ; CHECK: @ %bb.0: @ %entry
546 ; CHECK-NEXT: .save {r7, lr}
547 ; CHECK-NEXT: push {r7, lr}
548 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
549 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
550 ; CHECK-NEXT: vmov q4, q0
551 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
552 ; CHECK-NEXT: vmov r0, s0
553 ; CHECK-NEXT: bl exp2f
554 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
555 ; CHECK-NEXT: vmov s16, r0
556 ; CHECK-NEXT: vmov r1, s0
557 ; CHECK-NEXT: mov r0, r1
558 ; CHECK-NEXT: bl exp2f
559 ; CHECK-NEXT: vmov s0, r0
560 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
561 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
562 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
563 ; CHECK-NEXT: vmov r0, s0
564 ; CHECK-NEXT: bl exp2f
565 ; CHECK-NEXT: vmov s0, r0
566 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
567 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
568 ; CHECK-NEXT: vmov r0, s0
569 ; CHECK-NEXT: bl exp2f
570 ; CHECK-NEXT: vmov s0, r0
571 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
572 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
573 ; CHECK-NEXT: vmov r0, s0
574 ; CHECK-NEXT: bl exp2f
575 ; CHECK-NEXT: vmov s0, r0
576 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
577 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
578 ; CHECK-NEXT: vmov r0, s0
579 ; CHECK-NEXT: bl exp2f
580 ; CHECK-NEXT: vmov s0, r0
581 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
582 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
583 ; CHECK-NEXT: vmov r0, s0
584 ; CHECK-NEXT: bl exp2f
585 ; CHECK-NEXT: vmov s0, r0
586 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
587 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
588 ; CHECK-NEXT: vmov r0, s0
589 ; CHECK-NEXT: bl exp2f
590 ; CHECK-NEXT: vmov s0, r0
591 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
592 ; CHECK-NEXT: vmov q0, q5
593 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
594 ; CHECK-NEXT: pop {r7, pc}
596 %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src)
600 define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
601 ; CHECK-LABEL: exp2_float64_t:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: .save {r7, lr}
604 ; CHECK-NEXT: push {r7, lr}
605 ; CHECK-NEXT: .vsave {d8, d9}
606 ; CHECK-NEXT: vpush {d8, d9}
607 ; CHECK-NEXT: vmov q4, q0
608 ; CHECK-NEXT: vmov r0, r1, d9
609 ; CHECK-NEXT: bl exp2
610 ; CHECK-NEXT: vmov r2, r3, d8
611 ; CHECK-NEXT: vmov d9, r0, r1
612 ; CHECK-NEXT: mov r0, r2
613 ; CHECK-NEXT: mov r1, r3
614 ; CHECK-NEXT: bl exp2
615 ; CHECK-NEXT: vmov d8, r0, r1
616 ; CHECK-NEXT: vmov q0, q4
617 ; CHECK-NEXT: vpop {d8, d9}
618 ; CHECK-NEXT: pop {r7, pc}
620 %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
624 define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
625 ; CHECK-LABEL: log_float32_t:
626 ; CHECK: @ %bb.0: @ %entry
627 ; CHECK-NEXT: .save {r4, r5, r7, lr}
628 ; CHECK-NEXT: push {r4, r5, r7, lr}
629 ; CHECK-NEXT: .vsave {d8, d9}
630 ; CHECK-NEXT: vpush {d8, d9}
631 ; CHECK-NEXT: vmov q4, q0
632 ; CHECK-NEXT: vmov r0, r4, d9
633 ; CHECK-NEXT: bl logf
634 ; CHECK-NEXT: mov r5, r0
635 ; CHECK-NEXT: mov r0, r4
636 ; CHECK-NEXT: bl logf
637 ; CHECK-NEXT: vmov r4, r1, d8
638 ; CHECK-NEXT: vmov s19, r0
639 ; CHECK-NEXT: vmov s18, r5
640 ; CHECK-NEXT: mov r0, r1
641 ; CHECK-NEXT: bl logf
642 ; CHECK-NEXT: vmov s17, r0
643 ; CHECK-NEXT: mov r0, r4
644 ; CHECK-NEXT: bl logf
645 ; CHECK-NEXT: vmov s16, r0
646 ; CHECK-NEXT: vmov q0, q4
647 ; CHECK-NEXT: vpop {d8, d9}
648 ; CHECK-NEXT: pop {r4, r5, r7, pc}
650 %0 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %src)
654 define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) {
655 ; CHECK-LABEL: log_float16_t:
656 ; CHECK: @ %bb.0: @ %entry
657 ; CHECK-NEXT: .save {r7, lr}
658 ; CHECK-NEXT: push {r7, lr}
659 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
660 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
661 ; CHECK-NEXT: vmov q4, q0
662 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
663 ; CHECK-NEXT: vmov r0, s0
664 ; CHECK-NEXT: bl logf
665 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
666 ; CHECK-NEXT: vmov s16, r0
667 ; CHECK-NEXT: vmov r1, s0
668 ; CHECK-NEXT: mov r0, r1
669 ; CHECK-NEXT: bl logf
670 ; CHECK-NEXT: vmov s0, r0
671 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
672 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
673 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
674 ; CHECK-NEXT: vmov r0, s0
675 ; CHECK-NEXT: bl logf
676 ; CHECK-NEXT: vmov s0, r0
677 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
678 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
679 ; CHECK-NEXT: vmov r0, s0
680 ; CHECK-NEXT: bl logf
681 ; CHECK-NEXT: vmov s0, r0
682 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
683 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
684 ; CHECK-NEXT: vmov r0, s0
685 ; CHECK-NEXT: bl logf
686 ; CHECK-NEXT: vmov s0, r0
687 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
688 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
689 ; CHECK-NEXT: vmov r0, s0
690 ; CHECK-NEXT: bl logf
691 ; CHECK-NEXT: vmov s0, r0
692 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
693 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
694 ; CHECK-NEXT: vmov r0, s0
695 ; CHECK-NEXT: bl logf
696 ; CHECK-NEXT: vmov s0, r0
697 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
698 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
699 ; CHECK-NEXT: vmov r0, s0
700 ; CHECK-NEXT: bl logf
701 ; CHECK-NEXT: vmov s0, r0
702 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
703 ; CHECK-NEXT: vmov q0, q5
704 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
705 ; CHECK-NEXT: pop {r7, pc}
707 %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src)
711 define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
712 ; CHECK-LABEL: log_float64_t:
713 ; CHECK: @ %bb.0: @ %entry
714 ; CHECK-NEXT: .save {r7, lr}
715 ; CHECK-NEXT: push {r7, lr}
716 ; CHECK-NEXT: .vsave {d8, d9}
717 ; CHECK-NEXT: vpush {d8, d9}
718 ; CHECK-NEXT: vmov q4, q0
719 ; CHECK-NEXT: vmov r0, r1, d9
721 ; CHECK-NEXT: vmov r2, r3, d8
722 ; CHECK-NEXT: vmov d9, r0, r1
723 ; CHECK-NEXT: mov r0, r2
724 ; CHECK-NEXT: mov r1, r3
726 ; CHECK-NEXT: vmov d8, r0, r1
727 ; CHECK-NEXT: vmov q0, q4
728 ; CHECK-NEXT: vpop {d8, d9}
729 ; CHECK-NEXT: pop {r7, pc}
731 %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
735 define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
736 ; CHECK-LABEL: log2_float32_t:
737 ; CHECK: @ %bb.0: @ %entry
738 ; CHECK-NEXT: .save {r4, r5, r7, lr}
739 ; CHECK-NEXT: push {r4, r5, r7, lr}
740 ; CHECK-NEXT: .vsave {d8, d9}
741 ; CHECK-NEXT: vpush {d8, d9}
742 ; CHECK-NEXT: vmov q4, q0
743 ; CHECK-NEXT: vmov r0, r4, d9
744 ; CHECK-NEXT: bl log2f
745 ; CHECK-NEXT: mov r5, r0
746 ; CHECK-NEXT: mov r0, r4
747 ; CHECK-NEXT: bl log2f
748 ; CHECK-NEXT: vmov r4, r1, d8
749 ; CHECK-NEXT: vmov s19, r0
750 ; CHECK-NEXT: vmov s18, r5
751 ; CHECK-NEXT: mov r0, r1
752 ; CHECK-NEXT: bl log2f
753 ; CHECK-NEXT: vmov s17, r0
754 ; CHECK-NEXT: mov r0, r4
755 ; CHECK-NEXT: bl log2f
756 ; CHECK-NEXT: vmov s16, r0
757 ; CHECK-NEXT: vmov q0, q4
758 ; CHECK-NEXT: vpop {d8, d9}
759 ; CHECK-NEXT: pop {r4, r5, r7, pc}
761 %0 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %src)
765 define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) {
766 ; CHECK-LABEL: log2_float16_t:
767 ; CHECK: @ %bb.0: @ %entry
768 ; CHECK-NEXT: .save {r7, lr}
769 ; CHECK-NEXT: push {r7, lr}
770 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
771 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
772 ; CHECK-NEXT: vmov q4, q0
773 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
774 ; CHECK-NEXT: vmov r0, s0
775 ; CHECK-NEXT: bl log2f
776 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
777 ; CHECK-NEXT: vmov s16, r0
778 ; CHECK-NEXT: vmov r1, s0
779 ; CHECK-NEXT: mov r0, r1
780 ; CHECK-NEXT: bl log2f
781 ; CHECK-NEXT: vmov s0, r0
782 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
783 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
784 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
785 ; CHECK-NEXT: vmov r0, s0
786 ; CHECK-NEXT: bl log2f
787 ; CHECK-NEXT: vmov s0, r0
788 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
789 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
790 ; CHECK-NEXT: vmov r0, s0
791 ; CHECK-NEXT: bl log2f
792 ; CHECK-NEXT: vmov s0, r0
793 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
794 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
795 ; CHECK-NEXT: vmov r0, s0
796 ; CHECK-NEXT: bl log2f
797 ; CHECK-NEXT: vmov s0, r0
798 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
799 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
800 ; CHECK-NEXT: vmov r0, s0
801 ; CHECK-NEXT: bl log2f
802 ; CHECK-NEXT: vmov s0, r0
803 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
804 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
805 ; CHECK-NEXT: vmov r0, s0
806 ; CHECK-NEXT: bl log2f
807 ; CHECK-NEXT: vmov s0, r0
808 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
809 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
810 ; CHECK-NEXT: vmov r0, s0
811 ; CHECK-NEXT: bl log2f
812 ; CHECK-NEXT: vmov s0, r0
813 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
814 ; CHECK-NEXT: vmov q0, q5
815 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
816 ; CHECK-NEXT: pop {r7, pc}
818 %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src)
822 define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
823 ; CHECK-LABEL: log2_float64_t:
824 ; CHECK: @ %bb.0: @ %entry
825 ; CHECK-NEXT: .save {r7, lr}
826 ; CHECK-NEXT: push {r7, lr}
827 ; CHECK-NEXT: .vsave {d8, d9}
828 ; CHECK-NEXT: vpush {d8, d9}
829 ; CHECK-NEXT: vmov q4, q0
830 ; CHECK-NEXT: vmov r0, r1, d9
831 ; CHECK-NEXT: bl log2
832 ; CHECK-NEXT: vmov r2, r3, d8
833 ; CHECK-NEXT: vmov d9, r0, r1
834 ; CHECK-NEXT: mov r0, r2
835 ; CHECK-NEXT: mov r1, r3
836 ; CHECK-NEXT: bl log2
837 ; CHECK-NEXT: vmov d8, r0, r1
838 ; CHECK-NEXT: vmov q0, q4
839 ; CHECK-NEXT: vpop {d8, d9}
840 ; CHECK-NEXT: pop {r7, pc}
842 %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
846 define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
847 ; CHECK-LABEL: log10_float32_t:
848 ; CHECK: @ %bb.0: @ %entry
849 ; CHECK-NEXT: .save {r4, r5, r7, lr}
850 ; CHECK-NEXT: push {r4, r5, r7, lr}
851 ; CHECK-NEXT: .vsave {d8, d9}
852 ; CHECK-NEXT: vpush {d8, d9}
853 ; CHECK-NEXT: vmov q4, q0
854 ; CHECK-NEXT: vmov r0, r4, d9
855 ; CHECK-NEXT: bl log10f
856 ; CHECK-NEXT: mov r5, r0
857 ; CHECK-NEXT: mov r0, r4
858 ; CHECK-NEXT: bl log10f
859 ; CHECK-NEXT: vmov r4, r1, d8
860 ; CHECK-NEXT: vmov s19, r0
861 ; CHECK-NEXT: vmov s18, r5
862 ; CHECK-NEXT: mov r0, r1
863 ; CHECK-NEXT: bl log10f
864 ; CHECK-NEXT: vmov s17, r0
865 ; CHECK-NEXT: mov r0, r4
866 ; CHECK-NEXT: bl log10f
867 ; CHECK-NEXT: vmov s16, r0
868 ; CHECK-NEXT: vmov q0, q4
869 ; CHECK-NEXT: vpop {d8, d9}
870 ; CHECK-NEXT: pop {r4, r5, r7, pc}
872 %0 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %src)
876 define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) {
877 ; CHECK-LABEL: log10_float16_t:
878 ; CHECK: @ %bb.0: @ %entry
879 ; CHECK-NEXT: .save {r7, lr}
880 ; CHECK-NEXT: push {r7, lr}
881 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
882 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
883 ; CHECK-NEXT: vmov q4, q0
884 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
885 ; CHECK-NEXT: vmov r0, s0
886 ; CHECK-NEXT: bl log10f
887 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
888 ; CHECK-NEXT: vmov s16, r0
889 ; CHECK-NEXT: vmov r1, s0
890 ; CHECK-NEXT: mov r0, r1
891 ; CHECK-NEXT: bl log10f
892 ; CHECK-NEXT: vmov s0, r0
893 ; CHECK-NEXT: vcvtb.f16.f32 s20, s16
894 ; CHECK-NEXT: vcvtt.f16.f32 s20, s0
895 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
896 ; CHECK-NEXT: vmov r0, s0
897 ; CHECK-NEXT: bl log10f
898 ; CHECK-NEXT: vmov s0, r0
899 ; CHECK-NEXT: vcvtb.f16.f32 s21, s0
900 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
901 ; CHECK-NEXT: vmov r0, s0
902 ; CHECK-NEXT: bl log10f
903 ; CHECK-NEXT: vmov s0, r0
904 ; CHECK-NEXT: vcvtt.f16.f32 s21, s0
905 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
906 ; CHECK-NEXT: vmov r0, s0
907 ; CHECK-NEXT: bl log10f
908 ; CHECK-NEXT: vmov s0, r0
909 ; CHECK-NEXT: vcvtb.f16.f32 s22, s0
910 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
911 ; CHECK-NEXT: vmov r0, s0
912 ; CHECK-NEXT: bl log10f
913 ; CHECK-NEXT: vmov s0, r0
914 ; CHECK-NEXT: vcvtt.f16.f32 s22, s0
915 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
916 ; CHECK-NEXT: vmov r0, s0
917 ; CHECK-NEXT: bl log10f
918 ; CHECK-NEXT: vmov s0, r0
919 ; CHECK-NEXT: vcvtb.f16.f32 s23, s0
920 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
921 ; CHECK-NEXT: vmov r0, s0
922 ; CHECK-NEXT: bl log10f
923 ; CHECK-NEXT: vmov s0, r0
924 ; CHECK-NEXT: vcvtt.f16.f32 s23, s0
925 ; CHECK-NEXT: vmov q0, q5
926 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
927 ; CHECK-NEXT: pop {r7, pc}
929 %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src)
933 define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
934 ; CHECK-LABEL: log10_float64_t:
935 ; CHECK: @ %bb.0: @ %entry
936 ; CHECK-NEXT: .save {r7, lr}
937 ; CHECK-NEXT: push {r7, lr}
938 ; CHECK-NEXT: .vsave {d8, d9}
939 ; CHECK-NEXT: vpush {d8, d9}
940 ; CHECK-NEXT: vmov q4, q0
941 ; CHECK-NEXT: vmov r0, r1, d9
942 ; CHECK-NEXT: bl log10
943 ; CHECK-NEXT: vmov r2, r3, d8
944 ; CHECK-NEXT: vmov d9, r0, r1
945 ; CHECK-NEXT: mov r0, r2
946 ; CHECK-NEXT: mov r1, r3
947 ; CHECK-NEXT: bl log10
948 ; CHECK-NEXT: vmov d8, r0, r1
949 ; CHECK-NEXT: vmov q0, q4
950 ; CHECK-NEXT: vpop {d8, d9}
951 ; CHECK-NEXT: pop {r7, pc}
953 %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
957 define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
958 ; CHECK-LABEL: pow_float32_t:
959 ; CHECK: @ %bb.0: @ %entry
960 ; CHECK-NEXT: .save {r4, r5, r6, lr}
961 ; CHECK-NEXT: push {r4, r5, r6, lr}
962 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
963 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
964 ; CHECK-NEXT: vmov q4, q1
965 ; CHECK-NEXT: vmov q5, q0
966 ; CHECK-NEXT: vmov r0, r4, d11
967 ; CHECK-NEXT: vmov r1, r5, d9
968 ; CHECK-NEXT: bl powf
969 ; CHECK-NEXT: mov r6, r0
970 ; CHECK-NEXT: mov r0, r4
971 ; CHECK-NEXT: mov r1, r5
972 ; CHECK-NEXT: bl powf
973 ; CHECK-NEXT: vmov r4, r2, d10
974 ; CHECK-NEXT: vmov r5, r1, d8
975 ; CHECK-NEXT: vmov s19, r0
976 ; CHECK-NEXT: vmov s18, r6
977 ; CHECK-NEXT: mov r0, r2
978 ; CHECK-NEXT: bl powf
979 ; CHECK-NEXT: vmov s17, r0
980 ; CHECK-NEXT: mov r0, r4
981 ; CHECK-NEXT: mov r1, r5
982 ; CHECK-NEXT: bl powf
983 ; CHECK-NEXT: vmov s16, r0
984 ; CHECK-NEXT: vmov q0, q4
985 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
986 ; CHECK-NEXT: pop {r4, r5, r6, pc}
988 %0 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %src1, <4 x float> %src2)
992 define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %src2) {
993 ; CHECK-LABEL: pow_float16_t:
994 ; CHECK: @ %bb.0: @ %entry
995 ; CHECK-NEXT: .save {r7, lr}
996 ; CHECK-NEXT: push {r7, lr}
997 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
998 ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
999 ; CHECK-NEXT: vmov q5, q0
1000 ; CHECK-NEXT: vmov q4, q1
1001 ; CHECK-NEXT: vcvtb.f32.f16 s0, s20
1002 ; CHECK-NEXT: vmov r0, s0
1003 ; CHECK-NEXT: vcvtb.f32.f16 s0, s16
1004 ; CHECK-NEXT: vmov r1, s0
1005 ; CHECK-NEXT: bl powf
1006 ; CHECK-NEXT: vcvtt.f32.f16 s0, s20
1007 ; CHECK-NEXT: vmov r2, s0
1008 ; CHECK-NEXT: vcvtt.f32.f16 s0, s16
1009 ; CHECK-NEXT: vmov r1, s0
1010 ; CHECK-NEXT: vmov s16, r0
1011 ; CHECK-NEXT: mov r0, r2
1012 ; CHECK-NEXT: bl powf
1013 ; CHECK-NEXT: vmov s0, r0
1014 ; CHECK-NEXT: vcvtb.f16.f32 s24, s16
1015 ; CHECK-NEXT: vcvtt.f16.f32 s24, s0
1016 ; CHECK-NEXT: vcvtb.f32.f16 s0, s21
1017 ; CHECK-NEXT: vmov r0, s0
1018 ; CHECK-NEXT: vcvtb.f32.f16 s0, s17
1019 ; CHECK-NEXT: vmov r1, s0
1020 ; CHECK-NEXT: bl powf
1021 ; CHECK-NEXT: vmov s0, r0
1022 ; CHECK-NEXT: vcvtb.f16.f32 s25, s0
1023 ; CHECK-NEXT: vcvtt.f32.f16 s0, s21
1024 ; CHECK-NEXT: vmov r0, s0
1025 ; CHECK-NEXT: vcvtt.f32.f16 s0, s17
1026 ; CHECK-NEXT: vmov r1, s0
1027 ; CHECK-NEXT: bl powf
1028 ; CHECK-NEXT: vmov s0, r0
1029 ; CHECK-NEXT: vcvtt.f16.f32 s25, s0
1030 ; CHECK-NEXT: vcvtb.f32.f16 s0, s22
1031 ; CHECK-NEXT: vmov r0, s0
1032 ; CHECK-NEXT: vcvtb.f32.f16 s0, s18
1033 ; CHECK-NEXT: vmov r1, s0
1034 ; CHECK-NEXT: bl powf
1035 ; CHECK-NEXT: vmov s0, r0
1036 ; CHECK-NEXT: vcvtb.f16.f32 s26, s0
1037 ; CHECK-NEXT: vcvtt.f32.f16 s0, s22
1038 ; CHECK-NEXT: vmov r0, s0
1039 ; CHECK-NEXT: vcvtt.f32.f16 s0, s18
1040 ; CHECK-NEXT: vmov r1, s0
1041 ; CHECK-NEXT: bl powf
1042 ; CHECK-NEXT: vmov s0, r0
1043 ; CHECK-NEXT: vcvtt.f16.f32 s26, s0
1044 ; CHECK-NEXT: vcvtb.f32.f16 s0, s23
1045 ; CHECK-NEXT: vmov r0, s0
1046 ; CHECK-NEXT: vcvtb.f32.f16 s0, s19
1047 ; CHECK-NEXT: vmov r1, s0
1048 ; CHECK-NEXT: bl powf
1049 ; CHECK-NEXT: vmov s0, r0
1050 ; CHECK-NEXT: vcvtb.f16.f32 s27, s0
1051 ; CHECK-NEXT: vcvtt.f32.f16 s0, s23
1052 ; CHECK-NEXT: vmov r0, s0
1053 ; CHECK-NEXT: vcvtt.f32.f16 s0, s19
1054 ; CHECK-NEXT: vmov r1, s0
1055 ; CHECK-NEXT: bl powf
1056 ; CHECK-NEXT: vmov s0, r0
1057 ; CHECK-NEXT: vcvtt.f16.f32 s27, s0
1058 ; CHECK-NEXT: vmov q0, q6
1059 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
1060 ; CHECK-NEXT: pop {r7, pc}
1062 %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2)
1066 define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
1067 ; CHECK-LABEL: pow_float64_t:
1068 ; CHECK: @ %bb.0: @ %entry
1069 ; CHECK-NEXT: .save {r7, lr}
1070 ; CHECK-NEXT: push {r7, lr}
1071 ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1072 ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1073 ; CHECK-NEXT: vmov q4, q1
1074 ; CHECK-NEXT: vmov q5, q0
1075 ; CHECK-NEXT: vmov r0, r1, d11
1076 ; CHECK-NEXT: vmov r2, r3, d9
1077 ; CHECK-NEXT: bl pow
1078 ; CHECK-NEXT: vmov lr, r12, d10
1079 ; CHECK-NEXT: vmov r2, r3, d8
1080 ; CHECK-NEXT: vmov d9, r0, r1
1081 ; CHECK-NEXT: mov r0, lr
1082 ; CHECK-NEXT: mov r1, r12
1083 ; CHECK-NEXT: bl pow
1084 ; CHECK-NEXT: vmov d8, r0, r1
1085 ; CHECK-NEXT: vmov q0, q4
1086 ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1087 ; CHECK-NEXT: pop {r7, pc}
1089 %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
1093 define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
1094 ; FULLFP16-LABEL: copysign_float32_t:
1095 ; FULLFP16: @ %bb.0: @ %entry
1096 ; FULLFP16-NEXT: .save {r4, r5, r7, lr}
1097 ; FULLFP16-NEXT: push {r4, r5, r7, lr}
1098 ; FULLFP16-NEXT: vmov r12, r1, d2
1099 ; FULLFP16-NEXT: vmov r2, lr, d3
1100 ; FULLFP16-NEXT: vmov r3, r0, d0
1101 ; FULLFP16-NEXT: vmov r4, r5, d1
1102 ; FULLFP16-NEXT: lsrs r1, r1, #31
1103 ; FULLFP16-NEXT: bfi r0, r1, #31, #1
1104 ; FULLFP16-NEXT: lsrs r1, r2, #31
1105 ; FULLFP16-NEXT: bfi r4, r1, #31, #1
1106 ; FULLFP16-NEXT: lsr.w r1, lr, #31
1107 ; FULLFP16-NEXT: bfi r5, r1, #31, #1
1108 ; FULLFP16-NEXT: lsr.w r1, r12, #31
1109 ; FULLFP16-NEXT: bfi r3, r1, #31, #1
1110 ; FULLFP16-NEXT: vmov s2, r4
1111 ; FULLFP16-NEXT: vmov s3, r5
1112 ; FULLFP16-NEXT: vmov s1, r0
1113 ; FULLFP16-NEXT: vmov s0, r3
1114 ; FULLFP16-NEXT: pop {r4, r5, r7, pc}
1116 ; MVEFP-LABEL: copysign_float32_t:
1117 ; MVEFP: @ %bb.0: @ %entry
1118 ; MVEFP-NEXT: vmov.i32 q2, #0x80000000
1119 ; MVEFP-NEXT: vbic.i32 q0, #0x80000000
1120 ; MVEFP-NEXT: vand q1, q1, q2
1121 ; MVEFP-NEXT: vorr q0, q0, q1
1124 %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2)
1128 define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) {
1129 ; FULLFP16-LABEL: copysign_float16_t:
1130 ; FULLFP16: @ %bb.0: @ %entry
1131 ; FULLFP16-NEXT: .pad #32
1132 ; FULLFP16-NEXT: sub sp, #32
1133 ; FULLFP16-NEXT: vmovx.f16 s8, s4
1134 ; FULLFP16-NEXT: vstr.16 s8, [sp, #24]
1135 ; FULLFP16-NEXT: vstr.16 s4, [sp, #28]
1136 ; FULLFP16-NEXT: vmovx.f16 s4, s5
1137 ; FULLFP16-NEXT: vstr.16 s4, [sp, #16]
1138 ; FULLFP16-NEXT: vmovx.f16 s4, s6
1139 ; FULLFP16-NEXT: vstr.16 s5, [sp, #20]
1140 ; FULLFP16-NEXT: vstr.16 s4, [sp, #8]
1141 ; FULLFP16-NEXT: vmovx.f16 s4, s7
1142 ; FULLFP16-NEXT: vstr.16 s6, [sp, #12]
1143 ; FULLFP16-NEXT: vstr.16 s4, [sp]
1144 ; FULLFP16-NEXT: vstr.16 s7, [sp, #4]
1145 ; FULLFP16-NEXT: ldrb.w r0, [sp, #25]
1146 ; FULLFP16-NEXT: vmovx.f16 s4, s0
1147 ; FULLFP16-NEXT: vabs.f16 s4, s4
1148 ; FULLFP16-NEXT: vneg.f16 s6, s4
1149 ; FULLFP16-NEXT: lsls r0, r0, #24
1150 ; FULLFP16-NEXT: it pl
1151 ; FULLFP16-NEXT: vmovpl.f32 s6, s4
1152 ; FULLFP16-NEXT: ldrb.w r0, [sp, #29]
1153 ; FULLFP16-NEXT: vabs.f16 s4, s0
1154 ; FULLFP16-NEXT: vneg.f16 s0, s4
1155 ; FULLFP16-NEXT: lsls r0, r0, #24
1156 ; FULLFP16-NEXT: it pl
1157 ; FULLFP16-NEXT: vmovpl.f32 s0, s4
1158 ; FULLFP16-NEXT: ldrb.w r0, [sp, #17]
1159 ; FULLFP16-NEXT: vmovx.f16 s4, s1
1160 ; FULLFP16-NEXT: vabs.f16 s4, s4
1161 ; FULLFP16-NEXT: vins.f16 s0, s6
1162 ; FULLFP16-NEXT: vneg.f16 s6, s4
1163 ; FULLFP16-NEXT: lsls r0, r0, #24
1164 ; FULLFP16-NEXT: it pl
1165 ; FULLFP16-NEXT: vmovpl.f32 s6, s4
1166 ; FULLFP16-NEXT: ldrb.w r0, [sp, #21]
1167 ; FULLFP16-NEXT: vabs.f16 s4, s1
1168 ; FULLFP16-NEXT: vneg.f16 s1, s4
1169 ; FULLFP16-NEXT: lsls r0, r0, #24
1170 ; FULLFP16-NEXT: it pl
1171 ; FULLFP16-NEXT: vmovpl.f32 s1, s4
1172 ; FULLFP16-NEXT: ldrb.w r0, [sp, #9]
1173 ; FULLFP16-NEXT: vmovx.f16 s4, s2
1174 ; FULLFP16-NEXT: vabs.f16 s4, s4
1175 ; FULLFP16-NEXT: vins.f16 s1, s6
1176 ; FULLFP16-NEXT: vneg.f16 s6, s4
1177 ; FULLFP16-NEXT: lsls r0, r0, #24
1178 ; FULLFP16-NEXT: it pl
1179 ; FULLFP16-NEXT: vmovpl.f32 s6, s4
1180 ; FULLFP16-NEXT: ldrb.w r0, [sp, #13]
1181 ; FULLFP16-NEXT: vabs.f16 s4, s2
1182 ; FULLFP16-NEXT: vneg.f16 s2, s4
1183 ; FULLFP16-NEXT: lsls r0, r0, #24
1184 ; FULLFP16-NEXT: it pl
1185 ; FULLFP16-NEXT: vmovpl.f32 s2, s4
1186 ; FULLFP16-NEXT: ldrb.w r0, [sp, #1]
1187 ; FULLFP16-NEXT: vmovx.f16 s4, s3
1188 ; FULLFP16-NEXT: vabs.f16 s4, s4
1189 ; FULLFP16-NEXT: vins.f16 s2, s6
1190 ; FULLFP16-NEXT: vneg.f16 s6, s4
1191 ; FULLFP16-NEXT: lsls r0, r0, #24
1192 ; FULLFP16-NEXT: it pl
1193 ; FULLFP16-NEXT: vmovpl.f32 s6, s4
1194 ; FULLFP16-NEXT: ldrb.w r0, [sp, #5]
1195 ; FULLFP16-NEXT: vabs.f16 s4, s3
1196 ; FULLFP16-NEXT: vneg.f16 s3, s4
1197 ; FULLFP16-NEXT: lsls r0, r0, #24
1198 ; FULLFP16-NEXT: it pl
1199 ; FULLFP16-NEXT: vmovpl.f32 s3, s4
1200 ; FULLFP16-NEXT: vins.f16 s3, s6
1201 ; FULLFP16-NEXT: add sp, #32
1202 ; FULLFP16-NEXT: bx lr
1204 ; MVEFP-LABEL: copysign_float16_t:
1205 ; MVEFP: @ %bb.0: @ %entry
1206 ; MVEFP-NEXT: vmov.i16 q2, #0x8000
1207 ; MVEFP-NEXT: vbic.i16 q0, #0x8000
1208 ; MVEFP-NEXT: vand q1, q1, q2
1209 ; MVEFP-NEXT: vorr q0, q0, q1
1212 %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2)
1216 define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
1217 ; CHECK-LABEL: copysign_float64_t:
1218 ; CHECK: @ %bb.0: @ %entry
1219 ; CHECK-NEXT: .save {r7, lr}
1220 ; CHECK-NEXT: push {r7, lr}
1221 ; CHECK-NEXT: vmov r0, r1, d3
1222 ; CHECK-NEXT: vmov r0, lr, d2
1223 ; CHECK-NEXT: vmov r0, r3, d1
1224 ; CHECK-NEXT: vmov r12, r2, d0
1225 ; CHECK-NEXT: lsrs r1, r1, #31
1226 ; CHECK-NEXT: bfi r3, r1, #31, #1
1227 ; CHECK-NEXT: lsr.w r1, lr, #31
1228 ; CHECK-NEXT: bfi r2, r1, #31, #1
1229 ; CHECK-NEXT: vmov d1, r0, r3
1230 ; CHECK-NEXT: vmov d0, r12, r2
1231 ; CHECK-NEXT: pop {r7, pc}
1233 %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
1237 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1238 declare <4 x float> @llvm.cos.v4f32(<4 x float>)
1239 declare <4 x float> @llvm.sin.v4f32(<4 x float>)
1240 declare <4 x float> @llvm.exp.v4f32(<4 x float>)
1241 declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
1242 declare <4 x float> @llvm.log.v4f32(<4 x float>)
1243 declare <4 x float> @llvm.log2.v4f32(<4 x float>)
1244 declare <4 x float> @llvm.log10.v4f32(<4 x float>)
1245 declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
1246 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1247 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
1248 declare <8 x half> @llvm.cos.v8f16(<8 x half>)
1249 declare <8 x half> @llvm.sin.v8f16(<8 x half>)
1250 declare <8 x half> @llvm.exp.v8f16(<8 x half>)
1251 declare <8 x half> @llvm.exp2.v8f16(<8 x half>)
1252 declare <8 x half> @llvm.log.v8f16(<8 x half>)
1253 declare <8 x half> @llvm.log2.v8f16(<8 x half>)
1254 declare <8 x half> @llvm.log10.v8f16(<8 x half>)
1255 declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
1256 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
1257 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1258 declare <2 x double> @llvm.cos.v2f64(<2 x double>)
1259 declare <2 x double> @llvm.sin.v2f64(<2 x double>)
1260 declare <2 x double> @llvm.exp.v2f64(<2 x double>)
1261 declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
1262 declare <2 x double> @llvm.log.v2f64(<2 x double>)
1263 declare <2 x double> @llvm.log2.v2f64(<2 x double>)
1264 declare <2 x double> @llvm.log10.v2f64(<2 x double>)
1265 declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
1266 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)