1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck --check-prefixes=CHECK,CHECK-CYC %s
3 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck --check-prefixes=CHECK,CHECK-A57 %s
5 define float @t1(ptr nocapture %src) nounwind ssp {
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: ldr s0, [x0]
9 ; CHECK-NEXT: scvtf s0, s0
12 %tmp1 = load i32, ptr %src, align 4
13 %tmp2 = sitofp i32 %tmp1 to float
17 define float @t2(ptr nocapture %src) nounwind ssp {
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: ldr s0, [x0]
21 ; CHECK-NEXT: ucvtf s0, s0
24 %tmp1 = load i32, ptr %src, align 4
25 %tmp2 = uitofp i32 %tmp1 to float
29 define double @t3(ptr nocapture %src) nounwind ssp {
31 ; CHECK: // %bb.0: // %entry
32 ; CHECK-NEXT: ldr d0, [x0]
33 ; CHECK-NEXT: scvtf d0, d0
36 %tmp1 = load i64, ptr %src, align 4
37 %tmp2 = sitofp i64 %tmp1 to double
41 define double @t4(ptr nocapture %src) nounwind ssp {
43 ; CHECK: // %bb.0: // %entry
44 ; CHECK-NEXT: ldr d0, [x0]
45 ; CHECK-NEXT: ucvtf d0, d0
48 %tmp1 = load i64, ptr %src, align 4
49 %tmp2 = uitofp i64 %tmp1 to double
54 define double @t5(ptr nocapture %src) nounwind ssp optsize {
56 ; CHECK: // %bb.0: // %entry
57 ; CHECK-NEXT: ldr w8, [x0]
58 ; CHECK-NEXT: scvtf d0, w8
61 %tmp1 = load i32, ptr %src, align 4
62 %tmp2 = sitofp i32 %tmp1 to double
66 ; Check that we load in FP register when we want to convert into
67 ; floating point value.
68 ; This is much faster than loading on GPR and making the conversion
70 ; <rdar://problem/14599607>
72 ; Check the flollowing patterns for signed/unsigned:
73 ; 1. load with scaled imm to float.
74 ; 2. load with scaled register to float.
75 ; 3. load with scaled imm to double.
76 ; 4. load with scaled register to double.
77 ; 5. load with unscaled imm to float.
78 ; 6. load with unscaled imm to double.
79 ; With loading size: 8, 16, 32, and 64-bits.
81 ; ********* 1. load with scaled imm to float. *********
82 define float @fct1(ptr nocapture %sp0) {
84 ; CHECK: // %bb.0: // %entry
85 ; CHECK-NEXT: ldr b0, [x0, #1]
86 ; CHECK-NEXT: ucvtf s0, s0
87 ; CHECK-NEXT: fmul s0, s0, s0
90 %addr = getelementptr i8, ptr %sp0, i64 1
91 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
92 %val = uitofp i8 %pix_sp0.0.copyload to float
93 %vmull.i = fmul float %val, %val
97 define float @fct2(ptr nocapture %sp0) {
99 ; CHECK: // %bb.0: // %entry
100 ; CHECK-NEXT: ldr h0, [x0, #2]
101 ; CHECK-NEXT: ucvtf s0, s0
102 ; CHECK-NEXT: fmul s0, s0, s0
105 %addr = getelementptr i16, ptr %sp0, i64 1
106 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
107 %val = uitofp i16 %pix_sp0.0.copyload to float
108 %vmull.i = fmul float %val, %val
112 define float @fct3(ptr nocapture %sp0) {
114 ; CHECK: // %bb.0: // %entry
115 ; CHECK-NEXT: ldr s0, [x0, #4]
116 ; CHECK-NEXT: ucvtf s0, s0
117 ; CHECK-NEXT: fmul s0, s0, s0
120 %addr = getelementptr i32, ptr %sp0, i64 1
121 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
122 %val = uitofp i32 %pix_sp0.0.copyload to float
123 %vmull.i = fmul float %val, %val
127 ; i64 -> f32 is not supported on floating point unit.
128 define float @fct4(ptr nocapture %sp0) {
130 ; CHECK: // %bb.0: // %entry
131 ; CHECK-NEXT: ldr x8, [x0, #8]
132 ; CHECK-NEXT: ucvtf s0, x8
133 ; CHECK-NEXT: fmul s0, s0, s0
136 %addr = getelementptr i64, ptr %sp0, i64 1
137 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
138 %val = uitofp i64 %pix_sp0.0.copyload to float
139 %vmull.i = fmul float %val, %val
143 ; ********* 2. load with scaled register to float. *********
144 define float @fct5(ptr nocapture %sp0, i64 %offset) {
146 ; CHECK: // %bb.0: // %entry
147 ; CHECK-NEXT: ldr b0, [x0, x1]
148 ; CHECK-NEXT: ucvtf s0, s0
149 ; CHECK-NEXT: fmul s0, s0, s0
152 %addr = getelementptr i8, ptr %sp0, i64 %offset
153 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
154 %val = uitofp i8 %pix_sp0.0.copyload to float
155 %vmull.i = fmul float %val, %val
159 define float @fct6(ptr nocapture %sp0, i64 %offset) {
161 ; CHECK: // %bb.0: // %entry
162 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
163 ; CHECK-NEXT: ucvtf s0, s0
164 ; CHECK-NEXT: fmul s0, s0, s0
167 %addr = getelementptr i16, ptr %sp0, i64 %offset
168 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
169 %val = uitofp i16 %pix_sp0.0.copyload to float
170 %vmull.i = fmul float %val, %val
174 define float @fct7(ptr nocapture %sp0, i64 %offset) {
176 ; CHECK: // %bb.0: // %entry
177 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
178 ; CHECK-NEXT: ucvtf s0, s0
179 ; CHECK-NEXT: fmul s0, s0, s0
182 %addr = getelementptr i32, ptr %sp0, i64 %offset
183 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
184 %val = uitofp i32 %pix_sp0.0.copyload to float
185 %vmull.i = fmul float %val, %val
189 ; i64 -> f32 is not supported on floating point unit.
190 define float @fct8(ptr nocapture %sp0, i64 %offset) {
192 ; CHECK: // %bb.0: // %entry
193 ; CHECK-NEXT: ldr x8, [x0, x1, lsl #3]
194 ; CHECK-NEXT: ucvtf s0, x8
195 ; CHECK-NEXT: fmul s0, s0, s0
198 %addr = getelementptr i64, ptr %sp0, i64 %offset
199 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
200 %val = uitofp i64 %pix_sp0.0.copyload to float
201 %vmull.i = fmul float %val, %val
206 ; ********* 3. load with scaled imm to double. *********
207 define double @fct9(ptr nocapture %sp0) {
209 ; CHECK: // %bb.0: // %entry
210 ; CHECK-NEXT: ldr b0, [x0, #1]
211 ; CHECK-NEXT: ucvtf d0, d0
212 ; CHECK-NEXT: fmul d0, d0, d0
215 %addr = getelementptr i8, ptr %sp0, i64 1
216 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
217 %val = uitofp i8 %pix_sp0.0.copyload to double
218 %vmull.i = fmul double %val, %val
222 define double @fct10(ptr nocapture %sp0) {
223 ; CHECK-LABEL: fct10:
224 ; CHECK: // %bb.0: // %entry
225 ; CHECK-NEXT: ldr h0, [x0, #2]
226 ; CHECK-NEXT: ucvtf d0, d0
227 ; CHECK-NEXT: fmul d0, d0, d0
230 %addr = getelementptr i16, ptr %sp0, i64 1
231 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
232 %val = uitofp i16 %pix_sp0.0.copyload to double
233 %vmull.i = fmul double %val, %val
237 define double @fct11(ptr nocapture %sp0) {
238 ; CHECK-LABEL: fct11:
239 ; CHECK: // %bb.0: // %entry
240 ; CHECK-NEXT: ldr s0, [x0, #4]
241 ; CHECK-NEXT: ucvtf d0, d0
242 ; CHECK-NEXT: fmul d0, d0, d0
245 %addr = getelementptr i32, ptr %sp0, i64 1
246 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
247 %val = uitofp i32 %pix_sp0.0.copyload to double
248 %vmull.i = fmul double %val, %val
252 define double @fct12(ptr nocapture %sp0) {
253 ; CHECK-LABEL: fct12:
254 ; CHECK: // %bb.0: // %entry
255 ; CHECK-NEXT: ldr d0, [x0, #8]
256 ; CHECK-NEXT: ucvtf d0, d0
257 ; CHECK-NEXT: fmul d0, d0, d0
260 %addr = getelementptr i64, ptr %sp0, i64 1
261 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
262 %val = uitofp i64 %pix_sp0.0.copyload to double
263 %vmull.i = fmul double %val, %val
267 ; ********* 4. load with scaled register to double. *********
268 define double @fct13(ptr nocapture %sp0, i64 %offset) {
269 ; CHECK-LABEL: fct13:
270 ; CHECK: // %bb.0: // %entry
271 ; CHECK-NEXT: ldr b0, [x0, x1]
272 ; CHECK-NEXT: ucvtf d0, d0
273 ; CHECK-NEXT: fmul d0, d0, d0
276 %addr = getelementptr i8, ptr %sp0, i64 %offset
277 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
278 %val = uitofp i8 %pix_sp0.0.copyload to double
279 %vmull.i = fmul double %val, %val
283 define double @fct14(ptr nocapture %sp0, i64 %offset) {
284 ; CHECK-LABEL: fct14:
285 ; CHECK: // %bb.0: // %entry
286 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
287 ; CHECK-NEXT: ucvtf d0, d0
288 ; CHECK-NEXT: fmul d0, d0, d0
291 %addr = getelementptr i16, ptr %sp0, i64 %offset
292 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
293 %val = uitofp i16 %pix_sp0.0.copyload to double
294 %vmull.i = fmul double %val, %val
298 define double @fct15(ptr nocapture %sp0, i64 %offset) {
299 ; CHECK-LABEL: fct15:
300 ; CHECK: // %bb.0: // %entry
301 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
302 ; CHECK-NEXT: ucvtf d0, d0
303 ; CHECK-NEXT: fmul d0, d0, d0
306 %addr = getelementptr i32, ptr %sp0, i64 %offset
307 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
308 %val = uitofp i32 %pix_sp0.0.copyload to double
309 %vmull.i = fmul double %val, %val
313 define double @fct16(ptr nocapture %sp0, i64 %offset) {
314 ; CHECK-LABEL: fct16:
315 ; CHECK: // %bb.0: // %entry
316 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
317 ; CHECK-NEXT: ucvtf d0, d0
318 ; CHECK-NEXT: fmul d0, d0, d0
321 %addr = getelementptr i64, ptr %sp0, i64 %offset
322 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
323 %val = uitofp i64 %pix_sp0.0.copyload to double
324 %vmull.i = fmul double %val, %val
328 ; ********* 5. load with unscaled imm to float. *********
329 define float @fct17(ptr nocapture %sp0) {
330 ; CHECK-LABEL: fct17:
331 ; CHECK: // %bb.0: // %entry
332 ; CHECK-NEXT: ldur b0, [x0, #-1]
333 ; CHECK-NEXT: ucvtf s0, s0
334 ; CHECK-NEXT: fmul s0, s0, s0
337 %bitcast = ptrtoint ptr %sp0 to i64
338 %add = add i64 %bitcast, -1
339 %addr = inttoptr i64 %add to ptr
340 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
341 %val = uitofp i8 %pix_sp0.0.copyload to float
342 %vmull.i = fmul float %val, %val
346 define float @fct18(ptr nocapture %sp0) {
347 ; CHECK-LABEL: fct18:
349 ; CHECK-NEXT: ldur h0, [x0, #1]
350 ; CHECK-NEXT: ucvtf s0, s0
351 ; CHECK-NEXT: fmul s0, s0, s0
353 %bitcast = ptrtoint ptr %sp0 to i64
354 %add = add i64 %bitcast, 1
355 %addr = inttoptr i64 %add to ptr
356 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
357 %val = uitofp i16 %pix_sp0.0.copyload to float
358 %vmull.i = fmul float %val, %val
362 define float @fct19(ptr nocapture %sp0) {
363 ; CHECK-LABEL: fct19:
365 ; CHECK-NEXT: ldur s0, [x0, #1]
366 ; CHECK-NEXT: ucvtf s0, s0
367 ; CHECK-NEXT: fmul s0, s0, s0
369 %bitcast = ptrtoint ptr %sp0 to i64
370 %add = add i64 %bitcast, 1
371 %addr = inttoptr i64 %add to ptr
372 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
373 %val = uitofp i32 %pix_sp0.0.copyload to float
374 %vmull.i = fmul float %val, %val
378 ; i64 -> f32 is not supported on floating point unit.
379 define float @fct20(ptr nocapture %sp0) {
380 ; CHECK-LABEL: fct20:
382 ; CHECK-NEXT: ldur x8, [x0, #1]
383 ; CHECK-NEXT: ucvtf s0, x8
384 ; CHECK-NEXT: fmul s0, s0, s0
386 %bitcast = ptrtoint ptr %sp0 to i64
387 %add = add i64 %bitcast, 1
388 %addr = inttoptr i64 %add to ptr
389 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
390 %val = uitofp i64 %pix_sp0.0.copyload to float
391 %vmull.i = fmul float %val, %val
396 ; ********* 6. load with unscaled imm to double. *********
397 define double @fct21(ptr nocapture %sp0) {
398 ; CHECK-LABEL: fct21:
399 ; CHECK: // %bb.0: // %entry
400 ; CHECK-NEXT: ldur b0, [x0, #-1]
401 ; CHECK-NEXT: ucvtf d0, d0
402 ; CHECK-NEXT: fmul d0, d0, d0
405 %bitcast = ptrtoint ptr %sp0 to i64
406 %add = add i64 %bitcast, -1
407 %addr = inttoptr i64 %add to ptr
408 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
409 %val = uitofp i8 %pix_sp0.0.copyload to double
410 %vmull.i = fmul double %val, %val
414 define double @fct22(ptr nocapture %sp0) {
415 ; CHECK-LABEL: fct22:
417 ; CHECK-NEXT: ldur h0, [x0, #1]
418 ; CHECK-NEXT: ucvtf d0, d0
419 ; CHECK-NEXT: fmul d0, d0, d0
421 %bitcast = ptrtoint ptr %sp0 to i64
422 %add = add i64 %bitcast, 1
423 %addr = inttoptr i64 %add to ptr
424 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
425 %val = uitofp i16 %pix_sp0.0.copyload to double
426 %vmull.i = fmul double %val, %val
430 define double @fct23(ptr nocapture %sp0) {
431 ; CHECK-LABEL: fct23:
433 ; CHECK-NEXT: ldur s0, [x0, #1]
434 ; CHECK-NEXT: ucvtf d0, d0
435 ; CHECK-NEXT: fmul d0, d0, d0
437 %bitcast = ptrtoint ptr %sp0 to i64
438 %add = add i64 %bitcast, 1
439 %addr = inttoptr i64 %add to ptr
440 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
441 %val = uitofp i32 %pix_sp0.0.copyload to double
442 %vmull.i = fmul double %val, %val
446 define double @fct24(ptr nocapture %sp0) {
447 ; CHECK-LABEL: fct24:
449 ; CHECK-NEXT: ldur d0, [x0, #1]
450 ; CHECK-NEXT: ucvtf d0, d0
451 ; CHECK-NEXT: fmul d0, d0, d0
453 %bitcast = ptrtoint ptr %sp0 to i64
454 %add = add i64 %bitcast, 1
455 %addr = inttoptr i64 %add to ptr
456 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
457 %val = uitofp i64 %pix_sp0.0.copyload to double
458 %vmull.i = fmul double %val, %val
463 ; ********* 1s. load with scaled imm to float. *********
464 define float @sfct1(ptr nocapture %sp0) {
465 ; CHECK-CYC-LABEL: sfct1:
466 ; CHECK-CYC: // %bb.0: // %entry
467 ; CHECK-CYC-NEXT: ldr b0, [x0, #1]
468 ; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0
469 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
470 ; CHECK-CYC-NEXT: scvtf s0, s0
471 ; CHECK-CYC-NEXT: fmul s0, s0, s0
472 ; CHECK-CYC-NEXT: ret
474 ; CHECK-A57-LABEL: sfct1:
475 ; CHECK-A57: // %bb.0: // %entry
476 ; CHECK-A57-NEXT: ldrsb w8, [x0, #1]
477 ; CHECK-A57-NEXT: scvtf s0, w8
478 ; CHECK-A57-NEXT: fmul s0, s0, s0
479 ; CHECK-A57-NEXT: ret
481 %addr = getelementptr i8, ptr %sp0, i64 1
482 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
483 %val = sitofp i8 %pix_sp0.0.copyload to float
484 %vmull.i = fmul float %val, %val
488 define float @sfct2(ptr nocapture %sp0) {
489 ; CHECK-CYC-LABEL: sfct2:
490 ; CHECK-CYC: // %bb.0: // %entry
491 ; CHECK-CYC-NEXT: ldr h0, [x0, #2]
492 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
493 ; CHECK-CYC-NEXT: scvtf s0, s0
494 ; CHECK-CYC-NEXT: fmul s0, s0, s0
495 ; CHECK-CYC-NEXT: ret
497 ; CHECK-A57-LABEL: sfct2:
498 ; CHECK-A57: // %bb.0: // %entry
499 ; CHECK-A57-NEXT: ldrsh w8, [x0, #2]
500 ; CHECK-A57-NEXT: scvtf s0, w8
501 ; CHECK-A57-NEXT: fmul s0, s0, s0
502 ; CHECK-A57-NEXT: ret
504 %addr = getelementptr i16, ptr %sp0, i64 1
505 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
506 %val = sitofp i16 %pix_sp0.0.copyload to float
507 %vmull.i = fmul float %val, %val
511 define float @sfct3(ptr nocapture %sp0) {
512 ; CHECK-LABEL: sfct3:
513 ; CHECK: // %bb.0: // %entry
514 ; CHECK-NEXT: ldr s0, [x0, #4]
515 ; CHECK-NEXT: scvtf s0, s0
516 ; CHECK-NEXT: fmul s0, s0, s0
519 %addr = getelementptr i32, ptr %sp0, i64 1
520 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
521 %val = sitofp i32 %pix_sp0.0.copyload to float
522 %vmull.i = fmul float %val, %val
526 ; i64 -> f32 is not supported on floating point unit.
527 define float @sfct4(ptr nocapture %sp0) {
528 ; CHECK-LABEL: sfct4:
529 ; CHECK: // %bb.0: // %entry
530 ; CHECK-NEXT: ldr x8, [x0, #8]
531 ; CHECK-NEXT: scvtf s0, x8
532 ; CHECK-NEXT: fmul s0, s0, s0
535 %addr = getelementptr i64, ptr %sp0, i64 1
536 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
537 %val = sitofp i64 %pix_sp0.0.copyload to float
538 %vmull.i = fmul float %val, %val
542 ; ********* 2s. load with scaled register to float. *********
543 define float @sfct5(ptr nocapture %sp0, i64 %offset) {
544 ; CHECK-CYC-LABEL: sfct5:
545 ; CHECK-CYC: // %bb.0: // %entry
546 ; CHECK-CYC-NEXT: ldr b0, [x0, x1]
547 ; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0
548 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
549 ; CHECK-CYC-NEXT: scvtf s0, s0
550 ; CHECK-CYC-NEXT: fmul s0, s0, s0
551 ; CHECK-CYC-NEXT: ret
553 ; CHECK-A57-LABEL: sfct5:
554 ; CHECK-A57: // %bb.0: // %entry
555 ; CHECK-A57-NEXT: ldrsb w8, [x0, x1]
556 ; CHECK-A57-NEXT: scvtf s0, w8
557 ; CHECK-A57-NEXT: fmul s0, s0, s0
558 ; CHECK-A57-NEXT: ret
560 %addr = getelementptr i8, ptr %sp0, i64 %offset
561 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
562 %val = sitofp i8 %pix_sp0.0.copyload to float
563 %vmull.i = fmul float %val, %val
567 define float @sfct6(ptr nocapture %sp0, i64 %offset) {
568 ; CHECK-CYC-LABEL: sfct6:
569 ; CHECK-CYC: // %bb.0: // %entry
570 ; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1]
571 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
572 ; CHECK-CYC-NEXT: scvtf s0, s0
573 ; CHECK-CYC-NEXT: fmul s0, s0, s0
574 ; CHECK-CYC-NEXT: ret
576 ; CHECK-A57-LABEL: sfct6:
577 ; CHECK-A57: // %bb.0: // %entry
578 ; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1]
579 ; CHECK-A57-NEXT: scvtf s0, w8
580 ; CHECK-A57-NEXT: fmul s0, s0, s0
581 ; CHECK-A57-NEXT: ret
583 %addr = getelementptr i16, ptr %sp0, i64 %offset
584 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
585 %val = sitofp i16 %pix_sp0.0.copyload to float
586 %vmull.i = fmul float %val, %val
590 define float @sfct7(ptr nocapture %sp0, i64 %offset) {
591 ; CHECK-LABEL: sfct7:
592 ; CHECK: // %bb.0: // %entry
593 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
594 ; CHECK-NEXT: scvtf s0, s0
595 ; CHECK-NEXT: fmul s0, s0, s0
598 %addr = getelementptr i32, ptr %sp0, i64 %offset
599 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
600 %val = sitofp i32 %pix_sp0.0.copyload to float
601 %vmull.i = fmul float %val, %val
605 ; i64 -> f32 is not supported on floating point unit.
606 define float @sfct8(ptr nocapture %sp0, i64 %offset) {
607 ; CHECK-LABEL: sfct8:
608 ; CHECK: // %bb.0: // %entry
609 ; CHECK-NEXT: ldr x8, [x0, x1, lsl #3]
610 ; CHECK-NEXT: scvtf s0, x8
611 ; CHECK-NEXT: fmul s0, s0, s0
614 %addr = getelementptr i64, ptr %sp0, i64 %offset
615 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
616 %val = sitofp i64 %pix_sp0.0.copyload to float
617 %vmull.i = fmul float %val, %val
621 ; ********* 3s. load with scaled imm to double. *********
622 define double @sfct9(ptr nocapture %sp0) {
623 ; CHECK-LABEL: sfct9:
624 ; CHECK: // %bb.0: // %entry
625 ; CHECK-NEXT: ldrsb w8, [x0, #1]
626 ; CHECK-NEXT: scvtf d0, w8
627 ; CHECK-NEXT: fmul d0, d0, d0
630 %addr = getelementptr i8, ptr %sp0, i64 1
631 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
632 %val = sitofp i8 %pix_sp0.0.copyload to double
633 %vmull.i = fmul double %val, %val
637 define double @sfct10(ptr nocapture %sp0) {
638 ; CHECK-CYC-LABEL: sfct10:
639 ; CHECK-CYC: // %bb.0: // %entry
640 ; CHECK-CYC-NEXT: ldr h0, [x0, #2]
641 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
642 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
643 ; CHECK-CYC-NEXT: scvtf d0, d0
644 ; CHECK-CYC-NEXT: fmul d0, d0, d0
645 ; CHECK-CYC-NEXT: ret
647 ; CHECK-A57-LABEL: sfct10:
648 ; CHECK-A57: // %bb.0: // %entry
649 ; CHECK-A57-NEXT: ldrsh w8, [x0, #2]
650 ; CHECK-A57-NEXT: scvtf d0, w8
651 ; CHECK-A57-NEXT: fmul d0, d0, d0
652 ; CHECK-A57-NEXT: ret
654 %addr = getelementptr i16, ptr %sp0, i64 1
655 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
656 %val = sitofp i16 %pix_sp0.0.copyload to double
657 %vmull.i = fmul double %val, %val
661 define double @sfct11(ptr nocapture %sp0) {
662 ; CHECK-CYC-LABEL: sfct11:
663 ; CHECK-CYC: // %bb.0: // %entry
664 ; CHECK-CYC-NEXT: ldr s0, [x0, #4]
665 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
666 ; CHECK-CYC-NEXT: scvtf d0, d0
667 ; CHECK-CYC-NEXT: fmul d0, d0, d0
668 ; CHECK-CYC-NEXT: ret
670 ; CHECK-A57-LABEL: sfct11:
671 ; CHECK-A57: // %bb.0: // %entry
672 ; CHECK-A57-NEXT: ldr w8, [x0, #4]
673 ; CHECK-A57-NEXT: scvtf d0, w8
674 ; CHECK-A57-NEXT: fmul d0, d0, d0
675 ; CHECK-A57-NEXT: ret
677 %addr = getelementptr i32, ptr %sp0, i64 1
678 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
679 %val = sitofp i32 %pix_sp0.0.copyload to double
680 %vmull.i = fmul double %val, %val
684 define double @sfct12(ptr nocapture %sp0) {
685 ; CHECK-LABEL: sfct12:
686 ; CHECK: // %bb.0: // %entry
687 ; CHECK-NEXT: ldr d0, [x0, #8]
688 ; CHECK-NEXT: scvtf d0, d0
689 ; CHECK-NEXT: fmul d0, d0, d0
692 %addr = getelementptr i64, ptr %sp0, i64 1
693 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
694 %val = sitofp i64 %pix_sp0.0.copyload to double
695 %vmull.i = fmul double %val, %val
699 ; ********* 4s. load with scaled register to double. *********
700 define double @sfct13(ptr nocapture %sp0, i64 %offset) {
701 ; CHECK-LABEL: sfct13:
702 ; CHECK: // %bb.0: // %entry
703 ; CHECK-NEXT: ldrsb w8, [x0, x1]
704 ; CHECK-NEXT: scvtf d0, w8
705 ; CHECK-NEXT: fmul d0, d0, d0
708 %addr = getelementptr i8, ptr %sp0, i64 %offset
709 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
710 %val = sitofp i8 %pix_sp0.0.copyload to double
711 %vmull.i = fmul double %val, %val
715 define double @sfct14(ptr nocapture %sp0, i64 %offset) {
716 ; CHECK-CYC-LABEL: sfct14:
717 ; CHECK-CYC: // %bb.0: // %entry
718 ; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1]
719 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
720 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
721 ; CHECK-CYC-NEXT: scvtf d0, d0
722 ; CHECK-CYC-NEXT: fmul d0, d0, d0
723 ; CHECK-CYC-NEXT: ret
725 ; CHECK-A57-LABEL: sfct14:
726 ; CHECK-A57: // %bb.0: // %entry
727 ; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1]
728 ; CHECK-A57-NEXT: scvtf d0, w8
729 ; CHECK-A57-NEXT: fmul d0, d0, d0
730 ; CHECK-A57-NEXT: ret
732 %addr = getelementptr i16, ptr %sp0, i64 %offset
733 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
734 %val = sitofp i16 %pix_sp0.0.copyload to double
735 %vmull.i = fmul double %val, %val
739 define double @sfct15(ptr nocapture %sp0, i64 %offset) {
740 ; CHECK-CYC-LABEL: sfct15:
741 ; CHECK-CYC: // %bb.0: // %entry
742 ; CHECK-CYC-NEXT: ldr s0, [x0, x1, lsl #2]
743 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
744 ; CHECK-CYC-NEXT: scvtf d0, d0
745 ; CHECK-CYC-NEXT: fmul d0, d0, d0
746 ; CHECK-CYC-NEXT: ret
748 ; CHECK-A57-LABEL: sfct15:
749 ; CHECK-A57: // %bb.0: // %entry
750 ; CHECK-A57-NEXT: ldr w8, [x0, x1, lsl #2]
751 ; CHECK-A57-NEXT: scvtf d0, w8
752 ; CHECK-A57-NEXT: fmul d0, d0, d0
753 ; CHECK-A57-NEXT: ret
755 %addr = getelementptr i32, ptr %sp0, i64 %offset
756 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
757 %val = sitofp i32 %pix_sp0.0.copyload to double
758 %vmull.i = fmul double %val, %val
762 define double @sfct16(ptr nocapture %sp0, i64 %offset) {
763 ; CHECK-LABEL: sfct16:
764 ; CHECK: // %bb.0: // %entry
765 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
766 ; CHECK-NEXT: scvtf d0, d0
767 ; CHECK-NEXT: fmul d0, d0, d0
770 %addr = getelementptr i64, ptr %sp0, i64 %offset
771 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
772 %val = sitofp i64 %pix_sp0.0.copyload to double
773 %vmull.i = fmul double %val, %val
777 ; ********* 5s. load with unscaled imm to float. *********
778 define float @sfct17(ptr nocapture %sp0) {
779 ; CHECK-CYC-LABEL: sfct17:
780 ; CHECK-CYC: // %bb.0: // %entry
781 ; CHECK-CYC-NEXT: ldur b0, [x0, #-1]
782 ; CHECK-CYC-NEXT: sshll v0.8h, v0.8b, #0
783 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
784 ; CHECK-CYC-NEXT: scvtf s0, s0
785 ; CHECK-CYC-NEXT: fmul s0, s0, s0
786 ; CHECK-CYC-NEXT: ret
788 ; CHECK-A57-LABEL: sfct17:
789 ; CHECK-A57: // %bb.0: // %entry
790 ; CHECK-A57-NEXT: ldursb w8, [x0, #-1]
791 ; CHECK-A57-NEXT: scvtf s0, w8
792 ; CHECK-A57-NEXT: fmul s0, s0, s0
793 ; CHECK-A57-NEXT: ret
795 %bitcast = ptrtoint ptr %sp0 to i64
796 %add = add i64 %bitcast, -1
797 %addr = inttoptr i64 %add to ptr
798 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
799 %val = sitofp i8 %pix_sp0.0.copyload to float
800 %vmull.i = fmul float %val, %val
804 define float @sfct18(ptr nocapture %sp0) {
805 ; CHECK-CYC-LABEL: sfct18:
806 ; CHECK-CYC: // %bb.0:
807 ; CHECK-CYC-NEXT: ldur h0, [x0, #1]
808 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
809 ; CHECK-CYC-NEXT: scvtf s0, s0
810 ; CHECK-CYC-NEXT: fmul s0, s0, s0
811 ; CHECK-CYC-NEXT: ret
813 ; CHECK-A57-LABEL: sfct18:
814 ; CHECK-A57: // %bb.0:
815 ; CHECK-A57-NEXT: ldursh w8, [x0, #1]
816 ; CHECK-A57-NEXT: scvtf s0, w8
817 ; CHECK-A57-NEXT: fmul s0, s0, s0
818 ; CHECK-A57-NEXT: ret
819 %bitcast = ptrtoint ptr %sp0 to i64
820 %add = add i64 %bitcast, 1
821 %addr = inttoptr i64 %add to ptr
822 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
823 %val = sitofp i16 %pix_sp0.0.copyload to float
824 %vmull.i = fmul float %val, %val
828 define float @sfct19(ptr nocapture %sp0) {
829 ; CHECK-LABEL: sfct19:
831 ; CHECK-NEXT: ldur s0, [x0, #1]
832 ; CHECK-NEXT: scvtf s0, s0
833 ; CHECK-NEXT: fmul s0, s0, s0
835 %bitcast = ptrtoint ptr %sp0 to i64
836 %add = add i64 %bitcast, 1
837 %addr = inttoptr i64 %add to ptr
838 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
839 %val = sitofp i32 %pix_sp0.0.copyload to float
840 %vmull.i = fmul float %val, %val
844 ; i64 -> f32 is not supported on floating point unit.
845 define float @sfct20(ptr nocapture %sp0) {
846 ; CHECK-LABEL: sfct20:
848 ; CHECK-NEXT: ldur x8, [x0, #1]
849 ; CHECK-NEXT: scvtf s0, x8
850 ; CHECK-NEXT: fmul s0, s0, s0
852 %bitcast = ptrtoint ptr %sp0 to i64
853 %add = add i64 %bitcast, 1
854 %addr = inttoptr i64 %add to ptr
855 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
856 %val = sitofp i64 %pix_sp0.0.copyload to float
857 %vmull.i = fmul float %val, %val
862 ; ********* 6s. load with unscaled imm to double. *********
863 define double @sfct21(ptr nocapture %sp0) {
864 ; CHECK-LABEL: sfct21:
865 ; CHECK: // %bb.0: // %entry
866 ; CHECK-NEXT: ldursb w8, [x0, #-1]
867 ; CHECK-NEXT: scvtf d0, w8
868 ; CHECK-NEXT: fmul d0, d0, d0
871 %bitcast = ptrtoint ptr %sp0 to i64
872 %add = add i64 %bitcast, -1
873 %addr = inttoptr i64 %add to ptr
874 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
875 %val = sitofp i8 %pix_sp0.0.copyload to double
876 %vmull.i = fmul double %val, %val
880 define double @sfct22(ptr nocapture %sp0) {
881 ; CHECK-CYC-LABEL: sfct22:
882 ; CHECK-CYC: // %bb.0:
883 ; CHECK-CYC-NEXT: ldur h0, [x0, #1]
884 ; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
885 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
886 ; CHECK-CYC-NEXT: scvtf d0, d0
887 ; CHECK-CYC-NEXT: fmul d0, d0, d0
888 ; CHECK-CYC-NEXT: ret
890 ; CHECK-A57-LABEL: sfct22:
891 ; CHECK-A57: // %bb.0:
892 ; CHECK-A57-NEXT: ldursh w8, [x0, #1]
893 ; CHECK-A57-NEXT: scvtf d0, w8
894 ; CHECK-A57-NEXT: fmul d0, d0, d0
895 ; CHECK-A57-NEXT: ret
896 %bitcast = ptrtoint ptr %sp0 to i64
897 %add = add i64 %bitcast, 1
898 %addr = inttoptr i64 %add to ptr
899 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
900 %val = sitofp i16 %pix_sp0.0.copyload to double
901 %vmull.i = fmul double %val, %val
905 define double @sfct23(ptr nocapture %sp0) {
906 ; CHECK-CYC-LABEL: sfct23:
907 ; CHECK-CYC: // %bb.0:
908 ; CHECK-CYC-NEXT: ldur s0, [x0, #1]
909 ; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
910 ; CHECK-CYC-NEXT: scvtf d0, d0
911 ; CHECK-CYC-NEXT: fmul d0, d0, d0
912 ; CHECK-CYC-NEXT: ret
914 ; CHECK-A57-LABEL: sfct23:
915 ; CHECK-A57: // %bb.0:
916 ; CHECK-A57-NEXT: ldur w8, [x0, #1]
917 ; CHECK-A57-NEXT: scvtf d0, w8
918 ; CHECK-A57-NEXT: fmul d0, d0, d0
919 ; CHECK-A57-NEXT: ret
920 %bitcast = ptrtoint ptr %sp0 to i64
921 %add = add i64 %bitcast, 1
922 %addr = inttoptr i64 %add to ptr
923 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
924 %val = sitofp i32 %pix_sp0.0.copyload to double
925 %vmull.i = fmul double %val, %val
929 define double @sfct24(ptr nocapture %sp0) {
930 ; CHECK-LABEL: sfct24:
932 ; CHECK-NEXT: ldur d0, [x0, #1]
933 ; CHECK-NEXT: scvtf d0, d0
934 ; CHECK-NEXT: fmul d0, d0, d0
936 %bitcast = ptrtoint ptr %sp0 to i64
937 %add = add i64 %bitcast, 1
938 %addr = inttoptr i64 %add to ptr
939 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
940 %val = sitofp i64 %pix_sp0.0.copyload to double
941 %vmull.i = fmul double %val, %val
946 ; Check that we do not use SSHLL code sequence when code size is a concern.
947 define float @codesize_sfct17(ptr nocapture %sp0) optsize {
948 ; CHECK-LABEL: codesize_sfct17:
949 ; CHECK: // %bb.0: // %entry
950 ; CHECK-NEXT: ldursb w8, [x0, #-1]
951 ; CHECK-NEXT: scvtf s0, w8
952 ; CHECK-NEXT: fmul s0, s0, s0
955 %bitcast = ptrtoint ptr %sp0 to i64
956 %add = add i64 %bitcast, -1
957 %addr = inttoptr i64 %add to ptr
958 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
959 %val = sitofp i8 %pix_sp0.0.copyload to float
960 %vmull.i = fmul float %val, %val
964 define double @codesize_sfct11(ptr nocapture %sp0) minsize {
965 ; CHECK-LABEL: codesize_sfct11:
966 ; CHECK: // %bb.0: // %entry
967 ; CHECK-NEXT: ldr w8, [x0, #4]
968 ; CHECK-NEXT: scvtf d0, w8
969 ; CHECK-NEXT: fmul d0, d0, d0
972 %addr = getelementptr i32, ptr %sp0, i64 1
973 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
974 %val = sitofp i32 %pix_sp0.0.copyload to double
975 %vmull.i = fmul double %val, %val
979 ; Adding fp128 custom lowering makes these a little fragile since we have to
980 ; return the correct mix of Legal/Expand from the custom method.
982 ; rdar://problem/14991489
984 define float @float_from_i128(i128 %in) {
985 ; CHECK-LABEL: float_from_i128:
987 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
988 ; CHECK-NEXT: .cfi_def_cfa_offset 16
989 ; CHECK-NEXT: .cfi_offset w30, -16
990 ; CHECK-NEXT: bl __floatuntisf
991 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
993 %conv = uitofp i128 %in to float
997 define double @double_from_i128(i128 %in) {
998 ; CHECK-LABEL: double_from_i128:
1000 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1001 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1002 ; CHECK-NEXT: .cfi_offset w30, -16
1003 ; CHECK-NEXT: bl __floattidf
1004 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1006 %conv = sitofp i128 %in to double
1010 define fp128 @fp128_from_i128(i128 %in) {
1011 ; CHECK-LABEL: fp128_from_i128:
1013 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1014 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1015 ; CHECK-NEXT: .cfi_offset w30, -16
1016 ; CHECK-NEXT: bl __floatuntitf
1017 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1019 %conv = uitofp i128 %in to fp128
1023 define i128 @i128_from_float(float %in) {
1024 ; CHECK-LABEL: i128_from_float:
1026 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1027 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1028 ; CHECK-NEXT: .cfi_offset w30, -16
1029 ; CHECK-NEXT: bl __fixsfti
1030 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1032 %conv = fptosi float %in to i128
1036 define i128 @i128_from_double(double %in) {
1037 ; CHECK-LABEL: i128_from_double:
1039 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1040 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1041 ; CHECK-NEXT: .cfi_offset w30, -16
1042 ; CHECK-NEXT: bl __fixunsdfti
1043 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1045 %conv = fptoui double %in to i128
1049 define i128 @i128_from_fp128(fp128 %in) {
1050 ; CHECK-LABEL: i128_from_fp128:
1052 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
1053 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1054 ; CHECK-NEXT: .cfi_offset w30, -16
1055 ; CHECK-NEXT: bl __fixtfti
1056 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
1058 %conv = fptosi fp128 %in to i128