1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; Ensure we don't attempt to combine into an extending fp128 load.
7 define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
8 ; CHECK-LABEL: fcvt_v4f64_v4f128:
10 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
11 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
12 ; CHECK-NEXT: sub sp, sp, #48
13 ; CHECK-NEXT: addvl sp, sp, #-2
14 ; CHECK-NEXT: ptrue p0.d, vl4
15 ; CHECK-NEXT: add x8, sp, #48
16 ; CHECK-NEXT: mov x19, x1
17 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
18 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
19 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
20 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
21 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
22 ; CHECK-NEXT: bl __extenddftf2
23 ; CHECK-NEXT: add x8, sp, #48
24 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
25 ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
26 ; CHECK-NEXT: mov d1, v1.d[1]
27 ; CHECK-NEXT: fmov d0, d1
28 ; CHECK-NEXT: bl __extenddftf2
29 ; CHECK-NEXT: add x8, sp, #48
30 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
31 ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
32 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
33 ; CHECK-NEXT: bl __extenddftf2
34 ; CHECK-NEXT: add x8, sp, #48
35 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
36 ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
37 ; CHECK-NEXT: mov d1, v1.d[1]
38 ; CHECK-NEXT: fmov d0, d1
39 ; CHECK-NEXT: bl __extenddftf2
40 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
41 ; CHECK-NEXT: stp q1, q0, [x19]
42 ; CHECK-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
43 ; CHECK-NEXT: stp q0, q1, [x19, #32]
44 ; CHECK-NEXT: addvl sp, sp, #2
45 ; CHECK-NEXT: add sp, sp, #48
46 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
47 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
49 %op1 = load <4 x double>, ptr %a
50 %res = fpext <4 x double> %op1 to <4 x fp128>
51 store <4 x fp128> %res, ptr %b
55 ; Ensure we don't attempt to combine into a truncating fp128 store.
56 define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
57 ; CHECK-LABEL: fcvt_v4f128_v4f64:
59 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
60 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
61 ; CHECK-NEXT: sub sp, sp, #128
62 ; CHECK-NEXT: addvl sp, sp, #-2
63 ; CHECK-NEXT: ldp q1, q0, [x0, #64]
64 ; CHECK-NEXT: mov x19, x1
65 ; CHECK-NEXT: stp q0, q1, [sp, #96] // 32-byte Folded Spill
66 ; CHECK-NEXT: ldp q1, q0, [x0, #96]
67 ; CHECK-NEXT: stp q0, q1, [sp, #64] // 32-byte Folded Spill
68 ; CHECK-NEXT: ldp q1, q0, [x0]
69 ; CHECK-NEXT: stp q0, q1, [sp, #32] // 32-byte Folded Spill
70 ; CHECK-NEXT: ldp q1, q0, [x0, #32]
71 ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
72 ; CHECK-NEXT: bl __trunctfdf2
73 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
74 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
75 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
76 ; CHECK-NEXT: bl __trunctfdf2
77 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
78 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
79 ; CHECK-NEXT: add x8, sp, #128
80 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
81 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
82 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
83 ; CHECK-NEXT: bl __trunctfdf2
84 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
85 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
86 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
87 ; CHECK-NEXT: bl __trunctfdf2
88 ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
89 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
90 ; CHECK-NEXT: add x8, sp, #128
91 ; CHECK-NEXT: ptrue p0.d, vl2
92 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
93 ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
94 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
95 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
96 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
97 ; CHECK-NEXT: bl __trunctfdf2
98 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
99 ; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
100 ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
101 ; CHECK-NEXT: bl __trunctfdf2
102 ; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
103 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
104 ; CHECK-NEXT: add x8, sp, #128
105 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
106 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
107 ; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
108 ; CHECK-NEXT: bl __trunctfdf2
109 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
110 ; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
111 ; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
112 ; CHECK-NEXT: bl __trunctfdf2
113 ; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
114 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
115 ; CHECK-NEXT: add x8, sp, #128
116 ; CHECK-NEXT: ptrue p0.d, vl2
117 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
118 ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
119 ; CHECK-NEXT: mov x8, #4 // =0x4
120 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
121 ; CHECK-NEXT: ptrue p0.d, vl4
122 ; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
123 ; CHECK-NEXT: add x8, sp, #128
124 ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
125 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
126 ; CHECK-NEXT: addvl sp, sp, #2
127 ; CHECK-NEXT: add sp, sp, #128
128 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
129 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
131 %op1 = load <8 x fp128>, ptr %a
132 %res = fptrunc <8 x fp128> %op1 to <8 x double>
133 store <8 x double> %res, ptr %b
137 attributes #0 = { nounwind "target-features"="+sve" }