1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; Ensure we don't attempt to combine into an extending fp128 load.
7 define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
8 ; CHECK-LABEL: fcvt_v4f64_v4f128:
10 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
11 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
12 ; CHECK-NEXT: addvl sp, sp, #-2
13 ; CHECK-NEXT: sub sp, sp, #48
14 ; CHECK-NEXT: ptrue p0.d, vl4
15 ; CHECK-NEXT: add x8, sp, #48
16 ; CHECK-NEXT: mov x19, x1
17 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
18 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
19 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
20 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
21 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
22 ; CHECK-NEXT: bl __extenddftf2
23 ; CHECK-NEXT: add x8, sp, #48
24 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
25 ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
26 ; CHECK-NEXT: mov d1, v1.d[1]
27 ; CHECK-NEXT: fmov d0, d1
28 ; CHECK-NEXT: bl __extenddftf2
29 ; CHECK-NEXT: add x8, sp, #48
30 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
31 ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
32 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
33 ; CHECK-NEXT: bl __extenddftf2
34 ; CHECK-NEXT: add x8, sp, #48
35 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
36 ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
37 ; CHECK-NEXT: mov d1, v1.d[1]
38 ; CHECK-NEXT: fmov d0, d1
39 ; CHECK-NEXT: bl __extenddftf2
40 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
41 ; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
42 ; CHECK-NEXT: stp q1, q0, [x19]
43 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
44 ; CHECK-NEXT: stp q0, q2, [x19, #32]
45 ; CHECK-NEXT: addvl sp, sp, #2
46 ; CHECK-NEXT: add sp, sp, #48
47 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
48 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
50 %op1 = load <4 x double>, ptr %a
51 %res = fpext <4 x double> %op1 to <4 x fp128>
52 store <4 x fp128> %res, ptr %b
56 ; Ensure we don't attempt to combine into a truncating fp128 store.
57 define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
58 ; CHECK-LABEL: fcvt_v4f128_v4f64:
60 ; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill
61 ; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
62 ; CHECK-NEXT: addvl sp, sp, #-2
63 ; CHECK-NEXT: sub sp, sp, #128
64 ; CHECK-NEXT: ldr q1, [x0, #64]
65 ; CHECK-NEXT: ldr q0, [x0, #80]
66 ; CHECK-NEXT: mov x19, x1
67 ; CHECK-NEXT: stp q0, q1, [sp, #96] // 32-byte Folded Spill
68 ; CHECK-NEXT: ldr q1, [x0, #96]
69 ; CHECK-NEXT: ldr q0, [x0, #112]
70 ; CHECK-NEXT: stp q0, q1, [sp, #64] // 32-byte Folded Spill
71 ; CHECK-NEXT: ldr q1, [x0]
72 ; CHECK-NEXT: ldr q0, [x0, #16]
73 ; CHECK-NEXT: stp q0, q1, [sp, #32] // 32-byte Folded Spill
74 ; CHECK-NEXT: ldr q0, [x0, #32]
75 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
76 ; CHECK-NEXT: ldr q0, [x0, #48]
77 ; CHECK-NEXT: bl __trunctfdf2
78 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
79 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
80 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
81 ; CHECK-NEXT: bl __trunctfdf2
82 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
83 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
84 ; CHECK-NEXT: add x8, sp, #128
85 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
86 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
87 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
88 ; CHECK-NEXT: bl __trunctfdf2
89 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
90 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
91 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
92 ; CHECK-NEXT: bl __trunctfdf2
93 ; CHECK-NEXT: ptrue p0.d, vl2
94 ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
95 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
96 ; CHECK-NEXT: add x8, sp, #128
97 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
98 ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
99 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
100 ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill
101 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload
102 ; CHECK-NEXT: bl __trunctfdf2
103 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
104 ; CHECK-NEXT: str q0, [sp, #64] // 16-byte Folded Spill
105 ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload
106 ; CHECK-NEXT: bl __trunctfdf2
107 ; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
108 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
109 ; CHECK-NEXT: add x8, sp, #128
110 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
111 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
112 ; CHECK-NEXT: ldr q0, [sp, #96] // 16-byte Folded Reload
113 ; CHECK-NEXT: bl __trunctfdf2
114 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
115 ; CHECK-NEXT: str q0, [sp, #96] // 16-byte Folded Spill
116 ; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload
117 ; CHECK-NEXT: bl __trunctfdf2
118 ; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload
119 ; CHECK-NEXT: ptrue p1.d, vl2
120 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
121 ; CHECK-NEXT: add x8, sp, #128
122 ; CHECK-NEXT: ptrue p0.d, vl4
123 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
124 ; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload
125 ; CHECK-NEXT: mov x8, #4 // =0x4
126 ; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d
127 ; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3]
128 ; CHECK-NEXT: add x8, sp, #128
129 ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
130 ; CHECK-NEXT: st1d { z0.d }, p0, [x19]
131 ; CHECK-NEXT: addvl sp, sp, #2
132 ; CHECK-NEXT: add sp, sp, #128
133 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
134 ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload
136 %op1 = load <8 x fp128>, ptr %a
137 %res = fptrunc <8 x fp128> %op1 to <8 x double>
138 store <8 x double> %res, ptr %b
142 attributes #0 = { nounwind "target-features"="+sve" }