1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
3 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
4 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
5 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
9 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
17 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
19 target triple = "aarch64-unknown-linux-gnu"
21 ; Don't use SVE for 64-bit vectors.
22 define void @load_v2f32(ptr %a, ptr %b) #0 {
23 ; CHECK-LABEL: load_v2f32:
25 ; CHECK-NEXT: ldr d0, [x0]
26 ; CHECK-NEXT: str d0, [x1]
28 %load = load <2 x float>, ptr %a
29 store <2 x float> %load, ptr %b
33 ; Don't use SVE for 128-bit vectors.
34 define void @load_v4f32(ptr %a, ptr %b) #0 {
35 ; CHECK-LABEL: load_v4f32:
37 ; CHECK-NEXT: ldr q0, [x0]
38 ; CHECK-NEXT: str q0, [x1]
40 %load = load <4 x float>, ptr %a
41 store <4 x float> %load, ptr %b
45 define void @load_v8f32(ptr %a, ptr %b) #0 {
46 ; CHECK-LABEL: load_v8f32:
48 ; CHECK-NEXT: ptrue p0.s, vl8
49 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
50 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
52 %load = load <8 x float>, ptr %a
53 store <8 x float> %load, ptr %b
57 define void @load_v16f32(ptr %a, ptr %b) #0 {
58 ; VBITS_GE_256-LABEL: load_v16f32:
59 ; VBITS_GE_256: // %bb.0:
60 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
61 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
62 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
63 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
64 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
65 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1]
66 ; VBITS_GE_256-NEXT: ret
68 ; VBITS_GE_512-LABEL: load_v16f32:
69 ; VBITS_GE_512: // %bb.0:
70 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
71 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
72 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1]
73 ; VBITS_GE_512-NEXT: ret
75 ; VBITS_GE_1024-LABEL: load_v16f32:
76 ; VBITS_GE_1024: // %bb.0:
77 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
78 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
79 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
80 ; VBITS_GE_1024-NEXT: ret
82 ; VBITS_GE_2048-LABEL: load_v16f32:
83 ; VBITS_GE_2048: // %bb.0:
84 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl16
85 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
86 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
87 ; VBITS_GE_2048-NEXT: ret
88 %load = load <16 x float>, ptr %a
89 store <16 x float> %load, ptr %b
93 define void @load_v32f32(ptr %a, ptr %b) #0 {
94 ; VBITS_GE_256-LABEL: load_v32f32:
95 ; VBITS_GE_256: // %bb.0:
96 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
97 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
98 ; VBITS_GE_256-NEXT: mov x9, #24 // =0x18
99 ; VBITS_GE_256-NEXT: mov x10, #8 // =0x8
100 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
101 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
102 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
103 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
104 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
105 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
106 ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2]
107 ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1]
108 ; VBITS_GE_256-NEXT: ret
110 ; VBITS_GE_512-LABEL: load_v32f32:
111 ; VBITS_GE_512: // %bb.0:
112 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
113 ; VBITS_GE_512-NEXT: mov x8, #16 // =0x10
114 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
115 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
116 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
117 ; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x1]
118 ; VBITS_GE_512-NEXT: ret
120 ; VBITS_GE_1024-LABEL: load_v32f32:
121 ; VBITS_GE_1024: // %bb.0:
122 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
123 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
124 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1]
125 ; VBITS_GE_1024-NEXT: ret
127 ; VBITS_GE_2048-LABEL: load_v32f32:
128 ; VBITS_GE_2048: // %bb.0:
129 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
130 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
131 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
132 ; VBITS_GE_2048-NEXT: ret
133 %load = load <32 x float>, ptr %a
134 store <32 x float> %load, ptr %b
138 define void @load_v64f32(ptr %a, ptr %b) #0 {
139 ; VBITS_GE_256-LABEL: load_v64f32:
140 ; VBITS_GE_256: // %bb.0:
141 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
142 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
143 ; VBITS_GE_256-NEXT: mov x9, #24 // =0x18
144 ; VBITS_GE_256-NEXT: mov x10, #16 // =0x10
145 ; VBITS_GE_256-NEXT: mov x11, #48 // =0x30
146 ; VBITS_GE_256-NEXT: mov x12, #40 // =0x28
147 ; VBITS_GE_256-NEXT: mov x13, #56 // =0x38
148 ; VBITS_GE_256-NEXT: mov x14, #32 // =0x20
149 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x11, lsl #2]
150 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
151 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x13, lsl #2]
152 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x9, lsl #2]
153 ; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x10, lsl #2]
154 ; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x14, lsl #2]
155 ; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x12, lsl #2]
156 ; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
157 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x11, lsl #2]
158 ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x1, x13, lsl #2]
159 ; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x1, x14, lsl #2]
160 ; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x1, x12, lsl #2]
161 ; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x1, x10, lsl #2]
162 ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x1, x9, lsl #2]
163 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1, x8, lsl #2]
164 ; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x1]
165 ; VBITS_GE_256-NEXT: ret
167 ; VBITS_GE_512-LABEL: load_v64f32:
168 ; VBITS_GE_512: // %bb.0:
169 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
170 ; VBITS_GE_512-NEXT: mov x8, #32 // =0x20
171 ; VBITS_GE_512-NEXT: mov x9, #48 // =0x30
172 ; VBITS_GE_512-NEXT: mov x10, #16 // =0x10
173 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
174 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
175 ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x10, lsl #2]
176 ; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0]
177 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
178 ; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x1, x9, lsl #2]
179 ; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x1, x10, lsl #2]
180 ; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x1]
181 ; VBITS_GE_512-NEXT: ret
183 ; VBITS_GE_1024-LABEL: load_v64f32:
184 ; VBITS_GE_1024: // %bb.0:
185 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
186 ; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20
187 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
188 ; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0]
189 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
190 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1]
191 ; VBITS_GE_1024-NEXT: ret
193 ; VBITS_GE_2048-LABEL: load_v64f32:
194 ; VBITS_GE_2048: // %bb.0:
195 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
196 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
197 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
198 ; VBITS_GE_2048-NEXT: ret
199 %load = load <64 x float>, ptr %a
200 store <64 x float> %load, ptr %b
204 attributes #0 = { "target-features"="+sve" }