1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
6 define <vscale x 16 x i8> @ld1_nxv16i8(i8* %addr, i64 %off) {
7 ; CHECK-LABEL: ld1_nxv16i8:
9 ; CHECK-NEXT: ptrue p0.b
10 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
12 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
13 %ptrcast = bitcast i8* %ptr to <vscale x 16 x i8>*
14 %val = load volatile <vscale x 16 x i8>, <vscale x 16 x i8>* %ptrcast
15 ret <vscale x 16 x i8> %val
18 define <vscale x 8 x i16> @ld1_nxv16i8_bitcast_to_i16(i8* %addr, i64 %off) {
19 ; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i16:
21 ; CHECK-NEXT: ptrue p0.b
22 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
24 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
25 %ptrcast = bitcast i8* %ptr to <vscale x 8 x i16>*
26 %val = load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %ptrcast
27 ret <vscale x 8 x i16> %val
30 define <vscale x 4 x i32> @ld1_nxv16i8_bitcast_to_i32(i8* %addr, i64 %off) {
31 ; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i32:
33 ; CHECK-NEXT: ptrue p0.b
34 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
36 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
37 %ptrcast = bitcast i8* %ptr to <vscale x 4 x i32>*
38 %val = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %ptrcast
39 ret <vscale x 4 x i32> %val
42 define <vscale x 2 x i64> @ld1_nxv16i8_bitcast_to_i64(i8* %addr, i64 %off) {
43 ; CHECK-LABEL: ld1_nxv16i8_bitcast_to_i64:
45 ; CHECK-NEXT: ptrue p0.b
46 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
48 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
49 %ptrcast = bitcast i8* %ptr to <vscale x 2 x i64>*
50 %val = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %ptrcast
51 ret <vscale x 2 x i64> %val
54 define <vscale x 8 x i16> @ld1_nxv8i16_zext8(i8* %addr, i64 %off) {
55 ; CHECK-LABEL: ld1_nxv8i16_zext8:
57 ; CHECK-NEXT: ptrue p0.h
58 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, x1]
60 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
61 %ptrcast = bitcast i8* %ptr to <vscale x 8 x i8>*
62 %val = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %ptrcast
63 %zext = zext <vscale x 8 x i8> %val to <vscale x 8 x i16>
64 ret <vscale x 8 x i16> %zext
67 define <vscale x 4 x i32> @ld1_nxv4i32_zext8(i8* %addr, i64 %off) {
68 ; CHECK-LABEL: ld1_nxv4i32_zext8:
70 ; CHECK-NEXT: ptrue p0.s
71 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x1]
73 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
74 %ptrcast = bitcast i8* %ptr to <vscale x 4 x i8>*
75 %val = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %ptrcast
76 %zext = zext <vscale x 4 x i8> %val to <vscale x 4 x i32>
77 ret <vscale x 4 x i32> %zext
80 define <vscale x 2 x i64> @ld1_nxv2i64_zext8(i8* %addr, i64 %off) {
81 ; CHECK-LABEL: ld1_nxv2i64_zext8:
83 ; CHECK-NEXT: ptrue p0.d
84 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x1]
86 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
87 %ptrcast = bitcast i8* %ptr to <vscale x 2 x i8>*
88 %val = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %ptrcast
89 %zext = zext <vscale x 2 x i8> %val to <vscale x 2 x i64>
90 ret <vscale x 2 x i64> %zext
93 define <vscale x 8 x i16> @ld1_nxv8i16_sext8(i8* %addr, i64 %off) {
94 ; CHECK-LABEL: ld1_nxv8i16_sext8:
96 ; CHECK-NEXT: ptrue p0.h
97 ; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, x1]
99 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
100 %ptrcast = bitcast i8* %ptr to <vscale x 8 x i8>*
101 %val = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %ptrcast
102 %sext = sext <vscale x 8 x i8> %val to <vscale x 8 x i16>
103 ret <vscale x 8 x i16> %sext
106 define <vscale x 4 x i32> @ld1_nxv4i32_sext8(i8* %addr, i64 %off) {
107 ; CHECK-LABEL: ld1_nxv4i32_sext8:
109 ; CHECK-NEXT: ptrue p0.s
110 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, x1]
112 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
113 %ptrcast = bitcast i8* %ptr to <vscale x 4 x i8>*
114 %val = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %ptrcast
115 %sext = sext <vscale x 4 x i8> %val to <vscale x 4 x i32>
116 ret <vscale x 4 x i32> %sext
119 define <vscale x 2 x i64> @ld1_nxv2i64_sext8(i8* %addr, i64 %off) {
120 ; CHECK-LABEL: ld1_nxv2i64_sext8:
122 ; CHECK-NEXT: ptrue p0.d
123 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, x1]
125 %ptr = getelementptr inbounds i8, i8* %addr, i64 %off
126 %ptrcast = bitcast i8* %ptr to <vscale x 2 x i8>*
127 %val = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %ptrcast
128 %sext = sext <vscale x 2 x i8> %val to <vscale x 2 x i64>
129 ret <vscale x 2 x i64> %sext
134 define <vscale x 8 x i16> @ld1_nxv8i16(i16* %addr, i64 %off) {
135 ; CHECK-LABEL: ld1_nxv8i16:
137 ; CHECK-NEXT: ptrue p0.h
138 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
140 %ptr = getelementptr inbounds i16, i16* %addr, i64 %off
141 %ptrcast = bitcast i16* %ptr to <vscale x 8 x i16>*
142 %val = load volatile <vscale x 8 x i16>, <vscale x 8 x i16>* %ptrcast
143 ret <vscale x 8 x i16> %val
146 define <vscale x 4 x i32> @ld1_nxv4i32_zext16(i16* %addr, i64 %off) {
147 ; CHECK-LABEL: ld1_nxv4i32_zext16:
149 ; CHECK-NEXT: ptrue p0.s
150 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1]
152 %ptr = getelementptr inbounds i16, i16* %addr, i64 %off
153 %ptrcast = bitcast i16* %ptr to <vscale x 4 x i16>*
154 %val = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %ptrcast
155 %zext = zext <vscale x 4 x i16> %val to <vscale x 4 x i32>
156 ret <vscale x 4 x i32> %zext
159 define <vscale x 2 x i64> @ld1_nxv2i64_zext16(i16* %addr, i64 %off) {
160 ; CHECK-LABEL: ld1_nxv2i64_zext16:
162 ; CHECK-NEXT: ptrue p0.d
163 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1]
165 %ptr = getelementptr inbounds i16, i16* %addr, i64 %off
166 %ptrcast = bitcast i16* %ptr to <vscale x 2 x i16>*
167 %val = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %ptrcast
168 %zext = zext <vscale x 2 x i16> %val to <vscale x 2 x i64>
169 ret <vscale x 2 x i64> %zext
172 define <vscale x 4 x i32> @ld1_nxv4i32_sext16(i16* %addr, i64 %off) {
173 ; CHECK-LABEL: ld1_nxv4i32_sext16:
175 ; CHECK-NEXT: ptrue p0.s
176 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, x1, lsl #1]
178 %ptr = getelementptr inbounds i16, i16* %addr, i64 %off
179 %ptrcast = bitcast i16* %ptr to <vscale x 4 x i16>*
180 %val = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %ptrcast
181 %sext = sext <vscale x 4 x i16> %val to <vscale x 4 x i32>
182 ret <vscale x 4 x i32> %sext
185 define <vscale x 2 x i64> @ld1_nxv2i64_sext16(i16* %addr, i64 %off) {
186 ; CHECK-LABEL: ld1_nxv2i64_sext16:
188 ; CHECK-NEXT: ptrue p0.d
189 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, x1, lsl #1]
191 %ptr = getelementptr inbounds i16, i16* %addr, i64 %off
192 %ptrcast = bitcast i16* %ptr to <vscale x 2 x i16>*
193 %val = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %ptrcast
194 %sext = sext <vscale x 2 x i16> %val to <vscale x 2 x i64>
195 ret <vscale x 2 x i64> %sext
198 define <vscale x 8 x half> @ld1_nxv8f16(half* %addr, i64 %off) {
199 ; CHECK-LABEL: ld1_nxv8f16:
201 ; CHECK-NEXT: ptrue p0.h
202 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
204 %ptr = getelementptr inbounds half, half* %addr, i64 %off
205 %ptrcast = bitcast half* %ptr to <vscale x 8 x half>*
206 %val = load volatile <vscale x 8 x half>, <vscale x 8 x half>* %ptrcast
207 ret <vscale x 8 x half> %val
210 define <vscale x 8 x bfloat> @ld1_nxv8bf16(bfloat* %addr, i64 %off) {
211 ; CHECK-LABEL: ld1_nxv8bf16:
213 ; CHECK-NEXT: ptrue p0.h
214 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
216 %ptr = getelementptr inbounds bfloat, bfloat* %addr, i64 %off
217 %ptrcast = bitcast bfloat* %ptr to <vscale x 8 x bfloat>*
218 %val = load volatile <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %ptrcast
219 ret <vscale x 8 x bfloat> %val
222 define <vscale x 4 x half> @ld1_nxv4f16(half* %addr, i64 %off) {
223 ; CHECK-LABEL: ld1_nxv4f16:
225 ; CHECK-NEXT: ptrue p0.s
226 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1]
228 %ptr = getelementptr inbounds half, half* %addr, i64 %off
229 %ptrcast = bitcast half* %ptr to <vscale x 4 x half>*
230 %val = load volatile <vscale x 4 x half>, <vscale x 4 x half>* %ptrcast
231 ret <vscale x 4 x half> %val
234 define <vscale x 2 x half> @ld1_nxv2f16(half* %addr, i64 %off) {
235 ; CHECK-LABEL: ld1_nxv2f16:
237 ; CHECK-NEXT: ptrue p0.d
238 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1]
240 %ptr = getelementptr inbounds half, half* %addr, i64 %off
241 %ptrcast = bitcast half* %ptr to <vscale x 2 x half>*
242 %val = load volatile <vscale x 2 x half>, <vscale x 2 x half>* %ptrcast
243 ret <vscale x 2 x half> %val
248 define <vscale x 4 x i32> @ld1_nxv4i32(i32* %addr, i64 %off) {
249 ; CHECK-LABEL: ld1_nxv4i32:
251 ; CHECK-NEXT: ptrue p0.s
252 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
254 %ptr = getelementptr inbounds i32, i32* %addr, i64 %off
255 %ptrcast = bitcast i32* %ptr to <vscale x 4 x i32>*
256 %val = load volatile <vscale x 4 x i32>, <vscale x 4 x i32>* %ptrcast
257 ret <vscale x 4 x i32> %val
260 define <vscale x 2 x i64> @ld1_nxv2i64_zext32(i32* %addr, i64 %off) {
261 ; CHECK-LABEL: ld1_nxv2i64_zext32:
263 ; CHECK-NEXT: ptrue p0.d
264 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x1, lsl #2]
266 %ptr = getelementptr inbounds i32, i32* %addr, i64 %off
267 %ptrcast = bitcast i32* %ptr to <vscale x 2 x i32>*
268 %val = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %ptrcast
269 %zext = zext <vscale x 2 x i32> %val to <vscale x 2 x i64>
270 ret <vscale x 2 x i64> %zext
273 define <vscale x 2 x i64> @ld1_nxv2i64_sext32(i32* %addr, i64 %off) {
274 ; CHECK-LABEL: ld1_nxv2i64_sext32:
276 ; CHECK-NEXT: ptrue p0.d
277 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, x1, lsl #2]
279 %ptr = getelementptr inbounds i32, i32* %addr, i64 %off
280 %ptrcast = bitcast i32* %ptr to <vscale x 2 x i32>*
281 %val = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %ptrcast
282 %sext = sext <vscale x 2 x i32> %val to <vscale x 2 x i64>
283 ret <vscale x 2 x i64> %sext
286 define <vscale x 4 x float> @ld1_nxv4f32(float* %addr, i64 %off) {
287 ; CHECK-LABEL: ld1_nxv4f32:
289 ; CHECK-NEXT: ptrue p0.s
290 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
292 %ptr = getelementptr inbounds float, float* %addr, i64 %off
293 %ptrcast = bitcast float* %ptr to <vscale x 4 x float>*
294 %val = load volatile <vscale x 4 x float>, <vscale x 4 x float>* %ptrcast
295 ret <vscale x 4 x float> %val
298 define <vscale x 2 x float> @ld1_nxv2f32(float* %addr, i64 %off) {
299 ; CHECK-LABEL: ld1_nxv2f32:
301 ; CHECK-NEXT: ptrue p0.d
302 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x1, lsl #2]
304 %ptr = getelementptr inbounds float, float* %addr, i64 %off
305 %ptrcast = bitcast float* %ptr to <vscale x 2 x float>*
306 %val = load volatile <vscale x 2 x float>, <vscale x 2 x float>* %ptrcast
307 ret <vscale x 2 x float> %val
312 define <vscale x 2 x i64> @ld1_nxv2i64(i64* %addr, i64 %off) {
313 ; CHECK-LABEL: ld1_nxv2i64:
315 ; CHECK-NEXT: ptrue p0.d
316 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
318 %ptr = getelementptr inbounds i64, i64* %addr, i64 %off
319 %ptrcast = bitcast i64* %ptr to <vscale x 2 x i64>*
320 %val = load volatile <vscale x 2 x i64>, <vscale x 2 x i64>* %ptrcast
321 ret <vscale x 2 x i64> %val
324 define <vscale x 2 x double> @ld1_nxv2f64(double* %addr, i64 %off) {
325 ; CHECK-LABEL: ld1_nxv2f64:
327 ; CHECK-NEXT: ptrue p0.d
328 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
330 %ptr = getelementptr inbounds double, double* %addr, i64 %off
331 %ptrcast = bitcast double* %ptr to <vscale x 2 x double>*
332 %val = load volatile <vscale x 2 x double>, <vscale x 2 x double>* %ptrcast
333 ret <vscale x 2 x double> %val