1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
8 define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pg, ptr %a, i64 %index) {
9 ; CHECK-LABEL: ld1b_i8:
11 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x1]
13 %base = getelementptr i8, ptr %a, i64 %index
14 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base)
15 ret <vscale x 16 x i8> %load
18 define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, ptr %a, i64 %index) {
19 ; CHECK-LABEL: ld1b_h:
21 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, x1]
23 %base = getelementptr i8, ptr %a, i64 %index
24 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %base)
25 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
26 ret <vscale x 8 x i16> %res
29 define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, ptr %a, i64 %index) {
30 ; CHECK-LABEL: ld1sb_h:
32 ; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, x1]
34 %base = getelementptr i8, ptr %a, i64 %index
35 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %base)
36 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
37 ret <vscale x 8 x i16> %res
40 define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, ptr %a, i64 %index) {
41 ; CHECK-LABEL: ld1b_s:
43 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x1]
45 %base = getelementptr i8, ptr %a, i64 %index
46 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %base)
47 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
48 ret <vscale x 4 x i32> %res
51 define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, ptr %a, i64 %index) {
52 ; CHECK-LABEL: ld1sb_s:
54 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, x1]
56 %base = getelementptr i8, ptr %a, i64 %index
57 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %base)
58 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
59 ret <vscale x 4 x i32> %res
62 define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
63 ; CHECK-LABEL: ld1b_d:
65 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, x1]
67 %base = getelementptr i8, ptr %a, i64 %index
68 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %base)
69 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
70 ret <vscale x 2 x i64> %res
73 define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
74 ; CHECK-LABEL: ld1sb_d:
76 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, x1]
78 %base = getelementptr i8, ptr %a, i64 %index
79 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %base)
80 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
81 ret <vscale x 2 x i64> %res
88 define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pg, ptr %a, i64 %index) {
89 ; CHECK-LABEL: ld1h_i16:
91 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
93 %base = getelementptr i16, ptr %a, i64 %index
94 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base)
95 ret <vscale x 8 x i16> %load
98 define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pg, ptr %a, i64 %index) {
99 ; CHECK-LABEL: ld1h_f16:
101 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
103 %base = getelementptr half, ptr %a, i64 %index
104 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base)
105 ret <vscale x 8 x half> %load
108 define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pg, ptr %a, i64 %index) #0 {
109 ; CHECK-LABEL: ld1h_bf16:
111 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x1, lsl #1]
113 %base = getelementptr bfloat, ptr %a, i64 %index
114 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base)
115 ret <vscale x 8 x bfloat> %load
118 define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, ptr %a, i64 %index) {
119 ; CHECK-LABEL: ld1h_s:
121 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x1, lsl #1]
123 %base = getelementptr i16, ptr %a, i64 %index
124 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %base)
125 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
126 ret <vscale x 4 x i32> %res
129 define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, ptr %a, i64 %index) {
130 ; CHECK-LABEL: ld1sh_s:
132 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, x1, lsl #1]
134 %base = getelementptr i16, ptr %a, i64 %index
135 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %base)
136 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
137 ret <vscale x 4 x i32> %res
140 define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
141 ; CHECK-LABEL: ld1h_d:
143 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x1, lsl #1]
145 %base = getelementptr i16, ptr %a, i64 %index
146 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %base)
147 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
148 ret <vscale x 2 x i64> %res
151 define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
152 ; CHECK-LABEL: ld1sh_d:
154 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, x1, lsl #1]
156 %base = getelementptr i16, ptr %a, i64 %index
157 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %base)
158 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
159 ret <vscale x 2 x i64> %res
166 define<vscale x 4 x i32> @ld1w(<vscale x 4 x i1> %pg, ptr %a, i64 %index) {
168 ; CHECK: ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
170 %base = getelementptr i32, ptr %a, i64 %index
171 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base)
172 ret <vscale x 4 x i32> %load
175 define<vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pg, ptr %a, i64 %index) {
176 ; CHECK-LABEL: ld1w_f32
177 ; CHECK: ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
179 %base = getelementptr float, ptr %a, i64 %index
180 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base)
181 ret <vscale x 4 x float> %load
184 define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
185 ; CHECK-LABEL: ld1w_d:
187 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x1, lsl #2]
189 %base = getelementptr i32, ptr %a, i64 %index
190 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %base)
191 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
192 ret <vscale x 2 x i64> %res
195 define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, ptr %a, i64 %index) {
196 ; CHECK-LABEL: ld1sw_d:
198 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, x1, lsl #2]
200 %base = getelementptr i32, ptr %a, i64 %index
201 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %base)
202 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
203 ret <vscale x 2 x i64> %res
210 define <vscale x 2 x i64> @ld1d(<vscale x 2 x i1> %pg, ptr %a, i64 %index) {
213 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
215 %base = getelementptr i64, ptr %a, i64 %index
216 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base)
217 ret <vscale x 2 x i64> %load
220 define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pg, ptr %a, i64 %index) {
221 ; CHECK-LABEL: ld1d_f64:
223 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x1, lsl #3]
225 %base = getelementptr double, ptr %a, i64 %index
226 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base)
227 ret <vscale x 2 x double> %load
230 declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, ptr)
232 declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, ptr)
233 declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr)
234 declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, ptr)
235 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, ptr)
237 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, ptr)
238 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, ptr)
239 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr)
240 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, ptr)
242 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, ptr)
243 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, ptr)
244 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, ptr)
245 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, ptr)
246 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr)
248 ; +bf16 is required for the bfloat version.
249 attributes #0 = { "target-features"="+sve,+bf16" }