1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sve < %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=sme < %s | FileCheck %s
6 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @ld2.nxv32i8(<vscale x 16 x i1> %Pg, ptr %addr, i64 %a) {
7 ; CHECK-LABEL: ld2.nxv32i8:
9 ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x0, x1]
11 %addr2 = getelementptr i8, ptr %addr, i64 %a
12 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1> %Pg, ptr %addr2)
13 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res
17 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @ld2.nxv16i16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
18 ; CHECK-LABEL: ld2.nxv16i16:
20 ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
22 %addr2 = getelementptr i16, ptr %addr, i64 %a
23 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1> %Pg, ptr %addr2)
24 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res
27 define { <vscale x 8 x half>, <vscale x 8 x half> } @ld2.nxv16f16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
28 ; CHECK-LABEL: ld2.nxv16f16:
30 ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
32 %addr2 = getelementptr half, ptr %addr, i64 %a
33 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1> %Pg, ptr %addr2)
34 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
37 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld2.nxv16bf16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) #0 {
38 ; CHECK-LABEL: ld2.nxv16bf16:
40 ; CHECK-NEXT: ld2h { z0.h, z1.h }, p0/z, [x0, x1, lsl #1]
42 %addr2 = getelementptr bfloat, ptr %addr, i64 %a
43 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1> %Pg, ptr %addr2)
44 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
48 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @ld2.nxv8i32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
49 ; CHECK-LABEL: ld2.nxv8i32:
51 ; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
53 %addr2 = getelementptr i32, ptr %addr, i64 %a
54 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1> %Pg, ptr %addr2)
55 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res
58 define { <vscale x 4 x float>, <vscale x 4 x float> } @ld2.nxv8f32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
59 ; CHECK-LABEL: ld2.nxv8f32:
61 ; CHECK-NEXT: ld2w { z0.s, z1.s }, p0/z, [x0, x1, lsl #2]
63 %addr2 = getelementptr float, ptr %addr, i64 %a
64 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1> %Pg, ptr %addr2)
65 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
69 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @ld2.nxv4i64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
70 ; CHECK-LABEL: ld2.nxv4i64:
72 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
74 %addr2 = getelementptr i64, ptr %addr, i64 %a
75 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1> %Pg, ptr %addr2)
76 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
79 define { <vscale x 2 x double>, <vscale x 2 x double> } @ld2.nxv4f64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
80 ; CHECK-LABEL: ld2.nxv4f64:
82 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x0, x1, lsl #3]
84 %addr2 = getelementptr double, ptr %addr, i64 %a
85 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> %Pg, ptr %addr2)
86 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
90 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld3.nxv48i8(<vscale x 16 x i1> %Pg, ptr %addr, i64 %a) {
91 ; CHECK-LABEL: ld3.nxv48i8:
93 ; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x1]
95 %addr2 = getelementptr i8, ptr %addr, i64 %a
96 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1> %Pg, ptr %addr2)
97 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
101 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld3.nxv24i16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
102 ; CHECK-LABEL: ld3.nxv24i16:
104 ; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
106 %addr2 = getelementptr i16, ptr %addr, i64 %a
107 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1> %Pg, ptr %addr2)
108 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
111 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld3.nxv24f16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
112 ; CHECK-LABEL: ld3.nxv24f16:
114 ; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
116 %addr2 = getelementptr half, ptr %addr, i64 %a
117 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1> %Pg, ptr %addr2)
118 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
121 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld3.nxv24bf16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) #0 {
122 ; CHECK-LABEL: ld3.nxv24bf16:
124 ; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1]
126 %addr2 = getelementptr bfloat, ptr %addr, i64 %a
127 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1> %Pg, ptr %addr2)
128 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
132 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld3.nxv12i32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
133 ; CHECK-LABEL: ld3.nxv12i32:
135 ; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
137 %addr2 = getelementptr i32, ptr %addr, i64 %a
138 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1> %Pg, ptr %addr2)
139 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
142 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld3.nxv12f32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
143 ; CHECK-LABEL: ld3.nxv12f32:
145 ; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2]
147 %addr2 = getelementptr float, ptr %addr, i64 %a
148 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1> %Pg, ptr %addr2)
149 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
153 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld3.nxv6i64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
154 ; CHECK-LABEL: ld3.nxv6i64:
156 ; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
158 %addr2 = getelementptr i64, ptr %addr, i64 %a
159 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1> %Pg, ptr %addr2)
160 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
163 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld3.nxv6f64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
164 ; CHECK-LABEL: ld3.nxv6f64:
166 ; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3]
168 %addr2 = getelementptr double, ptr %addr, i64 %a
169 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1> %Pg, ptr %addr2)
170 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
174 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8(<vscale x 16 x i1> %Pg, ptr %addr, i64 %a) {
175 ; CHECK-LABEL: ld4.nxv64i8:
177 ; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x1]
179 %addr2 = getelementptr i8, ptr %addr, i64 %a
180 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1> %Pg, ptr %addr2)
181 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res
185 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @ld4.nxv32i16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
186 ; CHECK-LABEL: ld4.nxv32i16:
188 ; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
190 %addr2 = getelementptr i16, ptr %addr, i64 %a
191 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1> %Pg, ptr %addr2)
192 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res
195 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @ld4.nxv32f16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) {
196 ; CHECK-LABEL: ld4.nxv32f16:
198 ; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
200 %addr2 = getelementptr half, ptr %addr, i64 %a
201 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1> %Pg, ptr %addr2)
202 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
205 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @ld4.nxv32bf16(<vscale x 8 x i1> %Pg, ptr %addr, i64 %a) #0 {
206 ; CHECK-LABEL: ld4.nxv32bf16:
208 ; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1]
210 %addr2 = getelementptr bfloat, ptr %addr, i64 %a
211 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1> %Pg, ptr %addr2)
212 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
216 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @ld4.nxv16i32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
217 ; CHECK-LABEL: ld4.nxv16i32:
219 ; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
221 %addr2 = getelementptr i32, ptr %addr, i64 %a
222 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1> %Pg, ptr %addr2)
223 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
226 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @ld4.nxv16f32(<vscale x 4 x i1> %Pg, ptr %addr, i64 %a) {
227 ; CHECK-LABEL: ld4.nxv16f32:
229 ; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2]
231 %addr2 = getelementptr float, ptr %addr, i64 %a
232 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1> %Pg, ptr %addr2)
233 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
237 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @ld4.nxv8i64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
238 ; CHECK-LABEL: ld4.nxv8i64:
240 ; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
242 %addr2 = getelementptr i64, ptr %addr, i64 %a
243 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1> %Pg, ptr %addr2)
244 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
247 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @ld4.nxv8f64(<vscale x 2 x i1> %Pg, ptr %addr, i64 %a) {
248 ; CHECK-LABEL: ld4.nxv8f64:
250 ; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3]
252 %addr2 = getelementptr double, ptr %addr, i64 %a
253 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1> %Pg, ptr %addr2)
254 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
257 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld2.sret.nxv16i8(<vscale x 16 x i1>, ptr)
258 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld2.sret.nxv8i16(<vscale x 8 x i1>, ptr)
259 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld2.sret.nxv4i32(<vscale x 4 x i1>, ptr)
260 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld2.sret.nxv2i64(<vscale x 2 x i1>, ptr)
261 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld2.sret.nxv8f16(<vscale x 8 x i1>, ptr)
262 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld2.sret.nxv8bf16(<vscale x 8 x i1>, ptr)
263 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld2.sret.nxv4f32(<vscale x 4 x i1>, ptr)
264 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1>, ptr)
266 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld3.sret.nxv16i8(<vscale x 16 x i1>, ptr)
267 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld3.sret.nxv8i16(<vscale x 8 x i1>, ptr)
268 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld3.sret.nxv4i32(<vscale x 4 x i1>, ptr)
269 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld3.sret.nxv2i64(<vscale x 2 x i1>, ptr)
270 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld3.sret.nxv8f16(<vscale x 8 x i1>, ptr)
271 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld3.sret.nxv8bf16(<vscale x 8 x i1>, ptr)
272 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld3.sret.nxv4f32(<vscale x 4 x i1>, ptr)
273 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld3.sret.nxv2f64(<vscale x 2 x i1>, ptr)
275 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld4.sret.nxv16i8(<vscale x 16 x i1>, ptr)
276 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld4.sret.nxv8i16(<vscale x 8 x i1>, ptr)
277 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld4.sret.nxv4i32(<vscale x 4 x i1>, ptr)
278 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld4.sret.nxv2i64(<vscale x 2 x i1>, ptr)
279 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld4.sret.nxv8f16(<vscale x 8 x i1>, ptr)
280 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.ld4.sret.nxv8bf16(<vscale x 8 x i1>, ptr)
281 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld4.sret.nxv4f32(<vscale x 4 x i1>, ptr)
282 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld4.sret.nxv2f64(<vscale x 2 x i1>, ptr)
284 ; +bf16 is required for the bfloat version.
285 attributes #0 = { "target-features"="+bf16" }