1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
2 // REQUIRES: aarch64-registered-target
3 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
4 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
5 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -O1 -Werror -o /dev/null %s
9 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za8(
10 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11 // CHECK-C-NEXT: entry:
12 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
13 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
14 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
15 // CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
16 // CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
17 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
18 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
19 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
20 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
21 // CHECK-C-NEXT: ret void
23 // CHECK-CXX-LABEL: define dso_local void @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
24 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
25 // CHECK-CXX-NEXT: entry:
26 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
27 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
28 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
29 // CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
30 // CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
31 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
32 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
33 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
34 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
35 // CHECK-CXX-NEXT: ret void
37 void test_svld1_hor_vnum_za8(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
38 svld1_hor_vnum_za8(0, slice_base
, pg
, ptr
, vnum
);
39 svld1_hor_vnum_za8(0, slice_base
+ 15, pg
, ptr
, vnum
);
42 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za16(
43 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
44 // CHECK-C-NEXT: entry:
45 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
46 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
47 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
48 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
49 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
50 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
51 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
52 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
53 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
54 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
55 // CHECK-C-NEXT: ret void
57 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
58 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
59 // CHECK-CXX-NEXT: entry:
60 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
61 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
62 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
63 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
64 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
65 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
66 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
67 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
68 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
69 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
70 // CHECK-CXX-NEXT: ret void
72 void test_svld1_hor_vnum_za16(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
73 svld1_hor_vnum_za16(0, slice_base
, pg
, ptr
, vnum
);
74 svld1_hor_vnum_za16(1, slice_base
+ 7, pg
, ptr
, vnum
);
77 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za32(
78 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
79 // CHECK-C-NEXT: entry:
80 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
81 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
82 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
83 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
84 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
85 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
86 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
87 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
88 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
89 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
90 // CHECK-C-NEXT: ret void
92 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
93 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
94 // CHECK-CXX-NEXT: entry:
95 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
96 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
97 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
98 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
99 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
100 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
101 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
102 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
103 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
104 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
105 // CHECK-CXX-NEXT: ret void
107 void test_svld1_hor_vnum_za32(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
108 svld1_hor_vnum_za32(0, slice_base
, pg
, ptr
, vnum
);
109 svld1_hor_vnum_za32(3, slice_base
+ 3, pg
, ptr
, vnum
);
112 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za64(
113 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
114 // CHECK-C-NEXT: entry:
115 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
116 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
117 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
118 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
119 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
120 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
121 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
122 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
123 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
124 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
125 // CHECK-C-NEXT: ret void
127 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
128 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
129 // CHECK-CXX-NEXT: entry:
130 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
131 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
132 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
133 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
134 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
135 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
136 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
137 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
138 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
139 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
140 // CHECK-CXX-NEXT: ret void
142 void test_svld1_hor_vnum_za64(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
143 svld1_hor_vnum_za64(0, slice_base
, pg
, ptr
, vnum
);
144 svld1_hor_vnum_za64(7, slice_base
+ 1, pg
, ptr
, vnum
);
147 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za128(
148 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
149 // CHECK-C-NEXT: entry:
150 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
151 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
152 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
153 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
154 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
155 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
156 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
157 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]])
158 // CHECK-C-NEXT: ret void
160 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
161 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
162 // CHECK-CXX-NEXT: entry:
163 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
164 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
165 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
166 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
167 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
168 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
169 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
170 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]])
171 // CHECK-CXX-NEXT: ret void
173 void test_svld1_hor_vnum_za128(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
174 svld1_hor_vnum_za128(0, slice_base
, pg
, ptr
, vnum
);
175 svld1_hor_vnum_za128(15, slice_base
, pg
, ptr
, vnum
);
178 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_hor_za8(
179 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
180 // CHECK-C-NEXT: entry:
181 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
182 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
183 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
184 // CHECK-C-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
185 // CHECK-C-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
186 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
187 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
188 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
189 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
190 // CHECK-C-NEXT: ret void
192 // CHECK-CXX-LABEL: define dso_local void @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
193 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
194 // CHECK-CXX-NEXT: entry:
195 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
196 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
197 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
198 // CHECK-CXX-NEXT: [[TMP2:%.*]] = trunc i64 [[VNUM]] to i32
199 // CHECK-CXX-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
200 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP3]])
201 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP2]]
202 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[ADD]], 15
203 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[TMP4]])
204 // CHECK-CXX-NEXT: ret void
206 void test_svld1_ver_hor_za8(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
207 svld1_ver_vnum_za8(0, slice_base
, pg
, ptr
, vnum
);
208 svld1_ver_vnum_za8(0, slice_base
+ 15, pg
, ptr
, vnum
);
211 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za16(
212 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
213 // CHECK-C-NEXT: entry:
214 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
215 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
216 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
217 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
218 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
219 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
220 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
221 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
222 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
223 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
224 // CHECK-C-NEXT: ret void
226 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
227 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
228 // CHECK-CXX-NEXT: entry:
229 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
230 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
231 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
232 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
233 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
234 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
235 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
236 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
237 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 7
238 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[TMP5]])
239 // CHECK-CXX-NEXT: ret void
241 void test_svld1_ver_vnum_za16(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
242 svld1_ver_vnum_za16(0, slice_base
, pg
, ptr
, vnum
);
243 svld1_ver_vnum_za16(1, slice_base
+ 7, pg
, ptr
, vnum
);
246 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za32(
247 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
248 // CHECK-C-NEXT: entry:
249 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
250 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
251 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
252 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
253 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
254 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
255 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
256 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
257 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
258 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
259 // CHECK-C-NEXT: ret void
261 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
262 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
263 // CHECK-CXX-NEXT: entry:
264 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
265 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
266 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
267 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
268 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
269 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
270 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
271 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
272 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 3
273 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[TMP5]])
274 // CHECK-CXX-NEXT: ret void
276 void test_svld1_ver_vnum_za32(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
277 svld1_ver_vnum_za32(0, slice_base
, pg
, ptr
, vnum
);
278 svld1_ver_vnum_za32(3, slice_base
+ 3, pg
, ptr
, vnum
);
281 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za64(
282 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
283 // CHECK-C-NEXT: entry:
284 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
285 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
286 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
287 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
288 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
289 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
290 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
291 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
292 // CHECK-C-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
293 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
294 // CHECK-C-NEXT: ret void
296 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
297 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
298 // CHECK-CXX-NEXT: entry:
299 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
300 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
301 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
302 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
303 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
304 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
305 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
306 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
307 // CHECK-CXX-NEXT: [[TMP5:%.*]] = add i32 [[ADD]], 1
308 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[TMP5]])
309 // CHECK-CXX-NEXT: ret void
311 void test_svld1_ver_vnum_za64(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
312 svld1_ver_vnum_za64(0, slice_base
, pg
, ptr
, vnum
);
313 svld1_ver_vnum_za64(7, slice_base
+ 1, pg
, ptr
, vnum
);
316 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za128(
317 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
318 // CHECK-C-NEXT: entry:
319 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
320 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
321 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
322 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
323 // CHECK-C-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
324 // CHECK-C-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
325 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
326 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]])
327 // CHECK-C-NEXT: ret void
329 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
330 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
331 // CHECK-CXX-NEXT: entry:
332 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
333 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
334 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
335 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
336 // CHECK-CXX-NEXT: [[TMP3:%.*]] = trunc i64 [[VNUM]] to i32
337 // CHECK-CXX-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE]], [[TMP3]]
338 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[TMP4]])
339 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[TMP4]])
340 // CHECK-CXX-NEXT: ret void
342 void test_svld1_ver_vnum_za128(uint32_t slice_base
, svbool_t pg
, const void *ptr
, int64_t vnum
) __arm_streaming
__arm_out("za") {
343 svld1_ver_vnum_za128(0, slice_base
, pg
, ptr
, vnum
);
344 svld1_ver_vnum_za128(15, slice_base
, pg
, ptr
, vnum
);
346 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: