Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / AArch64 / sme-intrinsics / acle_sme_ld1_vnum.c
blobfcbd17559dc7026dbb9970677de6fe75f5e2e3d1
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
2 // REQUIRES: aarch64-registered-target
3 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
4 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
5 // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -O1 -Werror -o /dev/null %s
7 #include <arm_sme.h>
9 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za8(
10 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11 // CHECK-C-NEXT: entry:
12 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
13 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
14 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
15 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
16 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
17 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
18 // CHECK-C-NEXT: ret void
20 // CHECK-CXX-LABEL: define dso_local void @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
21 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
22 // CHECK-CXX-NEXT: entry:
23 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
24 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
25 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
26 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
27 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
28 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
29 // CHECK-CXX-NEXT: ret void
31 void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
32 svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
33 svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
36 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za16(
37 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
38 // CHECK-C-NEXT: entry:
39 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
40 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
41 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
42 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
43 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
44 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
45 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
46 // CHECK-C-NEXT: ret void
48 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
49 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
50 // CHECK-CXX-NEXT: entry:
51 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
52 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
53 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
54 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
55 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
56 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
57 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
58 // CHECK-CXX-NEXT: ret void
60 void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
61 svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
62 svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
65 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za32(
66 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
67 // CHECK-C-NEXT: entry:
68 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
69 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
70 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
71 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
72 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
73 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
74 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
75 // CHECK-C-NEXT: ret void
77 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
78 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
79 // CHECK-CXX-NEXT: entry:
80 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
81 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
82 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
83 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
84 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
85 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
86 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
87 // CHECK-CXX-NEXT: ret void
89 void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
90 svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
91 svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
94 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za64(
95 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
96 // CHECK-C-NEXT: entry:
97 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
98 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
99 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
100 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
101 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
102 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
103 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
104 // CHECK-C-NEXT: ret void
106 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
107 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
108 // CHECK-CXX-NEXT: entry:
109 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
110 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
111 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
112 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
113 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
114 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
115 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
116 // CHECK-CXX-NEXT: ret void
118 void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
119 svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
120 svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
123 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za128(
124 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
125 // CHECK-C-NEXT: entry:
126 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
127 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
128 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
129 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
130 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
131 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
132 // CHECK-C-NEXT: ret void
134 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
135 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
136 // CHECK-CXX-NEXT: entry:
137 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
138 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
139 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
140 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
141 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
142 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
143 // CHECK-CXX-NEXT: ret void
145 void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
146 svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
147 svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
150 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_hor_za8(
151 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
152 // CHECK-C-NEXT: entry:
153 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
154 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
155 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
156 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
157 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
158 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
159 // CHECK-C-NEXT: ret void
161 // CHECK-CXX-LABEL: define dso_local void @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
162 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
163 // CHECK-CXX-NEXT: entry:
164 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
165 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
166 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
167 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
168 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
169 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
170 // CHECK-CXX-NEXT: ret void
172 void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
173 svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
174 svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
177 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za16(
178 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
179 // CHECK-C-NEXT: entry:
180 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
181 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
182 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
183 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
184 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
185 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
186 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
187 // CHECK-C-NEXT: ret void
189 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
190 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
191 // CHECK-CXX-NEXT: entry:
192 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
193 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
194 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
195 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
196 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
197 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
198 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
199 // CHECK-CXX-NEXT: ret void
201 void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
202 svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
203 svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
206 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za32(
207 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
208 // CHECK-C-NEXT: entry:
209 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
210 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
211 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
212 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
213 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
214 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
215 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
216 // CHECK-C-NEXT: ret void
218 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
219 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
220 // CHECK-CXX-NEXT: entry:
221 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
222 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
223 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
224 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
225 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
226 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
227 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
228 // CHECK-CXX-NEXT: ret void
230 void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
231 svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
232 svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
235 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za64(
236 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
237 // CHECK-C-NEXT: entry:
238 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
239 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
240 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
241 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
242 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
243 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
244 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
245 // CHECK-C-NEXT: ret void
247 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
248 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
249 // CHECK-CXX-NEXT: entry:
250 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
251 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
252 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
253 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
254 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
255 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
256 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
257 // CHECK-CXX-NEXT: ret void
259 void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
260 svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
261 svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
264 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za128(
265 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
266 // CHECK-C-NEXT: entry:
267 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
268 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
269 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
270 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
271 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
272 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
273 // CHECK-C-NEXT: ret void
275 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
276 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
277 // CHECK-CXX-NEXT: entry:
278 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
279 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
280 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
281 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
282 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
283 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
284 // CHECK-CXX-NEXT: ret void
286 void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming __arm_out("za") {
287 svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
288 svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
290 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
291 // CHECK: {{.*}}