Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / aarch64-sme-intrinsics / acle_sme_ld1_vnum.c
blobcf49f62664eee8c0a1aa527b9965a943ac16101a
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
2 // REQUIRES: aarch64-registered-target
3 // RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
4 // RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
5 // RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
7 #include <arm_sme_draft_spec_subject_to_change.h>
9 #ifdef DISABLE_SME_ATTRIBUTES
10 #define ARM_STREAMING_ATTR
11 #else
12 #define ARM_STREAMING_ATTR __attribute__((arm_streaming))
13 #endif
15 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za8(
16 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
17 // CHECK-C-NEXT: entry:
18 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
19 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
20 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
21 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
22 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
23 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
24 // CHECK-C-NEXT: ret void
26 // CHECK-CXX-LABEL: define dso_local void @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKvl(
27 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
28 // CHECK-CXX-NEXT: entry:
29 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
30 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
31 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
32 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
33 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
34 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
35 // CHECK-CXX-NEXT: ret void
37 ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
38 svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
39 svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
42 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za16(
43 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
44 // CHECK-C-NEXT: entry:
45 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
46 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
47 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
48 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
49 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
50 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
51 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
52 // CHECK-C-NEXT: ret void
54 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKvl(
55 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
56 // CHECK-CXX-NEXT: entry:
57 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
58 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
59 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
60 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
61 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
62 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
63 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
64 // CHECK-CXX-NEXT: ret void
66 ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
67 svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
68 svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
71 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za32(
72 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
73 // CHECK-C-NEXT: entry:
74 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
75 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
76 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
77 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
78 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
79 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
80 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
81 // CHECK-C-NEXT: ret void
83 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKvl(
84 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
85 // CHECK-CXX-NEXT: entry:
86 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
87 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
88 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
89 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
90 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
91 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
92 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
93 // CHECK-CXX-NEXT: ret void
95 ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
96 svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
97 svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
100 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za64(
101 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
102 // CHECK-C-NEXT: entry:
103 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
104 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
105 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
106 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
107 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
108 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
109 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
110 // CHECK-C-NEXT: ret void
112 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKvl(
113 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
114 // CHECK-CXX-NEXT: entry:
115 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
116 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
117 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
118 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
119 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
120 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
121 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
122 // CHECK-CXX-NEXT: ret void
124 ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
125 svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
126 svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
129 // CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za128(
130 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
131 // CHECK-C-NEXT: entry:
132 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
133 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
134 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
135 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
136 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
137 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
138 // CHECK-C-NEXT: ret void
140 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKvl(
141 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
142 // CHECK-CXX-NEXT: entry:
143 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
144 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
145 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
146 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
147 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
148 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
149 // CHECK-CXX-NEXT: ret void
151 ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
152 svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
153 svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
156 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_hor_za8(
157 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
158 // CHECK-C-NEXT: entry:
159 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
160 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
161 // CHECK-C-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
162 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
163 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
164 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
165 // CHECK-C-NEXT: ret void
167 // CHECK-CXX-LABEL: define dso_local void @_Z22test_svld1_ver_hor_za8ju10__SVBool_tPKvl(
168 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
169 // CHECK-CXX-NEXT: entry:
170 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
171 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP0]], [[VNUM]]
172 // CHECK-CXX-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
173 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[SLICE_BASE]])
174 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 15
175 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
176 // CHECK-CXX-NEXT: ret void
178 ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
179 svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
180 svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
183 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za16(
184 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
185 // CHECK-C-NEXT: entry:
186 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
187 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
188 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
189 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
190 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
191 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
192 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
193 // CHECK-C-NEXT: ret void
195 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKvl(
196 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
197 // CHECK-CXX-NEXT: entry:
198 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
199 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
200 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
201 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
202 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
203 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 7
204 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
205 // CHECK-CXX-NEXT: ret void
207 ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
208 svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
209 svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
212 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za32(
213 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
214 // CHECK-C-NEXT: entry:
215 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
216 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
217 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
218 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
219 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
220 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
221 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
222 // CHECK-C-NEXT: ret void
224 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKvl(
225 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
226 // CHECK-CXX-NEXT: entry:
227 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
228 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
229 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
230 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
231 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
232 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 3
233 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
234 // CHECK-CXX-NEXT: ret void
236 ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
237 svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
238 svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
241 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za64(
242 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
243 // CHECK-C-NEXT: entry:
244 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
245 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
246 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
247 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
248 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
249 // CHECK-C-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
250 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
251 // CHECK-C-NEXT: ret void
253 // CHECK-CXX-LABEL: define dso_local void @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKvl(
254 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
255 // CHECK-CXX-NEXT: entry:
256 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
257 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
258 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
259 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
260 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
261 // CHECK-CXX-NEXT: [[ADD:%.*]] = add i32 [[SLICE_BASE]], 1
262 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
263 // CHECK-CXX-NEXT: ret void
265 ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
266 svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
267 svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
270 // CHECK-C-LABEL: define dso_local void @test_svld1_ver_vnum_za128(
271 // CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
272 // CHECK-C-NEXT: entry:
273 // CHECK-C-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
274 // CHECK-C-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
275 // CHECK-C-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
276 // CHECK-C-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
277 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
278 // CHECK-C-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
279 // CHECK-C-NEXT: ret void
281 // CHECK-CXX-LABEL: define dso_local void @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKvl(
282 // CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0]] {
283 // CHECK-CXX-NEXT: entry:
284 // CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv1i1(<vscale x 16 x i1> [[PG]])
285 // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb()
286 // CHECK-CXX-NEXT: [[MULVL:%.*]] = mul i64 [[TMP1]], [[VNUM]]
287 // CHECK-CXX-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[MULVL]]
288 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 0, i32 [[SLICE_BASE]])
289 // CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
290 // CHECK-CXX-NEXT: ret void
292 ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
293 svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
294 svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
296 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
297 // CHECK: {{.*}}