Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / AArch64 / sme2-intrinsics / acle_sme2_mlall.c
blob4efc226c10e68538d0e17f1e83d4bc1e75aa8614
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: aarch64-registered-target
4 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-i16i64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
9 #include <arm_sme.h>
11 #ifdef SVE_OVERLOADED_FORMS
12 // A simple used,unused... macro, long enough to represent any SVE builtin.
13 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
14 #else
15 #define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
16 #endif
19 // Single x 1
22 // MLAL
24 // CHECK-LABEL: @test_svmla_single_x1_s8(
25 // CHECK-NEXT: entry:
26 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
27 // CHECK-NEXT: ret void
29 // CPP-CHECK-LABEL: @_Z23test_svmla_single_x1_s8ju10__SVInt8_tS_(
30 // CPP-CHECK-NEXT: entry:
31 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
32 // CPP-CHECK-NEXT: ret void
34 void test_svmla_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
36 SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x1,,)(slice_base, zn, zm);
39 // CHECK-LABEL: @test_svmla_single_x1_s16(
40 // CHECK-NEXT: entry:
41 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
42 // CHECK-NEXT: ret void
44 // CPP-CHECK-LABEL: @_Z24test_svmla_single_x1_s16ju11__SVInt16_tS_(
45 // CPP-CHECK-NEXT: entry:
46 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
47 // CPP-CHECK-NEXT: ret void
49 void test_svmla_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
51 SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x1,,)(slice_base, zn, zm);
54 // CHECK-LABEL: @test_uvmlal_single_x1_u8(
55 // CHECK-NEXT: entry:
56 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
57 // CHECK-NEXT: ret void
59 // CPP-CHECK-LABEL: @_Z24test_uvmlal_single_x1_u8ju11__SVUint8_tS_(
60 // CPP-CHECK-NEXT: entry:
61 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
62 // CPP-CHECK-NEXT: ret void
64 void test_uvmlal_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
66 SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x1,,)(slice_base, zn, zm);
69 // CHECK-LABEL: @test_uvmlal_single_x1_u16(
70 // CHECK-NEXT: entry:
71 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
72 // CHECK-NEXT: ret void
74 // CPP-CHECK-LABEL: @_Z25test_uvmlal_single_x1_u16ju12__SVUint16_tS_(
75 // CPP-CHECK-NEXT: entry:
76 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
77 // CPP-CHECK-NEXT: ret void
79 void test_uvmlal_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
81 SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x1,,)(slice_base, zn, zm);
84 // MLSL
86 // CHECK-LABEL: @test_svmls_single_x1_s8(
87 // CHECK-NEXT: entry:
88 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
89 // CHECK-NEXT: ret void
91 // CPP-CHECK-LABEL: @_Z23test_svmls_single_x1_s8ju10__SVInt8_tS_(
92 // CPP-CHECK-NEXT: entry:
93 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
94 // CPP-CHECK-NEXT: ret void
96 void test_svmls_single_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
98 SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x1,,)(slice_base, zn, zm);
101 // CHECK-LABEL: @test_svmls_single_x1_s16(
102 // CHECK-NEXT: entry:
103 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
104 // CHECK-NEXT: ret void
106 // CPP-CHECK-LABEL: @_Z24test_svmls_single_x1_s16ju11__SVInt16_tS_(
107 // CPP-CHECK-NEXT: entry:
108 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
109 // CPP-CHECK-NEXT: ret void
111 void test_svmls_single_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
113 SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x1,,)(slice_base, zn, zm);
116 // CHECK-LABEL: @test_uvmlsl_single_x1_u8(
117 // CHECK-NEXT: entry:
118 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
119 // CHECK-NEXT: ret void
121 // CPP-CHECK-LABEL: @_Z24test_uvmlsl_single_x1_u8ju11__SVUint8_tS_(
122 // CPP-CHECK-NEXT: entry:
123 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
124 // CPP-CHECK-NEXT: ret void
126 void test_uvmlsl_single_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
128 SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x1,,)(slice_base, zn, zm);
131 // CHECK-LABEL: @test_uvmlsl_single_x1_u16(
132 // CHECK-NEXT: entry:
133 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
134 // CHECK-NEXT: ret void
136 // CPP-CHECK-LABEL: @_Z25test_uvmlsl_single_x1_u16ju12__SVUint16_tS_(
137 // CPP-CHECK-NEXT: entry:
138 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
139 // CPP-CHECK-NEXT: ret void
141 void test_uvmlsl_single_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
143 SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x1,,)(slice_base, zn, zm);
146 // SUMLALL
148 // CHECK-LABEL: @test_sumlall_single_x1_s8(
149 // CHECK-NEXT: entry:
150 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
151 // CHECK-NEXT: ret void
153 // CPP-CHECK-LABEL: @_Z25test_sumlall_single_x1_s8ju10__SVInt8_tu11__SVUint8_t(
154 // CPP-CHECK-NEXT: entry:
155 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM:%.*]], <vscale x 16 x i8> [[ZN:%.*]])
156 // CPP-CHECK-NEXT: ret void
158 void test_sumlall_single_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
160 SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x1,,)(slice_base, zn, zm);
163 // USMLALL
165 // CHECK-LABEL: @test_usmlall_single_x1_u8(
166 // CHECK-NEXT: entry:
167 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
168 // CHECK-NEXT: ret void
170 // CPP-CHECK-LABEL: @_Z25test_usmlall_single_x1_u8ju11__SVUint8_tu10__SVInt8_t(
171 // CPP-CHECK-NEXT: entry:
172 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
173 // CPP-CHECK-NEXT: ret void
175 void test_usmlall_single_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
177 SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x1,,)(slice_base, zn, zm);
181 // Single x 2
184 // MLAL
186 // CHECK-LABEL: @test_svmla_single_x2_s8(
187 // CHECK-NEXT: entry:
188 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
189 // CHECK-NEXT: ret void
191 // CPP-CHECK-LABEL: @_Z23test_svmla_single_x2_s8j10svint8x2_tu10__SVInt8_t(
192 // CPP-CHECK-NEXT: entry:
193 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
194 // CPP-CHECK-NEXT: ret void
196 void test_svmla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
198 SVE_ACLE_FUNC(svmla,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm);
201 // CHECK-LABEL: @test_svmla_single_x2_s16(
202 // CHECK-NEXT: entry:
203 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
204 // CHECK-NEXT: ret void
206 // CPP-CHECK-LABEL: @_Z24test_svmla_single_x2_s16j11svint16x2_tu11__SVInt16_t(
207 // CPP-CHECK-NEXT: entry:
208 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
209 // CPP-CHECK-NEXT: ret void
211 void test_svmla_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
213 SVE_ACLE_FUNC(svmla,_single,_za64,_s16,_vg4x2)(slice_base, zn, zm);
216 // CHECK-LABEL: @test_svmla_single_x2_u8(
217 // CHECK-NEXT: entry:
218 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
219 // CHECK-NEXT: ret void
221 // CPP-CHECK-LABEL: @_Z23test_svmla_single_x2_u8j11svuint8x2_tu11__SVUint8_t(
222 // CPP-CHECK-NEXT: entry:
223 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
224 // CPP-CHECK-NEXT: ret void
226 void test_svmla_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
228 SVE_ACLE_FUNC(svmla,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm);
231 // CHECK-LABEL: @test_svmla_single_x2_u16(
232 // CHECK-NEXT: entry:
233 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
234 // CHECK-NEXT: ret void
236 // CPP-CHECK-LABEL: @_Z24test_svmla_single_x2_u16j12svuint16x2_tu12__SVUint16_t(
237 // CPP-CHECK-NEXT: entry:
238 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
239 // CPP-CHECK-NEXT: ret void
241 void test_svmla_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
243 SVE_ACLE_FUNC(svmla,_single,_za64,_u16,_vg4x2)(slice_base, zn, zm);
246 // MLSL
248 // CHECK-LABEL: @test_svmls_single_x2_s8(
249 // CHECK-NEXT: entry:
250 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
251 // CHECK-NEXT: ret void
253 // CPP-CHECK-LABEL: @_Z23test_svmls_single_x2_s8j10svint8x2_tu10__SVInt8_t(
254 // CPP-CHECK-NEXT: entry:
255 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
256 // CPP-CHECK-NEXT: ret void
258 void test_svmls_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
260 SVE_ACLE_FUNC(svmls,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm);
263 // CHECK-LABEL: @test_svmls_single_x2_s16(
264 // CHECK-NEXT: entry:
265 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
266 // CHECK-NEXT: ret void
268 // CPP-CHECK-LABEL: @_Z24test_svmls_single_x2_s16j11svint16x2_tu11__SVInt16_t(
269 // CPP-CHECK-NEXT: entry:
270 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
271 // CPP-CHECK-NEXT: ret void
273 void test_svmls_single_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
275 SVE_ACLE_FUNC(svmls,_single,_za64,_s16,_vg4x2)(slice_base, zn, zm);
278 // CHECK-LABEL: @test_svmls_single_x2_u8(
279 // CHECK-NEXT: entry:
280 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
281 // CHECK-NEXT: ret void
283 // CPP-CHECK-LABEL: @_Z23test_svmls_single_x2_u8j11svuint8x2_tu11__SVUint8_t(
284 // CPP-CHECK-NEXT: entry:
285 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
286 // CPP-CHECK-NEXT: ret void
288 void test_svmls_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
290 SVE_ACLE_FUNC(svmls,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm);
293 // CHECK-LABEL: @test_svmls_single_x2_u16(
294 // CHECK-NEXT: entry:
295 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
296 // CHECK-NEXT: ret void
298 // CPP-CHECK-LABEL: @_Z24test_svmls_single_x2_u16j12svuint16x2_tu12__SVUint16_t(
299 // CPP-CHECK-NEXT: entry:
300 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
301 // CPP-CHECK-NEXT: ret void
303 void test_svmls_single_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
305 SVE_ACLE_FUNC(svmls,_single,_za64,_u16,_vg4x2)(slice_base, zn, zm);
308 // SUMLALL
310 // CHECK-LABEL: @test_svsumla_single_x2_s8(
311 // CHECK-NEXT: entry:
312 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
313 // CHECK-NEXT: ret void
315 // CPP-CHECK-LABEL: @_Z25test_svsumla_single_x2_s8j10svint8x2_tu11__SVUint8_t(
316 // CPP-CHECK-NEXT: entry:
317 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
318 // CPP-CHECK-NEXT: ret void
320 void test_svsumla_single_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
322 SVE_ACLE_FUNC(svsumla,_single,_za32,_s8,_vg4x2)(slice_base, zn, zm);
325 // USMLALL
327 // CHECK-LABEL: @test_usmlall_single_x2_u8(
328 // CHECK-NEXT: entry:
329 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
330 // CHECK-NEXT: ret void
332 // CPP-CHECK-LABEL: @_Z25test_usmlall_single_x2_u8j11svuint8x2_tu10__SVInt8_t(
333 // CPP-CHECK-NEXT: entry:
334 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
335 // CPP-CHECK-NEXT: ret void
337 void test_usmlall_single_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
339 SVE_ACLE_FUNC(svusmla,_single,_za32,_u8,_vg4x2)(slice_base, zn, zm);
343 // Single x 4
346 // MLAL
348 // CHECK-LABEL: @test_svmla_single_x4_s8(
349 // CHECK-NEXT: entry:
350 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
351 // CHECK-NEXT: ret void
353 // CPP-CHECK-LABEL: @_Z23test_svmla_single_x4_s8j10svint8x4_tu10__SVInt8_t(
354 // CPP-CHECK-NEXT: entry:
355 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
356 // CPP-CHECK-NEXT: ret void
358 void test_svmla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
360 SVE_ACLE_FUNC(svmla,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm);
363 // CHECK-LABEL: @test_svmla_single_x4_s16(
364 // CHECK-NEXT: entry:
365 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
366 // CHECK-NEXT: ret void
368 // CPP-CHECK-LABEL: @_Z24test_svmla_single_x4_s16j11svint16x4_tu11__SVInt16_t(
369 // CPP-CHECK-NEXT: entry:
370 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
371 // CPP-CHECK-NEXT: ret void
373 void test_svmla_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
375 SVE_ACLE_FUNC(svmla,_single,_za64,_s16,_vg4x4)(slice_base, zn, zm);
378 // CHECK-LABEL: @test_svmla_single_x4_u8(
379 // CHECK-NEXT: entry:
380 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
381 // CHECK-NEXT: ret void
383 // CPP-CHECK-LABEL: @_Z23test_svmla_single_x4_u8j11svuint8x4_tu11__SVUint8_t(
384 // CPP-CHECK-NEXT: entry:
385 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
386 // CPP-CHECK-NEXT: ret void
388 void test_svmla_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
390 SVE_ACLE_FUNC(svmla,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm);
393 // CHECK-LABEL: @test_svmla_single_x4_u16(
394 // CHECK-NEXT: entry:
395 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
396 // CHECK-NEXT: ret void
398 // CPP-CHECK-LABEL: @_Z24test_svmla_single_x4_u16j12svuint16x4_tu12__SVUint16_t(
399 // CPP-CHECK-NEXT: entry:
400 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
401 // CPP-CHECK-NEXT: ret void
403 void test_svmla_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
405 SVE_ACLE_FUNC(svmla,_single,_za64,_u16,_vg4x4)(slice_base, zn, zm);
408 // MLSL
410 // CHECK-LABEL: @test_svmls_single_x4_s8(
411 // CHECK-NEXT: entry:
412 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
413 // CHECK-NEXT: ret void
415 // CPP-CHECK-LABEL: @_Z23test_svmls_single_x4_s8j10svint8x4_tu10__SVInt8_t(
416 // CPP-CHECK-NEXT: entry:
417 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
418 // CPP-CHECK-NEXT: ret void
420 void test_svmls_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
422 SVE_ACLE_FUNC(svmls,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm);
425 // CHECK-LABEL: @test_svmls_single_x4_s16(
426 // CHECK-NEXT: entry:
427 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
428 // CHECK-NEXT: ret void
430 // CPP-CHECK-LABEL: @_Z24test_svmls_single_x4_s16j11svint16x4_tu11__SVInt16_t(
431 // CPP-CHECK-NEXT: entry:
432 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
433 // CPP-CHECK-NEXT: ret void
435 void test_svmls_single_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
437 SVE_ACLE_FUNC(svmls,_single,_za64,_s16,_vg4x4)(slice_base, zn, zm);
440 // CHECK-LABEL: @test_svmls_single_x4_u8(
441 // CHECK-NEXT: entry:
442 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
443 // CHECK-NEXT: ret void
445 // CPP-CHECK-LABEL: @_Z23test_svmls_single_x4_u8j11svuint8x4_tu11__SVUint8_t(
446 // CPP-CHECK-NEXT: entry:
447 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
448 // CPP-CHECK-NEXT: ret void
450 void test_svmls_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
452 SVE_ACLE_FUNC(svmls,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm);
455 // CHECK-LABEL: @test_svmls_single_x4_u16(
456 // CHECK-NEXT: entry:
457 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
458 // CHECK-NEXT: ret void
460 // CPP-CHECK-LABEL: @_Z24test_svmls_single_x4_u16j12svuint16x4_tu12__SVUint16_t(
461 // CPP-CHECK-NEXT: entry:
462 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.single.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
463 // CPP-CHECK-NEXT: ret void
465 void test_svmls_single_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
467 SVE_ACLE_FUNC(svmls,_single,_za64,_u16,_vg4x4)(slice_base, zn, zm);
470 // SUMLALL
472 // CHECK-LABEL: @test_svsumla_single_x4_s8(
473 // CHECK-NEXT: entry:
474 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
475 // CHECK-NEXT: ret void
477 // CPP-CHECK-LABEL: @_Z25test_svsumla_single_x4_s8j10svint8x4_tu11__SVUint8_t(
478 // CPP-CHECK-NEXT: entry:
479 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
480 // CPP-CHECK-NEXT: ret void
482 void test_svsumla_single_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
484 SVE_ACLE_FUNC(svsumla,_single,_za32,_s8,_vg4x4)(slice_base, zn, zm);
487 // USMLALL
489 // CHECK-LABEL: @test_usmlall_single_x4_u8(
490 // CHECK-NEXT: entry:
491 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
492 // CHECK-NEXT: ret void
494 // CPP-CHECK-LABEL: @_Z25test_usmlall_single_x4_u8j11svuint8x4_tu10__SVInt8_t(
495 // CPP-CHECK-NEXT: entry:
496 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.single.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]])
497 // CPP-CHECK-NEXT: ret void
499 void test_usmlall_single_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
501 SVE_ACLE_FUNC(svusmla,_single,_za32,_u8,_vg4x4)(slice_base, zn, zm);
505 // Multi x 2
508 // MLAL
510 // CHECK-LABEL: @test_mlal_multi_x2_s8(
511 // CHECK-NEXT: entry:
512 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
513 // CHECK-NEXT: ret void
515 // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x2_s8j10svint8x2_tS_(
516 // CPP-CHECK-NEXT: entry:
517 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
518 // CPP-CHECK-NEXT: ret void
520 void test_mlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za")
522 SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x2,,)(slice_base, zn, zm);
525 // CHECK-LABEL: @test_mlal_multi_x2_s16(
526 // CHECK-NEXT: entry:
527 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
528 // CHECK-NEXT: ret void
530 // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x2_s16j11svint16x2_tS_(
531 // CPP-CHECK-NEXT: entry:
532 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
533 // CPP-CHECK-NEXT: ret void
535 void test_mlal_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za")
537 SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x2,,)(slice_base, zn, zm);
540 // CHECK-LABEL: @test_mlal_multi_x2_u8(
541 // CHECK-NEXT: entry:
542 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
543 // CHECK-NEXT: ret void
545 // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x2_u8j11svuint8x2_tS_(
546 // CPP-CHECK-NEXT: entry:
547 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
548 // CPP-CHECK-NEXT: ret void
550 void test_mlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za")
552 SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x2,,)(slice_base, zn, zm);
555 // CHECK-LABEL: @test_mlal_multi_x2_u16(
556 // CHECK-NEXT: entry:
557 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
558 // CHECK-NEXT: ret void
560 // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x2_u16j12svuint16x2_tS_(
561 // CPP-CHECK-NEXT: entry:
562 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
563 // CPP-CHECK-NEXT: ret void
565 void test_mlal_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za")
567 SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x2,,)(slice_base, zn, zm);
570 // MLSL
572 // CHECK-LABEL: @test_mlsl_multi_x2_s8(
573 // CHECK-NEXT: entry:
574 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
575 // CHECK-NEXT: ret void
577 // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x2_s8j10svint8x2_tS_(
578 // CPP-CHECK-NEXT: entry:
579 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
580 // CPP-CHECK-NEXT: ret void
582 void test_mlsl_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za")
584 SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x2,,)(slice_base, zn, zm);
587 // CHECK-LABEL: @test_mlsl_multi_x2_s16(
588 // CHECK-NEXT: entry:
589 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
590 // CHECK-NEXT: ret void
592 // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x2_s16j11svint16x2_tS_(
593 // CPP-CHECK-NEXT: entry:
594 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
595 // CPP-CHECK-NEXT: ret void
597 void test_mlsl_multi_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_inout("za")
599 SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x2,,)(slice_base, zn, zm);
602 // CHECK-LABEL: @test_mlsl_multi_x2_u8(
603 // CHECK-NEXT: entry:
604 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
605 // CHECK-NEXT: ret void
607 // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x2_u8j11svuint8x2_tS_(
608 // CPP-CHECK-NEXT: entry:
609 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
610 // CPP-CHECK-NEXT: ret void
612 void test_mlsl_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za")
614 SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x2,,)(slice_base, zn, zm);
617 // CHECK-LABEL: @test_mlsl_multi_x2_u16(
618 // CHECK-NEXT: entry:
619 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
620 // CHECK-NEXT: ret void
622 // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x2_u16j12svuint16x2_tS_(
623 // CPP-CHECK-NEXT: entry:
624 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]])
625 // CPP-CHECK-NEXT: ret void
627 void test_mlsl_multi_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_inout("za")
629 SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x2,,)(slice_base, zn, zm);
632 // SUMLALL
634 // CHECK-LABEL: @test_sumlal_multi_x2_s8(
635 // CHECK-NEXT: entry:
636 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]])
637 // CHECK-NEXT: ret void
639 // CPP-CHECK-LABEL: @_Z23test_sumlal_multi_x2_s8j10svint8x2_t11svuint8x2_t(
640 // CPP-CHECK-NEXT: entry:
641 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]])
642 // CPP-CHECK-NEXT: ret void
644 void test_sumlal_multi_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8x2_t zm) __arm_streaming __arm_inout("za")
646 SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x2,,)(slice_base, zn, zm);
649 // USMLALL
651 // CHECK-LABEL: @test_usmlal_multi_x2_u8(
652 // CHECK-NEXT: entry:
653 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
654 // CHECK-NEXT: ret void
656 // CPP-CHECK-LABEL: @_Z23test_usmlal_multi_x2_u8j11svuint8x2_t10svint8x2_t(
657 // CPP-CHECK-NEXT: entry:
658 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]])
659 // CPP-CHECK-NEXT: ret void
661 void test_usmlal_multi_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8x2_t zm) __arm_streaming __arm_inout("za")
663 SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x2,,)(slice_base, zn, zm);
667 // Multi x 4
670 // MLAL
672 // CHECK-LABEL: @test_mlal_multi_x4_s8(
673 // CHECK-NEXT: entry:
674 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
675 // CHECK-NEXT: ret void
677 // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x4_s8j10svint8x4_tS_(
678 // CPP-CHECK-NEXT: entry:
679 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
680 // CPP-CHECK-NEXT: ret void
682 void test_mlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za")
684 SVE_ACLE_FUNC(svmla_za32,_s8,_vg4x4,,)(slice_base, zn, zm);
687 // CHECK-LABEL: @test_mlal_multi_x4_s16(
688 // CHECK-NEXT: entry:
689 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
690 // CHECK-NEXT: ret void
692 // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x4_s16j11svint16x4_tS_(
693 // CPP-CHECK-NEXT: entry:
694 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
695 // CPP-CHECK-NEXT: ret void
697 void test_mlal_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za")
699 SVE_ACLE_FUNC(svmla_za64,_s16,_vg4x4,,)(slice_base, zn, zm);
702 // CHECK-LABEL: @test_mlal_multi_x4_u8(
703 // CHECK-NEXT: entry:
704 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
705 // CHECK-NEXT: ret void
707 // CPP-CHECK-LABEL: @_Z21test_mlal_multi_x4_u8j11svuint8x4_tS_(
708 // CPP-CHECK-NEXT: entry:
709 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
710 // CPP-CHECK-NEXT: ret void
712 void test_mlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za")
714 SVE_ACLE_FUNC(svmla_za32,_u8,_vg4x4,,)(slice_base, zn, zm);
717 // CHECK-LABEL: @test_mlal_multi_x4_u16(
718 // CHECK-NEXT: entry:
719 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
720 // CHECK-NEXT: ret void
722 // CPP-CHECK-LABEL: @_Z22test_mlal_multi_x4_u16j12svuint16x4_tS_(
723 // CPP-CHECK-NEXT: entry:
724 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
725 // CPP-CHECK-NEXT: ret void
727 void test_mlal_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za")
729 SVE_ACLE_FUNC(svmla_za64,_u16,_vg4x4,,)(slice_base, zn, zm);
732 // MLSL
734 // CHECK-LABEL: @test_mlsl_multi_x4_s8(
735 // CHECK-NEXT: entry:
736 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
737 // CHECK-NEXT: ret void
739 // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x4_s8j10svint8x4_tS_(
740 // CPP-CHECK-NEXT: entry:
741 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
742 // CPP-CHECK-NEXT: ret void
744 void test_mlsl_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za")
746 SVE_ACLE_FUNC(svmls_za32,_s8,_vg4x4,,)(slice_base, zn, zm);
749 // CHECK-LABEL: @test_mlsl_multi_x4_s16(
750 // CHECK-NEXT: entry:
751 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
752 // CHECK-NEXT: ret void
754 // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x4_s16j11svint16x4_tS_(
755 // CPP-CHECK-NEXT: entry:
756 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
757 // CPP-CHECK-NEXT: ret void
759 void test_mlsl_multi_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_inout("za")
761 SVE_ACLE_FUNC(svmls_za64,_s16,_vg4x4,,)(slice_base, zn, zm);
764 // CHECK-LABEL: @test_mlsl_multi_x4_u8(
765 // CHECK-NEXT: entry:
766 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
767 // CHECK-NEXT: ret void
769 // CPP-CHECK-LABEL: @_Z21test_mlsl_multi_x4_u8j11svuint8x4_tS_(
770 // CPP-CHECK-NEXT: entry:
771 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
772 // CPP-CHECK-NEXT: ret void
774 void test_mlsl_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za")
776 SVE_ACLE_FUNC(svmls_za32,_u8,_vg4x4,,)(slice_base, zn, zm);
779 // CHECK-LABEL: @test_mlsl_multi_x4_u16(
780 // CHECK-NEXT: entry:
781 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
782 // CHECK-NEXT: ret void
784 // CPP-CHECK-LABEL: @_Z22test_mlsl_multi_x4_u16j12svuint16x4_tS_(
785 // CPP-CHECK-NEXT: entry:
786 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM_COERCE0:%.*]], <vscale x 8 x i16> [[ZM_COERCE1:%.*]], <vscale x 8 x i16> [[ZM_COERCE2:%.*]], <vscale x 8 x i16> [[ZM_COERCE3:%.*]])
787 // CPP-CHECK-NEXT: ret void
789 void test_mlsl_multi_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_inout("za")
791 SVE_ACLE_FUNC(svmls_za64,_u16,_vg4x4,,)(slice_base, zn, zm);
794 // SUMLALL
796 // CHECK-LABEL: @test_sumlal_multi_x4_s8(
797 // CHECK-NEXT: entry:
798 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]])
799 // CHECK-NEXT: ret void
801 // CPP-CHECK-LABEL: @_Z23test_sumlal_multi_x4_s8j10svint8x4_t11svuint8x4_t(
802 // CPP-CHECK-NEXT: entry:
803 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]])
804 // CPP-CHECK-NEXT: ret void
806 void test_sumlal_multi_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8x4_t zm) __arm_streaming __arm_inout("za")
808 SVE_ACLE_FUNC(svsumla_za32,_s8,_vg4x4,,)(slice_base, zn, zm);
811 // USMLALL
813 // CHECK-LABEL: @test_usmlal_multi_x4_u8(
814 // CHECK-NEXT: entry:
815 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
816 // CHECK-NEXT: ret void
818 // CPP-CHECK-LABEL: @_Z23test_usmlal_multi_x4_u8j11svuint8x4_t10svint8x4_t(
819 // CPP-CHECK-NEXT: entry:
820 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]])
821 // CPP-CHECK-NEXT: ret void
823 void test_usmlal_multi_x4_u8(uint32_t slice_base, svuint8x4_t zn, svint8x4_t zm) __arm_streaming __arm_inout("za")
825 SVE_ACLE_FUNC(svusmla_za32,_u8,_vg4x4,,)(slice_base, zn, zm);
829 // Indexed x 1
832 // SMLAL
834 // CHECK-LABEL: @test_smlal_lane_x1_s8(
835 // CHECK-NEXT: entry:
836 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
837 // CHECK-NEXT: ret void
839 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x1_s8ju10__SVInt8_tS_(
840 // CPP-CHECK-NEXT: entry:
841 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
842 // CPP-CHECK-NEXT: ret void
844 void test_smlal_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
846 SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15);
849 // CHECK-LABEL: @test_smlal_lane_x1_s16(
850 // CHECK-NEXT: entry:
851 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
852 // CHECK-NEXT: ret void
854 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x1_s16ju11__SVInt16_tS_(
855 // CPP-CHECK-NEXT: entry:
856 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
857 // CPP-CHECK-NEXT: ret void
859 void test_smlal_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
861 SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x1,,)(slice_base, zn, zm, 7);
864 // CHECK-LABEL: @test_smlal_lane_x1_u8(
865 // CHECK-NEXT: entry:
866 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
867 // CHECK-NEXT: ret void
869 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x1_u8ju11__SVUint8_tS_(
870 // CPP-CHECK-NEXT: entry:
871 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
872 // CPP-CHECK-NEXT: ret void
874 void test_smlal_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
876 SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15);
879 // CHECK-LABEL: @test_smlal_lane_x1_u16(
880 // CHECK-NEXT: entry:
881 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
882 // CHECK-NEXT: ret void
884 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x1_u16ju12__SVUint16_tS_(
885 // CPP-CHECK-NEXT: entry:
886 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
887 // CPP-CHECK-NEXT: ret void
889 void test_smlal_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
891 SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x1,,)(slice_base, zn, zm, 7);
894 // SMLSL
896 // CHECK-LABEL: @test_smlsl_lane_x1_s8(
897 // CHECK-NEXT: entry:
898 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
899 // CHECK-NEXT: ret void
901 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x1_s8ju10__SVInt8_tS_(
902 // CPP-CHECK-NEXT: entry:
903 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
904 // CPP-CHECK-NEXT: ret void
906 void test_smlsl_lane_x1_s8(uint32_t slice_base, svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
908 SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15);
911 // CHECK-LABEL: @test_smlsl_lane_x1_s16(
912 // CHECK-NEXT: entry:
913 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
914 // CHECK-NEXT: ret void
916 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x1_s16ju11__SVInt16_tS_(
917 // CPP-CHECK-NEXT: entry:
918 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
919 // CPP-CHECK-NEXT: ret void
921 void test_smlsl_lane_x1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
923 SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x1,,)(slice_base, zn, zm, 7);
926 // CHECK-LABEL: @test_smlsl_lane_x1_u8(
927 // CHECK-NEXT: entry:
928 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
929 // CHECK-NEXT: ret void
931 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x1_u8ju11__SVUint8_tS_(
932 // CPP-CHECK-NEXT: entry:
933 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
934 // CPP-CHECK-NEXT: ret void
936 void test_smlsl_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
938 SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15);
941 // CHECK-LABEL: @test_smlsl_lane_x1_u16(
942 // CHECK-NEXT: entry:
943 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
944 // CHECK-NEXT: ret void
946 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x1_u16ju12__SVUint16_tS_(
947 // CPP-CHECK-NEXT: entry:
948 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
949 // CPP-CHECK-NEXT: ret void
951 void test_smlsl_lane_x1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
953 SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x1,,)(slice_base, zn, zm, 7);
956 // SUMLALL
958 // CHECK-LABEL: @test_sumlall_lane_x1_s8(
959 // CHECK-NEXT: entry:
960 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
961 // CHECK-NEXT: ret void
963 // CPP-CHECK-LABEL: @_Z23test_sumlall_lane_x1_s8ju10__SVInt8_tu11__SVUint8_t(
964 // CPP-CHECK-NEXT: entry:
965 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
966 // CPP-CHECK-NEXT: ret void
968 void test_sumlall_lane_x1_s8(uint32_t slice_base, svint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
970 SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x1,,)(slice_base, zn, zm, 15);
973 // USMLALL
975 // CHECK-LABEL: @test_usmlall_lane_x1_u8(
976 // CHECK-NEXT: entry:
977 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
978 // CHECK-NEXT: ret void
980 // CPP-CHECK-LABEL: @_Z23test_usmlall_lane_x1_u8ju11__SVUint8_tu10__SVInt8_t(
981 // CPP-CHECK-NEXT: entry:
982 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x1.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
983 // CPP-CHECK-NEXT: ret void
985 void test_usmlall_lane_x1_u8(uint32_t slice_base, svuint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
987 SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x1,,)(slice_base, zn, zm, 15);
991 // Indexed x 2
994 // SMLAL
996 // CHECK-LABEL: @test_smlal_lane_x2_s8(
997 // CHECK-NEXT: entry:
998 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
999 // CHECK-NEXT: ret void
1001 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x2_s8j10svint8x2_tu10__SVInt8_t(
1002 // CPP-CHECK-NEXT: entry:
1003 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1004 // CPP-CHECK-NEXT: ret void
1006 void test_smlal_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1008 SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15);
1011 // CHECK-LABEL: @test_smlal_lane_x2_s16(
1012 // CHECK-NEXT: entry:
1013 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1014 // CHECK-NEXT: ret void
1016 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x2_s16j11svint16x2_tu11__SVInt16_t(
1017 // CPP-CHECK-NEXT: entry:
1018 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1019 // CPP-CHECK-NEXT: ret void
1021 void test_smlal_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
1023 SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x2,,)(slice_base, zn, zm, 7);
1026 // CHECK-LABEL: @test_smlal_lane_x2_u8(
1027 // CHECK-NEXT: entry:
1028 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1029 // CHECK-NEXT: ret void
1031 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x2_u8j11svuint8x2_tu11__SVUint8_t(
1032 // CPP-CHECK-NEXT: entry:
1033 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1034 // CPP-CHECK-NEXT: ret void
1036 void test_smlal_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1038 SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15);
1041 // CHECK-LABEL: @test_smlal_lane_x2_u16(
1042 // CHECK-NEXT: entry:
1043 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1044 // CHECK-NEXT: ret void
1046 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x2_u16j12svuint16x2_tu12__SVUint16_t(
1047 // CPP-CHECK-NEXT: entry:
1048 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1049 // CPP-CHECK-NEXT: ret void
1051 void test_smlal_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
1053 SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x2,,)(slice_base, zn, zm, 7);
1056 // SMLSL
1058 // CHECK-LABEL: @test_smlsl_lane_x2_s8(
1059 // CHECK-NEXT: entry:
1060 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1061 // CHECK-NEXT: ret void
1063 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x2_s8j10svint8x2_tu10__SVInt8_t(
1064 // CPP-CHECK-NEXT: entry:
1065 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1066 // CPP-CHECK-NEXT: ret void
1068 void test_smlsl_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1070 SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15);
1073 // CHECK-LABEL: @test_smlsl_lane_x2_s16(
1074 // CHECK-NEXT: entry:
1075 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1076 // CHECK-NEXT: ret void
1078 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x2_s16j11svint16x2_tu11__SVInt16_t(
1079 // CPP-CHECK-NEXT: entry:
1080 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1081 // CPP-CHECK-NEXT: ret void
1083 void test_smlsl_lane_x2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
1085 SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x2,,)(slice_base, zn, zm, 7);
1088 // CHECK-LABEL: @test_smlsl_lane_x2_u8(
1089 // CHECK-NEXT: entry:
1090 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1091 // CHECK-NEXT: ret void
1093 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x2_u8j11svuint8x2_tu11__SVUint8_t(
1094 // CPP-CHECK-NEXT: entry:
1095 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1096 // CPP-CHECK-NEXT: ret void
1098 void test_smlsl_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1100 SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15);
1103 // CHECK-LABEL: @test_smlsl_lane_x2_u16(
1104 // CHECK-NEXT: entry:
1105 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1106 // CHECK-NEXT: ret void
1108 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x2_u16j12svuint16x2_tu12__SVUint16_t(
1109 // CPP-CHECK-NEXT: entry:
1110 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1111 // CPP-CHECK-NEXT: ret void
1113 void test_smlsl_lane_x2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
1115 SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x2,,)(slice_base, zn, zm, 7);
1118 // SUMLALL
1120 // CHECK-LABEL: @test_sumlall_lane_x2_s8(
1121 // CHECK-NEXT: entry:
1122 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1123 // CHECK-NEXT: ret void
1125 // CPP-CHECK-LABEL: @_Z23test_sumlall_lane_x2_s8j10svint8x2_tu11__SVUint8_t(
1126 // CPP-CHECK-NEXT: entry:
1127 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1128 // CPP-CHECK-NEXT: ret void
1130 void test_sumlall_lane_x2_s8(uint32_t slice_base, svint8x2_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1132 SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x2,,)(slice_base, zn, zm, 15);
1135 // CHECK-LABEL: @test_usmlall_lane_x2_u8(
1136 // CHECK-NEXT: entry:
1137 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1138 // CHECK-NEXT: ret void
1140 // CPP-CHECK-LABEL: @_Z23test_usmlall_lane_x2_u8j11svuint8x2_tu10__SVInt8_t(
1141 // CPP-CHECK-NEXT: entry:
1142 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x2.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1143 // CPP-CHECK-NEXT: ret void
1145 void test_usmlall_lane_x2_u8(uint32_t slice_base, svuint8x2_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1147 SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x2,,)(slice_base, zn, zm, 15);
1151 // Indexed x 4
1154 // MLAL
1156 // CHECK-LABEL: @test_smlal_lane_x4_s8(
1157 // CHECK-NEXT: entry:
1158 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1159 // CHECK-NEXT: ret void
1161 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x4_s8j10svint8x4_tu10__SVInt8_t(
1162 // CPP-CHECK-NEXT: entry:
1163 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1164 // CPP-CHECK-NEXT: ret void
1166 void test_smlal_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1168 SVE_ACLE_FUNC(svmla_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15);
1171 // CHECK-LABEL: @test_smlal_lane_x4_s16(
1172 // CHECK-NEXT: entry:
1173 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1174 // CHECK-NEXT: ret void
1176 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x4_s16j11svint16x4_tu11__SVInt16_t(
1177 // CPP-CHECK-NEXT: entry:
1178 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1179 // CPP-CHECK-NEXT: ret void
1181 void test_smlal_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
1183 SVE_ACLE_FUNC(svmla_lane_za64,_s16,_vg4x4,,)(slice_base, zn, zm, 7);
1186 // CHECK-LABEL: @test_smlal_lane_x4_u8(
1187 // CHECK-NEXT: entry:
1188 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1189 // CHECK-NEXT: ret void
1191 // CPP-CHECK-LABEL: @_Z21test_smlal_lane_x4_u8j11svuint8x4_tu11__SVUint8_t(
1192 // CPP-CHECK-NEXT: entry:
1193 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1194 // CPP-CHECK-NEXT: ret void
1196 void test_smlal_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1198 SVE_ACLE_FUNC(svmla_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15);
1201 // CHECK-LABEL: @test_smlal_lane_x4_u16(
1202 // CHECK-NEXT: entry:
1203 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1204 // CHECK-NEXT: ret void
1206 // CPP-CHECK-LABEL: @_Z22test_smlal_lane_x4_u16j12svuint16x4_tu12__SVUint16_t(
1207 // CPP-CHECK-NEXT: entry:
1208 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umla.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1209 // CPP-CHECK-NEXT: ret void
1211 void test_smlal_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
1213 SVE_ACLE_FUNC(svmla_lane_za64,_u16,_vg4x4,,)(slice_base, zn, zm, 7);
1216 // MLSL
1218 // CHECK-LABEL: @test_smlsl_lane_x4_s8(
1219 // CHECK-NEXT: entry:
1220 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1221 // CHECK-NEXT: ret void
1223 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x4_s8j10svint8x4_tu10__SVInt8_t(
1224 // CPP-CHECK-NEXT: entry:
1225 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1226 // CPP-CHECK-NEXT: ret void
1228 void test_smlsl_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1230 SVE_ACLE_FUNC(svmls_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15);
1233 // CHECK-LABEL: @test_smlsl_lane_x4_s16(
1234 // CHECK-NEXT: entry:
1235 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1236 // CHECK-NEXT: ret void
1238 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x4_s16j11svint16x4_tu11__SVInt16_t(
1239 // CPP-CHECK-NEXT: entry:
1240 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1241 // CPP-CHECK-NEXT: ret void
1243 void test_smlsl_lane_x4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_inout("za")
1245 SVE_ACLE_FUNC(svmls_lane_za64,_s16,_vg4x4,,)(slice_base, zn, zm, 7);
1248 // CHECK-LABEL: @test_smlsl_lane_x4_u8(
1249 // CHECK-NEXT: entry:
1250 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1251 // CHECK-NEXT: ret void
1253 // CPP-CHECK-LABEL: @_Z21test_smlsl_lane_x4_u8j11svuint8x4_tu11__SVUint8_t(
1254 // CPP-CHECK-NEXT: entry:
1255 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1256 // CPP-CHECK-NEXT: ret void
1258 void test_smlsl_lane_x4_u8(uint32_t slice_base, svuint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1260 SVE_ACLE_FUNC(svmls_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15);
1263 // CHECK-LABEL: @test_smlsl_lane_x4_u16(
1264 // CHECK-NEXT: entry:
1265 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1266 // CHECK-NEXT: ret void
1268 // CPP-CHECK-LABEL: @_Z22test_smlsl_lane_x4_u16j12svuint16x4_tu12__SVUint16_t(
1269 // CPP-CHECK-NEXT: entry:
1270 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umls.za64.lane.vg4x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN_COERCE0:%.*]], <vscale x 8 x i16> [[ZN_COERCE1:%.*]], <vscale x 8 x i16> [[ZN_COERCE2:%.*]], <vscale x 8 x i16> [[ZN_COERCE3:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
1271 // CPP-CHECK-NEXT: ret void
1273 void test_smlsl_lane_x4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_inout("za")
1275 SVE_ACLE_FUNC(svmls_lane_za64,_u16,_vg4x4,,)(slice_base, zn, zm, 7);
1278 // SUMLALL
1280 // CHECK-LABEL: @test_sumlall_lane_x4_s8(
1281 // CHECK-NEXT: entry:
1282 // CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1283 // CHECK-NEXT: ret void
1285 // CPP-CHECK-LABEL: @_Z23test_sumlall_lane_x4_s8j10svint8x4_tu11__SVUint8_t(
1286 // CPP-CHECK-NEXT: entry:
1287 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1288 // CPP-CHECK-NEXT: ret void
1290 void test_sumlall_lane_x4_s8(uint32_t slice_base, svint8x4_t zn, svuint8_t zm) __arm_streaming __arm_inout("za")
1292 SVE_ACLE_FUNC(svsumla_lane_za32,_s8,_vg4x4,,)(slice_base, zn, zm, 15);
1295 // USMLALL
1297 // CHECK-LABEL: @test_usmlall_lane_x4_s8(
1298 // CHECK-NEXT: entry:
1299 // CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1300 // CHECK-NEXT: ret void
1302 // CPP-CHECK-LABEL: @_Z23test_usmlall_lane_x4_s8j11svuint8x4_tu10__SVInt8_t(
1303 // CPP-CHECK-NEXT: entry:
1304 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmla.za32.lane.vg4x4.nxv16i8(i32 [[SLICE_BASE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i32 15)
1305 // CPP-CHECK-NEXT: ret void
1307 void test_usmlall_lane_x4_s8(uint32_t slice_base, svuint8x4_t zn, svint8_t zm) __arm_streaming __arm_inout("za")
1309 SVE_ACLE_FUNC(svusmla_lane_za32,_u8,_vg4x4,,)(slice_base, zn, zm, 15);