1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2 // REQUIRES: aarch64-registered-target
4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6 // RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
7 // RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12 #ifdef SME_OVERLOADED_FORMS
13 #define SME_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED, A5) A1##A3##A5
15 #define SME_ACLE_FUNC(A1, A2, A3, A4, A5) A1##A2##A3##A4##A5
20 // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_vg2x1(
21 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
22 // CHECK-NEXT: [[ENTRY:.*:]]
23 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
24 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
25 // CHECK-NEXT: ret void
27 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za16_vg2x1ju13__SVMfloat8_tS_m(
28 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
29 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
30 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
31 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
32 // CPP-CHECK-NEXT: ret void
34 void test_svmla_lane_za16_vg2x1(uint32_t slice
, svmfloat8_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
35 SME_ACLE_FUNC(svmla_lane_za16
,_mf8
,_vg2x1_fpm
,,)(slice
, zn
, zm
, 0, fpm
);
38 // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_vg2x2(
39 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
40 // CHECK-NEXT: [[ENTRY:.*:]]
41 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
42 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 15)
43 // CHECK-NEXT: ret void
45 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za16_vg2x2j13svmfloat8x2_tu13__SVMfloat8_tm(
46 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
47 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
48 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
49 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 15)
50 // CPP-CHECK-NEXT: ret void
52 void test_svmla_lane_za16_vg2x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
53 SME_ACLE_FUNC(svmla_lane_za16
,_mf8
,_vg2x2_fpm
,,)(slice
, zn
, zm
, 15, fpm
);
56 // CHECK-LABEL: define dso_local void @test_svmla_lane_za16_vg2x4(
57 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
58 // CHECK-NEXT: [[ENTRY:.*:]]
59 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
60 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 7)
61 // CHECK-NEXT: ret void
63 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za16_vg2x4j13svmfloat8x4_tu13__SVMfloat8_tm(
64 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
65 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
66 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
67 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.lane.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 7)
68 // CPP-CHECK-NEXT: ret void
70 void test_svmla_lane_za16_vg2x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
71 SME_ACLE_FUNC(svmla_lane_za16
,_mf8
,_vg2x4_fpm
,,)(slice
, zn
, zm
, 7, fpm
);
76 // CHECK-LABEL: define dso_local void @test_svmla_lane_za32_vg4x1(
77 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
78 // CHECK-NEXT: [[ENTRY:.*:]]
79 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
80 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
81 // CHECK-NEXT: ret void
83 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za32_vg4x1ju13__SVMfloat8_tS_m(
84 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
85 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
86 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
87 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]], i32 0)
88 // CPP-CHECK-NEXT: ret void
90 void test_svmla_lane_za32_vg4x1(uint32_t slice
, svmfloat8_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
91 SME_ACLE_FUNC(svmla_lane_za32
,_mf8
,_vg4x1_fpm
,,)(slice
, zn
, zm
, 0, fpm
);
94 // CHECK-LABEL: define dso_local void @test_svmla_lane_za32_vg4x2(
95 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
96 // CHECK-NEXT: [[ENTRY:.*:]]
97 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
98 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 15)
99 // CHECK-NEXT: ret void
101 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za32_vg4x2j13svmfloat8x2_tu13__SVMfloat8_tm(
102 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
103 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
104 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
105 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 15)
106 // CPP-CHECK-NEXT: ret void
108 void test_svmla_lane_za32_vg4x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
109 SME_ACLE_FUNC(svmla_lane_za32
,_mf8
,_vg4x2_fpm
,,)(slice
, zn
, zm
, 15, fpm
);
112 // CHECK-LABEL: define dso_local void @test_svmla_lane_za32_vg4x4(
113 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
114 // CHECK-NEXT: [[ENTRY:.*:]]
115 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
116 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 7)
117 // CHECK-NEXT: ret void
119 // CPP-CHECK-LABEL: define dso_local void @_Z26test_svmla_lane_za32_vg4x4j13svmfloat8x4_tu13__SVMfloat8_tm(
120 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
121 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
122 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
123 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.lane.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]], i32 7)
124 // CPP-CHECK-NEXT: ret void
126 void test_svmla_lane_za32_vg4x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
127 SME_ACLE_FUNC(svmla_lane_za32
,_mf8
,_vg4x4_fpm
,,)(slice
, zn
, zm
, 7, fpm
);
132 // CHECK-LABEL: define dso_local void @test_svmla_single_za16_vg2x1(
133 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
134 // CHECK-NEXT: [[ENTRY:.*:]]
135 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
136 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
137 // CHECK-NEXT: ret void
139 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za16_vg2x1ju13__SVMfloat8_tS_m(
140 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
141 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
142 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
143 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
144 // CPP-CHECK-NEXT: ret void
146 void test_svmla_single_za16_vg2x1(uint32_t slice
, svmfloat8_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
147 SME_ACLE_FUNC(svmla
,_single
,_za16
,_mf8
,_vg2x1_fpm
)(slice
, zn
, zm
, fpm
);
150 // CHECK-LABEL: define dso_local void @test_svmla_single_za16_vg2x2(
151 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
152 // CHECK-NEXT: [[ENTRY:.*:]]
153 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
154 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
155 // CHECK-NEXT: ret void
157 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za16_vg2x2j13svmfloat8x2_tu13__SVMfloat8_tm(
158 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
159 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
160 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
161 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
162 // CPP-CHECK-NEXT: ret void
164 void test_svmla_single_za16_vg2x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
165 SME_ACLE_FUNC(svmla
,_single
,_za16
,_mf8
,_vg2x2_fpm
)(slice
, zn
, zm
, fpm
);
168 // CHECK-LABEL: define dso_local void @test_svmla_single_za16_vg2x4(
169 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
170 // CHECK-NEXT: [[ENTRY:.*:]]
171 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
172 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
173 // CHECK-NEXT: ret void
175 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za16_vg2x4j13svmfloat8x4_tu13__SVMfloat8_tm(
176 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
177 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
178 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
179 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.single.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
180 // CPP-CHECK-NEXT: ret void
182 void test_svmla_single_za16_vg2x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
183 SME_ACLE_FUNC(svmla
,_single
,_za16
,_mf8
,_vg2x4_fpm
)(slice
, zn
, zm
, fpm
);
188 // CHECK-LABEL: define dso_local void @test_svmla_single_za32_vg4x1(
189 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
190 // CHECK-NEXT: [[ENTRY:.*:]]
191 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
192 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
193 // CHECK-NEXT: ret void
195 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za32_vg4x1ju13__SVMfloat8_tS_m(
196 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
197 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
198 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
199 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x1(i32 [[SLICE]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
200 // CPP-CHECK-NEXT: ret void
202 void test_svmla_single_za32_vg4x1(uint32_t slice
, svmfloat8_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
203 SME_ACLE_FUNC(svmla
,_single
,_za32
,_mf8
,_vg4x1_fpm
)(slice
, zn
, zm
, fpm
);
206 // CHECK-LABEL: define dso_local void @test_svmla_single_za32_vg4x2(
207 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
208 // CHECK-NEXT: [[ENTRY:.*:]]
209 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
210 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
211 // CHECK-NEXT: ret void
213 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za32_vg4x2j13svmfloat8x2_tu13__SVMfloat8_tm(
214 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
215 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
216 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
217 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]])
218 // CPP-CHECK-NEXT: ret void
220 void test_svmla_single_za32_vg4x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
221 SME_ACLE_FUNC(svmla
,_single
,_za32
,_mf8
,_vg4x2_fpm
)(slice
, zn
, zm
, fpm
);
224 // CHECK-LABEL: define dso_local void @test_svmla_single_za32_vg4x4(
225 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
226 // CHECK-NEXT: [[ENTRY:.*:]]
227 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
228 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
229 // CHECK-NEXT: ret void
231 // CPP-CHECK-LABEL: define dso_local void @_Z28test_svmla_single_za32_vg4x4j13svmfloat8x4_tu13__SVMfloat8_tm(
232 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
233 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
234 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
235 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.single.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]])
236 // CPP-CHECK-NEXT: ret void
238 void test_svmla_single_za32_vg4x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
239 SME_ACLE_FUNC(svmla
,_single
,_za32
,_mf8
,_vg4x4_fpm
)(slice
, zn
, zm
, fpm
);
244 // CHECK-LABEL: define dso_local void @test_svmla_multi_za16_vg2x2(
245 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
246 // CHECK-NEXT: [[ENTRY:.*:]]
247 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
248 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.multi.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
249 // CHECK-NEXT: ret void
251 // CPP-CHECK-LABEL: define dso_local void @_Z27test_svmla_multi_za16_vg2x2j13svmfloat8x2_tS_m(
252 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
253 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
254 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
255 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.multi.za16.vg2x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
256 // CPP-CHECK-NEXT: ret void
258 void test_svmla_multi_za16_vg2x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8x2_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
259 SME_ACLE_FUNC(svmla_za16
,_mf8
,_vg2x2_fpm
,,)(slice
, zn
, zm
, fpm
);
262 // CHECK-LABEL: define dso_local void @test_svmla_multi_za16_vg2x4(
263 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
264 // CHECK-NEXT: [[ENTRY:.*:]]
265 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
266 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.multi.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
267 // CHECK-NEXT: ret void
269 // CPP-CHECK-LABEL: define dso_local void @_Z27test_svmla_multi_za16_vg2x4j13svmfloat8x4_tS_m(
270 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
271 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
272 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
273 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlal.multi.za16.vg2x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
274 // CPP-CHECK-NEXT: ret void
276 void test_svmla_multi_za16_vg2x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8x4_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
277 SME_ACLE_FUNC(svmla_za16
,_mf8
,_vg2x4_fpm
,,)(slice
, zn
, zm
, fpm
);
282 // CHECK-LABEL: define dso_local void @test_svmla_multi_za32_vg4x2(
283 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
284 // CHECK-NEXT: [[ENTRY:.*:]]
285 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
286 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.multi.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
287 // CHECK-NEXT: ret void
289 // CPP-CHECK-LABEL: define dso_local void @_Z27test_svmla_multi_za32_vg4x2j13svmfloat8x2_tS_m(
290 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
291 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
292 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
293 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.multi.za32.vg4x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]])
294 // CPP-CHECK-NEXT: ret void
296 void test_svmla_multi_za32_vg4x2(uint32_t slice
, svmfloat8x2_t zn
, svmfloat8x2_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
297 SME_ACLE_FUNC(svmla_za32
,_mf8
,_vg4x2_fpm
,,)(slice
, zn
, zm
, fpm
);
300 // CHECK-LABEL: define dso_local void @test_svmla_multi_za32_vg4x4(
301 // CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
302 // CHECK-NEXT: [[ENTRY:.*:]]
303 // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
304 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.multi.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
305 // CHECK-NEXT: ret void
307 // CPP-CHECK-LABEL: define dso_local void @_Z27test_svmla_multi_za32_vg4x4j13svmfloat8x4_tS_m(
308 // CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
309 // CPP-CHECK-NEXT: [[ENTRY:.*:]]
310 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
311 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmlall.multi.za32.vg4x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]])
312 // CPP-CHECK-NEXT: ret void
314 void test_svmla_multi_za32_vg4x4(uint32_t slice
, svmfloat8x4_t zn
, svmfloat8x4_t zm
, fpm_t fpm
) __arm_streaming
__arm_inout("za") {
315 SME_ACLE_FUNC(svmla_za32
,_mf8
,_vg4x4_fpm
,,)(slice
, zn
, zm
, fpm
);