[Clang][SME2] Fix PSEL builtin predicates (#77097)
[llvm-project.git] / clang / test / CodeGen / aarch64-sme2-intrinsics / acle_sme2_mlsl.c
blobafc7db797c9dfcee74e053eb23eb4c32ce9a1772
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: aarch64-registered-target
4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
6 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
7 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
8 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
9 #include <arm_sme_draft_spec_subject_to_change.h>
11 #ifdef SVE_OVERLOADED_FORMS
12 // A simple used,unused... macro, long enough to represent any SVE builtin.
13 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
14 #else
15 #define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
16 #endif
19 // Multi, multi
20 // CHECK-LABEL: @test_svmls2_f16(
21 // CHECK-NEXT: entry:
22 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
23 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
24 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
25 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
26 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
27 // CHECK-NEXT: ret void
29 // CPP-CHECK-LABEL: @_Z15test_svmls2_f16j13svfloat16x2_tS_(
30 // CPP-CHECK-NEXT: entry:
31 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
32 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
33 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM:%.*]], i64 0)
34 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZM]], i64 8)
35 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]])
36 // CPP-CHECK-NEXT: ret void
38 void test_svmls2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_shared_za
40 SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x2,,)(slice_base, zn, zm);
43 // CHECK-LABEL: @test_svmls2_bf16(
44 // CHECK-NEXT: entry:
45 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
46 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
47 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 0)
48 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8)
49 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
50 // CHECK-NEXT: ret void
52 // CPP-CHECK-LABEL: @_Z16test_svmls2_bf16j14svbfloat16x2_tS_(
53 // CPP-CHECK-NEXT: entry:
54 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
55 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
56 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 0)
57 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8)
58 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
59 // CPP-CHECK-NEXT: ret void
61 void test_svmls2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_shared_za
63 SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x2,,)(slice_base, zn, zm);
66 // CHECK-LABEL: @test_svmls2_u16(
67 // CHECK-NEXT: entry:
68 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
69 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
70 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM:%.*]], i64 0)
71 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM]], i64 8)
72 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]])
73 // CHECK-NEXT: ret void
75 // CPP-CHECK-LABEL: @_Z15test_svmls2_u16j12svuint16x2_tS_(
76 // CPP-CHECK-NEXT: entry:
77 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
78 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
79 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM:%.*]], i64 0)
80 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM]], i64 8)
81 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]])
82 // CPP-CHECK-NEXT: ret void
84 void test_svmls2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) __arm_streaming __arm_shared_za
86 SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x2,,)(slice_base, zn, zm);
89 // CHECK-LABEL: @test_svmls2_s16(
90 // CHECK-NEXT: entry:
91 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
92 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
93 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM:%.*]], i64 0)
94 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM]], i64 8)
95 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]])
96 // CHECK-NEXT: ret void
98 // CPP-CHECK-LABEL: @_Z15test_svmls2_s16j11svint16x2_tS_(
99 // CPP-CHECK-NEXT: entry:
100 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
101 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
102 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM:%.*]], i64 0)
103 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZM]], i64 8)
104 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]])
105 // CPP-CHECK-NEXT: ret void
107 void test_svmls2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) __arm_streaming __arm_shared_za
109 SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x2,,)(slice_base, zn, zm);
112 // CHECK-LABEL: @test_svmls4_f16(
113 // CHECK-NEXT: entry:
114 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
115 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
116 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
117 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
118 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
119 // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
120 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
121 // CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
122 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
123 // CHECK-NEXT: ret void
125 // CPP-CHECK-LABEL: @_Z15test_svmls4_f16j13svfloat16x4_tS_(
126 // CPP-CHECK-NEXT: entry:
127 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
128 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
129 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
130 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
131 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM:%.*]], i64 0)
132 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 8)
133 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 16)
134 // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZM]], i64 24)
135 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]])
136 // CPP-CHECK-NEXT: ret void
138 void test_svmls4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_shared_za
140 SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x4,,)(slice_base, zn, zm);
143 // CHECK-LABEL: @test_svmls4_bf16(
144 // CHECK-NEXT: entry:
145 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
146 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
147 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
148 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
149 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM:%.*]], i64 0)
150 // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 8)
151 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 16)
152 // CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 24)
153 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]])
154 // CHECK-NEXT: ret void
156 // CPP-CHECK-LABEL: @_Z16test_svmls4_bf16j14svbfloat16x4_tS_(
157 // CPP-CHECK-NEXT: entry:
158 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
159 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
160 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
161 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
162 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM:%.*]], i64 0)
163 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 8)
164 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 16)
165 // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 24)
166 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]])
167 // CPP-CHECK-NEXT: ret void
169 void test_svmls4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_shared_za
171 SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x4,,)(slice_base, zn, zm);
174 // CHECK-LABEL: @test_svmls4_u16(
175 // CHECK-NEXT: entry:
176 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
177 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
178 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
179 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
180 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM:%.*]], i64 0)
181 // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 8)
182 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 16)
183 // CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 24)
184 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]])
185 // CHECK-NEXT: ret void
187 // CPP-CHECK-LABEL: @_Z15test_svmls4_u16j12svuint16x4_tS_(
188 // CPP-CHECK-NEXT: entry:
189 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
190 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
191 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
192 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
193 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM:%.*]], i64 0)
194 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 8)
195 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 16)
196 // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 24)
197 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]])
198 // CPP-CHECK-NEXT: ret void
200 void test_svmls4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) __arm_streaming __arm_shared_za
202 SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x4,,)(slice_base, zn, zm);
205 // CHECK-LABEL: @test_svmls4_s16(
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
208 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
209 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
210 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
211 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM:%.*]], i64 0)
212 // CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 8)
213 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 16)
214 // CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 24)
215 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]])
216 // CHECK-NEXT: ret void
218 // CPP-CHECK-LABEL: @_Z15test_svmls4_s16j11svint16x4_tS_(
219 // CPP-CHECK-NEXT: entry:
220 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
221 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
222 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
223 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
224 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM:%.*]], i64 0)
225 // CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 8)
226 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 16)
227 // CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZM]], i64 24)
228 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]])
229 // CPP-CHECK-NEXT: ret void
231 void test_svmls4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) __arm_streaming __arm_shared_za
233 SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x4,,)(slice_base, zn, zm);
237 // Multi, single
238 // CHECK-LABEL: @test_svmls_single1_f16(
239 // CHECK-NEXT: entry:
240 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
241 // CHECK-NEXT: ret void
243 // CPP-CHECK-LABEL: @_Z22test_svmls_single1_f16ju13__SVFloat16_tS_(
244 // CPP-CHECK-NEXT: entry:
245 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]])
246 // CPP-CHECK-NEXT: ret void
248 void test_svmls_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
250 SVE_ACLE_FUNC(svmls_za32,_f16,_vg2x1,,)(slice_base, zn, zm);
253 // CHECK-LABEL: @test_svmls_single1_bf16(
254 // CHECK-NEXT: entry:
255 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
256 // CHECK-NEXT: ret void
258 // CPP-CHECK-LABEL: @_Z23test_svmls_single1_bf16ju14__SVBfloat16_tS_(
259 // CPP-CHECK-NEXT: entry:
260 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
261 // CPP-CHECK-NEXT: ret void
263 void test_svmls_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
265 SVE_ACLE_FUNC(svmls_za32,_bf16,_vg2x1,,)(slice_base, zn, zm);
268 // CHECK-LABEL: @test_svmls_single1_u16(
269 // CHECK-NEXT: entry:
270 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
271 // CHECK-NEXT: ret void
273 // CPP-CHECK-LABEL: @_Z22test_svmls_single1_u16ju12__SVUint16_tS_(
274 // CPP-CHECK-NEXT: entry:
275 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
276 // CPP-CHECK-NEXT: ret void
278 void test_svmls_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
280 SVE_ACLE_FUNC(svmls_za32,_u16,_vg2x1,,)(slice_base, zn, zm);
283 // CHECK-LABEL: @test_svmls_single1_s16(
284 // CHECK-NEXT: entry:
285 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
286 // CHECK-NEXT: ret void
288 // CPP-CHECK-LABEL: @_Z22test_svmls_single1_s16ju11__SVInt16_tS_(
289 // CPP-CHECK-NEXT: entry:
290 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]])
291 // CPP-CHECK-NEXT: ret void
293 void test_svmls_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za
295 SVE_ACLE_FUNC(svmls_za32,_s16,_vg2x1,,)(slice_base, zn, zm);
298 // CHECK-LABEL: @test_svmls_single2_f16(
299 // CHECK-NEXT: entry:
300 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
301 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
302 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[ZM:%.*]])
303 // CHECK-NEXT: ret void
305 // CPP-CHECK-LABEL: @_Z22test_svmls_single2_f16j13svfloat16x2_tu13__SVFloat16_t(
306 // CPP-CHECK-NEXT: entry:
307 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
308 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
309 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[ZM:%.*]])
310 // CPP-CHECK-NEXT: ret void
312 void test_svmls_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
314 SVE_ACLE_FUNC(svmls,_single,_za32,_f16,_vg2x2)(slice_base, zn, zm);
317 // CHECK-LABEL: @test_svmls_single2_bf16(
318 // CHECK-NEXT: entry:
319 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
320 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
321 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]])
322 // CHECK-NEXT: ret void
324 // CPP-CHECK-LABEL: @_Z23test_svmls_single2_bf16j14svbfloat16x2_tu14__SVBfloat16_t(
325 // CPP-CHECK-NEXT: entry:
326 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
327 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
328 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]])
329 // CPP-CHECK-NEXT: ret void
331 void test_svmls_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
333 SVE_ACLE_FUNC(svmls,_single,_za32,_bf16,_vg2x2)(slice_base, zn, zm);
336 // CHECK-LABEL: @test_svmls_single2_u16(
337 // CHECK-NEXT: entry:
338 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
339 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
340 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]])
341 // CHECK-NEXT: ret void
343 // CPP-CHECK-LABEL: @_Z22test_svmls_single2_u16j12svuint16x2_tu12__SVUint16_t(
344 // CPP-CHECK-NEXT: entry:
345 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
346 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
347 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]])
348 // CPP-CHECK-NEXT: ret void
350 void test_svmls_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
352 SVE_ACLE_FUNC(svmls,_single,_za32,_u16,_vg2x2)(slice_base, zn, zm);
355 // CHECK-LABEL: @test_svmls_single2_s16(
356 // CHECK-NEXT: entry:
357 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
358 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
359 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]])
360 // CHECK-NEXT: ret void
362 // CPP-CHECK-LABEL: @_Z22test_svmls_single2_s16j11svint16x2_tu11__SVInt16_t(
363 // CPP-CHECK-NEXT: entry:
364 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
365 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
366 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]])
367 // CPP-CHECK-NEXT: ret void
369 void test_svmls_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za
371 SVE_ACLE_FUNC(svmls,_single,_za32,_s16,_vg2x2)(slice_base, zn, zm);
374 // CHECK-LABEL: @test_svmls_single4_f16(
375 // CHECK-NEXT: entry:
376 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
377 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
378 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
379 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
380 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[ZM:%.*]])
381 // CHECK-NEXT: ret void
383 // CPP-CHECK-LABEL: @_Z22test_svmls_single4_f16j13svfloat16x4_tu13__SVFloat16_t(
384 // CPP-CHECK-NEXT: entry:
385 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
386 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
387 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
388 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
389 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[ZM:%.*]])
390 // CPP-CHECK-NEXT: ret void
392 void test_svmls_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
394 SVE_ACLE_FUNC(svmls,_single,_za32,_f16,_vg2x4)(slice_base, zn, zm);
397 // CHECK-LABEL: @test_svmls_single4_bf16(
398 // CHECK-NEXT: entry:
399 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
400 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
401 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
402 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
403 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]])
404 // CHECK-NEXT: ret void
406 // CPP-CHECK-LABEL: @_Z23test_svmls_single4_bf16j14svbfloat16x4_tu14__SVBfloat16_t(
407 // CPP-CHECK-NEXT: entry:
408 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
409 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
410 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
411 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
412 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]])
413 // CPP-CHECK-NEXT: ret void
415 void test_svmls_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
417 SVE_ACLE_FUNC(svmls,_single,_za32,_bf16,_vg2x4)(slice_base, zn, zm);
420 // CHECK-LABEL: @test_svmls_single4_u16(
421 // CHECK-NEXT: entry:
422 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
423 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
424 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
425 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
426 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]])
427 // CHECK-NEXT: ret void
429 // CPP-CHECK-LABEL: @_Z22test_svmls_single4_u16j12svuint16x4_tu12__SVUint16_t(
430 // CPP-CHECK-NEXT: entry:
431 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
432 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
433 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
434 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
435 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]])
436 // CPP-CHECK-NEXT: ret void
438 void test_svmls_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
440 SVE_ACLE_FUNC(svmls,_single,_za32,_u16,_vg2x4)(slice_base, zn, zm);
443 // CHECK-LABEL: @test_svmls_single4_s16(
444 // CHECK-NEXT: entry:
445 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
446 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
447 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
448 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
449 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]])
450 // CHECK-NEXT: ret void
452 // CPP-CHECK-LABEL: @_Z22test_svmls_single4_s16j11svint16x4_tu11__SVInt16_t(
453 // CPP-CHECK-NEXT: entry:
454 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
455 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
456 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
457 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
458 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]])
459 // CPP-CHECK-NEXT: ret void
461 void test_svmls_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za
463 SVE_ACLE_FUNC(svmls,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm);
467 // Multi, indexed
470 // CHECK-LABEL: @test_svmls_lane1_f16(
471 // CHECK-NEXT: entry:
472 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
473 // CHECK-NEXT: ret void
475 // CPP-CHECK-LABEL: @_Z20test_svmls_lane1_f16ju13__SVFloat16_tS_(
476 // CPP-CHECK-NEXT: entry:
477 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
478 // CPP-CHECK-NEXT: ret void
480 void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
482 SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x1,,)(slice_base, zn, zm, 7);
485 // CHECK-LABEL: @test_svmls_lane1_bf16(
486 // CHECK-NEXT: entry:
487 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
488 // CHECK-NEXT: ret void
490 // CPP-CHECK-LABEL: @_Z21test_svmls_lane1_bf16ju14__SVBfloat16_tS_(
491 // CPP-CHECK-NEXT: entry:
492 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
493 // CPP-CHECK-NEXT: ret void
495 void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
497 SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7);
500 // CHECK-LABEL: @test_svmls_lane1_u16(
501 // CHECK-NEXT: entry:
502 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
503 // CHECK-NEXT: ret void
505 // CPP-CHECK-LABEL: @_Z20test_svmls_lane1_u16ju12__SVUint16_tS_(
506 // CPP-CHECK-NEXT: entry:
507 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
508 // CPP-CHECK-NEXT: ret void
510 void test_svmls_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
512 SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x1,,)(slice_base, zn, zm, 7);
515 // CHECK-LABEL: @test_svmls_lane1_s16(
516 // CHECK-NEXT: entry:
517 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
518 // CHECK-NEXT: ret void
520 // CPP-CHECK-LABEL: @_Z20test_svmls_lane1_s16ju11__SVInt16_tS_(
521 // CPP-CHECK-NEXT: entry:
522 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
523 // CPP-CHECK-NEXT: ret void
525 void test_svmls_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) __arm_streaming __arm_shared_za
527 SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x1,,)(slice_base, zn, zm, 7);
530 // CHECK-LABEL: @test_svmls_lane2_f16(
531 // CHECK-NEXT: entry:
532 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
533 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
534 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
535 // CHECK-NEXT: ret void
537 // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f16j13svfloat16x2_tu13__SVFloat16_t(
538 // CPP-CHECK-NEXT: entry:
539 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN:%.*]], i64 0)
540 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZN]], i64 8)
541 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
542 // CPP-CHECK-NEXT: ret void
544 void test_svmls_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
546 SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x2,,)(slice_base, zn, zm, 7);
549 // CHECK-LABEL: @test_svmls_lane2_bf16(
550 // CHECK-NEXT: entry:
551 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
552 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
553 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
554 // CHECK-NEXT: ret void
556 // CPP-CHECK-LABEL: @_Z21test_svmls_lane2_bf16j14svbfloat16x2_tu14__SVBfloat16_t(
557 // CPP-CHECK-NEXT: entry:
558 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN:%.*]], i64 0)
559 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZN]], i64 8)
560 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
561 // CPP-CHECK-NEXT: ret void
563 void test_svmls_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
565 SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x2,,)(slice_base, zn, zm, 7);
568 // CHECK-LABEL: @test_svmls_lane2_u16(
569 // CHECK-NEXT: entry:
570 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
571 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
572 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
573 // CHECK-NEXT: ret void
575 // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_u16j12svuint16x2_tu12__SVUint16_t(
576 // CPP-CHECK-NEXT: entry:
577 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
578 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
579 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
580 // CPP-CHECK-NEXT: ret void
582 void test_svmls_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
584 SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x2,,)(slice_base, zn, zm, 7);
587 // CHECK-LABEL: @test_svmls_lane2_s16(
588 // CHECK-NEXT: entry:
589 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
590 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
591 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
592 // CHECK-NEXT: ret void
594 // CPP-CHECK-LABEL: @_Z20test_svmls_lane2_s16j11svint16x2_tu11__SVInt16_t(
595 // CPP-CHECK-NEXT: entry:
596 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN:%.*]], i64 0)
597 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[ZN]], i64 8)
598 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
599 // CPP-CHECK-NEXT: ret void
601 void test_svmls_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) __arm_streaming __arm_shared_za
603 SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x2,,)(slice_base, zn, zm, 7);
606 // CHECK-LABEL: @test_svmls_lane4_f16(
607 // CHECK-NEXT: entry:
608 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
609 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
610 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
611 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
612 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
613 // CHECK-NEXT: ret void
615 // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f16j13svfloat16x4_tu13__SVFloat16_t(
616 // CPP-CHECK-NEXT: entry:
617 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN:%.*]], i64 0)
618 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 8)
619 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 16)
620 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZN]], i64 24)
621 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[ZM:%.*]], i32 7)
622 // CPP-CHECK-NEXT: ret void
624 void test_svmls_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_shared_za
626 SVE_ACLE_FUNC(svmls_lane_za32,_f16,_vg2x4,,)(slice_base, zn, zm, 7);
629 // CHECK-LABEL: @test_svmls_lane4_bf16(
630 // CHECK-NEXT: entry:
631 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
632 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
633 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
634 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
635 // CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
636 // CHECK-NEXT: ret void
638 // CPP-CHECK-LABEL: @_Z21test_svmls_lane4_bf16j14svbfloat16x4_tu14__SVBfloat16_t(
639 // CPP-CHECK-NEXT: entry:
640 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN:%.*]], i64 0)
641 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 8)
642 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 16)
643 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZN]], i64 24)
644 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
645 // CPP-CHECK-NEXT: ret void
647 void test_svmls_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
649 SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x4,,)(slice_base, zn, zm, 7);
652 // CHECK-LABEL: @test_svmls_lane4_u16(
653 // CHECK-NEXT: entry:
654 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
655 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
656 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
657 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
658 // CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
659 // CHECK-NEXT: ret void
661 // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_u16j12svuint16x4_tu12__SVUint16_t(
662 // CPP-CHECK-NEXT: entry:
663 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
664 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
665 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
666 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
667 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
668 // CPP-CHECK-NEXT: ret void
670 void test_svmls_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) __arm_streaming __arm_shared_za
672 SVE_ACLE_FUNC(svmls_lane_za32,_u16,_vg2x4,,)(slice_base, zn, zm, 7);
675 // CHECK-LABEL: @test_svmls_lane4_s16(
676 // CHECK-NEXT: entry:
677 // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
678 // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
679 // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
680 // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
681 // CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
682 // CHECK-NEXT: ret void
684 // CPP-CHECK-LABEL: @_Z20test_svmls_lane4_s16j11svint16x4_tu11__SVInt16_t(
685 // CPP-CHECK-NEXT: entry:
686 // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN:%.*]], i64 0)
687 // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 8)
688 // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 16)
689 // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[ZN]], i64 24)
690 // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[SLICE_BASE:%.*]], <vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[ZM:%.*]], i32 7)
691 // CPP-CHECK-NEXT: ret void
693 void test_svmls_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za
695 SVE_ACLE_FUNC(svmls_lane_za32,_s16,_vg2x4,,)(slice_base, zn, zm, 7);