1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vcmlaq_f16(
11 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
12 // CHECK-NEXT: ret <8 x half> [[TMP0]]
14 float16x8_t
test_vcmlaq_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
)
17 return vcmlaq(a
, b
, c
);
19 return vcmlaq_f16(a
, b
, c
);
23 // CHECK-LABEL: @test_vcmlaq_f32(
25 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]])
26 // CHECK-NEXT: ret <4 x float> [[TMP0]]
28 float32x4_t
test_vcmlaq_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
)
31 return vcmlaq(a
, b
, c
);
33 return vcmlaq_f32(a
, b
, c
);
37 // CHECK-LABEL: @test_vcmlaq_rot90_f16(
39 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 1, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
40 // CHECK-NEXT: ret <8 x half> [[TMP0]]
42 float16x8_t
test_vcmlaq_rot90_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
)
45 return vcmlaq_rot90(a
, b
, c
);
47 return vcmlaq_rot90_f16(a
, b
, c
);
51 // CHECK-LABEL: @test_vcmlaq_rot90_f32(
53 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]])
54 // CHECK-NEXT: ret <4 x float> [[TMP0]]
56 float32x4_t
test_vcmlaq_rot90_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
)
59 return vcmlaq_rot90(a
, b
, c
);
61 return vcmlaq_rot90_f32(a
, b
, c
);
65 // CHECK-LABEL: @test_vcmlaq_rot180_f16(
67 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
68 // CHECK-NEXT: ret <8 x half> [[TMP0]]
70 float16x8_t
test_vcmlaq_rot180_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
)
73 return vcmlaq_rot180(a
, b
, c
);
75 return vcmlaq_rot180_f16(a
, b
, c
);
79 // CHECK-LABEL: @test_vcmlaq_rot180_f32(
81 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 2, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]])
82 // CHECK-NEXT: ret <4 x float> [[TMP0]]
84 float32x4_t
test_vcmlaq_rot180_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
)
87 return vcmlaq_rot180(a
, b
, c
);
89 return vcmlaq_rot180_f32(a
, b
, c
);
93 // CHECK-LABEL: @test_vcmlaq_rot270_f16(
95 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]])
96 // CHECK-NEXT: ret <8 x half> [[TMP0]]
98 float16x8_t
test_vcmlaq_rot270_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
)
101 return vcmlaq_rot270(a
, b
, c
);
103 return vcmlaq_rot270_f16(a
, b
, c
);
107 // CHECK-LABEL: @test_vcmlaq_rot270_f32(
108 // CHECK-NEXT: entry:
109 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 3, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]])
110 // CHECK-NEXT: ret <4 x float> [[TMP0]]
112 float32x4_t
test_vcmlaq_rot270_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
)
115 return vcmlaq_rot270(a
, b
, c
);
117 return vcmlaq_rot270_f32(a
, b
, c
);
121 // CHECK-LABEL: @test_vcmlaq_m_f16(
122 // CHECK-NEXT: entry:
123 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
124 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
125 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 0, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x i1> [[TMP1]])
126 // CHECK-NEXT: ret <8 x half> [[TMP2]]
128 float16x8_t
test_vcmlaq_m_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
, mve_pred16_t p
)
131 return vcmlaq_m(a
, b
, c
, p
);
133 return vcmlaq_m_f16(a
, b
, c
, p
);
137 // CHECK-LABEL: @test_vcmlaq_m_f32(
138 // CHECK-NEXT: entry:
139 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
140 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
141 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 0, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x i1> [[TMP1]])
142 // CHECK-NEXT: ret <4 x float> [[TMP2]]
144 float32x4_t
test_vcmlaq_m_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
, mve_pred16_t p
)
147 return vcmlaq_m(a
, b
, c
, p
);
149 return vcmlaq_m_f32(a
, b
, c
, p
);
153 // CHECK-LABEL: @test_vcmlaq_rot90_m_f16(
154 // CHECK-NEXT: entry:
155 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
156 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
157 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 1, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x i1> [[TMP1]])
158 // CHECK-NEXT: ret <8 x half> [[TMP2]]
160 float16x8_t
test_vcmlaq_rot90_m_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
, mve_pred16_t p
)
163 return vcmlaq_rot90_m(a
, b
, c
, p
);
165 return vcmlaq_rot90_m_f16(a
, b
, c
, p
);
169 // CHECK-LABEL: @test_vcmlaq_rot90_m_f32(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
172 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
173 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 1, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x i1> [[TMP1]])
174 // CHECK-NEXT: ret <4 x float> [[TMP2]]
176 float32x4_t
test_vcmlaq_rot90_m_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
, mve_pred16_t p
)
179 return vcmlaq_rot90_m(a
, b
, c
, p
);
181 return vcmlaq_rot90_m_f32(a
, b
, c
, p
);
185 // CHECK-LABEL: @test_vcmlaq_rot180_m_f16(
186 // CHECK-NEXT: entry:
187 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
188 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
189 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 2, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x i1> [[TMP1]])
190 // CHECK-NEXT: ret <8 x half> [[TMP2]]
192 float16x8_t
test_vcmlaq_rot180_m_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
, mve_pred16_t p
)
195 return vcmlaq_rot180_m(a
, b
, c
, p
);
197 return vcmlaq_rot180_m_f16(a
, b
, c
, p
);
201 // CHECK-LABEL: @test_vcmlaq_rot180_m_f32(
202 // CHECK-NEXT: entry:
203 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
204 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
205 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 2, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x i1> [[TMP1]])
206 // CHECK-NEXT: ret <4 x float> [[TMP2]]
208 float32x4_t
test_vcmlaq_rot180_m_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
, mve_pred16_t p
)
211 return vcmlaq_rot180_m(a
, b
, c
, p
);
213 return vcmlaq_rot180_m_f32(a
, b
, c
, p
);
217 // CHECK-LABEL: @test_vcmlaq_rot270_m_f16(
218 // CHECK-NEXT: entry:
219 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
220 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
221 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmlaq.predicated.v8f16.v8i1(i32 3, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x half> [[C:%.*]], <8 x i1> [[TMP1]])
222 // CHECK-NEXT: ret <8 x half> [[TMP2]]
224 float16x8_t
test_vcmlaq_rot270_m_f16(float16x8_t a
, float16x8_t b
, float16x8_t c
, mve_pred16_t p
)
227 return vcmlaq_rot270_m(a
, b
, c
, p
);
229 return vcmlaq_rot270_m_f16(a
, b
, c
, p
);
233 // CHECK-LABEL: @test_vcmlaq_rot270_m_f32(
234 // CHECK-NEXT: entry:
235 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
236 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
237 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmlaq.predicated.v4f32.v4i1(i32 3, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], <4 x i1> [[TMP1]])
238 // CHECK-NEXT: ret <4 x float> [[TMP2]]
240 float32x4_t
test_vcmlaq_rot270_m_f32(float32x4_t a
, float32x4_t b
, float32x4_t c
, mve_pred16_t p
)
243 return vcmlaq_rot270_m(a
, b
, c
, p
);
245 return vcmlaq_rot270_m_f32(a
, b
, c
, p
);