1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vcmulq_f16(
11 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 0, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
12 // CHECK-NEXT: ret <8 x half> [[TMP0]]
14 float16x8_t
test_vcmulq_f16(float16x8_t a
, float16x8_t b
)
19 return vcmulq_f16(a
, b
);
23 // CHECK-LABEL: @test_vcmulq_f32(
25 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
26 // CHECK-NEXT: ret <4 x float> [[TMP0]]
28 float32x4_t
test_vcmulq_f32(float32x4_t a
, float32x4_t b
)
33 return vcmulq_f32(a
, b
);
37 // CHECK-LABEL: @test_vcmulq_rot90_f16(
39 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 1, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
40 // CHECK-NEXT: ret <8 x half> [[TMP0]]
42 float16x8_t
test_vcmulq_rot90_f16(float16x8_t a
, float16x8_t b
)
45 return vcmulq_rot90(a
, b
);
47 return vcmulq_rot90_f16(a
, b
);
51 // CHECK-LABEL: @test_vcmulq_rot90_f32(
53 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
54 // CHECK-NEXT: ret <4 x float> [[TMP0]]
56 float32x4_t
test_vcmulq_rot90_f32(float32x4_t a
, float32x4_t b
)
59 return vcmulq_rot90(a
, b
);
61 return vcmulq_rot90_f32(a
, b
);
65 // CHECK-LABEL: @test_vcmulq_rot180_f16(
67 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
68 // CHECK-NEXT: ret <8 x half> [[TMP0]]
70 float16x8_t
test_vcmulq_rot180_f16(float16x8_t a
, float16x8_t b
)
73 return vcmulq_rot180(a
, b
);
75 return vcmulq_rot180_f16(a
, b
);
79 // CHECK-LABEL: @test_vcmulq_rot180_f32(
81 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 2, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
82 // CHECK-NEXT: ret <4 x float> [[TMP0]]
84 float32x4_t
test_vcmulq_rot180_f32(float32x4_t a
, float32x4_t b
)
87 return vcmulq_rot180(a
, b
);
89 return vcmulq_rot180_f32(a
, b
);
93 // CHECK-LABEL: @test_vcmulq_rot270_f16(
95 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
96 // CHECK-NEXT: ret <8 x half> [[TMP0]]
98 float16x8_t
test_vcmulq_rot270_f16(float16x8_t a
, float16x8_t b
)
101 return vcmulq_rot270(a
, b
);
103 return vcmulq_rot270_f16(a
, b
);
107 // CHECK-LABEL: @test_vcmulq_rot270_f32(
108 // CHECK-NEXT: entry:
109 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 3, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
110 // CHECK-NEXT: ret <4 x float> [[TMP0]]
112 float32x4_t
test_vcmulq_rot270_f32(float32x4_t a
, float32x4_t b
)
115 return vcmulq_rot270(a
, b
);
117 return vcmulq_rot270_f32(a
, b
);
121 // CHECK-LABEL: @test_vcmulq_m_f16(
122 // CHECK-NEXT: entry:
123 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
124 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
125 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
126 // CHECK-NEXT: ret <8 x half> [[TMP2]]
128 float16x8_t
test_vcmulq_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
131 return vcmulq_m(inactive
, a
, b
, p
);
133 return vcmulq_m_f16(inactive
, a
, b
, p
);
137 // CHECK-LABEL: @test_vcmulq_m_f32(
138 // CHECK-NEXT: entry:
139 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
140 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
141 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
142 // CHECK-NEXT: ret <4 x float> [[TMP2]]
144 float32x4_t
test_vcmulq_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
147 return vcmulq_m(inactive
, a
, b
, p
);
149 return vcmulq_m_f32(inactive
, a
, b
, p
);
153 // CHECK-LABEL: @test_vcmulq_rot90_m_f16(
154 // CHECK-NEXT: entry:
155 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
156 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
157 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
158 // CHECK-NEXT: ret <8 x half> [[TMP2]]
160 float16x8_t
test_vcmulq_rot90_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
163 return vcmulq_rot90_m(inactive
, a
, b
, p
);
165 return vcmulq_rot90_m_f16(inactive
, a
, b
, p
);
169 // CHECK-LABEL: @test_vcmulq_rot90_m_f32(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
172 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
173 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
174 // CHECK-NEXT: ret <4 x float> [[TMP2]]
176 float32x4_t
test_vcmulq_rot90_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
179 return vcmulq_rot90_m(inactive
, a
, b
, p
);
181 return vcmulq_rot90_m_f32(inactive
, a
, b
, p
);
185 // CHECK-LABEL: @test_vcmulq_rot180_m_f16(
186 // CHECK-NEXT: entry:
187 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
188 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
189 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
190 // CHECK-NEXT: ret <8 x half> [[TMP2]]
192 float16x8_t
test_vcmulq_rot180_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
195 return vcmulq_rot180_m(inactive
, a
, b
, p
);
197 return vcmulq_rot180_m_f16(inactive
, a
, b
, p
);
201 // CHECK-LABEL: @test_vcmulq_rot180_m_f32(
202 // CHECK-NEXT: entry:
203 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
204 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
205 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
206 // CHECK-NEXT: ret <4 x float> [[TMP2]]
208 float32x4_t
test_vcmulq_rot180_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
211 return vcmulq_rot180_m(inactive
, a
, b
, p
);
213 return vcmulq_rot180_m_f32(inactive
, a
, b
, p
);
217 // CHECK-LABEL: @test_vcmulq_rot270_m_f16(
218 // CHECK-NEXT: entry:
219 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
220 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
221 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
222 // CHECK-NEXT: ret <8 x half> [[TMP2]]
224 float16x8_t
test_vcmulq_rot270_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
227 return vcmulq_rot270_m(inactive
, a
, b
, p
);
229 return vcmulq_rot270_m_f16(inactive
, a
, b
, p
);
233 // CHECK-LABEL: @test_vcmulq_rot270_m_f32(
234 // CHECK-NEXT: entry:
235 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
236 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
237 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
238 // CHECK-NEXT: ret <4 x float> [[TMP2]]
240 float32x4_t
test_vcmulq_rot270_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
243 return vcmulq_rot270_m(inactive
, a
, b
, p
);
245 return vcmulq_rot270_m_f32(inactive
, a
, b
, p
);
249 // CHECK-LABEL: @test_vcmulq_x_f16(
250 // CHECK-NEXT: entry:
251 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
252 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
253 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
254 // CHECK-NEXT: ret <8 x half> [[TMP2]]
256 float16x8_t
test_vcmulq_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
259 return vcmulq_x(a
, b
, p
);
261 return vcmulq_x_f16(a
, b
, p
);
265 // CHECK-LABEL: @test_vcmulq_x_f32(
266 // CHECK-NEXT: entry:
267 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
268 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
269 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
270 // CHECK-NEXT: ret <4 x float> [[TMP2]]
272 float32x4_t
test_vcmulq_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
275 return vcmulq_x(a
, b
, p
);
277 return vcmulq_x_f32(a
, b
, p
);
281 // CHECK-LABEL: @test_vcmulq_rot90_x_f16(
282 // CHECK-NEXT: entry:
283 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
284 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
285 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
286 // CHECK-NEXT: ret <8 x half> [[TMP2]]
288 float16x8_t
test_vcmulq_rot90_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
291 return vcmulq_rot90_x(a
, b
, p
);
293 return vcmulq_rot90_x_f16(a
, b
, p
);
297 // CHECK-LABEL: @test_vcmulq_rot90_x_f32(
298 // CHECK-NEXT: entry:
299 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
300 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
301 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
302 // CHECK-NEXT: ret <4 x float> [[TMP2]]
304 float32x4_t
test_vcmulq_rot90_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
307 return vcmulq_rot90_x(a
, b
, p
);
309 return vcmulq_rot90_x_f32(a
, b
, p
);
313 // CHECK-LABEL: @test_vcmulq_rot180_x_f16(
314 // CHECK-NEXT: entry:
315 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
316 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
317 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
318 // CHECK-NEXT: ret <8 x half> [[TMP2]]
320 float16x8_t
test_vcmulq_rot180_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
323 return vcmulq_rot180_x(a
, b
, p
);
325 return vcmulq_rot180_x_f16(a
, b
, p
);
329 // CHECK-LABEL: @test_vcmulq_rot180_x_f32(
330 // CHECK-NEXT: entry:
331 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
332 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
333 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
334 // CHECK-NEXT: ret <4 x float> [[TMP2]]
336 float32x4_t
test_vcmulq_rot180_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
339 return vcmulq_rot180_x(a
, b
, p
);
341 return vcmulq_rot180_x_f32(a
, b
, p
);
345 // CHECK-LABEL: @test_vcmulq_rot270_x_f16(
346 // CHECK-NEXT: entry:
347 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
348 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
349 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
350 // CHECK-NEXT: ret <8 x half> [[TMP2]]
352 float16x8_t
test_vcmulq_rot270_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
355 return vcmulq_rot270_x(a
, b
, p
);
357 return vcmulq_rot270_x_f16(a
, b
, p
);
361 // CHECK-LABEL: @test_vcmulq_rot270_x_f32(
362 // CHECK-NEXT: entry:
363 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
364 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
365 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
366 // CHECK-NEXT: ret <4 x float> [[TMP2]]
368 float32x4_t
test_vcmulq_rot270_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
371 return vcmulq_rot270_x(a
, b
, p
);
373 return vcmulq_rot270_x_f32(a
, b
, p
);