1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vcaddq_rot90_u8(
11 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
12 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
14 uint8x16_t
test_vcaddq_rot90_u8(uint8x16_t a
, uint8x16_t b
)
17 return vcaddq_rot90(a
, b
);
19 return vcaddq_rot90_u8(a
, b
);
23 // CHECK-LABEL: @test_vcaddq_rot90_u16(
25 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
26 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
28 uint16x8_t
test_vcaddq_rot90_u16(uint16x8_t a
, uint16x8_t b
)
31 return vcaddq_rot90(a
, b
);
33 return vcaddq_rot90_u16(a
, b
);
37 // CHECK-LABEL: @test_vcaddq_rot90_u32(
39 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
40 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
42 uint32x4_t
test_vcaddq_rot90_u32(uint32x4_t a
, uint32x4_t b
)
45 return vcaddq_rot90(a
, b
);
47 return vcaddq_rot90_u32(a
, b
);
51 // CHECK-LABEL: @test_vcaddq_rot90_s8(
53 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
54 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
56 int8x16_t
test_vcaddq_rot90_s8(int8x16_t a
, int8x16_t b
)
59 return vcaddq_rot90(a
, b
);
61 return vcaddq_rot90_s8(a
, b
);
65 // CHECK-LABEL: @test_vcaddq_rot90_s16(
67 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
68 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
70 int16x8_t
test_vcaddq_rot90_s16(int16x8_t a
, int16x8_t b
)
73 return vcaddq_rot90(a
, b
);
75 return vcaddq_rot90_s16(a
, b
);
79 // CHECK-LABEL: @test_vcaddq_rot90_s32(
81 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
82 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
84 int32x4_t
test_vcaddq_rot90_s32(int32x4_t a
, int32x4_t b
)
87 return vcaddq_rot90(a
, b
);
89 return vcaddq_rot90_s32(a
, b
);
93 // CHECK-LABEL: @test_vcaddq_rot90_f16(
95 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 0, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
96 // CHECK-NEXT: ret <8 x half> [[TMP0]]
98 float16x8_t
test_vcaddq_rot90_f16(float16x8_t a
, float16x8_t b
)
101 return vcaddq_rot90(a
, b
);
103 return vcaddq_rot90_f16(a
, b
);
107 // CHECK-LABEL: @test_vcaddq_rot90_f32(
108 // CHECK-NEXT: entry:
109 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
110 // CHECK-NEXT: ret <4 x float> [[TMP0]]
112 float32x4_t
test_vcaddq_rot90_f32(float32x4_t a
, float32x4_t b
)
115 return vcaddq_rot90(a
, b
);
117 return vcaddq_rot90_f32(a
, b
);
121 // CHECK-LABEL: @test_vcaddq_rot270_u8(
122 // CHECK-NEXT: entry:
123 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
124 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
126 uint8x16_t
test_vcaddq_rot270_u8(uint8x16_t a
, uint8x16_t b
)
129 return vcaddq_rot270(a
, b
);
131 return vcaddq_rot270_u8(a
, b
);
135 // CHECK-LABEL: @test_vcaddq_rot270_u16(
136 // CHECK-NEXT: entry:
137 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
138 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
140 uint16x8_t
test_vcaddq_rot270_u16(uint16x8_t a
, uint16x8_t b
)
143 return vcaddq_rot270(a
, b
);
145 return vcaddq_rot270_u16(a
, b
);
149 // CHECK-LABEL: @test_vcaddq_rot270_u32(
150 // CHECK-NEXT: entry:
151 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
152 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
154 uint32x4_t
test_vcaddq_rot270_u32(uint32x4_t a
, uint32x4_t b
)
157 return vcaddq_rot270(a
, b
);
159 return vcaddq_rot270_u32(a
, b
);
163 // CHECK-LABEL: @test_vcaddq_rot270_s8(
164 // CHECK-NEXT: entry:
165 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]])
166 // CHECK-NEXT: ret <16 x i8> [[TMP0]]
168 int8x16_t
test_vcaddq_rot270_s8(int8x16_t a
, int8x16_t b
)
171 return vcaddq_rot270(a
, b
);
173 return vcaddq_rot270_s8(a
, b
);
177 // CHECK-LABEL: @test_vcaddq_rot270_s16(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
180 // CHECK-NEXT: ret <8 x i16> [[TMP0]]
182 int16x8_t
test_vcaddq_rot270_s16(int16x8_t a
, int16x8_t b
)
185 return vcaddq_rot270(a
, b
);
187 return vcaddq_rot270_s16(a
, b
);
191 // CHECK-LABEL: @test_vcaddq_rot270_s32(
192 // CHECK-NEXT: entry:
193 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
194 // CHECK-NEXT: ret <4 x i32> [[TMP0]]
196 int32x4_t
test_vcaddq_rot270_s32(int32x4_t a
, int32x4_t b
)
199 return vcaddq_rot270(a
, b
);
201 return vcaddq_rot270_s32(a
, b
);
205 // CHECK-LABEL: @test_vcaddq_rot270_f16(
206 // CHECK-NEXT: entry:
207 // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 1, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]])
208 // CHECK-NEXT: ret <8 x half> [[TMP0]]
210 float16x8_t
test_vcaddq_rot270_f16(float16x8_t a
, float16x8_t b
)
213 return vcaddq_rot270(a
, b
);
215 return vcaddq_rot270_f16(a
, b
);
219 // CHECK-LABEL: @test_vcaddq_rot270_f32(
220 // CHECK-NEXT: entry:
221 // CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
222 // CHECK-NEXT: ret <4 x float> [[TMP0]]
224 float32x4_t
test_vcaddq_rot270_f32(float32x4_t a
, float32x4_t b
)
227 return vcaddq_rot270(a
, b
);
229 return vcaddq_rot270_f32(a
, b
);
234 // CHECK-LABEL: @test_vcaddq_rot90_m_u8(
235 // CHECK-NEXT: entry:
236 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
237 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
238 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
239 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
241 uint8x16_t
test_vcaddq_rot90_m_u8(uint8x16_t inactive
, uint8x16_t a
, uint8x16_t b
, mve_pred16_t p
)
244 return vcaddq_rot90_m(inactive
, a
, b
, p
);
246 return vcaddq_rot90_m_u8(inactive
, a
, b
, p
);
250 // CHECK-LABEL: @test_vcaddq_rot90_m_u16(
251 // CHECK-NEXT: entry:
252 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
253 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
254 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
255 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
257 uint16x8_t
test_vcaddq_rot90_m_u16(uint16x8_t inactive
, uint16x8_t a
, uint16x8_t b
, mve_pred16_t p
)
260 return vcaddq_rot90_m(inactive
, a
, b
, p
);
262 return vcaddq_rot90_m_u16(inactive
, a
, b
, p
);
266 // CHECK-LABEL: @test_vcaddq_rot90_m_u32(
267 // CHECK-NEXT: entry:
268 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
269 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
270 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
271 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
273 uint32x4_t
test_vcaddq_rot90_m_u32(uint32x4_t inactive
, uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
)
276 return vcaddq_rot90_m(inactive
, a
, b
, p
);
278 return vcaddq_rot90_m_u32(inactive
, a
, b
, p
);
282 // CHECK-LABEL: @test_vcaddq_rot90_m_s8(
283 // CHECK-NEXT: entry:
284 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
285 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
286 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
287 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
289 int8x16_t
test_vcaddq_rot90_m_s8(int8x16_t inactive
, int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
292 return vcaddq_rot90_m(inactive
, a
, b
, p
);
294 return vcaddq_rot90_m_s8(inactive
, a
, b
, p
);
298 // CHECK-LABEL: @test_vcaddq_rot90_m_s16(
299 // CHECK-NEXT: entry:
300 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
301 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
302 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
303 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
305 int16x8_t
test_vcaddq_rot90_m_s16(int16x8_t inactive
, int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
308 return vcaddq_rot90_m(inactive
, a
, b
, p
);
310 return vcaddq_rot90_m_s16(inactive
, a
, b
, p
);
314 // CHECK-LABEL: @test_vcaddq_rot90_m_s32(
315 // CHECK-NEXT: entry:
316 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
317 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
318 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
319 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
321 int32x4_t
test_vcaddq_rot90_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
324 return vcaddq_rot90_m(inactive
, a
, b
, p
);
326 return vcaddq_rot90_m_s32(inactive
, a
, b
, p
);
330 // CHECK-LABEL: @test_vcaddq_rot90_m_f16(
331 // CHECK-NEXT: entry:
332 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
333 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
334 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
335 // CHECK-NEXT: ret <8 x half> [[TMP2]]
337 float16x8_t
test_vcaddq_rot90_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
340 return vcaddq_rot90_m(inactive
, a
, b
, p
);
342 return vcaddq_rot90_m_f16(inactive
, a
, b
, p
);
346 // CHECK-LABEL: @test_vcaddq_rot90_m_f32(
347 // CHECK-NEXT: entry:
348 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
349 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
350 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
351 // CHECK-NEXT: ret <4 x float> [[TMP2]]
353 float32x4_t
test_vcaddq_rot90_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
356 return vcaddq_rot90_m(inactive
, a
, b
, p
);
358 return vcaddq_rot90_m_f32(inactive
, a
, b
, p
);
362 // CHECK-LABEL: @test_vcaddq_rot270_m_u8(
363 // CHECK-NEXT: entry:
364 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
365 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
366 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
367 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
369 uint8x16_t
test_vcaddq_rot270_m_u8(uint8x16_t inactive
, uint8x16_t a
, uint8x16_t b
, mve_pred16_t p
)
372 return vcaddq_rot270_m(inactive
, a
, b
, p
);
374 return vcaddq_rot270_m_u8(inactive
, a
, b
, p
);
378 // CHECK-LABEL: @test_vcaddq_rot270_m_u16(
379 // CHECK-NEXT: entry:
380 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
381 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
382 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
383 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
385 uint16x8_t
test_vcaddq_rot270_m_u16(uint16x8_t inactive
, uint16x8_t a
, uint16x8_t b
, mve_pred16_t p
)
388 return vcaddq_rot270_m(inactive
, a
, b
, p
);
390 return vcaddq_rot270_m_u16(inactive
, a
, b
, p
);
394 // CHECK-LABEL: @test_vcaddq_rot270_m_u32(
395 // CHECK-NEXT: entry:
396 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
397 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
398 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
399 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
401 uint32x4_t
test_vcaddq_rot270_m_u32(uint32x4_t inactive
, uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
)
404 return vcaddq_rot270_m(inactive
, a
, b
, p
);
406 return vcaddq_rot270_m_u32(inactive
, a
, b
, p
);
410 // CHECK-LABEL: @test_vcaddq_rot270_m_s8(
411 // CHECK-NEXT: entry:
412 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
413 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
414 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
415 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
417 int8x16_t
test_vcaddq_rot270_m_s8(int8x16_t inactive
, int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
420 return vcaddq_rot270_m(inactive
, a
, b
, p
);
422 return vcaddq_rot270_m_s8(inactive
, a
, b
, p
);
426 // CHECK-LABEL: @test_vcaddq_rot270_m_s16(
427 // CHECK-NEXT: entry:
428 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
429 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
430 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
431 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
433 int16x8_t
test_vcaddq_rot270_m_s16(int16x8_t inactive
, int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
436 return vcaddq_rot270_m(inactive
, a
, b
, p
);
438 return vcaddq_rot270_m_s16(inactive
, a
, b
, p
);
442 // CHECK-LABEL: @test_vcaddq_rot270_m_s32(
443 // CHECK-NEXT: entry:
444 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
445 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
446 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
447 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
449 int32x4_t
test_vcaddq_rot270_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
452 return vcaddq_rot270_m(inactive
, a
, b
, p
);
454 return vcaddq_rot270_m_s32(inactive
, a
, b
, p
);
458 // CHECK-LABEL: @test_vcaddq_rot270_m_f16(
459 // CHECK-NEXT: entry:
460 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
461 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
462 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
463 // CHECK-NEXT: ret <8 x half> [[TMP2]]
465 float16x8_t
test_vcaddq_rot270_m_f16(float16x8_t inactive
, float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
468 return vcaddq_rot270_m(inactive
, a
, b
, p
);
470 return vcaddq_rot270_m_f16(inactive
, a
, b
, p
);
474 // CHECK-LABEL: @test_vcaddq_rot270_m_f32(
475 // CHECK-NEXT: entry:
476 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
477 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
478 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> [[INACTIVE:%.*]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
479 // CHECK-NEXT: ret <4 x float> [[TMP2]]
481 float32x4_t
test_vcaddq_rot270_m_f32(float32x4_t inactive
, float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
484 return vcaddq_rot270_m(inactive
, a
, b
, p
);
486 return vcaddq_rot270_m_f32(inactive
, a
, b
, p
);
490 // CHECK-LABEL: @test_vcaddq_rot90_x_u8(
491 // CHECK-NEXT: entry:
492 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
493 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
494 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
495 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
497 uint8x16_t
test_vcaddq_rot90_x_u8(uint8x16_t a
, uint8x16_t b
, mve_pred16_t p
)
500 return vcaddq_rot90_x(a
, b
, p
);
502 return vcaddq_rot90_x_u8(a
, b
, p
);
506 // CHECK-LABEL: @test_vcaddq_rot90_x_u16(
507 // CHECK-NEXT: entry:
508 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
509 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
510 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
511 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
513 uint16x8_t
test_vcaddq_rot90_x_u16(uint16x8_t a
, uint16x8_t b
, mve_pred16_t p
)
516 return vcaddq_rot90_x(a
, b
, p
);
518 return vcaddq_rot90_x_u16(a
, b
, p
);
522 // CHECK-LABEL: @test_vcaddq_rot90_x_u32(
523 // CHECK-NEXT: entry:
524 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
525 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
526 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
527 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
529 uint32x4_t
test_vcaddq_rot90_x_u32(uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
)
532 return vcaddq_rot90_x(a
, b
, p
);
534 return vcaddq_rot90_x_u32(a
, b
, p
);
538 // CHECK-LABEL: @test_vcaddq_rot90_x_s8(
539 // CHECK-NEXT: entry:
540 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
541 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
542 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
543 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
545 int8x16_t
test_vcaddq_rot90_x_s8(int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
548 return vcaddq_rot90_x(a
, b
, p
);
550 return vcaddq_rot90_x_s8(a
, b
, p
);
554 // CHECK-LABEL: @test_vcaddq_rot90_x_s16(
555 // CHECK-NEXT: entry:
556 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
557 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
558 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
559 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
561 int16x8_t
test_vcaddq_rot90_x_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
564 return vcaddq_rot90_x(a
, b
, p
);
566 return vcaddq_rot90_x_s16(a
, b
, p
);
570 // CHECK-LABEL: @test_vcaddq_rot90_x_s32(
571 // CHECK-NEXT: entry:
572 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
573 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
574 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
575 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
577 int32x4_t
test_vcaddq_rot90_x_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
580 return vcaddq_rot90_x(a
, b
, p
);
582 return vcaddq_rot90_x_s32(a
, b
, p
);
586 // CHECK-LABEL: @test_vcaddq_rot90_x_f16(
587 // CHECK-NEXT: entry:
588 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
589 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
590 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
591 // CHECK-NEXT: ret <8 x half> [[TMP2]]
593 float16x8_t
test_vcaddq_rot90_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
596 return vcaddq_rot90_x(a
, b
, p
);
598 return vcaddq_rot90_x_f16(a
, b
, p
);
602 // CHECK-LABEL: @test_vcaddq_rot90_x_f32(
603 // CHECK-NEXT: entry:
604 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
605 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
606 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
607 // CHECK-NEXT: ret <4 x float> [[TMP2]]
609 float32x4_t
test_vcaddq_rot90_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
612 return vcaddq_rot90_x(a
, b
, p
);
614 return vcaddq_rot90_x_f32(a
, b
, p
);
618 // CHECK-LABEL: @test_vcaddq_rot270_x_u8(
619 // CHECK-NEXT: entry:
620 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
621 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
622 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
623 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
625 uint8x16_t
test_vcaddq_rot270_x_u8(uint8x16_t a
, uint8x16_t b
, mve_pred16_t p
)
628 return vcaddq_rot270_x(a
, b
, p
);
630 return vcaddq_rot270_x_u8(a
, b
, p
);
634 // CHECK-LABEL: @test_vcaddq_rot270_x_u16(
635 // CHECK-NEXT: entry:
636 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
637 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
638 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
639 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
641 uint16x8_t
test_vcaddq_rot270_x_u16(uint16x8_t a
, uint16x8_t b
, mve_pred16_t p
)
644 return vcaddq_rot270_x(a
, b
, p
);
646 return vcaddq_rot270_x_u16(a
, b
, p
);
650 // CHECK-LABEL: @test_vcaddq_rot270_x_u32(
651 // CHECK-NEXT: entry:
652 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
653 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
654 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
655 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
657 uint32x4_t
test_vcaddq_rot270_x_u32(uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
)
660 return vcaddq_rot270_x(a
, b
, p
);
662 return vcaddq_rot270_x_u32(a
, b
, p
);
666 // CHECK-LABEL: @test_vcaddq_rot270_x_s8(
667 // CHECK-NEXT: entry:
668 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
669 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
670 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]])
671 // CHECK-NEXT: ret <16 x i8> [[TMP2]]
673 int8x16_t
test_vcaddq_rot270_x_s8(int8x16_t a
, int8x16_t b
, mve_pred16_t p
)
676 return vcaddq_rot270_x(a
, b
, p
);
678 return vcaddq_rot270_x_s8(a
, b
, p
);
682 // CHECK-LABEL: @test_vcaddq_rot270_x_s16(
683 // CHECK-NEXT: entry:
684 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
685 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
686 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
687 // CHECK-NEXT: ret <8 x i16> [[TMP2]]
689 int16x8_t
test_vcaddq_rot270_x_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
)
692 return vcaddq_rot270_x(a
, b
, p
);
694 return vcaddq_rot270_x_s16(a
, b
, p
);
698 // CHECK-LABEL: @test_vcaddq_rot270_x_s32(
699 // CHECK-NEXT: entry:
700 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
701 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
702 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
703 // CHECK-NEXT: ret <4 x i32> [[TMP2]]
705 int32x4_t
test_vcaddq_rot270_x_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
)
708 return vcaddq_rot270_x(a
, b
, p
);
710 return vcaddq_rot270_x_s32(a
, b
, p
);
714 // CHECK-LABEL: @test_vcaddq_rot270_x_f16(
715 // CHECK-NEXT: entry:
716 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
717 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
718 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> undef, <8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]])
719 // CHECK-NEXT: ret <8 x half> [[TMP2]]
721 float16x8_t
test_vcaddq_rot270_x_f16(float16x8_t a
, float16x8_t b
, mve_pred16_t p
)
724 return vcaddq_rot270_x(a
, b
, p
);
726 return vcaddq_rot270_x_f16(a
, b
, p
);
730 // CHECK-LABEL: @test_vcaddq_rot270_x_f32(
731 // CHECK-NEXT: entry:
732 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
733 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
734 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> undef, <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]])
735 // CHECK-NEXT: ret <4 x float> [[TMP2]]
737 float32x4_t
test_vcaddq_rot270_x_f32(float32x4_t a
, float32x4_t b
, mve_pred16_t p
)
740 return vcaddq_rot270_x(a
, b
, p
);
742 return vcaddq_rot270_x_f32(a
, b
, p
);