1 // RUN: %clang_cc1 -triple armv8-linux-gnueabihf -target-cpu cortex-a75 -target-feature +dotprod \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=instcombine | FileCheck %s
4 // REQUIRES: arm-registered-target
6 // Test ARM v8.2-A dot product intrinsics
10 uint32x2_t
test_vdot_u32(uint32x2_t a
, uint8x8_t b
, uint8x8_t c
) {
11 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_u32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
12 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
13 // CHECK: ret <2 x i32> [[RESULT]]
14 return vdot_u32(a
, b
, c
);
17 uint32x4_t
test_vdotq_u32(uint32x4_t a
, uint8x16_t b
, uint8x16_t c
) {
18 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_u32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
19 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
20 // CHECK: ret <4 x i32> [[RESULT]]
21 return vdotq_u32(a
, b
, c
);
24 int32x2_t
test_vdot_s32(int32x2_t a
, int8x8_t b
, int8x8_t c
) {
25 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_s32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
26 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
27 // CHECK: ret <2 x i32> [[RESULT]]
28 return vdot_s32(a
, b
, c
);
31 int32x4_t
test_vdotq_s32(int32x4_t a
, int8x16_t b
, int8x16_t c
) {
32 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_s32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
33 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
34 // CHECK: ret <4 x i32> [[RESULT]]
35 return vdotq_s32(a
, b
, c
);
38 uint32x2_t
test_vdot_lane_u32(uint32x2_t a
, uint8x8_t b
, uint8x8_t c
) {
39 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_lane_u32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
40 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
41 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
42 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
43 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
44 // CHECK: ret <2 x i32> [[RESULT]]
45 return vdot_lane_u32(a
, b
, c
, 1);
48 uint32x4_t
test_vdotq_lane_u32(uint32x4_t a
, uint8x16_t b
, uint8x8_t c
) {
49 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_lane_u32(<4 x i32> noundef %a, <16 x i8> noundef %b, <8 x i8> noundef %c)
50 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
51 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
52 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
53 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
54 // CHECK: ret <4 x i32> [[RESULT]]
55 return vdotq_lane_u32(a
, b
, c
, 1);
58 int32x2_t
test_vdot_lane_s32(int32x2_t a
, int8x8_t b
, int8x8_t c
) {
59 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_lane_s32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
60 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
61 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
62 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
63 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
64 // CHECK: ret <2 x i32> [[RESULT]]
65 return vdot_lane_s32(a
, b
, c
, 1);
68 int32x4_t
test_vdotq_lane_s32(int32x4_t a
, int8x16_t b
, int8x8_t c
) {
69 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_lane_s32(<4 x i32> noundef %a, <16 x i8> noundef %b, <8 x i8> noundef %c)
70 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
71 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
72 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
73 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
74 // CHECK: ret <4 x i32> [[RESULT]]
75 return vdotq_lane_s32(a
, b
, c
, 1);