1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +dotprod \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=instcombine | FileCheck %s
4 // REQUIRES: aarch64-registered-target
6 // Test AArch64 Armv8.2-A dot product intrinsics
10 uint32x2_t
test_vdot_u32(uint32x2_t a
, uint8x8_t b
, uint8x8_t c
) {
11 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_u32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
12 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
13 // CHECK: ret <2 x i32> [[RESULT]]
14 return vdot_u32(a
, b
, c
);
17 uint32x4_t
test_vdotq_u32(uint32x4_t a
, uint8x16_t b
, uint8x16_t c
) {
18 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_u32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
19 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
20 // CHECK: ret <4 x i32> [[RESULT]]
21 return vdotq_u32(a
, b
, c
);
24 int32x2_t
test_vdot_s32(int32x2_t a
, int8x8_t b
, int8x8_t c
) {
25 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_s32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
26 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
27 // CHECK: ret <2 x i32> [[RESULT]]
28 return vdot_s32(a
, b
, c
);
31 int32x4_t
test_vdotq_s32(int32x4_t a
, int8x16_t b
, int8x16_t c
) {
32 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_s32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
33 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
34 // CHECK: ret <4 x i32> [[RESULT]]
35 return vdotq_s32(a
, b
, c
);
38 uint32x2_t
test_vdot_lane_u32(uint32x2_t a
, uint8x8_t b
, uint8x8_t c
) {
39 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_lane_u32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
40 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
41 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
42 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
43 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
44 // CHECK: ret <2 x i32> [[RESULT]]
45 return vdot_lane_u32(a
, b
, c
, 1);
48 uint32x4_t
test_vdotq_lane_u32(uint32x4_t a
, uint8x16_t b
, uint8x8_t c
) {
49 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_lane_u32(<4 x i32> noundef %a, <16 x i8> noundef %b, <8 x i8> noundef %c)
50 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
51 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
52 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
53 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
54 // CHECK: ret <4 x i32> [[RESULT]]
55 return vdotq_lane_u32(a
, b
, c
, 1);
58 uint32x2_t
test_vdot_laneq_u32(uint32x2_t a
, uint8x8_t b
, uint8x16_t c
) {
59 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_laneq_u32(<2 x i32> noundef %a, <8 x i8> noundef %b, <16 x i8> noundef %c)
60 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
61 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 1, i32 1>
62 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
63 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
64 // CHECK: ret <2 x i32> [[RESULT]]
65 return vdot_laneq_u32(a
, b
, c
, 1);
68 uint32x4_t
test_vdotq_laneq_u32(uint32x4_t a
, uint8x16_t b
, uint8x16_t c
) {
69 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_laneq_u32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
70 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
71 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
72 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
73 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
74 // CHECK: ret <4 x i32> [[RESULT]]
75 return vdotq_laneq_u32(a
, b
, c
, 1);
78 int32x2_t
test_vdot_lane_s32(int32x2_t a
, int8x8_t b
, int8x8_t c
) {
79 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_lane_s32(<2 x i32> noundef %a, <8 x i8> noundef %b, <8 x i8> noundef %c)
80 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
81 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
82 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
83 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
84 // CHECK: ret <2 x i32> [[RESULT]]
85 return vdot_lane_s32(a
, b
, c
, 1);
88 int32x4_t
test_vdotq_lane_s32(int32x4_t a
, int8x16_t b
, int8x8_t c
) {
89 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_lane_s32(<4 x i32> noundef %a, <16 x i8> noundef %b, <8 x i8> noundef %c)
90 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
91 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
92 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
93 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
94 // CHECK: ret <4 x i32> [[RESULT]]
95 return vdotq_lane_s32(a
, b
, c
, 1);
98 int32x2_t
test_vdot_laneq_s32(int32x2_t a
, int8x8_t b
, int8x16_t c
) {
99 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vdot_laneq_s32(<2 x i32> noundef %a, <8 x i8> noundef %b, <16 x i8> noundef %c)
100 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
101 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 1, i32 1>
102 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
103 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
104 // CHECK: ret <2 x i32> [[RESULT]]
105 return vdot_laneq_s32(a
, b
, c
, 1);
108 int32x4_t
test_vdotq_laneq_s32(int32x4_t a
, int8x16_t b
, int8x16_t c
) {
109 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vdotq_laneq_s32(<4 x i32> noundef %a, <16 x i8> noundef %b, <16 x i8> noundef %c)
110 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
111 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
112 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
113 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
114 // CHECK: ret <4 x i32> [[RESULT]]
115 return vdotq_laneq_s32(a
, b
, c
, 1);