[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / test / CodeGen / aarch64-v8.6a-neon-intrinsics.c
blob4001776fcb3c35a36f183d5a1710b554f657cd88
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.6a -target-feature +i8mm \
2 // RUN: -S -disable-O0-optnone -emit-llvm -o - %s \
3 // RUN: | opt -S -passes=mem2reg,sroa \
4 // RUN: | FileCheck %s
6 // REQUIRES: aarch64-registered-target
8 #include <arm_neon.h>
10 // CHECK-LABEL: test_vmmlaq_s32
11 // CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
12 // CHECK: ret <4 x i32> [[VAL]]
13 int32x4_t test_vmmlaq_s32(int32x4_t r, int8x16_t a, int8x16_t b) {
14 return vmmlaq_s32(r, a, b);
17 // CHECK-LABEL: test_vmmlaq_u32
18 // CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
19 // CHECK: ret <4 x i32> [[VAL]]
20 uint32x4_t test_vmmlaq_u32(uint32x4_t r, uint8x16_t a, uint8x16_t b) {
21 return vmmlaq_u32(r, a, b);
24 // CHECK-LABEL: test_vusmmlaq_s32
25 // CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
26 // CHECK: ret <4 x i32> [[VAL]]
27 int32x4_t test_vusmmlaq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
28 return vusmmlaq_s32(r, a, b);
31 // CHECK-LABEL: test_vusdot_s32
32 // CHECK: [[VAL:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b)
33 // CHECK: ret <2 x i32> [[VAL]]
34 int32x2_t test_vusdot_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
35 return vusdot_s32(r, a, b);
38 // CHECK-LABEL: test_vusdot_lane_s32
39 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
40 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
41 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
42 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
43 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
44 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
45 // CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> [[TMP4]])
46 // CHECK: ret <2 x i32> [[OP]]
47 int32x2_t test_vusdot_lane_s32(int32x2_t r, uint8x8_t a, int8x8_t b) {
48 return vusdot_lane_s32(r, a, b, 0);
51 // CHECK-LABEL: test_vsudot_lane_s32
52 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
53 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %0 to <8 x i8>
54 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> %1 to <2 x i32>
55 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer
56 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
57 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
58 // CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> [[TMP4]], <8 x i8> %a)
59 // CHECK: ret <2 x i32> [[OP]]
60 int32x2_t test_vsudot_lane_s32(int32x2_t r, int8x8_t a, uint8x8_t b) {
61 return vsudot_lane_s32(r, a, b, 0);
64 // CHECK-LABEL: test_vusdot_laneq_s32
65 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
66 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
67 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
68 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
69 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
70 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
71 // CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> [[TMP4]])
72 // CHECK: ret <2 x i32> [[OP]]
73 int32x2_t test_vusdot_laneq_s32(int32x2_t r, uint8x8_t a, int8x16_t b) {
74 return vusdot_laneq_s32(r, a, b, 0);
77 // CHECK-LABEL: test_vsudot_laneq_s32
78 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
79 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
80 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
81 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <2 x i32> zeroinitializer
82 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
83 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> %r to <8 x i8>
84 // CHECK: [[OP:%.*]] = call <2 x i32> @llvm.aarch64.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> [[TMP4]], <8 x i8> %a)
85 // CHECK: ret <2 x i32> [[OP]]
86 int32x2_t test_vsudot_laneq_s32(int32x2_t r, int8x8_t a, uint8x16_t b) {
87 return vsudot_laneq_s32(r, a, b, 0);
90 // CHECK-LABEL: test_vusdotq_s32
91 // CHECK: [[VAL:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b)
92 // CHECK: ret <4 x i32> [[VAL]]
93 int32x4_t test_vusdotq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
94 return vusdotq_s32(r, a, b);
97 // CHECK-LABEL: test_vusdotq_lane_s32
98 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
99 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
100 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
101 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
102 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
103 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
104 // CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> [[TMP4]])
105 // CHECK: ret <4 x i32> [[OP]]
106 int32x4_t test_vusdotq_lane_s32(int32x4_t r, uint8x16_t a, int8x8_t b) {
107 return vusdotq_lane_s32(r, a, b, 0);
110 // CHECK-LABEL: test_vsudotq_lane_s32
111 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %b to <2 x i32>
112 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
113 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
114 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <4 x i32> zeroinitializer
115 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
116 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
117 // CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> [[TMP4]], <16 x i8> %a)
118 // CHECK: ret <4 x i32> [[OP]]
119 int32x4_t test_vsudotq_lane_s32(int32x4_t r, int8x16_t a, uint8x8_t b) {
120 return vsudotq_lane_s32(r, a, b, 0);
123 // CHECK-LABEL: test_vusdotq_laneq_s32
124 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
125 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
126 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
127 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
128 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
129 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
130 // CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> [[TMP4]])
131 // CHECK: ret <4 x i32> [[OP]]
132 int32x4_t test_vusdotq_laneq_s32(int32x4_t r, uint8x16_t a, int8x16_t b) {
133 return vusdotq_laneq_s32(r, a, b, 0);
136 // CHECK-LABEL: test_vsudotq_laneq_s32
137 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %b to <4 x i32>
138 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
139 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
140 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer
141 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
142 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> %r to <16 x i8>
143 // CHECK: [[OP:%.*]] = call <4 x i32> @llvm.aarch64.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> [[TMP4]], <16 x i8> %a)
144 // CHECK: ret <4 x i32> [[OP]]
145 int32x4_t test_vsudotq_laneq_s32(int32x4_t r, int8x16_t a, uint8x16_t b) {
146 return vsudotq_laneq_s32(r, a, b, 0);