1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vshlcq_s8(
11 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
12 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 18)
13 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
14 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
15 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
16 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
18 int8x16_t
test_vshlcq_s8(int8x16_t a
, uint32_t *b
) {
20 return vshlcq(a
, b
, 18);
21 #else /* POLYMORPHIC */
22 return vshlcq_s8(a
, b
, 18);
23 #endif /* POLYMORPHIC */
26 // CHECK-LABEL: @test_vshlcq_s16(
28 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
29 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 16)
30 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
31 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
32 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
33 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
35 int16x8_t
test_vshlcq_s16(int16x8_t a
, uint32_t *b
) {
37 return vshlcq(a
, b
, 16);
38 #else /* POLYMORPHIC */
39 return vshlcq_s16(a
, b
, 16);
40 #endif /* POLYMORPHIC */
43 // CHECK-LABEL: @test_vshlcq_s32(
45 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
46 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 4)
47 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
48 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
49 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
50 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
52 int32x4_t
test_vshlcq_s32(int32x4_t a
, uint32_t *b
) {
54 return vshlcq(a
, b
, 4);
55 #else /* POLYMORPHIC */
56 return vshlcq_s32(a
, b
, 4);
57 #endif /* POLYMORPHIC */
60 // CHECK-LABEL: @test_vshlcq_u8(
62 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
63 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 17)
64 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
65 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
66 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
67 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
69 uint8x16_t
test_vshlcq_u8(uint8x16_t a
, uint32_t *b
) {
71 return vshlcq(a
, b
, 17);
72 #else /* POLYMORPHIC */
73 return vshlcq_u8(a
, b
, 17);
74 #endif /* POLYMORPHIC */
77 // CHECK-LABEL: @test_vshlcq_u16(
79 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
80 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17)
81 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
82 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
83 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
84 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
86 uint16x8_t
test_vshlcq_u16(uint16x8_t a
, uint32_t *b
) {
88 return vshlcq(a
, b
, 17);
89 #else /* POLYMORPHIC */
90 return vshlcq_u16(a
, b
, 17);
91 #endif /* POLYMORPHIC */
94 // CHECK-LABEL: @test_vshlcq_u32(
96 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
97 // CHECK-NEXT: [[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 20)
98 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
99 // CHECK-NEXT: store i32 [[TMP2]], ptr [[B]], align 4
100 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
101 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
103 uint32x4_t
test_vshlcq_u32(uint32x4_t a
, uint32_t *b
) {
105 return vshlcq(a
, b
, 20);
106 #else /* POLYMORPHIC */
107 return vshlcq_u32(a
, b
, 20);
108 #endif /* POLYMORPHIC */
111 // CHECK-LABEL: @test_vshlcq_m_s8(
112 // CHECK-NEXT: entry:
113 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
114 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
115 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
116 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 29, <16 x i1> [[TMP2]])
117 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
118 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
119 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
120 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
122 int8x16_t
test_vshlcq_m_s8(int8x16_t a
, uint32_t *b
, mve_pred16_t p
) {
124 return vshlcq_m(a
, b
, 29, p
);
125 #else /* POLYMORPHIC */
126 return vshlcq_m_s8(a
, b
, 29, p
);
127 #endif /* POLYMORPHIC */
130 // CHECK-LABEL: @test_vshlcq_m_s16(
131 // CHECK-NEXT: entry:
132 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
133 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
134 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
135 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 17, <8 x i1> [[TMP2]])
136 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
137 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
138 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
139 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
141 int16x8_t
test_vshlcq_m_s16(int16x8_t a
, uint32_t *b
, mve_pred16_t p
) {
143 return vshlcq_m(a
, b
, 17, p
);
144 #else /* POLYMORPHIC */
145 return vshlcq_m_s16(a
, b
, 17, p
);
146 #endif /* POLYMORPHIC */
149 // CHECK-LABEL: @test_vshlcq_m_s32(
150 // CHECK-NEXT: entry:
151 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
152 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
153 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
154 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 9, <4 x i1> [[TMP2]])
155 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
156 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
157 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
158 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
160 int32x4_t
test_vshlcq_m_s32(int32x4_t a
, uint32_t *b
, mve_pred16_t p
) {
162 return vshlcq_m(a
, b
, 9, p
);
163 #else /* POLYMORPHIC */
164 return vshlcq_m_s32(a
, b
, 9, p
);
165 #endif /* POLYMORPHIC */
168 // CHECK-LABEL: @test_vshlcq_m_u8(
169 // CHECK-NEXT: entry:
170 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
171 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
172 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
173 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 21, <16 x i1> [[TMP2]])
174 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 0
175 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
176 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <16 x i8> } [[TMP3]], 1
177 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
179 uint8x16_t
test_vshlcq_m_u8(uint8x16_t a
, uint32_t *b
, mve_pred16_t p
) {
181 return vshlcq_m(a
, b
, 21, p
);
182 #else /* POLYMORPHIC */
183 return vshlcq_m_u8(a
, b
, 21, p
);
184 #endif /* POLYMORPHIC */
187 // CHECK-LABEL: @test_vshlcq_m_u16(
188 // CHECK-NEXT: entry:
189 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
190 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
191 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
192 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 24, <8 x i1> [[TMP2]])
193 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 0
194 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
195 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <8 x i16> } [[TMP3]], 1
196 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
198 uint16x8_t
test_vshlcq_m_u16(uint16x8_t a
, uint32_t *b
, mve_pred16_t p
) {
200 return vshlcq_m(a
, b
, 24, p
);
201 #else /* POLYMORPHIC */
202 return vshlcq_m_u16(a
, b
, 24, p
);
203 #endif /* POLYMORPHIC */
206 // CHECK-LABEL: @test_vshlcq_m_u32(
207 // CHECK-NEXT: entry:
208 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
209 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
210 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
211 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 26, <4 x i1> [[TMP2]])
212 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 0
213 // CHECK-NEXT: store i32 [[TMP4]], ptr [[B]], align 4
214 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, <4 x i32> } [[TMP3]], 1
215 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
217 uint32x4_t
test_vshlcq_m_u32(uint32x4_t a
, uint32_t *b
, mve_pred16_t p
) {
219 return vshlcq_m(a
, b
, 26, p
);
220 #else /* POLYMORPHIC */
221 return vshlcq_m_u32(a
, b
, 26, p
);
222 #endif /* POLYMORPHIC */