1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vadciq_s32(
11 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
12 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
13 // CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
14 // CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
15 // CHECK-NEXT: store i32 [[TMP3]], ptr [[CARRY_OUT:%.*]], align 4
16 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
17 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
19 int32x4_t
test_vadciq_s32(int32x4_t a
, int32x4_t b
, unsigned *carry_out
)
22 return vadciq(a
, b
, carry_out
);
23 #else /* POLYMORPHIC */
24 return vadciq_s32(a
, b
, carry_out
);
25 #endif /* POLYMORPHIC */
28 // CHECK-LABEL: @test_vadcq_u32(
30 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
31 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
32 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
33 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
34 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
35 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
36 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY]], align 4
37 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
38 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
40 uint32x4_t
test_vadcq_u32(uint32x4_t a
, uint32x4_t b
, unsigned *carry
)
43 return vadcq(a
, b
, carry
);
44 #else /* POLYMORPHIC */
45 return vadcq_u32(a
, b
, carry
);
46 #endif /* POLYMORPHIC */
49 // CHECK-LABEL: @test_vadciq_m_u32(
51 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
52 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
53 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
54 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
55 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
56 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
57 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY_OUT:%.*]], align 4
58 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
59 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
61 uint32x4_t
test_vadciq_m_u32(uint32x4_t inactive
, uint32x4_t a
, uint32x4_t b
, unsigned *carry_out
, mve_pred16_t p
)
64 return vadciq_m(inactive
, a
, b
, carry_out
, p
);
65 #else /* POLYMORPHIC */
66 return vadciq_m_u32(inactive
, a
, b
, carry_out
, p
);
67 #endif /* POLYMORPHIC */
70 // CHECK-LABEL: @test_vadcq_m_s32(
72 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
73 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
74 // CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
75 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
76 // CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
77 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
78 // CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
79 // CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
80 // CHECK-NEXT: store i32 [[TMP7]], ptr [[CARRY]], align 4
81 // CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
82 // CHECK-NEXT: ret <4 x i32> [[TMP8]]
84 int32x4_t
test_vadcq_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, unsigned *carry
, mve_pred16_t p
)
87 return vadcq_m(inactive
, a
, b
, carry
, p
);
88 #else /* POLYMORPHIC */
89 return vadcq_m_s32(inactive
, a
, b
, carry
, p
);
90 #endif /* POLYMORPHIC */
93 // CHECK-LABEL: @test_vsbciq_s32(
95 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
96 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
97 // CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
98 // CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
99 // CHECK-NEXT: store i32 [[TMP3]], ptr [[CARRY_OUT:%.*]], align 4
100 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
101 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
103 int32x4_t
test_vsbciq_s32(int32x4_t a
, int32x4_t b
, unsigned *carry_out
) {
105 return vsbciq(a
, b
, carry_out
);
106 #else /* POLYMORPHIC */
107 return vsbciq_s32(a
, b
, carry_out
);
108 #endif /* POLYMORPHIC */
111 // CHECK-LABEL: @test_vsbciq_u32(
112 // CHECK-NEXT: entry:
113 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
114 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
115 // CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
116 // CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
117 // CHECK-NEXT: store i32 [[TMP3]], ptr [[CARRY_OUT:%.*]], align 4
118 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
119 // CHECK-NEXT: ret <4 x i32> [[TMP4]]
121 uint32x4_t
test_vsbciq_u32(uint32x4_t a
, uint32x4_t b
, unsigned *carry_out
) {
123 return vsbciq(a
, b
, carry_out
);
124 #else /* POLYMORPHIC */
125 return vsbciq_u32(a
, b
, carry_out
);
126 #endif /* POLYMORPHIC */
129 // CHECK-LABEL: @test_vsbcq_s32(
130 // CHECK-NEXT: entry:
131 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
132 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
133 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
134 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
135 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
136 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
137 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY]], align 4
138 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
139 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
141 int32x4_t
test_vsbcq_s32(int32x4_t a
, int32x4_t b
, unsigned *carry
) {
143 return vsbcq(a
, b
, carry
);
144 #else /* POLYMORPHIC */
145 return vsbcq_s32(a
, b
, carry
);
146 #endif /* POLYMORPHIC */
149 // CHECK-LABEL: @test_vsbcq_u32(
150 // CHECK-NEXT: entry:
151 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
152 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
153 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
154 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
155 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
156 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
157 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY]], align 4
158 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
159 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
161 uint32x4_t
test_vsbcq_u32(uint32x4_t a
, uint32x4_t b
, unsigned *carry
) {
163 return vsbcq(a
, b
, carry
);
164 #else /* POLYMORPHIC */
165 return vsbcq_u32(a
, b
, carry
);
166 #endif /* POLYMORPHIC */
169 // CHECK-LABEL: @test_vsbciq_m_s32(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
172 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
173 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
174 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
175 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
176 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
177 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY_OUT:%.*]], align 4
178 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
179 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
181 int32x4_t
test_vsbciq_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, unsigned *carry_out
, mve_pred16_t p
) {
183 return vsbciq_m(inactive
, a
, b
, carry_out
, p
);
184 #else /* POLYMORPHIC */
185 return vsbciq_m_s32(inactive
, a
, b
, carry_out
, p
);
186 #endif /* POLYMORPHIC */
189 // CHECK-LABEL: @test_vsbciq_m_u32(
190 // CHECK-NEXT: entry:
191 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
192 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
193 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
194 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
195 // CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
196 // CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
197 // CHECK-NEXT: store i32 [[TMP5]], ptr [[CARRY_OUT:%.*]], align 4
198 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
199 // CHECK-NEXT: ret <4 x i32> [[TMP6]]
201 uint32x4_t
test_vsbciq_m_u32(uint32x4_t inactive
, uint32x4_t a
, uint32x4_t b
, unsigned *carry_out
, mve_pred16_t p
) {
203 return vsbciq_m(inactive
, a
, b
, carry_out
, p
);
204 #else /* POLYMORPHIC */
205 return vsbciq_m_u32(inactive
, a
, b
, carry_out
, p
);
206 #endif /* POLYMORPHIC */
209 // CHECK-LABEL: @test_vsbcq_m_s32(
210 // CHECK-NEXT: entry:
211 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
212 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
213 // CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
214 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
215 // CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
216 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
217 // CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
218 // CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
219 // CHECK-NEXT: store i32 [[TMP7]], ptr [[CARRY]], align 4
220 // CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
221 // CHECK-NEXT: ret <4 x i32> [[TMP8]]
223 int32x4_t
test_vsbcq_m_s32(int32x4_t inactive
, int32x4_t a
, int32x4_t b
, unsigned *carry
, mve_pred16_t p
) {
225 return vsbcq_m(inactive
, a
, b
, carry
, p
);
226 #else /* POLYMORPHIC */
227 return vsbcq_m_s32(inactive
, a
, b
, carry
, p
);
228 #endif /* POLYMORPHIC */
231 // CHECK-LABEL: @test_vsbcq_m_u32(
232 // CHECK-NEXT: entry:
233 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[CARRY:%.*]], align 4
234 // CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
235 // CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
236 // CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
237 // CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
238 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
239 // CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
240 // CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
241 // CHECK-NEXT: store i32 [[TMP7]], ptr [[CARRY]], align 4
242 // CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
243 // CHECK-NEXT: ret <4 x i32> [[TMP8]]
245 uint32x4_t
test_vsbcq_m_u32(uint32x4_t inactive
, uint32x4_t a
, uint32x4_t b
, unsigned *carry
, mve_pred16_t p
) {
247 return vsbcq_m(inactive
, a
, b
, carry
, p
);
248 #else /* POLYMORPHIC */
249 return vsbcq_m_u32(inactive
, a
, b
, carry
, p
);
250 #endif /* POLYMORPHIC */