1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vmlaldavaq_s16(
11 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
12 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
13 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
14 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
15 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
16 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
17 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
18 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
19 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
20 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
21 // CHECK-NEXT: ret i64 [[TMP9]]
23 int64_t test_vmlaldavaq_s16(int64_t a
, int16x8_t b
, int16x8_t c
) {
25 return vmlaldavaq(a
, b
, c
);
27 return vmlaldavaq_s16(a
, b
, c
);
31 // CHECK-LABEL: @test_vmlaldavaq_s32(
33 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
34 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
35 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
36 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
37 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
38 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
39 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
40 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
41 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
42 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
43 // CHECK-NEXT: ret i64 [[TMP9]]
45 int64_t test_vmlaldavaq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
47 return vmlaldavaq(a
, b
, c
);
49 return vmlaldavaq_s32(a
, b
, c
);
53 // CHECK-LABEL: @test_vmlaldavaq_u16(
55 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
56 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
57 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
58 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
59 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
60 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
61 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
62 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
63 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
64 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
65 // CHECK-NEXT: ret i64 [[TMP9]]
67 uint64_t test_vmlaldavaq_u16(uint64_t a
, uint16x8_t b
, uint16x8_t c
) {
69 return vmlaldavaq(a
, b
, c
);
71 return vmlaldavaq_u16(a
, b
, c
);
75 // CHECK-LABEL: @test_vmlaldavaq_u32(
77 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
78 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
79 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
80 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
81 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
82 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
83 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
84 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
85 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
86 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
87 // CHECK-NEXT: ret i64 [[TMP9]]
89 uint64_t test_vmlaldavaq_u32(uint64_t a
, uint32x4_t b
, uint32x4_t c
) {
91 return vmlaldavaq(a
, b
, c
);
93 return vmlaldavaq_u32(a
, b
, c
);
97 // CHECK-LABEL: @test_vmlaldavaxq_s16(
99 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
100 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
101 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
102 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
103 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
104 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
105 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
106 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
107 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
108 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
109 // CHECK-NEXT: ret i64 [[TMP9]]
111 int64_t test_vmlaldavaxq_s16(int64_t a
, int16x8_t b
, int16x8_t c
) {
113 return vmlaldavaxq(a
, b
, c
);
115 return vmlaldavaxq_s16(a
, b
, c
);
119 // CHECK-LABEL: @test_vmlaldavaxq_s32(
120 // CHECK-NEXT: entry:
121 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
122 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
123 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
124 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
125 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
126 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
127 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
128 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
129 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
130 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
131 // CHECK-NEXT: ret i64 [[TMP9]]
133 int64_t test_vmlaldavaxq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
135 return vmlaldavaxq(a
, b
, c
);
137 return vmlaldavaxq_s32(a
, b
, c
);
141 // CHECK-LABEL: @test_vmlsldavaq_s16(
142 // CHECK-NEXT: entry:
143 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
144 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
145 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
146 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
147 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
148 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
149 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
150 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
151 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
152 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
153 // CHECK-NEXT: ret i64 [[TMP9]]
155 int64_t test_vmlsldavaq_s16(int64_t a
, int16x8_t b
, int16x8_t c
) {
157 return vmlsldavaq(a
, b
, c
);
159 return vmlsldavaq_s16(a
, b
, c
);
163 // CHECK-LABEL: @test_vmlsldavaq_s32(
164 // CHECK-NEXT: entry:
165 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
166 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
167 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
168 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
169 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
170 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
171 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
172 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
173 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
174 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
175 // CHECK-NEXT: ret i64 [[TMP9]]
177 int64_t test_vmlsldavaq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
179 return vmlsldavaq(a
, b
, c
);
181 return vmlsldavaq_s32(a
, b
, c
);
185 // CHECK-LABEL: @test_vmlsldaxvaq_s16(
186 // CHECK-NEXT: entry:
187 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
188 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
189 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
190 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
191 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
192 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
193 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
194 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
195 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
196 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
197 // CHECK-NEXT: ret i64 [[TMP9]]
199 int64_t test_vmlsldaxvaq_s16(int64_t a
, int16x8_t b
, int16x8_t c
) {
201 return vmlsldavaxq(a
, b
, c
);
203 return vmlsldavaxq_s16(a
, b
, c
);
207 // CHECK-LABEL: @test_vmlsldavaxq_s32(
208 // CHECK-NEXT: entry:
209 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
210 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
211 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
212 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
213 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
214 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
215 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
216 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
217 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
218 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
219 // CHECK-NEXT: ret i64 [[TMP9]]
221 int64_t test_vmlsldavaxq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
223 return vmlsldavaxq(a
, b
, c
);
225 return vmlsldavaxq_s32(a
, b
, c
);
229 // CHECK-LABEL: @test_vrmlaldavhaq_s32(
230 // CHECK-NEXT: entry:
231 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
232 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
233 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
234 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
235 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
236 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
237 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
238 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
239 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
240 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
241 // CHECK-NEXT: ret i64 [[TMP9]]
243 int64_t test_vrmlaldavhaq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
245 return vrmlaldavhaq(a
, b
, c
);
247 return vrmlaldavhaq_s32(a
, b
, c
);
251 // CHECK-LABEL: @test_vrmlaldavhaq_u32(
252 // CHECK-NEXT: entry:
253 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
254 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
255 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
256 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
257 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
258 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
259 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
260 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
261 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
262 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
263 // CHECK-NEXT: ret i64 [[TMP9]]
265 uint64_t test_vrmlaldavhaq_u32(uint64_t a
, uint32x4_t b
, uint32x4_t c
) {
267 return vrmlaldavhaq(a
, b
, c
);
269 return vrmlaldavhaq_u32(a
, b
, c
);
273 // CHECK-LABEL: @test_vrmlaldavhaxq_s32(
274 // CHECK-NEXT: entry:
275 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
276 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
277 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
278 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
279 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
280 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
281 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
282 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
283 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
284 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
285 // CHECK-NEXT: ret i64 [[TMP9]]
287 int64_t test_vrmlaldavhaxq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
289 return vrmlaldavhaxq(a
, b
, c
);
291 return vrmlaldavhaxq_s32(a
, b
, c
);
295 // CHECK-LABEL: @test_vrmlsldavhaq_s32(
296 // CHECK-NEXT: entry:
297 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
298 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
299 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
300 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
301 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
302 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
303 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
304 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
305 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
306 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
307 // CHECK-NEXT: ret i64 [[TMP9]]
309 int64_t test_vrmlsldavhaq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
311 return vrmlsldavhaq(a
, b
, c
);
313 return vrmlsldavhaq_s32(a
, b
, c
);
317 // CHECK-LABEL: @test_vrmlsldavhaxq_s32(
318 // CHECK-NEXT: entry:
319 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
320 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
321 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
322 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
323 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
324 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
325 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
326 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
327 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
328 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
329 // CHECK-NEXT: ret i64 [[TMP9]]
331 int64_t test_vrmlsldavhaxq_s32(int64_t a
, int32x4_t b
, int32x4_t c
) {
333 return vrmlsldavhaxq(a
, b
, c
);
335 return vrmlsldavhaxq_s32(a
, b
, c
);
339 // CHECK-LABEL: @test_vmlaldavaq_p_s16(
340 // CHECK-NEXT: entry:
341 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
342 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
343 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
344 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
345 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
346 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
347 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
348 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
349 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
350 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
351 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
352 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
353 // CHECK-NEXT: ret i64 [[TMP11]]
355 int64_t test_vmlaldavaq_p_s16(int64_t a
, int16x8_t b
, int16x8_t c
, mve_pred16_t p
) {
357 return vmlaldavaq_p(a
, b
, c
, p
);
359 return vmlaldavaq_p_s16(a
, b
, c
, p
);
363 // CHECK-LABEL: @test_vmlaldavaq_p_s32(
364 // CHECK-NEXT: entry:
365 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
366 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
367 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
368 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
369 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
370 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
371 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
372 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
373 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
374 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
375 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
376 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
377 // CHECK-NEXT: ret i64 [[TMP11]]
379 int64_t test_vmlaldavaq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
381 return vmlaldavaq_p(a
, b
, c
, p
);
383 return vmlaldavaq_p_s32(a
, b
, c
, p
);
387 // CHECK-LABEL: @test_vmlaldavaq_p_u16(
388 // CHECK-NEXT: entry:
389 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
390 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
391 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
392 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
393 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
394 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
395 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
396 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
397 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
398 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
399 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
400 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
401 // CHECK-NEXT: ret i64 [[TMP11]]
403 uint64_t test_vmlaldavaq_p_u16(uint64_t a
, uint16x8_t b
, uint16x8_t c
, mve_pred16_t p
) {
405 return vmlaldavaq_p(a
, b
, c
, p
);
407 return vmlaldavaq_p_u16(a
, b
, c
, p
);
411 // CHECK-LABEL: @test_vmlaldavaq_p_u32(
412 // CHECK-NEXT: entry:
413 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
414 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
415 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
416 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
417 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
418 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
419 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
420 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
421 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
422 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
423 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
424 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
425 // CHECK-NEXT: ret i64 [[TMP11]]
427 uint64_t test_vmlaldavaq_p_u32(uint64_t a
, uint32x4_t b
, uint32x4_t c
, mve_pred16_t p
) {
429 return vmlaldavaq_p(a
, b
, c
, p
);
431 return vmlaldavaq_p_u32(a
, b
, c
, p
);
435 // CHECK-LABEL: @test_vmlaldavaxq_p_s16(
436 // CHECK-NEXT: entry:
437 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
438 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
439 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
440 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
441 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
442 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
443 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
444 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
445 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
446 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
447 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
448 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
449 // CHECK-NEXT: ret i64 [[TMP11]]
451 int64_t test_vmlaldavaxq_p_s16(int64_t a
, int16x8_t b
, int16x8_t c
, mve_pred16_t p
) {
453 return vmlaldavaxq_p(a
, b
, c
, p
);
455 return vmlaldavaxq_p_s16(a
, b
, c
, p
);
459 // CHECK-LABEL: @test_vmlaldavaxq_p_s32(
460 // CHECK-NEXT: entry:
461 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
462 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
463 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
464 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
465 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
466 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
467 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
468 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
469 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
470 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
471 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
472 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
473 // CHECK-NEXT: ret i64 [[TMP11]]
475 int64_t test_vmlaldavaxq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
477 return vmlaldavaxq_p(a
, b
, c
, p
);
479 return vmlaldavaxq_p_s32(a
, b
, c
, p
);
483 // CHECK-LABEL: @test_vmlsldavaq_p_s16(
484 // CHECK-NEXT: entry:
485 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
486 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
487 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
488 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
489 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
490 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
491 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
492 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
493 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
494 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
495 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
496 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
497 // CHECK-NEXT: ret i64 [[TMP11]]
499 int64_t test_vmlsldavaq_p_s16(int64_t a
, int16x8_t b
, int16x8_t c
, mve_pred16_t p
) {
501 return vmlsldavaq_p(a
, b
, c
, p
);
503 return vmlsldavaq_p_s16(a
, b
, c
, p
);
507 // CHECK-LABEL: @test_vmlsldavaq_p_s32(
508 // CHECK-NEXT: entry:
509 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
510 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
511 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
512 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
513 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
514 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
515 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
516 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
517 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
518 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
519 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
520 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
521 // CHECK-NEXT: ret i64 [[TMP11]]
523 int64_t test_vmlsldavaq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
525 return vmlsldavaq_p(a
, b
, c
, p
);
527 return vmlsldavaq_p_s32(a
, b
, c
, p
);
531 // CHECK-LABEL: @test_vmlsldaxvaq_p_s16(
532 // CHECK-NEXT: entry:
533 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
534 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
535 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
536 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
537 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
538 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
539 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
540 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
541 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
542 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
543 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
544 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
545 // CHECK-NEXT: ret i64 [[TMP11]]
547 int64_t test_vmlsldaxvaq_p_s16(int64_t a
, int16x8_t b
, int16x8_t c
, mve_pred16_t p
) {
549 return vmlsldavaxq_p(a
, b
, c
, p
);
551 return vmlsldavaxq_p_s16(a
, b
, c
, p
);
555 // CHECK-LABEL: @test_vmlsldavaxq_p_s32(
556 // CHECK-NEXT: entry:
557 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
558 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
559 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
560 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
561 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
562 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
563 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
564 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
565 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
566 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
567 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
568 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
569 // CHECK-NEXT: ret i64 [[TMP11]]
571 int64_t test_vmlsldavaxq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
573 return vmlsldavaxq_p(a
, b
, c
, p
);
575 return vmlsldavaxq_p_s32(a
, b
, c
, p
);
579 // CHECK-LABEL: @test_vrmlaldavhaq_p_s32(
580 // CHECK-NEXT: entry:
581 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
582 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
583 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
584 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
585 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
586 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
587 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
588 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
589 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
590 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
591 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
592 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
593 // CHECK-NEXT: ret i64 [[TMP11]]
595 int64_t test_vrmlaldavhaq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
597 return vrmlaldavhaq_p(a
, b
, c
, p
);
599 return vrmlaldavhaq_p_s32(a
, b
, c
, p
);
603 // CHECK-LABEL: @test_vrmlaldavhaq_p_u32(
604 // CHECK-NEXT: entry:
605 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
606 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
607 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
608 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
609 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
610 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
611 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
612 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
613 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
614 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
615 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
616 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
617 // CHECK-NEXT: ret i64 [[TMP11]]
619 uint64_t test_vrmlaldavhaq_p_u32(uint64_t a
, uint32x4_t b
, uint32x4_t c
, mve_pred16_t p
) {
621 return vrmlaldavhaq_p(a
, b
, c
, p
);
623 return vrmlaldavhaq_p_u32(a
, b
, c
, p
);
627 // CHECK-LABEL: @test_vrmlaldavhaxq_p_s32(
628 // CHECK-NEXT: entry:
629 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
630 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
631 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
632 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
633 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
634 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
635 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
636 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
637 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
638 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
639 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
640 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
641 // CHECK-NEXT: ret i64 [[TMP11]]
643 int64_t test_vrmlaldavhaxq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
645 return vrmlaldavhaxq_p(a
, b
, c
, p
);
647 return vrmlaldavhaxq_p_s32(a
, b
, c
, p
);
651 // CHECK-LABEL: @test_vrmlsldavhaq_p_s32(
652 // CHECK-NEXT: entry:
653 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
654 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
655 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
656 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
657 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
658 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
659 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
660 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
661 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
662 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
663 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
664 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
665 // CHECK-NEXT: ret i64 [[TMP11]]
667 int64_t test_vrmlsldavhaq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
669 return vrmlsldavhaq_p(a
, b
, c
, p
);
671 return vrmlsldavhaq_p_s32(a
, b
, c
, p
);
675 // CHECK-LABEL: @test_vrmlsldavhaxq_p_s32(
676 // CHECK-NEXT: entry:
677 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
678 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
679 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
680 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
681 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
682 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
683 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
684 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
685 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
686 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
687 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
688 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
689 // CHECK-NEXT: ret i64 [[TMP11]]
691 int64_t test_vrmlsldavhaxq_p_s32(int64_t a
, int32x4_t b
, int32x4_t c
, mve_pred16_t p
) {
693 return vrmlsldavhaxq_p(a
, b
, c
, p
);
695 return vrmlsldavhaxq_p_s32(a
, b
, c
, p
);
699 // CHECK-LABEL: @test_vmlaldavq_s16(
700 // CHECK-NEXT: entry:
701 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
702 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
703 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
704 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
705 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
706 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
707 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
708 // CHECK-NEXT: ret i64 [[TMP6]]
710 int64_t test_vmlaldavq_s16(int16x8_t a
, int16x8_t b
) {
712 return vmlaldavq(a
, b
);
714 return vmlaldavq_s16(a
, b
);
718 // CHECK-LABEL: @test_vmlaldavq_s32(
719 // CHECK-NEXT: entry:
720 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
721 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
722 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
723 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
724 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
725 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
726 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
727 // CHECK-NEXT: ret i64 [[TMP6]]
729 int64_t test_vmlaldavq_s32(int32x4_t a
, int32x4_t b
) {
731 return vmlaldavq(a
, b
);
733 return vmlaldavq_s32(a
, b
);
737 // CHECK-LABEL: @test_vmlaldavq_u16(
738 // CHECK-NEXT: entry:
739 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
740 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
741 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
742 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
743 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
744 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
745 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
746 // CHECK-NEXT: ret i64 [[TMP6]]
748 uint64_t test_vmlaldavq_u16(uint16x8_t a
, uint16x8_t b
) {
750 return vmlaldavq(a
, b
);
752 return vmlaldavq_u16(a
, b
);
756 // CHECK-LABEL: @test_vmlaldavq_u32(
757 // CHECK-NEXT: entry:
758 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
759 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
760 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
761 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
762 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
763 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
764 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
765 // CHECK-NEXT: ret i64 [[TMP6]]
767 uint64_t test_vmlaldavq_u32(uint32x4_t a
, uint32x4_t b
) {
769 return vmlaldavq(a
, b
);
771 return vmlaldavq_u32(a
, b
);
775 // CHECK-LABEL: @test_vmlaldavxq_s16(
776 // CHECK-NEXT: entry:
777 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
778 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
779 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
780 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
781 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
782 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
783 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
784 // CHECK-NEXT: ret i64 [[TMP6]]
786 int64_t test_vmlaldavxq_s16(int16x8_t a
, int16x8_t b
) {
788 return vmlaldavxq(a
, b
);
790 return vmlaldavxq_s16(a
, b
);
794 // CHECK-LABEL: @test_vmlaldavxq_s32(
795 // CHECK-NEXT: entry:
796 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
797 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
798 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
799 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
800 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
801 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
802 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
803 // CHECK-NEXT: ret i64 [[TMP6]]
805 int64_t test_vmlaldavxq_s32(int32x4_t a
, int32x4_t b
) {
807 return vmlaldavxq(a
, b
);
809 return vmlaldavxq_s32(a
, b
);
813 // CHECK-LABEL: @test_vmlsldavq_s16(
814 // CHECK-NEXT: entry:
815 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
816 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
817 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
818 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
819 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
820 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
821 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
822 // CHECK-NEXT: ret i64 [[TMP6]]
824 int64_t test_vmlsldavq_s16(int16x8_t a
, int16x8_t b
) {
826 return vmlsldavq(a
, b
);
828 return vmlsldavq_s16(a
, b
);
832 // CHECK-LABEL: @test_vmlsldavq_s32(
833 // CHECK-NEXT: entry:
834 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
835 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
836 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
837 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
838 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
839 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
840 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
841 // CHECK-NEXT: ret i64 [[TMP6]]
843 int64_t test_vmlsldavq_s32(int32x4_t a
, int32x4_t b
) {
845 return vmlsldavq(a
, b
);
847 return vmlsldavq_s32(a
, b
);
851 // CHECK-LABEL: @test_vmlsldavxvq_s16(
852 // CHECK-NEXT: entry:
853 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
854 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
855 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
856 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
857 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
858 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
859 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
860 // CHECK-NEXT: ret i64 [[TMP6]]
862 int64_t test_vmlsldavxvq_s16(int16x8_t a
, int16x8_t b
) {
864 return vmlsldavxq(a
, b
);
866 return vmlsldavxq_s16(a
, b
);
870 // CHECK-LABEL: @test_vmlsldavxq_s32(
871 // CHECK-NEXT: entry:
872 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
873 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
874 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
875 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
876 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
877 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
878 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
879 // CHECK-NEXT: ret i64 [[TMP6]]
881 int64_t test_vmlsldavxq_s32(int32x4_t a
, int32x4_t b
) {
883 return vmlsldavxq(a
, b
);
885 return vmlsldavxq_s32(a
, b
);
889 // CHECK-LABEL: @test_vrmlaldavhq_s32(
890 // CHECK-NEXT: entry:
891 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
892 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
893 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
894 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
895 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
896 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
897 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
898 // CHECK-NEXT: ret i64 [[TMP6]]
900 int64_t test_vrmlaldavhq_s32(int32x4_t a
, int32x4_t b
) {
902 return vrmlaldavhq(a
, b
);
904 return vrmlaldavhq_s32(a
, b
);
908 // CHECK-LABEL: @test_vrmlaldavhq_u32(
909 // CHECK-NEXT: entry:
910 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
911 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
912 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
913 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
914 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
915 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
916 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
917 // CHECK-NEXT: ret i64 [[TMP6]]
919 uint64_t test_vrmlaldavhq_u32(uint32x4_t a
, uint32x4_t b
) {
921 return vrmlaldavhq(a
, b
);
923 return vrmlaldavhq_u32(a
, b
);
927 // CHECK-LABEL: @test_vrmlaldavhxq_s32(
928 // CHECK-NEXT: entry:
929 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
930 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
931 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
932 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
933 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
934 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
935 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
936 // CHECK-NEXT: ret i64 [[TMP6]]
938 int64_t test_vrmlaldavhxq_s32(int32x4_t a
, int32x4_t b
) {
940 return vrmlaldavhxq(a
, b
);
942 return vrmlaldavhxq_s32(a
, b
);
946 // CHECK-LABEL: @test_vrmlsldavhq_s32(
947 // CHECK-NEXT: entry:
948 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
949 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
950 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
951 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
952 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
953 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
954 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
955 // CHECK-NEXT: ret i64 [[TMP6]]
957 int64_t test_vrmlsldavhq_s32(int32x4_t a
, int32x4_t b
) {
959 return vrmlsldavhq(a
, b
);
961 return vrmlsldavhq_s32(a
, b
);
965 // CHECK-LABEL: @test_vrmlsldavhxq_s32(
966 // CHECK-NEXT: entry:
967 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
968 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
969 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
970 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
971 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
972 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
973 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
974 // CHECK-NEXT: ret i64 [[TMP6]]
976 int64_t test_vrmlsldavhxq_s32(int32x4_t a
, int32x4_t b
) {
978 return vrmlsldavhxq(a
, b
);
980 return vrmlsldavhxq_s32(a
, b
);
984 // CHECK-LABEL: @test_vmlaldavq_p_s16(
985 // CHECK-NEXT: entry:
986 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
987 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
988 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
989 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
990 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
991 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
992 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
993 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
994 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
995 // CHECK-NEXT: ret i64 [[TMP8]]
997 int64_t test_vmlaldavq_p_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
) {
999 return vmlaldavq_p(a
, b
, p
);
1001 return vmlaldavq_p_s16(a
, b
, p
);
1005 // CHECK-LABEL: @test_vmlaldavq_p_s32(
1006 // CHECK-NEXT: entry:
1007 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1008 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1009 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1010 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1011 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1012 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1013 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1014 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1015 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1016 // CHECK-NEXT: ret i64 [[TMP8]]
1018 int64_t test_vmlaldavq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1020 return vmlaldavq_p(a
, b
, p
);
1022 return vmlaldavq_p_s32(a
, b
, p
);
1026 // CHECK-LABEL: @test_vmlaldavq_p_u16(
1027 // CHECK-NEXT: entry:
1028 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1029 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1030 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1031 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1032 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1033 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1034 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1035 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1036 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1037 // CHECK-NEXT: ret i64 [[TMP8]]
1039 uint64_t test_vmlaldavq_p_u16(uint16x8_t a
, uint16x8_t b
, mve_pred16_t p
) {
1041 return vmlaldavq_p(a
, b
, p
);
1043 return vmlaldavq_p_u16(a
, b
, p
);
1047 // CHECK-LABEL: @test_vmlaldavq_p_u32(
1048 // CHECK-NEXT: entry:
1049 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1050 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1051 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1052 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1053 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1054 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1055 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1056 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1057 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1058 // CHECK-NEXT: ret i64 [[TMP8]]
1060 uint64_t test_vmlaldavq_p_u32(uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
) {
1062 return vmlaldavq_p(a
, b
, p
);
1064 return vmlaldavq_p_u32(a
, b
, p
);
1068 // CHECK-LABEL: @test_vmlaldavxq_p_s16(
1069 // CHECK-NEXT: entry:
1070 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1071 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1072 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1073 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1074 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1075 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1076 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1077 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1078 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1079 // CHECK-NEXT: ret i64 [[TMP8]]
1081 int64_t test_vmlaldavxq_p_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
) {
1083 return vmlaldavxq_p(a
, b
, p
);
1085 return vmlaldavxq_p_s16(a
, b
, p
);
1089 // CHECK-LABEL: @test_vmlaldavxq_p_s32(
1090 // CHECK-NEXT: entry:
1091 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1092 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1093 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1094 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1095 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1096 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1097 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1098 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1099 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1100 // CHECK-NEXT: ret i64 [[TMP8]]
1102 int64_t test_vmlaldavxq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1104 return vmlaldavxq_p(a
, b
, p
);
1106 return vmlaldavxq_p_s32(a
, b
, p
);
1110 // CHECK-LABEL: @test_vmlsldavq_p_s16(
1111 // CHECK-NEXT: entry:
1112 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1113 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1114 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1115 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1116 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1117 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1118 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1119 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1120 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1121 // CHECK-NEXT: ret i64 [[TMP8]]
1123 int64_t test_vmlsldavq_p_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
) {
1125 return vmlsldavq_p(a
, b
, p
);
1127 return vmlsldavq_p_s16(a
, b
, p
);
1131 // CHECK-LABEL: @test_vmlsldavq_p_s32(
1132 // CHECK-NEXT: entry:
1133 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1134 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1135 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1136 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1137 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1138 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1139 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1140 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1141 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1142 // CHECK-NEXT: ret i64 [[TMP8]]
1144 int64_t test_vmlsldavq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1146 return vmlsldavq_p(a
, b
, p
);
1148 return vmlsldavq_p_s32(a
, b
, p
);
1152 // CHECK-LABEL: @test_vmlsldaxvq_p_s16(
1153 // CHECK-NEXT: entry:
1154 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1155 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1156 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1157 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1158 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1159 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1160 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1161 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1162 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1163 // CHECK-NEXT: ret i64 [[TMP8]]
1165 int64_t test_vmlsldaxvq_p_s16(int16x8_t a
, int16x8_t b
, mve_pred16_t p
) {
1167 return vmlsldavxq_p(a
, b
, p
);
1169 return vmlsldavxq_p_s16(a
, b
, p
);
1173 // CHECK-LABEL: @test_vmlsldavxq_p_s32(
1174 // CHECK-NEXT: entry:
1175 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1176 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1177 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1178 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1179 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1180 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1181 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1182 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1183 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1184 // CHECK-NEXT: ret i64 [[TMP8]]
1186 int64_t test_vmlsldavxq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1188 return vmlsldavxq_p(a
, b
, p
);
1190 return vmlsldavxq_p_s32(a
, b
, p
);
1194 // CHECK-LABEL: @test_vrmlaldavhq_p_s32(
1195 // CHECK-NEXT: entry:
1196 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1197 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1198 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1199 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1200 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1201 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1202 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1203 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1204 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1205 // CHECK-NEXT: ret i64 [[TMP8]]
1207 int64_t test_vrmlaldavhq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1209 return vrmlaldavhq_p(a
, b
, p
);
1211 return vrmlaldavhq_p_s32(a
, b
, p
);
1215 // CHECK-LABEL: @test_vrmlaldavhq_p_u32(
1216 // CHECK-NEXT: entry:
1217 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1218 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1219 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1220 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1221 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1222 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1223 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1224 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1225 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1226 // CHECK-NEXT: ret i64 [[TMP8]]
1228 uint64_t test_vrmlaldavhq_p_u32(uint32x4_t a
, uint32x4_t b
, mve_pred16_t p
) {
1230 return vrmlaldavhq_p(a
, b
, p
);
1232 return vrmlaldavhq_p_u32(a
, b
, p
);
1236 // CHECK-LABEL: @test_vrmlaldavhxq_p_s32(
1237 // CHECK-NEXT: entry:
1238 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1239 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1240 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1241 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1242 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1243 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1244 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1245 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1246 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1247 // CHECK-NEXT: ret i64 [[TMP8]]
1249 int64_t test_vrmlaldavhxq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1251 return vrmlaldavhxq_p(a
, b
, p
);
1253 return vrmlaldavhxq_p_s32(a
, b
, p
);
1257 // CHECK-LABEL: @test_vrmlsldavhq_p_s32(
1258 // CHECK-NEXT: entry:
1259 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1260 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1261 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1262 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1263 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1264 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1265 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1266 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1267 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1268 // CHECK-NEXT: ret i64 [[TMP8]]
1270 int64_t test_vrmlsldavhq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1272 return vrmlsldavhq_p(a
, b
, p
);
1274 return vrmlsldavhq_p_s32(a
, b
, p
);
1278 // CHECK-LABEL: @test_vrmlsldavhxq_p_s32(
1279 // CHECK-NEXT: entry:
1280 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1281 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1282 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1283 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1284 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1285 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1286 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1287 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1288 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1289 // CHECK-NEXT: ret i64 [[TMP8]]
1291 int64_t test_vrmlsldavhxq_p_s32(int32x4_t a
, int32x4_t b
, mve_pred16_t p
) {
1293 return vrmlsldavhxq_p(a
, b
, p
);
1295 return vrmlsldavhxq_p_s32(a
, b
, p
);