Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / arm-mve-intrinsics / vmlldav.c
blob76b9c815e607d10af87a4f5d05ff203cdb974df7
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
7 #include <arm_mve.h>
9 // CHECK-LABEL: @test_vmlaldavaq_s16(
10 // CHECK-NEXT: entry:
11 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
12 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
13 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
14 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
15 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
16 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
17 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
18 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
19 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
20 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
21 // CHECK-NEXT: ret i64 [[TMP9]]
23 int64_t test_vmlaldavaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
24 #ifdef POLYMORPHIC
25 return vmlaldavaq(a, b, c);
26 #else
27 return vmlaldavaq_s16(a, b, c);
28 #endif
31 // CHECK-LABEL: @test_vmlaldavaq_s32(
32 // CHECK-NEXT: entry:
33 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
34 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
35 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
36 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
37 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
38 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
39 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
40 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
41 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
42 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
43 // CHECK-NEXT: ret i64 [[TMP9]]
45 int64_t test_vmlaldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
46 #ifdef POLYMORPHIC
47 return vmlaldavaq(a, b, c);
48 #else
49 return vmlaldavaq_s32(a, b, c);
50 #endif
53 // CHECK-LABEL: @test_vmlaldavaq_u16(
54 // CHECK-NEXT: entry:
55 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
56 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
57 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
58 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
59 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
60 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
61 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
62 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
63 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
64 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
65 // CHECK-NEXT: ret i64 [[TMP9]]
67 uint64_t test_vmlaldavaq_u16(uint64_t a, uint16x8_t b, uint16x8_t c) {
68 #ifdef POLYMORPHIC
69 return vmlaldavaq(a, b, c);
70 #else
71 return vmlaldavaq_u16(a, b, c);
72 #endif
75 // CHECK-LABEL: @test_vmlaldavaq_u32(
76 // CHECK-NEXT: entry:
77 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
78 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
79 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
80 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
81 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
82 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
83 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
84 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
85 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
86 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
87 // CHECK-NEXT: ret i64 [[TMP9]]
89 uint64_t test_vmlaldavaq_u32(uint64_t a, uint32x4_t b, uint32x4_t c) {
90 #ifdef POLYMORPHIC
91 return vmlaldavaq(a, b, c);
92 #else
93 return vmlaldavaq_u32(a, b, c);
94 #endif
97 // CHECK-LABEL: @test_vmlaldavaxq_s16(
98 // CHECK-NEXT: entry:
99 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
100 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
101 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
102 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
103 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
104 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
105 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
106 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
107 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
108 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
109 // CHECK-NEXT: ret i64 [[TMP9]]
111 int64_t test_vmlaldavaxq_s16(int64_t a, int16x8_t b, int16x8_t c) {
112 #ifdef POLYMORPHIC
113 return vmlaldavaxq(a, b, c);
114 #else
115 return vmlaldavaxq_s16(a, b, c);
116 #endif
119 // CHECK-LABEL: @test_vmlaldavaxq_s32(
120 // CHECK-NEXT: entry:
121 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
122 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
123 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
124 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
125 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
126 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
127 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
128 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
129 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
130 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
131 // CHECK-NEXT: ret i64 [[TMP9]]
133 int64_t test_vmlaldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
134 #ifdef POLYMORPHIC
135 return vmlaldavaxq(a, b, c);
136 #else
137 return vmlaldavaxq_s32(a, b, c);
138 #endif
141 // CHECK-LABEL: @test_vmlsldavaq_s16(
142 // CHECK-NEXT: entry:
143 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
144 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
145 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
146 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
147 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
148 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
149 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
150 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
151 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
152 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
153 // CHECK-NEXT: ret i64 [[TMP9]]
155 int64_t test_vmlsldavaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
156 #ifdef POLYMORPHIC
157 return vmlsldavaq(a, b, c);
158 #else
159 return vmlsldavaq_s16(a, b, c);
160 #endif
163 // CHECK-LABEL: @test_vmlsldavaq_s32(
164 // CHECK-NEXT: entry:
165 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
166 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
167 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
168 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
169 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
170 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
171 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
172 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
173 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
174 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
175 // CHECK-NEXT: ret i64 [[TMP9]]
177 int64_t test_vmlsldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
178 #ifdef POLYMORPHIC
179 return vmlsldavaq(a, b, c);
180 #else
181 return vmlsldavaq_s32(a, b, c);
182 #endif
185 // CHECK-LABEL: @test_vmlsldaxvaq_s16(
186 // CHECK-NEXT: entry:
187 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
188 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
189 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
190 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
191 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
192 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
193 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
194 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
195 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
196 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
197 // CHECK-NEXT: ret i64 [[TMP9]]
199 int64_t test_vmlsldaxvaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
200 #ifdef POLYMORPHIC
201 return vmlsldavaxq(a, b, c);
202 #else
203 return vmlsldavaxq_s16(a, b, c);
204 #endif
207 // CHECK-LABEL: @test_vmlsldavaxq_s32(
208 // CHECK-NEXT: entry:
209 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
210 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
211 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
212 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
213 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
214 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
215 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
216 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
217 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
218 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
219 // CHECK-NEXT: ret i64 [[TMP9]]
221 int64_t test_vmlsldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
222 #ifdef POLYMORPHIC
223 return vmlsldavaxq(a, b, c);
224 #else
225 return vmlsldavaxq_s32(a, b, c);
226 #endif
229 // CHECK-LABEL: @test_vrmlaldavhaq_s32(
230 // CHECK-NEXT: entry:
231 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
232 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
233 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
234 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
235 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
236 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
237 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
238 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
239 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
240 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
241 // CHECK-NEXT: ret i64 [[TMP9]]
243 int64_t test_vrmlaldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
244 #ifdef POLYMORPHIC
245 return vrmlaldavhaq(a, b, c);
246 #else
247 return vrmlaldavhaq_s32(a, b, c);
248 #endif
251 // CHECK-LABEL: @test_vrmlaldavhaq_u32(
252 // CHECK-NEXT: entry:
253 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
254 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
255 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
256 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
257 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
258 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
259 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
260 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
261 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
262 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
263 // CHECK-NEXT: ret i64 [[TMP9]]
265 uint64_t test_vrmlaldavhaq_u32(uint64_t a, uint32x4_t b, uint32x4_t c) {
266 #ifdef POLYMORPHIC
267 return vrmlaldavhaq(a, b, c);
268 #else
269 return vrmlaldavhaq_u32(a, b, c);
270 #endif
273 // CHECK-LABEL: @test_vrmlaldavhaxq_s32(
274 // CHECK-NEXT: entry:
275 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
276 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
277 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
278 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
279 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
280 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
281 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
282 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
283 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
284 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
285 // CHECK-NEXT: ret i64 [[TMP9]]
287 int64_t test_vrmlaldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
288 #ifdef POLYMORPHIC
289 return vrmlaldavhaxq(a, b, c);
290 #else
291 return vrmlaldavhaxq_s32(a, b, c);
292 #endif
295 // CHECK-LABEL: @test_vrmlsldavhaq_s32(
296 // CHECK-NEXT: entry:
297 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
298 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
299 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
300 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
301 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
302 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
303 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
304 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
305 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
306 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
307 // CHECK-NEXT: ret i64 [[TMP9]]
309 int64_t test_vrmlsldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
310 #ifdef POLYMORPHIC
311 return vrmlsldavhaq(a, b, c);
312 #else
313 return vrmlsldavhaq_s32(a, b, c);
314 #endif
317 // CHECK-LABEL: @test_vrmlsldavhaxq_s32(
318 // CHECK-NEXT: entry:
319 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
320 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
321 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
322 // CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
323 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
324 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
325 // CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
326 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
327 // CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
328 // CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
329 // CHECK-NEXT: ret i64 [[TMP9]]
331 int64_t test_vrmlsldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
332 #ifdef POLYMORPHIC
333 return vrmlsldavhaxq(a, b, c);
334 #else
335 return vrmlsldavhaxq_s32(a, b, c);
336 #endif
339 // CHECK-LABEL: @test_vmlaldavaq_p_s16(
340 // CHECK-NEXT: entry:
341 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
342 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
343 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
344 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
345 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
346 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
347 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
348 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
349 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
350 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
351 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
352 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
353 // CHECK-NEXT: ret i64 [[TMP11]]
355 int64_t test_vmlaldavaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
356 #ifdef POLYMORPHIC
357 return vmlaldavaq_p(a, b, c, p);
358 #else
359 return vmlaldavaq_p_s16(a, b, c, p);
360 #endif
363 // CHECK-LABEL: @test_vmlaldavaq_p_s32(
364 // CHECK-NEXT: entry:
365 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
366 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
367 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
368 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
369 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
370 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
371 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
372 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
373 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
374 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
375 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
376 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
377 // CHECK-NEXT: ret i64 [[TMP11]]
379 int64_t test_vmlaldavaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
380 #ifdef POLYMORPHIC
381 return vmlaldavaq_p(a, b, c, p);
382 #else
383 return vmlaldavaq_p_s32(a, b, c, p);
384 #endif
387 // CHECK-LABEL: @test_vmlaldavaq_p_u16(
388 // CHECK-NEXT: entry:
389 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
390 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
391 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
392 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
393 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
394 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
395 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
396 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
397 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
398 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
399 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
400 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
401 // CHECK-NEXT: ret i64 [[TMP11]]
403 uint64_t test_vmlaldavaq_p_u16(uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p) {
404 #ifdef POLYMORPHIC
405 return vmlaldavaq_p(a, b, c, p);
406 #else
407 return vmlaldavaq_p_u16(a, b, c, p);
408 #endif
411 // CHECK-LABEL: @test_vmlaldavaq_p_u32(
412 // CHECK-NEXT: entry:
413 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
414 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
415 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
416 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
417 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
418 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
419 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
420 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
421 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
422 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
423 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
424 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
425 // CHECK-NEXT: ret i64 [[TMP11]]
427 uint64_t test_vmlaldavaq_p_u32(uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) {
428 #ifdef POLYMORPHIC
429 return vmlaldavaq_p(a, b, c, p);
430 #else
431 return vmlaldavaq_p_u32(a, b, c, p);
432 #endif
435 // CHECK-LABEL: @test_vmlaldavaxq_p_s16(
436 // CHECK-NEXT: entry:
437 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
438 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
439 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
440 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
441 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
442 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
443 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
444 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
445 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
446 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
447 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
448 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
449 // CHECK-NEXT: ret i64 [[TMP11]]
451 int64_t test_vmlaldavaxq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
452 #ifdef POLYMORPHIC
453 return vmlaldavaxq_p(a, b, c, p);
454 #else
455 return vmlaldavaxq_p_s16(a, b, c, p);
456 #endif
459 // CHECK-LABEL: @test_vmlaldavaxq_p_s32(
460 // CHECK-NEXT: entry:
461 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
462 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
463 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
464 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
465 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
466 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
467 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
468 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
469 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
470 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
471 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
472 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
473 // CHECK-NEXT: ret i64 [[TMP11]]
475 int64_t test_vmlaldavaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
476 #ifdef POLYMORPHIC
477 return vmlaldavaxq_p(a, b, c, p);
478 #else
479 return vmlaldavaxq_p_s32(a, b, c, p);
480 #endif
483 // CHECK-LABEL: @test_vmlsldavaq_p_s16(
484 // CHECK-NEXT: entry:
485 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
486 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
487 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
488 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
489 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
490 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
491 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
492 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
493 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
494 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
495 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
496 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
497 // CHECK-NEXT: ret i64 [[TMP11]]
499 int64_t test_vmlsldavaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
500 #ifdef POLYMORPHIC
501 return vmlsldavaq_p(a, b, c, p);
502 #else
503 return vmlsldavaq_p_s16(a, b, c, p);
504 #endif
507 // CHECK-LABEL: @test_vmlsldavaq_p_s32(
508 // CHECK-NEXT: entry:
509 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
510 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
511 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
512 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
513 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
514 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
515 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
516 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
517 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
518 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
519 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
520 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
521 // CHECK-NEXT: ret i64 [[TMP11]]
523 int64_t test_vmlsldavaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
524 #ifdef POLYMORPHIC
525 return vmlsldavaq_p(a, b, c, p);
526 #else
527 return vmlsldavaq_p_s32(a, b, c, p);
528 #endif
531 // CHECK-LABEL: @test_vmlsldaxvaq_p_s16(
532 // CHECK-NEXT: entry:
533 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
534 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
535 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
536 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
537 // CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
538 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
539 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
540 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
541 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
542 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
543 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
544 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
545 // CHECK-NEXT: ret i64 [[TMP11]]
547 int64_t test_vmlsldaxvaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
548 #ifdef POLYMORPHIC
549 return vmlsldavaxq_p(a, b, c, p);
550 #else
551 return vmlsldavaxq_p_s16(a, b, c, p);
552 #endif
555 // CHECK-LABEL: @test_vmlsldavaxq_p_s32(
556 // CHECK-NEXT: entry:
557 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
558 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
559 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
560 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
561 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
562 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
563 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
564 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
565 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
566 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
567 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
568 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
569 // CHECK-NEXT: ret i64 [[TMP11]]
571 int64_t test_vmlsldavaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
572 #ifdef POLYMORPHIC
573 return vmlsldavaxq_p(a, b, c, p);
574 #else
575 return vmlsldavaxq_p_s32(a, b, c, p);
576 #endif
579 // CHECK-LABEL: @test_vrmlaldavhaq_p_s32(
580 // CHECK-NEXT: entry:
581 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
582 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
583 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
584 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
585 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
586 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
587 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
588 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
589 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
590 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
591 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
592 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
593 // CHECK-NEXT: ret i64 [[TMP11]]
595 int64_t test_vrmlaldavhaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
596 #ifdef POLYMORPHIC
597 return vrmlaldavhaq_p(a, b, c, p);
598 #else
599 return vrmlaldavhaq_p_s32(a, b, c, p);
600 #endif
603 // CHECK-LABEL: @test_vrmlaldavhaq_p_u32(
604 // CHECK-NEXT: entry:
605 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
606 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
607 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
608 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
609 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
610 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
611 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
612 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
613 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
614 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
615 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
616 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
617 // CHECK-NEXT: ret i64 [[TMP11]]
619 uint64_t test_vrmlaldavhaq_p_u32(uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) {
620 #ifdef POLYMORPHIC
621 return vrmlaldavhaq_p(a, b, c, p);
622 #else
623 return vrmlaldavhaq_p_u32(a, b, c, p);
624 #endif
627 // CHECK-LABEL: @test_vrmlaldavhaxq_p_s32(
628 // CHECK-NEXT: entry:
629 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
630 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
631 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
632 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
633 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
634 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
635 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
636 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
637 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
638 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
639 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
640 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
641 // CHECK-NEXT: ret i64 [[TMP11]]
643 int64_t test_vrmlaldavhaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
644 #ifdef POLYMORPHIC
645 return vrmlaldavhaxq_p(a, b, c, p);
646 #else
647 return vrmlaldavhaxq_p_s32(a, b, c, p);
648 #endif
651 // CHECK-LABEL: @test_vrmlsldavhaq_p_s32(
652 // CHECK-NEXT: entry:
653 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
654 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
655 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
656 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
657 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
658 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
659 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
660 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
661 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
662 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
663 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
664 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
665 // CHECK-NEXT: ret i64 [[TMP11]]
667 int64_t test_vrmlsldavhaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
668 #ifdef POLYMORPHIC
669 return vrmlsldavhaq_p(a, b, c, p);
670 #else
671 return vrmlsldavhaq_p_s32(a, b, c, p);
672 #endif
675 // CHECK-LABEL: @test_vrmlsldavhaxq_p_s32(
676 // CHECK-NEXT: entry:
677 // CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
678 // CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
679 // CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[A]] to i32
680 // CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
681 // CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
682 // CHECK-NEXT: [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
683 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
684 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
685 // CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 32
686 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
687 // CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
688 // CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
689 // CHECK-NEXT: ret i64 [[TMP11]]
691 int64_t test_vrmlsldavhaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
692 #ifdef POLYMORPHIC
693 return vrmlsldavhaxq_p(a, b, c, p);
694 #else
695 return vrmlsldavhaxq_p_s32(a, b, c, p);
696 #endif
699 // CHECK-LABEL: @test_vmlaldavq_s16(
700 // CHECK-NEXT: entry:
701 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
702 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
703 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
704 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
705 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
706 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
707 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
708 // CHECK-NEXT: ret i64 [[TMP6]]
710 int64_t test_vmlaldavq_s16(int16x8_t a, int16x8_t b) {
711 #ifdef POLYMORPHIC
712 return vmlaldavq(a, b);
713 #else
714 return vmlaldavq_s16(a, b);
715 #endif
718 // CHECK-LABEL: @test_vmlaldavq_s32(
719 // CHECK-NEXT: entry:
720 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
721 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
722 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
723 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
724 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
725 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
726 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
727 // CHECK-NEXT: ret i64 [[TMP6]]
729 int64_t test_vmlaldavq_s32(int32x4_t a, int32x4_t b) {
730 #ifdef POLYMORPHIC
731 return vmlaldavq(a, b);
732 #else
733 return vmlaldavq_s32(a, b);
734 #endif
737 // CHECK-LABEL: @test_vmlaldavq_u16(
738 // CHECK-NEXT: entry:
739 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
740 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
741 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
742 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
743 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
744 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
745 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
746 // CHECK-NEXT: ret i64 [[TMP6]]
748 uint64_t test_vmlaldavq_u16(uint16x8_t a, uint16x8_t b) {
749 #ifdef POLYMORPHIC
750 return vmlaldavq(a, b);
751 #else
752 return vmlaldavq_u16(a, b);
753 #endif
756 // CHECK-LABEL: @test_vmlaldavq_u32(
757 // CHECK-NEXT: entry:
758 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
759 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
760 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
761 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
762 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
763 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
764 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
765 // CHECK-NEXT: ret i64 [[TMP6]]
767 uint64_t test_vmlaldavq_u32(uint32x4_t a, uint32x4_t b) {
768 #ifdef POLYMORPHIC
769 return vmlaldavq(a, b);
770 #else
771 return vmlaldavq_u32(a, b);
772 #endif
775 // CHECK-LABEL: @test_vmlaldavxq_s16(
776 // CHECK-NEXT: entry:
777 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
778 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
779 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
780 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
781 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
782 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
783 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
784 // CHECK-NEXT: ret i64 [[TMP6]]
786 int64_t test_vmlaldavxq_s16(int16x8_t a, int16x8_t b) {
787 #ifdef POLYMORPHIC
788 return vmlaldavxq(a, b);
789 #else
790 return vmlaldavxq_s16(a, b);
791 #endif
794 // CHECK-LABEL: @test_vmlaldavxq_s32(
795 // CHECK-NEXT: entry:
796 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
797 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
798 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
799 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
800 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
801 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
802 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
803 // CHECK-NEXT: ret i64 [[TMP6]]
805 int64_t test_vmlaldavxq_s32(int32x4_t a, int32x4_t b) {
806 #ifdef POLYMORPHIC
807 return vmlaldavxq(a, b);
808 #else
809 return vmlaldavxq_s32(a, b);
810 #endif
813 // CHECK-LABEL: @test_vmlsldavq_s16(
814 // CHECK-NEXT: entry:
815 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
816 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
817 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
818 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
819 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
820 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
821 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
822 // CHECK-NEXT: ret i64 [[TMP6]]
824 int64_t test_vmlsldavq_s16(int16x8_t a, int16x8_t b) {
825 #ifdef POLYMORPHIC
826 return vmlsldavq(a, b);
827 #else
828 return vmlsldavq_s16(a, b);
829 #endif
832 // CHECK-LABEL: @test_vmlsldavq_s32(
833 // CHECK-NEXT: entry:
834 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
835 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
836 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
837 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
838 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
839 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
840 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
841 // CHECK-NEXT: ret i64 [[TMP6]]
843 int64_t test_vmlsldavq_s32(int32x4_t a, int32x4_t b) {
844 #ifdef POLYMORPHIC
845 return vmlsldavq(a, b);
846 #else
847 return vmlsldavq_s32(a, b);
848 #endif
851 // CHECK-LABEL: @test_vmlsldavxvq_s16(
852 // CHECK-NEXT: entry:
853 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
854 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
855 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
856 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
857 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
858 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
859 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
860 // CHECK-NEXT: ret i64 [[TMP6]]
862 int64_t test_vmlsldavxvq_s16(int16x8_t a, int16x8_t b) {
863 #ifdef POLYMORPHIC
864 return vmlsldavxq(a, b);
865 #else
866 return vmlsldavxq_s16(a, b);
867 #endif
870 // CHECK-LABEL: @test_vmlsldavxq_s32(
871 // CHECK-NEXT: entry:
872 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
873 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
874 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
875 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
876 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
877 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
878 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
879 // CHECK-NEXT: ret i64 [[TMP6]]
881 int64_t test_vmlsldavxq_s32(int32x4_t a, int32x4_t b) {
882 #ifdef POLYMORPHIC
883 return vmlsldavxq(a, b);
884 #else
885 return vmlsldavxq_s32(a, b);
886 #endif
889 // CHECK-LABEL: @test_vrmlaldavhq_s32(
890 // CHECK-NEXT: entry:
891 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
892 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
893 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
894 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
895 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
896 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
897 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
898 // CHECK-NEXT: ret i64 [[TMP6]]
900 int64_t test_vrmlaldavhq_s32(int32x4_t a, int32x4_t b) {
901 #ifdef POLYMORPHIC
902 return vrmlaldavhq(a, b);
903 #else
904 return vrmlaldavhq_s32(a, b);
905 #endif
908 // CHECK-LABEL: @test_vrmlaldavhq_u32(
909 // CHECK-NEXT: entry:
910 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
911 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
912 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
913 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
914 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
915 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
916 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
917 // CHECK-NEXT: ret i64 [[TMP6]]
919 uint64_t test_vrmlaldavhq_u32(uint32x4_t a, uint32x4_t b) {
920 #ifdef POLYMORPHIC
921 return vrmlaldavhq(a, b);
922 #else
923 return vrmlaldavhq_u32(a, b);
924 #endif
927 // CHECK-LABEL: @test_vrmlaldavhxq_s32(
928 // CHECK-NEXT: entry:
929 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
930 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
931 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
932 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
933 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
934 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
935 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
936 // CHECK-NEXT: ret i64 [[TMP6]]
938 int64_t test_vrmlaldavhxq_s32(int32x4_t a, int32x4_t b) {
939 #ifdef POLYMORPHIC
940 return vrmlaldavhxq(a, b);
941 #else
942 return vrmlaldavhxq_s32(a, b);
943 #endif
946 // CHECK-LABEL: @test_vrmlsldavhq_s32(
947 // CHECK-NEXT: entry:
948 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
949 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
950 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
951 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
952 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
953 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
954 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
955 // CHECK-NEXT: ret i64 [[TMP6]]
957 int64_t test_vrmlsldavhq_s32(int32x4_t a, int32x4_t b) {
958 #ifdef POLYMORPHIC
959 return vrmlsldavhq(a, b);
960 #else
961 return vrmlsldavhq_s32(a, b);
962 #endif
965 // CHECK-LABEL: @test_vrmlsldavhxq_s32(
966 // CHECK-NEXT: entry:
967 // CHECK-NEXT: [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
968 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
969 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
970 // CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 32
971 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
972 // CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
973 // CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
974 // CHECK-NEXT: ret i64 [[TMP6]]
976 int64_t test_vrmlsldavhxq_s32(int32x4_t a, int32x4_t b) {
977 #ifdef POLYMORPHIC
978 return vrmlsldavhxq(a, b);
979 #else
980 return vrmlsldavhxq_s32(a, b);
981 #endif
984 // CHECK-LABEL: @test_vmlaldavq_p_s16(
985 // CHECK-NEXT: entry:
986 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
987 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
988 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
989 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
990 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
991 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
992 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
993 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
994 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
995 // CHECK-NEXT: ret i64 [[TMP8]]
997 int64_t test_vmlaldavq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
998 #ifdef POLYMORPHIC
999 return vmlaldavq_p(a, b, p);
1000 #else
1001 return vmlaldavq_p_s16(a, b, p);
1002 #endif
1005 // CHECK-LABEL: @test_vmlaldavq_p_s32(
1006 // CHECK-NEXT: entry:
1007 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1008 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1009 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1010 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1011 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1012 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1013 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1014 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1015 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1016 // CHECK-NEXT: ret i64 [[TMP8]]
1018 int64_t test_vmlaldavq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1019 #ifdef POLYMORPHIC
1020 return vmlaldavq_p(a, b, p);
1021 #else
1022 return vmlaldavq_p_s32(a, b, p);
1023 #endif
1026 // CHECK-LABEL: @test_vmlaldavq_p_u16(
1027 // CHECK-NEXT: entry:
1028 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1029 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1030 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1031 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1032 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1033 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1034 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1035 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1036 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1037 // CHECK-NEXT: ret i64 [[TMP8]]
1039 uint64_t test_vmlaldavq_p_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) {
1040 #ifdef POLYMORPHIC
1041 return vmlaldavq_p(a, b, p);
1042 #else
1043 return vmlaldavq_p_u16(a, b, p);
1044 #endif
1047 // CHECK-LABEL: @test_vmlaldavq_p_u32(
1048 // CHECK-NEXT: entry:
1049 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1050 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1051 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1052 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1053 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1054 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1055 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1056 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1057 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1058 // CHECK-NEXT: ret i64 [[TMP8]]
1060 uint64_t test_vmlaldavq_p_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) {
1061 #ifdef POLYMORPHIC
1062 return vmlaldavq_p(a, b, p);
1063 #else
1064 return vmlaldavq_p_u32(a, b, p);
1065 #endif
1068 // CHECK-LABEL: @test_vmlaldavxq_p_s16(
1069 // CHECK-NEXT: entry:
1070 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1071 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1072 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1073 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1074 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1075 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1076 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1077 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1078 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1079 // CHECK-NEXT: ret i64 [[TMP8]]
1081 int64_t test_vmlaldavxq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1082 #ifdef POLYMORPHIC
1083 return vmlaldavxq_p(a, b, p);
1084 #else
1085 return vmlaldavxq_p_s16(a, b, p);
1086 #endif
1089 // CHECK-LABEL: @test_vmlaldavxq_p_s32(
1090 // CHECK-NEXT: entry:
1091 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1092 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1093 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1094 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1095 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1096 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1097 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1098 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1099 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1100 // CHECK-NEXT: ret i64 [[TMP8]]
1102 int64_t test_vmlaldavxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1103 #ifdef POLYMORPHIC
1104 return vmlaldavxq_p(a, b, p);
1105 #else
1106 return vmlaldavxq_p_s32(a, b, p);
1107 #endif
1110 // CHECK-LABEL: @test_vmlsldavq_p_s16(
1111 // CHECK-NEXT: entry:
1112 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1113 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1114 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1115 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1116 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1117 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1118 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1119 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1120 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1121 // CHECK-NEXT: ret i64 [[TMP8]]
1123 int64_t test_vmlsldavq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1124 #ifdef POLYMORPHIC
1125 return vmlsldavq_p(a, b, p);
1126 #else
1127 return vmlsldavq_p_s16(a, b, p);
1128 #endif
1131 // CHECK-LABEL: @test_vmlsldavq_p_s32(
1132 // CHECK-NEXT: entry:
1133 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1134 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1135 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1136 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1137 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1138 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1139 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1140 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1141 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1142 // CHECK-NEXT: ret i64 [[TMP8]]
1144 int64_t test_vmlsldavq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1145 #ifdef POLYMORPHIC
1146 return vmlsldavq_p(a, b, p);
1147 #else
1148 return vmlsldavq_p_s32(a, b, p);
1149 #endif
1152 // CHECK-LABEL: @test_vmlsldaxvq_p_s16(
1153 // CHECK-NEXT: entry:
1154 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1155 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1156 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1157 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1158 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1159 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1160 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1161 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1162 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1163 // CHECK-NEXT: ret i64 [[TMP8]]
1165 int64_t test_vmlsldaxvq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1166 #ifdef POLYMORPHIC
1167 return vmlsldavxq_p(a, b, p);
1168 #else
1169 return vmlsldavxq_p_s16(a, b, p);
1170 #endif
1173 // CHECK-LABEL: @test_vmlsldavxq_p_s32(
1174 // CHECK-NEXT: entry:
1175 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1176 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1177 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1178 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1179 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1180 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1181 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1182 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1183 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1184 // CHECK-NEXT: ret i64 [[TMP8]]
1186 int64_t test_vmlsldavxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1187 #ifdef POLYMORPHIC
1188 return vmlsldavxq_p(a, b, p);
1189 #else
1190 return vmlsldavxq_p_s32(a, b, p);
1191 #endif
1194 // CHECK-LABEL: @test_vrmlaldavhq_p_s32(
1195 // CHECK-NEXT: entry:
1196 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1197 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1198 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1199 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1200 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1201 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1202 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1203 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1204 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1205 // CHECK-NEXT: ret i64 [[TMP8]]
1207 int64_t test_vrmlaldavhq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1208 #ifdef POLYMORPHIC
1209 return vrmlaldavhq_p(a, b, p);
1210 #else
1211 return vrmlaldavhq_p_s32(a, b, p);
1212 #endif
1215 // CHECK-LABEL: @test_vrmlaldavhq_p_u32(
1216 // CHECK-NEXT: entry:
1217 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1218 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1219 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1220 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1221 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1222 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1223 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1224 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1225 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1226 // CHECK-NEXT: ret i64 [[TMP8]]
1228 uint64_t test_vrmlaldavhq_p_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) {
1229 #ifdef POLYMORPHIC
1230 return vrmlaldavhq_p(a, b, p);
1231 #else
1232 return vrmlaldavhq_p_u32(a, b, p);
1233 #endif
1236 // CHECK-LABEL: @test_vrmlaldavhxq_p_s32(
1237 // CHECK-NEXT: entry:
1238 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1239 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1240 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1241 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1242 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1243 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1244 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1245 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1246 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1247 // CHECK-NEXT: ret i64 [[TMP8]]
1249 int64_t test_vrmlaldavhxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1250 #ifdef POLYMORPHIC
1251 return vrmlaldavhxq_p(a, b, p);
1252 #else
1253 return vrmlaldavhxq_p_s32(a, b, p);
1254 #endif
1257 // CHECK-LABEL: @test_vrmlsldavhq_p_s32(
1258 // CHECK-NEXT: entry:
1259 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1260 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1261 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1262 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1263 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1264 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1265 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1266 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1267 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1268 // CHECK-NEXT: ret i64 [[TMP8]]
1270 int64_t test_vrmlsldavhq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1271 #ifdef POLYMORPHIC
1272 return vrmlsldavhq_p(a, b, p);
1273 #else
1274 return vrmlsldavhq_p_s32(a, b, p);
1275 #endif
1278 // CHECK-LABEL: @test_vrmlsldavhxq_p_s32(
1279 // CHECK-NEXT: entry:
1280 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1281 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1282 // CHECK-NEXT: [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1283 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1284 // CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1285 // CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1286 // CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1287 // CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1288 // CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1289 // CHECK-NEXT: ret i64 [[TMP8]]
1291 int64_t test_vrmlsldavhxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1292 #ifdef POLYMORPHIC
1293 return vrmlsldavhxq_p(a, b, p);
1294 #else
1295 return vrmlsldavhxq_p_s32(a, b, p);
1296 #endif