1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
5 // REQUIRES: aarch64-registered-target || arm-registered-target
9 // CHECK-LABEL: @test_vidupq_n_u8(
11 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[A:%.*]], i32 4)
12 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
13 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
15 uint8x16_t
test_vidupq_n_u8(uint32_t a
)
18 return vidupq_u8(a
, 4);
19 #else /* POLYMORPHIC */
20 return vidupq_n_u8(a
, 4);
21 #endif /* POLYMORPHIC */
24 // CHECK-LABEL: @test_vidupq_n_u16(
26 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[A:%.*]], i32 1)
27 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
28 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
30 uint16x8_t
test_vidupq_n_u16(uint32_t a
)
33 return vidupq_u16(a
, 1);
34 #else /* POLYMORPHIC */
35 return vidupq_n_u16(a
, 1);
36 #endif /* POLYMORPHIC */
39 // CHECK-LABEL: @test_vidupq_n_u32(
41 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[A:%.*]], i32 4)
42 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
43 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
45 uint32x4_t
test_vidupq_n_u32(uint32_t a
)
48 return vidupq_u32(a
, 4);
49 #else /* POLYMORPHIC */
50 return vidupq_n_u32(a
, 4);
51 #endif /* POLYMORPHIC */
54 // CHECK-LABEL: @test_vddupq_n_u8(
56 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[A:%.*]], i32 2)
57 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
58 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
60 uint8x16_t
test_vddupq_n_u8(uint32_t a
)
63 return vddupq_u8(a
, 2);
64 #else /* POLYMORPHIC */
65 return vddupq_n_u8(a
, 2);
66 #endif /* POLYMORPHIC */
69 // CHECK-LABEL: @test_vddupq_n_u16(
71 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[A:%.*]], i32 4)
72 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
73 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
75 uint16x8_t
test_vddupq_n_u16(uint32_t a
)
78 return vddupq_u16(a
, 4);
79 #else /* POLYMORPHIC */
80 return vddupq_n_u16(a
, 4);
81 #endif /* POLYMORPHIC */
84 // CHECK-LABEL: @test_vddupq_n_u32(
86 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[A:%.*]], i32 2)
87 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
88 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
90 uint32x4_t
test_vddupq_n_u32(uint32_t a
)
93 return vddupq_u32(a
, 2);
94 #else /* POLYMORPHIC */
95 return vddupq_n_u32(a
, 2);
96 #endif /* POLYMORPHIC */
99 // CHECK-LABEL: @test_viwdupq_n_u8(
100 // CHECK-NEXT: entry:
101 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
102 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
103 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
105 uint8x16_t
test_viwdupq_n_u8(uint32_t a
, uint32_t b
)
108 return viwdupq_u8(a
, b
, 4);
109 #else /* POLYMORPHIC */
110 return viwdupq_n_u8(a
, b
, 4);
111 #endif /* POLYMORPHIC */
114 // CHECK-LABEL: @test_viwdupq_n_u16(
115 // CHECK-NEXT: entry:
116 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 2)
117 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
118 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
120 uint16x8_t
test_viwdupq_n_u16(uint32_t a
, uint32_t b
)
123 return viwdupq_u16(a
, b
, 2);
124 #else /* POLYMORPHIC */
125 return viwdupq_n_u16(a
, b
, 2);
126 #endif /* POLYMORPHIC */
129 // CHECK-LABEL: @test_viwdupq_n_u32(
130 // CHECK-NEXT: entry:
131 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
132 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
133 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
135 uint32x4_t
test_viwdupq_n_u32(uint32_t a
, uint32_t b
)
138 return viwdupq_u32(a
, b
, 8);
139 #else /* POLYMORPHIC */
140 return viwdupq_n_u32(a
, b
, 8);
141 #endif /* POLYMORPHIC */
144 // CHECK-LABEL: @test_vdwdupq_n_u8(
145 // CHECK-NEXT: entry:
146 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
147 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
148 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
150 uint8x16_t
test_vdwdupq_n_u8(uint32_t a
, uint32_t b
)
153 return vdwdupq_u8(a
, b
, 4);
154 #else /* POLYMORPHIC */
155 return vdwdupq_n_u8(a
, b
, 4);
156 #endif /* POLYMORPHIC */
159 // CHECK-LABEL: @test_vdwdupq_n_u16(
160 // CHECK-NEXT: entry:
161 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
162 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
163 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
165 uint16x8_t
test_vdwdupq_n_u16(uint32_t a
, uint32_t b
)
168 return vdwdupq_u16(a
, b
, 8);
169 #else /* POLYMORPHIC */
170 return vdwdupq_n_u16(a
, b
, 8);
171 #endif /* POLYMORPHIC */
174 // CHECK-LABEL: @test_vdwdupq_n_u32(
175 // CHECK-NEXT: entry:
176 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
177 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
178 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
180 uint32x4_t
test_vdwdupq_n_u32(uint32_t a
, uint32_t b
)
183 return vdwdupq_u32(a
, b
, 1);
184 #else /* POLYMORPHIC */
185 return vdwdupq_n_u32(a
, b
, 1);
186 #endif /* POLYMORPHIC */
189 // CHECK-LABEL: @test_vidupq_wb_u8(
190 // CHECK-NEXT: entry:
191 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
192 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[TMP0]], i32 8)
193 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
194 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
195 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
196 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
198 uint8x16_t
test_vidupq_wb_u8(uint32_t *a
)
201 return vidupq_u8(a
, 8);
202 #else /* POLYMORPHIC */
203 return vidupq_wb_u8(a
, 8);
204 #endif /* POLYMORPHIC */
207 // CHECK-LABEL: @test_vidupq_wb_u16(
208 // CHECK-NEXT: entry:
209 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
210 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[TMP0]], i32 1)
211 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
212 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
213 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
214 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
216 uint16x8_t
test_vidupq_wb_u16(uint32_t *a
)
219 return vidupq_u16(a
, 1);
220 #else /* POLYMORPHIC */
221 return vidupq_wb_u16(a
, 1);
222 #endif /* POLYMORPHIC */
225 // CHECK-LABEL: @test_vidupq_wb_u32(
226 // CHECK-NEXT: entry:
227 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
228 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[TMP0]], i32 4)
229 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
230 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
231 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
232 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
234 uint32x4_t
test_vidupq_wb_u32(uint32_t *a
)
237 return vidupq_u32(a
, 4);
238 #else /* POLYMORPHIC */
239 return vidupq_wb_u32(a
, 4);
240 #endif /* POLYMORPHIC */
243 // CHECK-LABEL: @test_vddupq_wb_u8(
244 // CHECK-NEXT: entry:
245 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
246 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[TMP0]], i32 2)
247 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
248 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
249 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
250 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
252 uint8x16_t
test_vddupq_wb_u8(uint32_t *a
)
255 return vddupq_u8(a
, 2);
256 #else /* POLYMORPHIC */
257 return vddupq_wb_u8(a
, 2);
258 #endif /* POLYMORPHIC */
261 // CHECK-LABEL: @test_vddupq_wb_u16(
262 // CHECK-NEXT: entry:
263 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
264 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[TMP0]], i32 8)
265 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
266 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
267 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
268 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
270 uint16x8_t
test_vddupq_wb_u16(uint32_t *a
)
273 return vddupq_u16(a
, 8);
274 #else /* POLYMORPHIC */
275 return vddupq_wb_u16(a
, 8);
276 #endif /* POLYMORPHIC */
279 // CHECK-LABEL: @test_vddupq_wb_u32(
280 // CHECK-NEXT: entry:
281 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
282 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[TMP0]], i32 2)
283 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
284 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
285 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
286 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
288 uint32x4_t
test_vddupq_wb_u32(uint32_t *a
)
291 return vddupq_u32(a
, 2);
292 #else /* POLYMORPHIC */
293 return vddupq_wb_u32(a
, 2);
294 #endif /* POLYMORPHIC */
297 // CHECK-LABEL: @test_vdwdupq_wb_u8(
298 // CHECK-NEXT: entry:
299 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
300 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
301 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
302 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
303 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
304 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
306 uint8x16_t
test_vdwdupq_wb_u8(uint32_t *a
, uint32_t b
)
309 return vdwdupq_u8(a
, b
, 4);
310 #else /* POLYMORPHIC */
311 return vdwdupq_wb_u8(a
, b
, 4);
312 #endif /* POLYMORPHIC */
315 // CHECK-LABEL: @test_vdwdupq_wb_u16(
316 // CHECK-NEXT: entry:
317 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
318 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
319 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
320 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
321 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
322 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
324 uint16x8_t
test_vdwdupq_wb_u16(uint32_t *a
, uint32_t b
)
327 return vdwdupq_u16(a
, b
, 4);
328 #else /* POLYMORPHIC */
329 return vdwdupq_wb_u16(a
, b
, 4);
330 #endif /* POLYMORPHIC */
333 // CHECK-LABEL: @test_viwdupq_wb_u8(
334 // CHECK-NEXT: entry:
335 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
336 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
337 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
338 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
339 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
340 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
342 uint8x16_t
test_viwdupq_wb_u8(uint32_t *a
, uint32_t b
)
345 return viwdupq_u8(a
, b
, 1);
346 #else /* POLYMORPHIC */
347 return viwdupq_wb_u8(a
, b
, 1);
348 #endif /* POLYMORPHIC */
351 // CHECK-LABEL: @test_viwdupq_wb_u16(
352 // CHECK-NEXT: entry:
353 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
354 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
355 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
356 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
357 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
358 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
360 uint16x8_t
test_viwdupq_wb_u16(uint32_t *a
, uint32_t b
)
363 return viwdupq_u16(a
, b
, 1);
364 #else /* POLYMORPHIC */
365 return viwdupq_wb_u16(a
, b
, 1);
366 #endif /* POLYMORPHIC */
369 // CHECK-LABEL: @test_viwdupq_wb_u32(
370 // CHECK-NEXT: entry:
371 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
372 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 8)
373 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
374 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
375 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
376 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
378 uint32x4_t
test_viwdupq_wb_u32(uint32_t *a
, uint32_t b
)
381 return viwdupq_u32(a
, b
, 8);
382 #else /* POLYMORPHIC */
383 return viwdupq_wb_u32(a
, b
, 8);
384 #endif /* POLYMORPHIC */
387 // CHECK-LABEL: @test_vdwdupq_wb_u32(
388 // CHECK-NEXT: entry:
389 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
390 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 2)
391 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
392 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
393 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
394 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
396 uint32x4_t
test_vdwdupq_wb_u32(uint32_t *a
, uint32_t b
)
399 return vdwdupq_u32(a
, b
, 2);
400 #else /* POLYMORPHIC */
401 return vdwdupq_wb_u32(a
, b
, 2);
402 #endif /* POLYMORPHIC */
405 // CHECK-LABEL: @test_vidupq_m_n_u8(
406 // CHECK-NEXT: entry:
407 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
408 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
409 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
410 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
411 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
413 uint8x16_t
test_vidupq_m_n_u8(uint8x16_t inactive
, uint32_t a
, mve_pred16_t p
)
416 return vidupq_m(inactive
, a
, 8, p
);
417 #else /* POLYMORPHIC */
418 return vidupq_m_n_u8(inactive
, a
, 8, p
);
419 #endif /* POLYMORPHIC */
422 // CHECK-LABEL: @test_vidupq_m_n_u16(
423 // CHECK-NEXT: entry:
424 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
425 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
426 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <8 x i1> [[TMP1]])
427 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
428 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
430 uint16x8_t
test_vidupq_m_n_u16(uint16x8_t inactive
, uint32_t a
, mve_pred16_t p
)
433 return vidupq_m(inactive
, a
, 8, p
);
434 #else /* POLYMORPHIC */
435 return vidupq_m_n_u16(inactive
, a
, 8, p
);
436 #endif /* POLYMORPHIC */
439 // CHECK-LABEL: @test_vidupq_m_n_u32(
440 // CHECK-NEXT: entry:
441 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
442 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
443 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
444 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
445 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
447 uint32x4_t
test_vidupq_m_n_u32(uint32x4_t inactive
, uint32_t a
, mve_pred16_t p
)
450 return vidupq_m(inactive
, a
, 2, p
);
451 #else /* POLYMORPHIC */
452 return vidupq_m_n_u32(inactive
, a
, 2, p
);
453 #endif /* POLYMORPHIC */
456 // CHECK-LABEL: @test_vddupq_m_n_u8(
457 // CHECK-NEXT: entry:
458 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
459 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
460 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
461 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
462 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
464 uint8x16_t
test_vddupq_m_n_u8(uint8x16_t inactive
, uint32_t a
, mve_pred16_t p
)
467 return vddupq_m(inactive
, a
, 8, p
);
468 #else /* POLYMORPHIC */
469 return vddupq_m_n_u8(inactive
, a
, 8, p
);
470 #endif /* POLYMORPHIC */
473 // CHECK-LABEL: @test_vddupq_m_n_u16(
474 // CHECK-NEXT: entry:
475 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
476 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
477 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
478 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
479 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
481 uint16x8_t
test_vddupq_m_n_u16(uint16x8_t inactive
, uint32_t a
, mve_pred16_t p
)
484 return vddupq_m(inactive
, a
, 2, p
);
485 #else /* POLYMORPHIC */
486 return vddupq_m_n_u16(inactive
, a
, 2, p
);
487 #endif /* POLYMORPHIC */
490 // CHECK-LABEL: @test_vddupq_m_n_u32(
491 // CHECK-NEXT: entry:
492 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
493 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
494 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
495 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
496 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
498 uint32x4_t
test_vddupq_m_n_u32(uint32x4_t inactive
, uint32_t a
, mve_pred16_t p
)
501 return vddupq_m(inactive
, a
, 8, p
);
502 #else /* POLYMORPHIC */
503 return vddupq_m_n_u32(inactive
, a
, 8, p
);
504 #endif /* POLYMORPHIC */
507 // CHECK-LABEL: @test_viwdupq_m_n_u8(
508 // CHECK-NEXT: entry:
509 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
510 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
511 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP1]])
512 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
513 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
515 uint8x16_t
test_viwdupq_m_n_u8(uint8x16_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
518 return viwdupq_m(inactive
, a
, b
, 8, p
);
519 #else /* POLYMORPHIC */
520 return viwdupq_m_n_u8(inactive
, a
, b
, 8, p
);
521 #endif /* POLYMORPHIC */
524 // CHECK-LABEL: @test_viwdupq_m_n_u16(
525 // CHECK-NEXT: entry:
526 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
527 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
528 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP1]])
529 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
530 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
532 uint16x8_t
test_viwdupq_m_n_u16(uint16x8_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
535 return viwdupq_m(inactive
, a
, b
, 8, p
);
536 #else /* POLYMORPHIC */
537 return viwdupq_m_n_u16(inactive
, a
, b
, 8, p
);
538 #endif /* POLYMORPHIC */
541 // CHECK-LABEL: @test_viwdupq_m_n_u32(
542 // CHECK-NEXT: entry:
543 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
544 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
545 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
546 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
547 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
549 uint32x4_t
test_viwdupq_m_n_u32(uint32x4_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
552 return viwdupq_m(inactive
, a
, b
, 4, p
);
553 #else /* POLYMORPHIC */
554 return viwdupq_m_n_u32(inactive
, a
, b
, 4, p
);
555 #endif /* POLYMORPHIC */
558 // CHECK-LABEL: @test_vdwdupq_m_n_u8(
559 // CHECK-NEXT: entry:
560 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
561 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
562 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP1]])
563 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
564 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
566 uint8x16_t
test_vdwdupq_m_n_u8(uint8x16_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
569 return vdwdupq_m(inactive
, a
, b
, 1, p
);
570 #else /* POLYMORPHIC */
571 return vdwdupq_m_n_u8(inactive
, a
, b
, 1, p
);
572 #endif /* POLYMORPHIC */
575 // CHECK-LABEL: @test_vdwdupq_m_n_u16(
576 // CHECK-NEXT: entry:
577 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
578 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
579 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
580 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
581 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
583 uint16x8_t
test_vdwdupq_m_n_u16(uint16x8_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
586 return vdwdupq_m(inactive
, a
, b
, 2, p
);
587 #else /* POLYMORPHIC */
588 return vdwdupq_m_n_u16(inactive
, a
, b
, 2, p
);
589 #endif /* POLYMORPHIC */
592 // CHECK-LABEL: @test_vdwdupq_m_n_u32(
593 // CHECK-NEXT: entry:
594 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
595 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
596 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
597 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
598 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
600 uint32x4_t
test_vdwdupq_m_n_u32(uint32x4_t inactive
, uint32_t a
, uint32_t b
, mve_pred16_t p
)
603 return vdwdupq_m(inactive
, a
, b
, 4, p
);
604 #else /* POLYMORPHIC */
605 return vdwdupq_m_n_u32(inactive
, a
, b
, 4, p
);
606 #endif /* POLYMORPHIC */
609 // CHECK-LABEL: @test_vidupq_m_wb_u8(
610 // CHECK-NEXT: entry:
611 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
612 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
613 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
614 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <16 x i1> [[TMP2]])
615 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
616 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
617 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
618 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
620 uint8x16_t
test_vidupq_m_wb_u8(uint8x16_t inactive
, uint32_t *a
, mve_pred16_t p
)
623 return vidupq_m(inactive
, a
, 8, p
);
624 #else /* POLYMORPHIC */
625 return vidupq_m_wb_u8(inactive
, a
, 8, p
);
626 #endif /* POLYMORPHIC */
629 // CHECK-LABEL: @test_vidupq_m_wb_u16(
630 // CHECK-NEXT: entry:
631 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
632 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
633 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
634 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 2, <8 x i1> [[TMP2]])
635 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
636 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
637 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
638 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
640 uint16x8_t
test_vidupq_m_wb_u16(uint16x8_t inactive
, uint32_t *a
, mve_pred16_t p
)
643 return vidupq_m(inactive
, a
, 2, p
);
644 #else /* POLYMORPHIC */
645 return vidupq_m_wb_u16(inactive
, a
, 2, p
);
646 #endif /* POLYMORPHIC */
649 // CHECK-LABEL: @test_vidupq_m_wb_u32(
650 // CHECK-NEXT: entry:
651 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
652 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
653 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
654 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <4 x i1> [[TMP2]])
655 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
656 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
657 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
658 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
660 uint32x4_t
test_vidupq_m_wb_u32(uint32x4_t inactive
, uint32_t *a
, mve_pred16_t p
)
663 return vidupq_m(inactive
, a
, 8, p
);
664 #else /* POLYMORPHIC */
665 return vidupq_m_wb_u32(inactive
, a
, 8, p
);
666 #endif /* POLYMORPHIC */
669 // CHECK-LABEL: @test_vddupq_m_wb_u8(
670 // CHECK-NEXT: entry:
671 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
672 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
673 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
674 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
675 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
676 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
677 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
678 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
680 uint8x16_t
test_vddupq_m_wb_u8(uint8x16_t inactive
, uint32_t *a
, mve_pred16_t p
)
683 return vddupq_m(inactive
, a
, 1, p
);
684 #else /* POLYMORPHIC */
685 return vddupq_m_wb_u8(inactive
, a
, 1, p
);
686 #endif /* POLYMORPHIC */
689 // CHECK-LABEL: @test_vddupq_m_wb_u16(
690 // CHECK-NEXT: entry:
691 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
692 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
693 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
694 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <8 x i1> [[TMP2]])
695 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
696 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
697 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
698 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
700 uint16x8_t
test_vddupq_m_wb_u16(uint16x8_t inactive
, uint32_t *a
, mve_pred16_t p
)
703 return vddupq_m(inactive
, a
, 1, p
);
704 #else /* POLYMORPHIC */
705 return vddupq_m_wb_u16(inactive
, a
, 1, p
);
706 #endif /* POLYMORPHIC */
709 // CHECK-LABEL: @test_vddupq_m_wb_u32(
710 // CHECK-NEXT: entry:
711 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
712 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
713 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
714 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
715 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
716 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
717 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
718 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
720 uint32x4_t
test_vddupq_m_wb_u32(uint32x4_t inactive
, uint32_t *a
, mve_pred16_t p
)
723 return vddupq_m(inactive
, a
, 4, p
);
724 #else /* POLYMORPHIC */
725 return vddupq_m_wb_u32(inactive
, a
, 4, p
);
726 #endif /* POLYMORPHIC */
729 // CHECK-LABEL: @test_viwdupq_m_wb_u8(
730 // CHECK-NEXT: entry:
731 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
732 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
733 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
734 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP2]])
735 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
736 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
737 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
738 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
740 uint8x16_t
test_viwdupq_m_wb_u8(uint8x16_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
743 return viwdupq_m(inactive
, a
, b
, 8, p
);
744 #else /* POLYMORPHIC */
745 return viwdupq_m_wb_u8(inactive
, a
, b
, 8, p
);
746 #endif /* POLYMORPHIC */
749 // CHECK-LABEL: @test_viwdupq_m_wb_u16(
750 // CHECK-NEXT: entry:
751 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
752 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
753 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
754 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP2]])
755 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
756 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
757 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
758 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
760 uint16x8_t
test_viwdupq_m_wb_u16(uint16x8_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
763 return viwdupq_m(inactive
, a
, b
, 8, p
);
764 #else /* POLYMORPHIC */
765 return viwdupq_m_wb_u16(inactive
, a
, b
, 8, p
);
766 #endif /* POLYMORPHIC */
769 // CHECK-LABEL: @test_viwdupq_m_wb_u32(
770 // CHECK-NEXT: entry:
771 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
772 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
773 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
774 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
775 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
776 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
777 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
778 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
780 uint32x4_t
test_viwdupq_m_wb_u32(uint32x4_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
783 return viwdupq_m(inactive
, a
, b
, 4, p
);
784 #else /* POLYMORPHIC */
785 return viwdupq_m_wb_u32(inactive
, a
, b
, 4, p
);
786 #endif /* POLYMORPHIC */
789 // CHECK-LABEL: @test_vdwdupq_m_wb_u8(
790 // CHECK-NEXT: entry:
791 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
792 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
793 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
794 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
795 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
796 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
797 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
798 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
800 uint8x16_t
test_vdwdupq_m_wb_u8(uint8x16_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
803 return vdwdupq_m(inactive
, a
, b
, 1, p
);
804 #else /* POLYMORPHIC */
805 return vdwdupq_m_wb_u8(inactive
, a
, b
, 1, p
);
806 #endif /* POLYMORPHIC */
809 // CHECK-LABEL: @test_vdwdupq_m_wb_u16(
810 // CHECK-NEXT: entry:
811 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
812 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
813 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
814 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
815 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
816 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
817 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
818 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
820 uint16x8_t
test_vdwdupq_m_wb_u16(uint16x8_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
823 return vdwdupq_m(inactive
, a
, b
, 4, p
);
824 #else /* POLYMORPHIC */
825 return vdwdupq_m_wb_u16(inactive
, a
, b
, 4, p
);
826 #endif /* POLYMORPHIC */
829 // CHECK-LABEL: @test_vdwdupq_m_wb_u32(
830 // CHECK-NEXT: entry:
831 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
832 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
833 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
834 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
835 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
836 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
837 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
838 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
840 uint32x4_t
test_vdwdupq_m_wb_u32(uint32x4_t inactive
, uint32_t *a
, uint32_t b
, mve_pred16_t p
)
843 return vdwdupq_m(inactive
, a
, b
, 4, p
);
844 #else /* POLYMORPHIC */
845 return vdwdupq_m_wb_u32(inactive
, a
, b
, 4, p
);
846 #endif /* POLYMORPHIC */
849 // CHECK-LABEL: @test_vidupq_x_n_u8(
850 // CHECK-NEXT: entry:
851 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
852 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
853 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 2, <16 x i1> [[TMP1]])
854 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
855 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
857 uint8x16_t
test_vidupq_x_n_u8(uint32_t a
, mve_pred16_t p
)
860 return vidupq_x_u8(a
, 2, p
);
861 #else /* POLYMORPHIC */
862 return vidupq_x_n_u8(a
, 2, p
);
863 #endif /* POLYMORPHIC */
866 // CHECK-LABEL: @test_vidupq_x_n_u16(
867 // CHECK-NEXT: entry:
868 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
869 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
870 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
871 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
872 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
874 uint16x8_t
test_vidupq_x_n_u16(uint32_t a
, mve_pred16_t p
)
877 return vidupq_x_u16(a
, 2, p
);
878 #else /* POLYMORPHIC */
879 return vidupq_x_n_u16(a
, 2, p
);
880 #endif /* POLYMORPHIC */
883 // CHECK-LABEL: @test_vidupq_x_n_u32(
884 // CHECK-NEXT: entry:
885 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
886 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
887 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
888 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
889 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
891 uint32x4_t
test_vidupq_x_n_u32(uint32_t a
, mve_pred16_t p
)
894 return vidupq_x_u32(a
, 8, p
);
895 #else /* POLYMORPHIC */
896 return vidupq_x_n_u32(a
, 8, p
);
897 #endif /* POLYMORPHIC */
900 // CHECK-LABEL: @test_vddupq_x_n_u8(
901 // CHECK-NEXT: entry:
902 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
903 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
904 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
905 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
906 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
908 uint8x16_t
test_vddupq_x_n_u8(uint32_t a
, mve_pred16_t p
)
911 return vddupq_x_u8(a
, 8, p
);
912 #else /* POLYMORPHIC */
913 return vddupq_x_n_u8(a
, 8, p
);
914 #endif /* POLYMORPHIC */
917 // CHECK-LABEL: @test_vddupq_x_n_u16(
918 // CHECK-NEXT: entry:
919 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
920 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
921 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 4, <8 x i1> [[TMP1]])
922 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
923 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
925 uint16x8_t
test_vddupq_x_n_u16(uint32_t a
, mve_pred16_t p
)
928 return vddupq_x_u16(a
, 4, p
);
929 #else /* POLYMORPHIC */
930 return vddupq_x_n_u16(a
, 4, p
);
931 #endif /* POLYMORPHIC */
934 // CHECK-LABEL: @test_vddupq_x_n_u32(
935 // CHECK-NEXT: entry:
936 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
937 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
938 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
939 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
940 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
942 uint32x4_t
test_vddupq_x_n_u32(uint32_t a
, mve_pred16_t p
)
945 return vddupq_x_u32(a
, 2, p
);
946 #else /* POLYMORPHIC */
947 return vddupq_x_n_u32(a
, 2, p
);
948 #endif /* POLYMORPHIC */
951 // CHECK-LABEL: @test_viwdupq_x_n_u8(
952 // CHECK-NEXT: entry:
953 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
954 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
955 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
956 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
957 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
959 uint8x16_t
test_viwdupq_x_n_u8(uint32_t a
, uint32_t b
, mve_pred16_t p
)
962 return viwdupq_x_u8(a
, b
, 2, p
);
963 #else /* POLYMORPHIC */
964 return viwdupq_x_n_u8(a
, b
, 2, p
);
965 #endif /* POLYMORPHIC */
968 // CHECK-LABEL: @test_viwdupq_x_n_u16(
969 // CHECK-NEXT: entry:
970 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
971 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
972 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP1]])
973 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
974 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
976 uint16x8_t
test_viwdupq_x_n_u16(uint32_t a
, uint32_t b
, mve_pred16_t p
)
979 return viwdupq_x_u16(a
, b
, 4, p
);
980 #else /* POLYMORPHIC */
981 return viwdupq_x_n_u16(a
, b
, 4, p
);
982 #endif /* POLYMORPHIC */
985 // CHECK-LABEL: @test_viwdupq_x_n_u32(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
988 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
989 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <4 x i1> [[TMP1]])
990 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
991 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
993 uint32x4_t
test_viwdupq_x_n_u32(uint32_t a
, uint32_t b
, mve_pred16_t p
)
996 return viwdupq_x_u32(a
, b
, 2, p
);
997 #else /* POLYMORPHIC */
998 return viwdupq_x_n_u32(a
, b
, 2, p
);
999 #endif /* POLYMORPHIC */
1002 // CHECK-LABEL: @test_vdwdupq_x_n_u8(
1003 // CHECK-NEXT: entry:
1004 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1005 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
1006 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
1007 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
1008 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
1010 uint8x16_t
test_vdwdupq_x_n_u8(uint32_t a
, uint32_t b
, mve_pred16_t p
)
1013 return vdwdupq_x_u8(a
, b
, 2, p
);
1014 #else /* POLYMORPHIC */
1015 return vdwdupq_x_n_u8(a
, b
, 2, p
);
1016 #endif /* POLYMORPHIC */
1019 // CHECK-LABEL: @test_vdwdupq_x_n_u16(
1020 // CHECK-NEXT: entry:
1021 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1022 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1023 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
1024 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
1025 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
1027 uint16x8_t
test_vdwdupq_x_n_u16(uint32_t a
, uint32_t b
, mve_pred16_t p
)
1030 return vdwdupq_x_u16(a
, b
, 2, p
);
1031 #else /* POLYMORPHIC */
1032 return vdwdupq_x_n_u16(a
, b
, 2, p
);
1033 #endif /* POLYMORPHIC */
1036 // CHECK-LABEL: @test_vdwdupq_x_n_u32(
1037 // CHECK-NEXT: entry:
1038 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1039 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1040 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <4 x i1> [[TMP1]])
1041 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
1042 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1044 uint32x4_t
test_vdwdupq_x_n_u32(uint32_t a
, uint32_t b
, mve_pred16_t p
)
1047 return vdwdupq_x_u32(a
, b
, 8, p
);
1048 #else /* POLYMORPHIC */
1049 return vdwdupq_x_n_u32(a
, b
, 8, p
);
1050 #endif /* POLYMORPHIC */
1053 // CHECK-LABEL: @test_vidupq_x_wb_u8(
1054 // CHECK-NEXT: entry:
1055 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1056 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1057 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1058 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 2, <16 x i1> [[TMP2]])
1059 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1060 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1061 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1062 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1064 uint8x16_t
test_vidupq_x_wb_u8(uint32_t *a
, mve_pred16_t p
)
1067 return vidupq_x_u8(a
, 2, p
);
1068 #else /* POLYMORPHIC */
1069 return vidupq_x_wb_u8(a
, 2, p
);
1070 #endif /* POLYMORPHIC */
1073 // CHECK-LABEL: @test_vidupq_x_wb_u16(
1074 // CHECK-NEXT: entry:
1075 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1076 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1077 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1078 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1079 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1080 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1081 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1082 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1084 uint16x8_t
test_vidupq_x_wb_u16(uint32_t *a
, mve_pred16_t p
)
1087 return vidupq_x_u16(a
, 4, p
);
1088 #else /* POLYMORPHIC */
1089 return vidupq_x_wb_u16(a
, 4, p
);
1090 #endif /* POLYMORPHIC */
1093 // CHECK-LABEL: @test_vidupq_x_wb_u32(
1094 // CHECK-NEXT: entry:
1095 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1096 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1097 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1098 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 2, <4 x i1> [[TMP2]])
1099 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1100 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1101 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1102 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1104 uint32x4_t
test_vidupq_x_wb_u32(uint32_t *a
, mve_pred16_t p
)
1107 return vidupq_x_u32(a
, 2, p
);
1108 #else /* POLYMORPHIC */
1109 return vidupq_x_wb_u32(a
, 2, p
);
1110 #endif /* POLYMORPHIC */
1113 // CHECK-LABEL: @test_vddupq_x_wb_u8(
1114 // CHECK-NEXT: entry:
1115 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1116 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1117 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1118 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
1119 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1120 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1121 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1122 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1124 uint8x16_t
test_vddupq_x_wb_u8(uint32_t *a
, mve_pred16_t p
)
1127 return vddupq_x_u8(a
, 1, p
);
1128 #else /* POLYMORPHIC */
1129 return vddupq_x_wb_u8(a
, 1, p
);
1130 #endif /* POLYMORPHIC */
1133 // CHECK-LABEL: @test_vddupq_x_wb_u16(
1134 // CHECK-NEXT: entry:
1135 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1136 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1137 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1138 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1139 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1140 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1141 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1142 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1144 uint16x8_t
test_vddupq_x_wb_u16(uint32_t *a
, mve_pred16_t p
)
1147 return vddupq_x_u16(a
, 4, p
);
1148 #else /* POLYMORPHIC */
1149 return vddupq_x_wb_u16(a
, 4, p
);
1150 #endif /* POLYMORPHIC */
1153 // CHECK-LABEL: @test_vddupq_x_wb_u32(
1154 // CHECK-NEXT: entry:
1155 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1156 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1157 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1158 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1159 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1160 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1161 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1162 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1164 uint32x4_t
test_vddupq_x_wb_u32(uint32_t *a
, mve_pred16_t p
)
1167 return vddupq_x_u32(a
, 4, p
);
1168 #else /* POLYMORPHIC */
1169 return vddupq_x_wb_u32(a
, 4, p
);
1170 #endif /* POLYMORPHIC */
1173 // CHECK-LABEL: @test_viwdupq_x_wb_u8(
1174 // CHECK-NEXT: entry:
1175 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1176 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1177 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1178 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
1179 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1180 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1181 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1182 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1184 uint8x16_t
test_viwdupq_x_wb_u8(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1187 return viwdupq_x_u8(a
, b
, 1, p
);
1188 #else /* POLYMORPHIC */
1189 return viwdupq_x_wb_u8(a
, b
, 1, p
);
1190 #endif /* POLYMORPHIC */
1193 // CHECK-LABEL: @test_viwdupq_x_wb_u16(
1194 // CHECK-NEXT: entry:
1195 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1196 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1197 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1198 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP2]])
1199 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1200 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1201 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1202 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1204 uint16x8_t
test_viwdupq_x_wb_u16(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1207 return viwdupq_x_u16(a
, b
, 2, p
);
1208 #else /* POLYMORPHIC */
1209 return viwdupq_x_wb_u16(a
, b
, 2, p
);
1210 #endif /* POLYMORPHIC */
1213 // CHECK-LABEL: @test_viwdupq_x_wb_u32(
1214 // CHECK-NEXT: entry:
1215 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1216 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1217 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1218 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <4 x i1> [[TMP2]])
1219 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1220 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1221 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1222 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1224 uint32x4_t
test_viwdupq_x_wb_u32(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1227 return viwdupq_x_u32(a
, b
, 1, p
);
1228 #else /* POLYMORPHIC */
1229 return viwdupq_x_wb_u32(a
, b
, 1, p
);
1230 #endif /* POLYMORPHIC */
1233 // CHECK-LABEL: @test_vdwdupq_x_wb_u8(
1234 // CHECK-NEXT: entry:
1235 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1236 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1237 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1238 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <16 x i1> [[TMP2]])
1239 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1240 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1241 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1242 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1244 uint8x16_t
test_vdwdupq_x_wb_u8(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1247 return vdwdupq_x_u8(a
, b
, 4, p
);
1248 #else /* POLYMORPHIC */
1249 return vdwdupq_x_wb_u8(a
, b
, 4, p
);
1250 #endif /* POLYMORPHIC */
1253 // CHECK-LABEL: @test_vdwdupq_x_wb_u16(
1254 // CHECK-NEXT: entry:
1255 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1256 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1257 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1258 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
1259 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1260 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1261 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1262 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1264 uint16x8_t
test_vdwdupq_x_wb_u16(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1267 return vdwdupq_x_u16(a
, b
, 4, p
);
1268 #else /* POLYMORPHIC */
1269 return vdwdupq_x_wb_u16(a
, b
, 4, p
);
1270 #endif /* POLYMORPHIC */
1273 // CHECK-LABEL: @test_vdwdupq_x_wb_u32(
1274 // CHECK-NEXT: entry:
1275 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1276 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1277 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1278 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
1279 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1280 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1281 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1282 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1284 uint32x4_t
test_vdwdupq_x_wb_u32(uint32_t *a
, uint32_t b
, mve_pred16_t p
)
1287 return vdwdupq_x_u32(a
, b
, 4, p
);
1288 #else /* POLYMORPHIC */
1289 return vdwdupq_x_wb_u32(a
, b
, 4, p
);
1290 #endif /* POLYMORPHIC */