1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -disable-O0-optnone \
3 // RUN: -flax-vector-conversions=none -emit-llvm -o - %s \
4 // RUN: | opt -S -passes=mem2reg \
7 // REQUIRES: aarch64-registered-target || arm-registered-target
11 // CHECK-LABEL: @test_vadd_s8(
12 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
13 // CHECK: ret <8 x i8> [[ADD_I]]
14 int8x8_t
test_vadd_s8(int8x8_t v1
, int8x8_t v2
) {
15 return vadd_s8(v1
, v2
);
18 // CHECK-LABEL: @test_vadd_s16(
19 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
20 // CHECK: ret <4 x i16> [[ADD_I]]
21 int16x4_t
test_vadd_s16(int16x4_t v1
, int16x4_t v2
) {
22 return vadd_s16(v1
, v2
);
25 // CHECK-LABEL: @test_vadd_s32(
26 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
27 // CHECK: ret <2 x i32> [[ADD_I]]
28 int32x2_t
test_vadd_s32(int32x2_t v1
, int32x2_t v2
) {
29 return vadd_s32(v1
, v2
);
32 // CHECK-LABEL: @test_vadd_s64(
33 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
34 // CHECK: ret <1 x i64> [[ADD_I]]
35 int64x1_t
test_vadd_s64(int64x1_t v1
, int64x1_t v2
) {
36 return vadd_s64(v1
, v2
);
39 // CHECK-LABEL: @test_vadd_f32(
40 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
41 // CHECK: ret <2 x float> [[ADD_I]]
42 float32x2_t
test_vadd_f32(float32x2_t v1
, float32x2_t v2
) {
43 return vadd_f32(v1
, v2
);
46 // CHECK-LABEL: @test_vadd_u8(
47 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
48 // CHECK: ret <8 x i8> [[ADD_I]]
49 uint8x8_t
test_vadd_u8(uint8x8_t v1
, uint8x8_t v2
) {
50 return vadd_u8(v1
, v2
);
53 // CHECK-LABEL: @test_vadd_u16(
54 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
55 // CHECK: ret <4 x i16> [[ADD_I]]
56 uint16x4_t
test_vadd_u16(uint16x4_t v1
, uint16x4_t v2
) {
57 return vadd_u16(v1
, v2
);
60 // CHECK-LABEL: @test_vadd_u32(
61 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
62 // CHECK: ret <2 x i32> [[ADD_I]]
63 uint32x2_t
test_vadd_u32(uint32x2_t v1
, uint32x2_t v2
) {
64 return vadd_u32(v1
, v2
);
67 // CHECK-LABEL: @test_vadd_u64(
68 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
69 // CHECK: ret <1 x i64> [[ADD_I]]
70 uint64x1_t
test_vadd_u64(uint64x1_t v1
, uint64x1_t v2
) {
71 return vadd_u64(v1
, v2
);
74 // CHECK-LABEL: @test_vaddq_s8(
75 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
76 // CHECK: ret <16 x i8> [[ADD_I]]
77 int8x16_t
test_vaddq_s8(int8x16_t v1
, int8x16_t v2
) {
78 return vaddq_s8(v1
, v2
);
81 // CHECK-LABEL: @test_vaddq_s16(
82 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
83 // CHECK: ret <8 x i16> [[ADD_I]]
84 int16x8_t
test_vaddq_s16(int16x8_t v1
, int16x8_t v2
) {
85 return vaddq_s16(v1
, v2
);
88 // CHECK-LABEL: @test_vaddq_s32(
89 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
90 // CHECK: ret <4 x i32> [[ADD_I]]
91 int32x4_t
test_vaddq_s32(int32x4_t v1
, int32x4_t v2
) {
92 return vaddq_s32(v1
, v2
);
95 // CHECK-LABEL: @test_vaddq_s64(
96 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
97 // CHECK: ret <2 x i64> [[ADD_I]]
98 int64x2_t
test_vaddq_s64(int64x2_t v1
, int64x2_t v2
) {
99 return vaddq_s64(v1
, v2
);
102 // CHECK-LABEL: @test_vaddq_f32(
103 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
104 // CHECK: ret <4 x float> [[ADD_I]]
105 float32x4_t
test_vaddq_f32(float32x4_t v1
, float32x4_t v2
) {
106 return vaddq_f32(v1
, v2
);
109 // CHECK-LABEL: @test_vaddq_f64(
110 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
111 // CHECK: ret <2 x double> [[ADD_I]]
112 float64x2_t
test_vaddq_f64(float64x2_t v1
, float64x2_t v2
) {
113 return vaddq_f64(v1
, v2
);
116 // CHECK-LABEL: @test_vaddq_u8(
117 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
118 // CHECK: ret <16 x i8> [[ADD_I]]
119 uint8x16_t
test_vaddq_u8(uint8x16_t v1
, uint8x16_t v2
) {
120 return vaddq_u8(v1
, v2
);
123 // CHECK-LABEL: @test_vaddq_u16(
124 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
125 // CHECK: ret <8 x i16> [[ADD_I]]
126 uint16x8_t
test_vaddq_u16(uint16x8_t v1
, uint16x8_t v2
) {
127 return vaddq_u16(v1
, v2
);
130 // CHECK-LABEL: @test_vaddq_u32(
131 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
132 // CHECK: ret <4 x i32> [[ADD_I]]
133 uint32x4_t
test_vaddq_u32(uint32x4_t v1
, uint32x4_t v2
) {
134 return vaddq_u32(v1
, v2
);
137 // CHECK-LABEL: @test_vaddq_u64(
138 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
139 // CHECK: ret <2 x i64> [[ADD_I]]
140 uint64x2_t
test_vaddq_u64(uint64x2_t v1
, uint64x2_t v2
) {
141 return vaddq_u64(v1
, v2
);
144 // CHECK-LABEL: @test_vsub_s8(
145 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
146 // CHECK: ret <8 x i8> [[SUB_I]]
147 int8x8_t
test_vsub_s8(int8x8_t v1
, int8x8_t v2
) {
148 return vsub_s8(v1
, v2
);
151 // CHECK-LABEL: @test_vsub_s16(
152 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
153 // CHECK: ret <4 x i16> [[SUB_I]]
154 int16x4_t
test_vsub_s16(int16x4_t v1
, int16x4_t v2
) {
155 return vsub_s16(v1
, v2
);
158 // CHECK-LABEL: @test_vsub_s32(
159 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
160 // CHECK: ret <2 x i32> [[SUB_I]]
161 int32x2_t
test_vsub_s32(int32x2_t v1
, int32x2_t v2
) {
162 return vsub_s32(v1
, v2
);
165 // CHECK-LABEL: @test_vsub_s64(
166 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
167 // CHECK: ret <1 x i64> [[SUB_I]]
168 int64x1_t
test_vsub_s64(int64x1_t v1
, int64x1_t v2
) {
169 return vsub_s64(v1
, v2
);
172 // CHECK-LABEL: @test_vsub_f32(
173 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
174 // CHECK: ret <2 x float> [[SUB_I]]
175 float32x2_t
test_vsub_f32(float32x2_t v1
, float32x2_t v2
) {
176 return vsub_f32(v1
, v2
);
179 // CHECK-LABEL: @test_vsub_u8(
180 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
181 // CHECK: ret <8 x i8> [[SUB_I]]
182 uint8x8_t
test_vsub_u8(uint8x8_t v1
, uint8x8_t v2
) {
183 return vsub_u8(v1
, v2
);
186 // CHECK-LABEL: @test_vsub_u16(
187 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
188 // CHECK: ret <4 x i16> [[SUB_I]]
189 uint16x4_t
test_vsub_u16(uint16x4_t v1
, uint16x4_t v2
) {
190 return vsub_u16(v1
, v2
);
193 // CHECK-LABEL: @test_vsub_u32(
194 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
195 // CHECK: ret <2 x i32> [[SUB_I]]
196 uint32x2_t
test_vsub_u32(uint32x2_t v1
, uint32x2_t v2
) {
197 return vsub_u32(v1
, v2
);
200 // CHECK-LABEL: @test_vsub_u64(
201 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
202 // CHECK: ret <1 x i64> [[SUB_I]]
203 uint64x1_t
test_vsub_u64(uint64x1_t v1
, uint64x1_t v2
) {
204 return vsub_u64(v1
, v2
);
207 // CHECK-LABEL: @test_vsubq_s8(
208 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
209 // CHECK: ret <16 x i8> [[SUB_I]]
210 int8x16_t
test_vsubq_s8(int8x16_t v1
, int8x16_t v2
) {
211 return vsubq_s8(v1
, v2
);
214 // CHECK-LABEL: @test_vsubq_s16(
215 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
216 // CHECK: ret <8 x i16> [[SUB_I]]
217 int16x8_t
test_vsubq_s16(int16x8_t v1
, int16x8_t v2
) {
218 return vsubq_s16(v1
, v2
);
221 // CHECK-LABEL: @test_vsubq_s32(
222 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
223 // CHECK: ret <4 x i32> [[SUB_I]]
224 int32x4_t
test_vsubq_s32(int32x4_t v1
, int32x4_t v2
) {
225 return vsubq_s32(v1
, v2
);
228 // CHECK-LABEL: @test_vsubq_s64(
229 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
230 // CHECK: ret <2 x i64> [[SUB_I]]
231 int64x2_t
test_vsubq_s64(int64x2_t v1
, int64x2_t v2
) {
232 return vsubq_s64(v1
, v2
);
235 // CHECK-LABEL: @test_vsubq_f32(
236 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
237 // CHECK: ret <4 x float> [[SUB_I]]
238 float32x4_t
test_vsubq_f32(float32x4_t v1
, float32x4_t v2
) {
239 return vsubq_f32(v1
, v2
);
242 // CHECK-LABEL: @test_vsubq_f64(
243 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
244 // CHECK: ret <2 x double> [[SUB_I]]
245 float64x2_t
test_vsubq_f64(float64x2_t v1
, float64x2_t v2
) {
246 return vsubq_f64(v1
, v2
);
249 // CHECK-LABEL: @test_vsubq_u8(
250 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
251 // CHECK: ret <16 x i8> [[SUB_I]]
252 uint8x16_t
test_vsubq_u8(uint8x16_t v1
, uint8x16_t v2
) {
253 return vsubq_u8(v1
, v2
);
256 // CHECK-LABEL: @test_vsubq_u16(
257 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
258 // CHECK: ret <8 x i16> [[SUB_I]]
259 uint16x8_t
test_vsubq_u16(uint16x8_t v1
, uint16x8_t v2
) {
260 return vsubq_u16(v1
, v2
);
263 // CHECK-LABEL: @test_vsubq_u32(
264 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
265 // CHECK: ret <4 x i32> [[SUB_I]]
266 uint32x4_t
test_vsubq_u32(uint32x4_t v1
, uint32x4_t v2
) {
267 return vsubq_u32(v1
, v2
);
270 // CHECK-LABEL: @test_vsubq_u64(
271 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
272 // CHECK: ret <2 x i64> [[SUB_I]]
273 uint64x2_t
test_vsubq_u64(uint64x2_t v1
, uint64x2_t v2
) {
274 return vsubq_u64(v1
, v2
);
277 // CHECK-LABEL: @test_vmul_s8(
278 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
279 // CHECK: ret <8 x i8> [[MUL_I]]
280 int8x8_t
test_vmul_s8(int8x8_t v1
, int8x8_t v2
) {
281 return vmul_s8(v1
, v2
);
284 // CHECK-LABEL: @test_vmul_s16(
285 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
286 // CHECK: ret <4 x i16> [[MUL_I]]
287 int16x4_t
test_vmul_s16(int16x4_t v1
, int16x4_t v2
) {
288 return vmul_s16(v1
, v2
);
291 // CHECK-LABEL: @test_vmul_s32(
292 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
293 // CHECK: ret <2 x i32> [[MUL_I]]
294 int32x2_t
test_vmul_s32(int32x2_t v1
, int32x2_t v2
) {
295 return vmul_s32(v1
, v2
);
298 // CHECK-LABEL: @test_vmul_f32(
299 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
300 // CHECK: ret <2 x float> [[MUL_I]]
301 float32x2_t
test_vmul_f32(float32x2_t v1
, float32x2_t v2
) {
302 return vmul_f32(v1
, v2
);
305 // CHECK-LABEL: @test_vmul_u8(
306 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
307 // CHECK: ret <8 x i8> [[MUL_I]]
308 uint8x8_t
test_vmul_u8(uint8x8_t v1
, uint8x8_t v2
) {
309 return vmul_u8(v1
, v2
);
312 // CHECK-LABEL: @test_vmul_u16(
313 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
314 // CHECK: ret <4 x i16> [[MUL_I]]
315 uint16x4_t
test_vmul_u16(uint16x4_t v1
, uint16x4_t v2
) {
316 return vmul_u16(v1
, v2
);
319 // CHECK-LABEL: @test_vmul_u32(
320 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
321 // CHECK: ret <2 x i32> [[MUL_I]]
322 uint32x2_t
test_vmul_u32(uint32x2_t v1
, uint32x2_t v2
) {
323 return vmul_u32(v1
, v2
);
326 // CHECK-LABEL: @test_vmulq_s8(
327 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
328 // CHECK: ret <16 x i8> [[MUL_I]]
329 int8x16_t
test_vmulq_s8(int8x16_t v1
, int8x16_t v2
) {
330 return vmulq_s8(v1
, v2
);
333 // CHECK-LABEL: @test_vmulq_s16(
334 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
335 // CHECK: ret <8 x i16> [[MUL_I]]
336 int16x8_t
test_vmulq_s16(int16x8_t v1
, int16x8_t v2
) {
337 return vmulq_s16(v1
, v2
);
340 // CHECK-LABEL: @test_vmulq_s32(
341 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
342 // CHECK: ret <4 x i32> [[MUL_I]]
343 int32x4_t
test_vmulq_s32(int32x4_t v1
, int32x4_t v2
) {
344 return vmulq_s32(v1
, v2
);
347 // CHECK-LABEL: @test_vmulq_u8(
348 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
349 // CHECK: ret <16 x i8> [[MUL_I]]
350 uint8x16_t
test_vmulq_u8(uint8x16_t v1
, uint8x16_t v2
) {
351 return vmulq_u8(v1
, v2
);
354 // CHECK-LABEL: @test_vmulq_u16(
355 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
356 // CHECK: ret <8 x i16> [[MUL_I]]
357 uint16x8_t
test_vmulq_u16(uint16x8_t v1
, uint16x8_t v2
) {
358 return vmulq_u16(v1
, v2
);
361 // CHECK-LABEL: @test_vmulq_u32(
362 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
363 // CHECK: ret <4 x i32> [[MUL_I]]
364 uint32x4_t
test_vmulq_u32(uint32x4_t v1
, uint32x4_t v2
) {
365 return vmulq_u32(v1
, v2
);
368 // CHECK-LABEL: @test_vmulq_f32(
369 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
370 // CHECK: ret <4 x float> [[MUL_I]]
371 float32x4_t
test_vmulq_f32(float32x4_t v1
, float32x4_t v2
) {
372 return vmulq_f32(v1
, v2
);
375 // CHECK-LABEL: @test_vmulq_f64(
376 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
377 // CHECK: ret <2 x double> [[MUL_I]]
378 float64x2_t
test_vmulq_f64(float64x2_t v1
, float64x2_t v2
) {
379 return vmulq_f64(v1
, v2
);
382 // CHECK-LABEL: @test_vmul_p8(
383 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
384 // CHECK: ret <8 x i8> [[VMUL_V_I]]
385 poly8x8_t
test_vmul_p8(poly8x8_t v1
, poly8x8_t v2
) {
386 return vmul_p8(v1
, v2
);
389 // CHECK-LABEL: @test_vmulq_p8(
390 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
391 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
392 poly8x16_t
test_vmulq_p8(poly8x16_t v1
, poly8x16_t v2
) {
393 return vmulq_p8(v1
, v2
);
396 // CHECK-LABEL: @test_vmla_s8(
397 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
398 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
399 // CHECK: ret <8 x i8> [[ADD_I]]
400 int8x8_t
test_vmla_s8(int8x8_t v1
, int8x8_t v2
, int8x8_t v3
) {
401 return vmla_s8(v1
, v2
, v3
);
404 // CHECK-LABEL: @test_vmla_s16(
405 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
406 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
407 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
408 // CHECK: ret <8 x i8> [[TMP0]]
409 int8x8_t
test_vmla_s16(int16x4_t v1
, int16x4_t v2
, int16x4_t v3
) {
410 return (int8x8_t
)vmla_s16(v1
, v2
, v3
);
413 // CHECK-LABEL: @test_vmla_s32(
414 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
415 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
416 // CHECK: ret <2 x i32> [[ADD_I]]
417 int32x2_t
test_vmla_s32(int32x2_t v1
, int32x2_t v2
, int32x2_t v3
) {
418 return vmla_s32(v1
, v2
, v3
);
421 // CHECK-LABEL: @test_vmla_f32(
422 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
423 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
424 // CHECK: ret <2 x float> [[ADD_I]]
425 float32x2_t
test_vmla_f32(float32x2_t v1
, float32x2_t v2
, float32x2_t v3
) {
426 return vmla_f32(v1
, v2
, v3
);
429 // CHECK-LABEL: @test_vmla_u8(
430 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
431 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
432 // CHECK: ret <8 x i8> [[ADD_I]]
433 uint8x8_t
test_vmla_u8(uint8x8_t v1
, uint8x8_t v2
, uint8x8_t v3
) {
434 return vmla_u8(v1
, v2
, v3
);
437 // CHECK-LABEL: @test_vmla_u16(
438 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
439 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
440 // CHECK: ret <4 x i16> [[ADD_I]]
441 uint16x4_t
test_vmla_u16(uint16x4_t v1
, uint16x4_t v2
, uint16x4_t v3
) {
442 return vmla_u16(v1
, v2
, v3
);
445 // CHECK-LABEL: @test_vmla_u32(
446 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
447 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
448 // CHECK: ret <2 x i32> [[ADD_I]]
449 uint32x2_t
test_vmla_u32(uint32x2_t v1
, uint32x2_t v2
, uint32x2_t v3
) {
450 return vmla_u32(v1
, v2
, v3
);
453 // CHECK-LABEL: @test_vmlaq_s8(
454 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
455 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
456 // CHECK: ret <16 x i8> [[ADD_I]]
457 int8x16_t
test_vmlaq_s8(int8x16_t v1
, int8x16_t v2
, int8x16_t v3
) {
458 return vmlaq_s8(v1
, v2
, v3
);
461 // CHECK-LABEL: @test_vmlaq_s16(
462 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
463 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
464 // CHECK: ret <8 x i16> [[ADD_I]]
465 int16x8_t
test_vmlaq_s16(int16x8_t v1
, int16x8_t v2
, int16x8_t v3
) {
466 return vmlaq_s16(v1
, v2
, v3
);
469 // CHECK-LABEL: @test_vmlaq_s32(
470 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
471 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
472 // CHECK: ret <4 x i32> [[ADD_I]]
473 int32x4_t
test_vmlaq_s32(int32x4_t v1
, int32x4_t v2
, int32x4_t v3
) {
474 return vmlaq_s32(v1
, v2
, v3
);
477 // CHECK-LABEL: @test_vmlaq_f32(
478 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
479 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
480 // CHECK: ret <4 x float> [[ADD_I]]
481 float32x4_t
test_vmlaq_f32(float32x4_t v1
, float32x4_t v2
, float32x4_t v3
) {
482 return vmlaq_f32(v1
, v2
, v3
);
485 // CHECK-LABEL: @test_vmlaq_u8(
486 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
487 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
488 // CHECK: ret <16 x i8> [[ADD_I]]
489 uint8x16_t
test_vmlaq_u8(uint8x16_t v1
, uint8x16_t v2
, uint8x16_t v3
) {
490 return vmlaq_u8(v1
, v2
, v3
);
493 // CHECK-LABEL: @test_vmlaq_u16(
494 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
495 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
496 // CHECK: ret <8 x i16> [[ADD_I]]
497 uint16x8_t
test_vmlaq_u16(uint16x8_t v1
, uint16x8_t v2
, uint16x8_t v3
) {
498 return vmlaq_u16(v1
, v2
, v3
);
501 // CHECK-LABEL: @test_vmlaq_u32(
502 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
503 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
504 // CHECK: ret <4 x i32> [[ADD_I]]
505 uint32x4_t
test_vmlaq_u32(uint32x4_t v1
, uint32x4_t v2
, uint32x4_t v3
) {
506 return vmlaq_u32(v1
, v2
, v3
);
509 // CHECK-LABEL: @test_vmlaq_f64(
510 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
511 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
512 // CHECK: ret <2 x double> [[ADD_I]]
513 float64x2_t
test_vmlaq_f64(float64x2_t v1
, float64x2_t v2
, float64x2_t v3
) {
514 return vmlaq_f64(v1
, v2
, v3
);
517 // CHECK-LABEL: @test_vmls_s8(
518 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
519 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
520 // CHECK: ret <8 x i8> [[SUB_I]]
521 int8x8_t
test_vmls_s8(int8x8_t v1
, int8x8_t v2
, int8x8_t v3
) {
522 return vmls_s8(v1
, v2
, v3
);
525 // CHECK-LABEL: @test_vmls_s16(
526 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
527 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
528 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
529 // CHECK: ret <8 x i8> [[TMP0]]
530 int8x8_t
test_vmls_s16(int16x4_t v1
, int16x4_t v2
, int16x4_t v3
) {
531 return (int8x8_t
)vmls_s16(v1
, v2
, v3
);
534 // CHECK-LABEL: @test_vmls_s32(
535 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
536 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
537 // CHECK: ret <2 x i32> [[SUB_I]]
538 int32x2_t
test_vmls_s32(int32x2_t v1
, int32x2_t v2
, int32x2_t v3
) {
539 return vmls_s32(v1
, v2
, v3
);
542 // CHECK-LABEL: @test_vmls_f32(
543 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
544 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
545 // CHECK: ret <2 x float> [[SUB_I]]
546 float32x2_t
test_vmls_f32(float32x2_t v1
, float32x2_t v2
, float32x2_t v3
) {
547 return vmls_f32(v1
, v2
, v3
);
550 // CHECK-LABEL: @test_vmls_u8(
551 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
552 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
553 // CHECK: ret <8 x i8> [[SUB_I]]
554 uint8x8_t
test_vmls_u8(uint8x8_t v1
, uint8x8_t v2
, uint8x8_t v3
) {
555 return vmls_u8(v1
, v2
, v3
);
558 // CHECK-LABEL: @test_vmls_u16(
559 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
560 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
561 // CHECK: ret <4 x i16> [[SUB_I]]
562 uint16x4_t
test_vmls_u16(uint16x4_t v1
, uint16x4_t v2
, uint16x4_t v3
) {
563 return vmls_u16(v1
, v2
, v3
);
566 // CHECK-LABEL: @test_vmls_u32(
567 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
568 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
569 // CHECK: ret <2 x i32> [[SUB_I]]
570 uint32x2_t
test_vmls_u32(uint32x2_t v1
, uint32x2_t v2
, uint32x2_t v3
) {
571 return vmls_u32(v1
, v2
, v3
);
574 // CHECK-LABEL: @test_vmlsq_s8(
575 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
576 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
577 // CHECK: ret <16 x i8> [[SUB_I]]
578 int8x16_t
test_vmlsq_s8(int8x16_t v1
, int8x16_t v2
, int8x16_t v3
) {
579 return vmlsq_s8(v1
, v2
, v3
);
582 // CHECK-LABEL: @test_vmlsq_s16(
583 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
584 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
585 // CHECK: ret <8 x i16> [[SUB_I]]
586 int16x8_t
test_vmlsq_s16(int16x8_t v1
, int16x8_t v2
, int16x8_t v3
) {
587 return vmlsq_s16(v1
, v2
, v3
);
590 // CHECK-LABEL: @test_vmlsq_s32(
591 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
592 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
593 // CHECK: ret <4 x i32> [[SUB_I]]
594 int32x4_t
test_vmlsq_s32(int32x4_t v1
, int32x4_t v2
, int32x4_t v3
) {
595 return vmlsq_s32(v1
, v2
, v3
);
598 // CHECK-LABEL: @test_vmlsq_f32(
599 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
600 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
601 // CHECK: ret <4 x float> [[SUB_I]]
602 float32x4_t
test_vmlsq_f32(float32x4_t v1
, float32x4_t v2
, float32x4_t v3
) {
603 return vmlsq_f32(v1
, v2
, v3
);
606 // CHECK-LABEL: @test_vmlsq_u8(
607 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
608 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
609 // CHECK: ret <16 x i8> [[SUB_I]]
610 uint8x16_t
test_vmlsq_u8(uint8x16_t v1
, uint8x16_t v2
, uint8x16_t v3
) {
611 return vmlsq_u8(v1
, v2
, v3
);
614 // CHECK-LABEL: @test_vmlsq_u16(
615 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
616 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
617 // CHECK: ret <8 x i16> [[SUB_I]]
618 uint16x8_t
test_vmlsq_u16(uint16x8_t v1
, uint16x8_t v2
, uint16x8_t v3
) {
619 return vmlsq_u16(v1
, v2
, v3
);
622 // CHECK-LABEL: @test_vmlsq_u32(
623 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
624 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
625 // CHECK: ret <4 x i32> [[SUB_I]]
626 uint32x4_t
test_vmlsq_u32(uint32x4_t v1
, uint32x4_t v2
, uint32x4_t v3
) {
627 return vmlsq_u32(v1
, v2
, v3
);
630 // CHECK-LABEL: @test_vmlsq_f64(
631 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
632 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
633 // CHECK: ret <2 x double> [[SUB_I]]
634 float64x2_t
test_vmlsq_f64(float64x2_t v1
, float64x2_t v2
, float64x2_t v3
) {
635 return vmlsq_f64(v1
, v2
, v3
);
638 // CHECK-LABEL: @test_vfma_f32(
639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
641 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
642 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
643 // CHECK: ret <2 x float> [[TMP3]]
644 float32x2_t
test_vfma_f32(float32x2_t v1
, float32x2_t v2
, float32x2_t v3
) {
645 return vfma_f32(v1
, v2
, v3
);
648 // CHECK-LABEL: @test_vfmaq_f32(
649 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
650 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
651 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
652 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
653 // CHECK: ret <4 x float> [[TMP3]]
654 float32x4_t
test_vfmaq_f32(float32x4_t v1
, float32x4_t v2
, float32x4_t v3
) {
655 return vfmaq_f32(v1
, v2
, v3
);
658 // CHECK-LABEL: @test_vfmaq_f64(
659 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
660 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
661 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
662 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
663 // CHECK: ret <2 x double> [[TMP3]]
664 float64x2_t
test_vfmaq_f64(float64x2_t v1
, float64x2_t v2
, float64x2_t v3
) {
665 return vfmaq_f64(v1
, v2
, v3
);
668 // CHECK-LABEL: @test_vfms_f32(
669 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %v2
670 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
671 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
672 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
673 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
674 // CHECK: ret <2 x float> [[TMP3]]
675 float32x2_t
test_vfms_f32(float32x2_t v1
, float32x2_t v2
, float32x2_t v3
) {
676 return vfms_f32(v1
, v2
, v3
);
679 // CHECK-LABEL: @test_vfmsq_f32(
680 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %v2
681 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
682 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
683 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
684 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
685 // CHECK: ret <4 x float> [[TMP3]]
686 float32x4_t
test_vfmsq_f32(float32x4_t v1
, float32x4_t v2
, float32x4_t v3
) {
687 return vfmsq_f32(v1
, v2
, v3
);
690 // CHECK-LABEL: @test_vfmsq_f64(
691 // CHECK: [[SUB_I:%.*]] = fneg <2 x double> %v2
692 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
693 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
694 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
695 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
696 // CHECK: ret <2 x double> [[TMP3]]
697 float64x2_t
test_vfmsq_f64(float64x2_t v1
, float64x2_t v2
, float64x2_t v3
) {
698 return vfmsq_f64(v1
, v2
, v3
);
701 // CHECK-LABEL: @test_vdivq_f64(
702 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
703 // CHECK: ret <2 x double> [[DIV_I]]
704 float64x2_t
test_vdivq_f64(float64x2_t v1
, float64x2_t v2
) {
705 return vdivq_f64(v1
, v2
);
708 // CHECK-LABEL: @test_vdivq_f32(
709 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
710 // CHECK: ret <4 x float> [[DIV_I]]
711 float32x4_t
test_vdivq_f32(float32x4_t v1
, float32x4_t v2
) {
712 return vdivq_f32(v1
, v2
);
715 // CHECK-LABEL: @test_vdiv_f32(
716 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
717 // CHECK: ret <2 x float> [[DIV_I]]
718 float32x2_t
test_vdiv_f32(float32x2_t v1
, float32x2_t v2
) {
719 return vdiv_f32(v1
, v2
);
722 // CHECK-LABEL: @test_vaba_s8(
723 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
724 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
725 // CHECK: ret <8 x i8> [[ADD_I]]
726 int8x8_t
test_vaba_s8(int8x8_t v1
, int8x8_t v2
, int8x8_t v3
) {
727 return vaba_s8(v1
, v2
, v3
);
730 // CHECK-LABEL: @test_vaba_s16(
731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
733 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
734 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
735 // CHECK: ret <4 x i16> [[ADD_I]]
736 int16x4_t
test_vaba_s16(int16x4_t v1
, int16x4_t v2
, int16x4_t v3
) {
737 return vaba_s16(v1
, v2
, v3
);
740 // CHECK-LABEL: @test_vaba_s32(
741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
743 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
744 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
745 // CHECK: ret <2 x i32> [[ADD_I]]
746 int32x2_t
test_vaba_s32(int32x2_t v1
, int32x2_t v2
, int32x2_t v3
) {
747 return vaba_s32(v1
, v2
, v3
);
750 // CHECK-LABEL: @test_vaba_u8(
751 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
752 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
753 // CHECK: ret <8 x i8> [[ADD_I]]
754 uint8x8_t
test_vaba_u8(uint8x8_t v1
, uint8x8_t v2
, uint8x8_t v3
) {
755 return vaba_u8(v1
, v2
, v3
);
758 // CHECK-LABEL: @test_vaba_u16(
759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
761 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
762 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
763 // CHECK: ret <4 x i16> [[ADD_I]]
764 uint16x4_t
test_vaba_u16(uint16x4_t v1
, uint16x4_t v2
, uint16x4_t v3
) {
765 return vaba_u16(v1
, v2
, v3
);
768 // CHECK-LABEL: @test_vaba_u32(
769 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
770 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
771 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
772 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
773 // CHECK: ret <2 x i32> [[ADD_I]]
774 uint32x2_t
test_vaba_u32(uint32x2_t v1
, uint32x2_t v2
, uint32x2_t v3
) {
775 return vaba_u32(v1
, v2
, v3
);
778 // CHECK-LABEL: @test_vabaq_s8(
779 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
780 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
781 // CHECK: ret <16 x i8> [[ADD_I]]
782 int8x16_t
test_vabaq_s8(int8x16_t v1
, int8x16_t v2
, int8x16_t v3
) {
783 return vabaq_s8(v1
, v2
, v3
);
786 // CHECK-LABEL: @test_vabaq_s16(
787 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
789 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
790 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
791 // CHECK: ret <8 x i16> [[ADD_I]]
792 int16x8_t
test_vabaq_s16(int16x8_t v1
, int16x8_t v2
, int16x8_t v3
) {
793 return vabaq_s16(v1
, v2
, v3
);
796 // CHECK-LABEL: @test_vabaq_s32(
797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
799 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
800 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
801 // CHECK: ret <4 x i32> [[ADD_I]]
802 int32x4_t
test_vabaq_s32(int32x4_t v1
, int32x4_t v2
, int32x4_t v3
) {
803 return vabaq_s32(v1
, v2
, v3
);
806 // CHECK-LABEL: @test_vabaq_u8(
807 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
808 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
809 // CHECK: ret <16 x i8> [[ADD_I]]
810 uint8x16_t
test_vabaq_u8(uint8x16_t v1
, uint8x16_t v2
, uint8x16_t v3
) {
811 return vabaq_u8(v1
, v2
, v3
);
814 // CHECK-LABEL: @test_vabaq_u16(
815 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
816 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
817 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
818 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
819 // CHECK: ret <8 x i16> [[ADD_I]]
820 uint16x8_t
test_vabaq_u16(uint16x8_t v1
, uint16x8_t v2
, uint16x8_t v3
) {
821 return vabaq_u16(v1
, v2
, v3
);
824 // CHECK-LABEL: @test_vabaq_u32(
825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
826 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
827 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
828 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
829 // CHECK: ret <4 x i32> [[ADD_I]]
830 uint32x4_t
test_vabaq_u32(uint32x4_t v1
, uint32x4_t v2
, uint32x4_t v3
) {
831 return vabaq_u32(v1
, v2
, v3
);
834 // CHECK-LABEL: @test_vabd_s8(
835 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
836 // CHECK: ret <8 x i8> [[VABD_I]]
837 int8x8_t
test_vabd_s8(int8x8_t v1
, int8x8_t v2
) {
838 return vabd_s8(v1
, v2
);
841 // CHECK-LABEL: @test_vabd_s16(
842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
844 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
845 // CHECK: ret <4 x i16> [[VABD2_I]]
846 int16x4_t
test_vabd_s16(int16x4_t v1
, int16x4_t v2
) {
847 return vabd_s16(v1
, v2
);
850 // CHECK-LABEL: @test_vabd_s32(
851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
852 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
853 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
854 // CHECK: ret <2 x i32> [[VABD2_I]]
855 int32x2_t
test_vabd_s32(int32x2_t v1
, int32x2_t v2
) {
856 return vabd_s32(v1
, v2
);
859 // CHECK-LABEL: @test_vabd_u8(
860 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
861 // CHECK: ret <8 x i8> [[VABD_I]]
862 uint8x8_t
test_vabd_u8(uint8x8_t v1
, uint8x8_t v2
) {
863 return vabd_u8(v1
, v2
);
866 // CHECK-LABEL: @test_vabd_u16(
867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
869 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
870 // CHECK: ret <4 x i16> [[VABD2_I]]
871 uint16x4_t
test_vabd_u16(uint16x4_t v1
, uint16x4_t v2
) {
872 return vabd_u16(v1
, v2
);
875 // CHECK-LABEL: @test_vabd_u32(
876 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
877 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
878 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
879 // CHECK: ret <2 x i32> [[VABD2_I]]
880 uint32x2_t
test_vabd_u32(uint32x2_t v1
, uint32x2_t v2
) {
881 return vabd_u32(v1
, v2
);
884 // CHECK-LABEL: @test_vabd_f32(
885 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
886 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
887 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
888 // CHECK: ret <2 x float> [[VABD2_I]]
889 float32x2_t
test_vabd_f32(float32x2_t v1
, float32x2_t v2
) {
890 return vabd_f32(v1
, v2
);
893 // CHECK-LABEL: @test_vabdq_s8(
894 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
895 // CHECK: ret <16 x i8> [[VABD_I]]
896 int8x16_t
test_vabdq_s8(int8x16_t v1
, int8x16_t v2
) {
897 return vabdq_s8(v1
, v2
);
900 // CHECK-LABEL: @test_vabdq_s16(
901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
902 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
903 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
904 // CHECK: ret <8 x i16> [[VABD2_I]]
905 int16x8_t
test_vabdq_s16(int16x8_t v1
, int16x8_t v2
) {
906 return vabdq_s16(v1
, v2
);
909 // CHECK-LABEL: @test_vabdq_s32(
910 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
911 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
912 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
913 // CHECK: ret <4 x i32> [[VABD2_I]]
914 int32x4_t
test_vabdq_s32(int32x4_t v1
, int32x4_t v2
) {
915 return vabdq_s32(v1
, v2
);
918 // CHECK-LABEL: @test_vabdq_u8(
919 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
920 // CHECK: ret <16 x i8> [[VABD_I]]
921 uint8x16_t
test_vabdq_u8(uint8x16_t v1
, uint8x16_t v2
) {
922 return vabdq_u8(v1
, v2
);
925 // CHECK-LABEL: @test_vabdq_u16(
926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
927 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
928 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
929 // CHECK: ret <8 x i16> [[VABD2_I]]
930 uint16x8_t
test_vabdq_u16(uint16x8_t v1
, uint16x8_t v2
) {
931 return vabdq_u16(v1
, v2
);
934 // CHECK-LABEL: @test_vabdq_u32(
935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
936 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
937 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
938 // CHECK: ret <4 x i32> [[VABD2_I]]
939 uint32x4_t
test_vabdq_u32(uint32x4_t v1
, uint32x4_t v2
) {
940 return vabdq_u32(v1
, v2
);
943 // CHECK-LABEL: @test_vabdq_f32(
944 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
945 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
946 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
947 // CHECK: ret <4 x float> [[VABD2_I]]
948 float32x4_t
test_vabdq_f32(float32x4_t v1
, float32x4_t v2
) {
949 return vabdq_f32(v1
, v2
);
952 // CHECK-LABEL: @test_vabdq_f64(
953 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
954 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
955 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
956 // CHECK: ret <2 x double> [[VABD2_I]]
957 float64x2_t
test_vabdq_f64(float64x2_t v1
, float64x2_t v2
) {
958 return vabdq_f64(v1
, v2
);
961 // CHECK-LABEL: @test_vbsl_s8(
962 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
963 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
964 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
965 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
966 // CHECK: ret <8 x i8> [[VBSL2_I]]
967 int8x8_t
test_vbsl_s8(uint8x8_t v1
, int8x8_t v2
, int8x8_t v3
) {
968 return vbsl_s8(v1
, v2
, v3
);
971 // CHECK-LABEL: @test_vbsl_s16(
972 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
973 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
974 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
975 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
976 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
977 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
978 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
979 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
980 // CHECK: ret <8 x i8> [[TMP4]]
981 int8x8_t
test_vbsl_s16(uint16x4_t v1
, int16x4_t v2
, int16x4_t v3
) {
982 return (int8x8_t
)vbsl_s16(v1
, v2
, v3
);
985 // CHECK-LABEL: @test_vbsl_s32(
986 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
987 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
988 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
989 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
990 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
991 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
992 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
993 // CHECK: ret <2 x i32> [[VBSL5_I]]
994 int32x2_t
test_vbsl_s32(uint32x2_t v1
, int32x2_t v2
, int32x2_t v3
) {
995 return vbsl_s32(v1
, v2
, v3
);
998 // CHECK-LABEL: @test_vbsl_s64(
999 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1000 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1001 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1002 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1003 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
1004 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1005 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1006 // CHECK: ret <1 x i64> [[VBSL5_I]]
1007 int64x1_t
test_vbsl_s64(uint64x1_t v1
, int64x1_t v2
, int64x1_t v3
) {
1008 return vbsl_s64(v1
, v2
, v3
);
1011 // CHECK-LABEL: @test_vbsl_u8(
1012 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1013 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
1014 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1015 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1016 // CHECK: ret <8 x i8> [[VBSL2_I]]
1017 uint8x8_t
test_vbsl_u8(uint8x8_t v1
, uint8x8_t v2
, uint8x8_t v3
) {
1018 return vbsl_u8(v1
, v2
, v3
);
1021 // CHECK-LABEL: @test_vbsl_u16(
1022 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1023 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1024 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1025 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1026 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
1027 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1028 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1029 // CHECK: ret <4 x i16> [[VBSL5_I]]
1030 uint16x4_t
test_vbsl_u16(uint16x4_t v1
, uint16x4_t v2
, uint16x4_t v3
) {
1031 return vbsl_u16(v1
, v2
, v3
);
1034 // CHECK-LABEL: @test_vbsl_u32(
1035 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1036 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1037 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1038 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1039 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
1040 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1041 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1042 // CHECK: ret <2 x i32> [[VBSL5_I]]
1043 uint32x2_t
test_vbsl_u32(uint32x2_t v1
, uint32x2_t v2
, uint32x2_t v3
) {
1044 return vbsl_u32(v1
, v2
, v3
);
1047 // CHECK-LABEL: @test_vbsl_u64(
1048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1049 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1050 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1051 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1052 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
1053 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1054 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK: ret <1 x i64> [[VBSL5_I]]
1056 uint64x1_t
test_vbsl_u64(uint64x1_t v1
, uint64x1_t v2
, uint64x1_t v3
) {
1057 return vbsl_u64(v1
, v2
, v3
);
1060 // CHECK-LABEL: @test_vbsl_f32(
1061 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1062 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
1067 // CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, splat (i32 -1)
1068 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK: ret <2 x float> [[TMP5]]
1072 float32x2_t
test_vbsl_f32(uint32x2_t v1
, float32x2_t v2
, float32x2_t v3
) {
1073 return vbsl_f32(v1
, v2
, v3
);
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1)
1084 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK: ret <1 x double> [[TMP4]]
1088 float64x1_t
test_vbsl_f64(uint64x1_t v1
, float64x1_t v2
, float64x1_t v3
) {
1089 return vbsl_f64(v1
, v2
, v3
);
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1)
1095 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK: ret <8 x i8> [[VBSL2_I]]
1098 poly8x8_t
test_vbsl_p8(uint8x8_t v1
, poly8x8_t v2
, poly8x8_t v3
) {
1099 return vbsl_p8(v1
, v2
, v3
);
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1)
1108 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK: ret <4 x i16> [[VBSL5_I]]
1111 poly16x4_t
test_vbsl_p16(uint16x4_t v1
, poly16x4_t v2
, poly16x4_t v3
) {
1112 return vbsl_p16(v1
, v2
, v3
);
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
1118 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK: ret <16 x i8> [[VBSL2_I]]
1121 int8x16_t
test_vbslq_s8(uint8x16_t v1
, int8x16_t v2
, int8x16_t v3
) {
1122 return vbslq_s8(v1
, v2
, v3
);
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
1131 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK: ret <8 x i16> [[VBSL5_I]]
1134 int16x8_t
test_vbslq_s16(uint16x8_t v1
, int16x8_t v2
, int16x8_t v3
) {
1135 return vbslq_s16(v1
, v2
, v3
);
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
1144 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK: ret <4 x i32> [[VBSL5_I]]
1147 int32x4_t
test_vbslq_s32(uint32x4_t v1
, int32x4_t v2
, int32x4_t v3
) {
1148 return vbslq_s32(v1
, v2
, v3
);
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
1157 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK: ret <2 x i64> [[VBSL5_I]]
1160 int64x2_t
test_vbslq_s64(uint64x2_t v1
, int64x2_t v2
, int64x2_t v3
) {
1161 return vbslq_s64(v1
, v2
, v3
);
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
1167 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK: ret <16 x i8> [[VBSL2_I]]
1170 uint8x16_t
test_vbslq_u8(uint8x16_t v1
, uint8x16_t v2
, uint8x16_t v3
) {
1171 return vbslq_u8(v1
, v2
, v3
);
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
1180 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK: ret <8 x i16> [[VBSL5_I]]
1183 uint16x8_t
test_vbslq_u16(uint16x8_t v1
, uint16x8_t v2
, uint16x8_t v3
) {
1184 return vbslq_u16(v1
, v2
, v3
);
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
1193 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK: ret <4 x i32> [[VBSL5_I]]
1196 int32x4_t
test_vbslq_u32(uint32x4_t v1
, int32x4_t v2
, int32x4_t v3
) {
1197 return vbslq_s32(v1
, v2
, v3
);
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
1206 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK: ret <2 x i64> [[VBSL5_I]]
1209 uint64x2_t
test_vbslq_u64(uint64x2_t v1
, uint64x2_t v2
, uint64x2_t v3
) {
1210 return vbslq_u64(v1
, v2
, v3
);
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1)
1221 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK: ret <4 x float> [[TMP4]]
1225 float32x4_t
test_vbslq_f32(uint32x4_t v1
, float32x4_t v2
, float32x4_t v3
) {
1226 return vbslq_f32(v1
, v2
, v3
);
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1)
1232 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK: ret <16 x i8> [[VBSL2_I]]
1235 poly8x16_t
test_vbslq_p8(uint8x16_t v1
, poly8x16_t v2
, poly8x16_t v3
) {
1236 return vbslq_p8(v1
, v2
, v3
);
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1)
1245 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK: ret <8 x i16> [[VBSL5_I]]
1248 poly16x8_t
test_vbslq_p16(uint16x8_t v1
, poly16x8_t v2
, poly16x8_t v3
) {
1249 return vbslq_p16(v1
, v2
, v3
);
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1)
1260 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK: ret <2 x double> [[TMP4]]
1264 float64x2_t
test_vbslq_f64(uint64x2_t v1
, float64x2_t v2
, float64x2_t v3
) {
1265 return vbslq_f64(v1
, v2
, v3
);
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
1273 float32x2_t
test_vrecps_f32(float32x2_t v1
, float32x2_t v2
) {
1274 return vrecps_f32(v1
, v2
);
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
1283 float32x4_t
test_vrecpsq_f32(float32x4_t v1
, float32x4_t v2
) {
1284 return vrecpsq_f32(v1
, v2
);
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK: ret <2 x double> [[VRECPSQ_V2_I]]
1293 float64x2_t
test_vrecpsq_f64(float64x2_t v1
, float64x2_t v2
) {
1294 return vrecpsq_f64(v1
, v2
);
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
1303 float32x2_t
test_vrsqrts_f32(float32x2_t v1
, float32x2_t v2
) {
1304 return vrsqrts_f32(v1
, v2
);
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
1313 float32x4_t
test_vrsqrtsq_f32(float32x4_t v1
, float32x4_t v2
) {
1314 return vrsqrtsq_f32(v1
, v2
);
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]]
1323 float64x2_t
test_vrsqrtsq_f64(float64x2_t v1
, float64x2_t v2
) {
1324 return vrsqrtsq_f64(v1
, v2
);
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
1332 uint32x2_t
test_vcage_f32(float32x2_t v1
, float32x2_t v2
) {
1333 return vcage_f32(v1
, v2
);
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK: ret <1 x i64> [[VCAGE_V2_I]]
1341 uint64x1_t
test_vcage_f64(float64x1_t a
, float64x1_t b
) {
1342 return vcage_f64(a
, b
);
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
1350 uint32x4_t
test_vcageq_f32(float32x4_t v1
, float32x4_t v2
) {
1351 return vcageq_f32(v1
, v2
);
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]]
1359 uint64x2_t
test_vcageq_f64(float64x2_t v1
, float64x2_t v2
) {
1360 return vcageq_f64(v1
, v2
);
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
1368 uint32x2_t
test_vcagt_f32(float32x2_t v1
, float32x2_t v2
) {
1369 return vcagt_f32(v1
, v2
);
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK: ret <1 x i64> [[VCAGT_V2_I]]
1377 uint64x1_t
test_vcagt_f64(float64x1_t a
, float64x1_t b
) {
1378 return vcagt_f64(a
, b
);
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
1386 uint32x4_t
test_vcagtq_f32(float32x4_t v1
, float32x4_t v2
) {
1387 return vcagtq_f32(v1
, v2
);
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]]
1395 uint64x2_t
test_vcagtq_f64(float64x2_t v1
, float64x2_t v2
) {
1396 return vcagtq_f64(v1
, v2
);
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
1404 uint32x2_t
test_vcale_f32(float32x2_t v1
, float32x2_t v2
) {
1405 return vcale_f32(v1
, v2
);
1406 // Using registers other than v0, v1 are possible, but would be odd.
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK: ret <1 x i64> [[VCALE_V2_I]]
1414 uint64x1_t
test_vcale_f64(float64x1_t a
, float64x1_t b
) {
1415 return vcale_f64(a
, b
);
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
1423 uint32x4_t
test_vcaleq_f32(float32x4_t v1
, float32x4_t v2
) {
1424 return vcaleq_f32(v1
, v2
);
1425 // Using registers other than v0, v1 are possible, but would be odd.
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]]
1433 uint64x2_t
test_vcaleq_f64(float64x2_t v1
, float64x2_t v2
) {
1434 return vcaleq_f64(v1
, v2
);
1435 // Using registers other than v0, v1 are possible, but would be odd.
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
1443 uint32x2_t
test_vcalt_f32(float32x2_t v1
, float32x2_t v2
) {
1444 return vcalt_f32(v1
, v2
);
1445 // Using registers other than v0, v1 are possible, but would be odd.
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK: ret <1 x i64> [[VCALT_V2_I]]
1453 uint64x1_t
test_vcalt_f64(float64x1_t a
, float64x1_t b
) {
1454 return vcalt_f64(a
, b
);
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
1462 uint32x4_t
test_vcaltq_f32(float32x4_t v1
, float32x4_t v2
) {
1463 return vcaltq_f32(v1
, v2
);
1464 // Using registers other than v0, v1 are possible, but would be odd.
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]]
1472 uint64x2_t
test_vcaltq_f64(float64x2_t v1
, float64x2_t v2
) {
1473 return vcaltq_f64(v1
, v2
);
1474 // Using registers other than v0, v1 are possible, but would be odd.
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK: ret <8 x i8> [[VTST_I]]
1482 uint8x8_t
test_vtst_s8(int8x8_t v1
, int8x8_t v2
) {
1483 return vtst_s8(v1
, v2
);
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK: ret <4 x i16> [[VTST_I]]
1493 uint16x4_t
test_vtst_s16(int16x4_t v1
, int16x4_t v2
) {
1494 return vtst_s16(v1
, v2
);
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK: ret <2 x i32> [[VTST_I]]
1504 uint32x2_t
test_vtst_s32(int32x2_t v1
, int32x2_t v2
) {
1505 return vtst_s32(v1
, v2
);
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK: ret <8 x i8> [[VTST_I]]
1513 uint8x8_t
test_vtst_u8(uint8x8_t v1
, uint8x8_t v2
) {
1514 return vtst_u8(v1
, v2
);
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK: ret <4 x i16> [[VTST_I]]
1524 uint16x4_t
test_vtst_u16(uint16x4_t v1
, uint16x4_t v2
) {
1525 return vtst_u16(v1
, v2
);
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK: ret <2 x i32> [[VTST_I]]
1535 uint32x2_t
test_vtst_u32(uint32x2_t v1
, uint32x2_t v2
) {
1536 return vtst_u32(v1
, v2
);
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK: ret <16 x i8> [[VTST_I]]
1544 uint8x16_t
test_vtstq_s8(int8x16_t v1
, int8x16_t v2
) {
1545 return vtstq_s8(v1
, v2
);
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK: ret <8 x i16> [[VTST_I]]
1555 uint16x8_t
test_vtstq_s16(int16x8_t v1
, int16x8_t v2
) {
1556 return vtstq_s16(v1
, v2
);
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK: ret <4 x i32> [[VTST_I]]
1566 uint32x4_t
test_vtstq_s32(int32x4_t v1
, int32x4_t v2
) {
1567 return vtstq_s32(v1
, v2
);
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK: ret <16 x i8> [[VTST_I]]
1575 uint8x16_t
test_vtstq_u8(uint8x16_t v1
, uint8x16_t v2
) {
1576 return vtstq_u8(v1
, v2
);
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK: ret <8 x i16> [[VTST_I]]
1586 uint16x8_t
test_vtstq_u16(uint16x8_t v1
, uint16x8_t v2
) {
1587 return vtstq_u16(v1
, v2
);
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK: ret <4 x i32> [[VTST_I]]
1597 uint32x4_t
test_vtstq_u32(uint32x4_t v1
, uint32x4_t v2
) {
1598 return vtstq_u32(v1
, v2
);
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK: ret <2 x i64> [[VTST_I]]
1608 uint64x2_t
test_vtstq_s64(int64x2_t v1
, int64x2_t v2
) {
1609 return vtstq_s64(v1
, v2
);
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK: ret <2 x i64> [[VTST_I]]
1619 uint64x2_t
test_vtstq_u64(uint64x2_t v1
, uint64x2_t v2
) {
1620 return vtstq_u64(v1
, v2
);
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK: ret <8 x i8> [[VTST_I]]
1628 uint8x8_t
test_vtst_p8(poly8x8_t v1
, poly8x8_t v2
) {
1629 return vtst_p8(v1
, v2
);
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK: ret <4 x i16> [[VTST_I]]
1639 uint16x4_t
test_vtst_p16(poly16x4_t v1
, poly16x4_t v2
) {
1640 return vtst_p16(v1
, v2
);
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK: ret <16 x i8> [[VTST_I]]
1648 uint8x16_t
test_vtstq_p8(poly8x16_t v1
, poly8x16_t v2
) {
1649 return vtstq_p8(v1
, v2
);
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK: ret <8 x i16> [[VTST_I]]
1659 uint16x8_t
test_vtstq_p16(poly16x8_t v1
, poly16x8_t v2
) {
1660 return vtstq_p16(v1
, v2
);
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK: ret <1 x i64> [[VTST_I]]
1670 uint64x1_t
test_vtst_s64(int64x1_t a
, int64x1_t b
) {
1671 return vtst_s64(a
, b
);
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK: ret <1 x i64> [[VTST_I]]
1681 uint64x1_t
test_vtst_u64(uint64x1_t a
, uint64x1_t b
) {
1682 return vtst_u64(a
, b
);
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK: ret <8 x i8> [[SEXT_I]]
1689 uint8x8_t
test_vceq_s8(int8x8_t v1
, int8x8_t v2
) {
1690 return vceq_s8(v1
, v2
);
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK: ret <4 x i16> [[SEXT_I]]
1697 uint16x4_t
test_vceq_s16(int16x4_t v1
, int16x4_t v2
) {
1698 return vceq_s16(v1
, v2
);
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK: ret <2 x i32> [[SEXT_I]]
1705 uint32x2_t
test_vceq_s32(int32x2_t v1
, int32x2_t v2
) {
1706 return vceq_s32(v1
, v2
);
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK: ret <1 x i64> [[SEXT_I]]
1713 uint64x1_t
test_vceq_s64(int64x1_t a
, int64x1_t b
) {
1714 return vceq_s64(a
, b
);
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK: ret <1 x i64> [[SEXT_I]]
1721 uint64x1_t
test_vceq_u64(uint64x1_t a
, uint64x1_t b
) {
1722 return vceq_u64(a
, b
);
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK: ret <2 x i32> [[SEXT_I]]
1729 uint32x2_t
test_vceq_f32(float32x2_t v1
, float32x2_t v2
) {
1730 return vceq_f32(v1
, v2
);
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK: ret <1 x i64> [[SEXT_I]]
1737 uint64x1_t
test_vceq_f64(float64x1_t a
, float64x1_t b
) {
1738 return vceq_f64(a
, b
);
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK: ret <8 x i8> [[SEXT_I]]
1745 uint8x8_t
test_vceq_u8(uint8x8_t v1
, uint8x8_t v2
) {
1746 return vceq_u8(v1
, v2
);
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK: ret <4 x i16> [[SEXT_I]]
1753 uint16x4_t
test_vceq_u16(uint16x4_t v1
, uint16x4_t v2
) {
1754 return vceq_u16(v1
, v2
);
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK: ret <2 x i32> [[SEXT_I]]
1761 uint32x2_t
test_vceq_u32(uint32x2_t v1
, uint32x2_t v2
) {
1762 return vceq_u32(v1
, v2
);
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK: ret <8 x i8> [[SEXT_I]]
1769 uint8x8_t
test_vceq_p8(poly8x8_t v1
, poly8x8_t v2
) {
1770 return vceq_p8(v1
, v2
);
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK: ret <16 x i8> [[SEXT_I]]
1777 uint8x16_t
test_vceqq_s8(int8x16_t v1
, int8x16_t v2
) {
1778 return vceqq_s8(v1
, v2
);
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK: ret <8 x i16> [[SEXT_I]]
1785 uint16x8_t
test_vceqq_s16(int16x8_t v1
, int16x8_t v2
) {
1786 return vceqq_s16(v1
, v2
);
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK: ret <4 x i32> [[SEXT_I]]
1793 uint32x4_t
test_vceqq_s32(int32x4_t v1
, int32x4_t v2
) {
1794 return vceqq_s32(v1
, v2
);
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK: ret <4 x i32> [[SEXT_I]]
1801 uint32x4_t
test_vceqq_f32(float32x4_t v1
, float32x4_t v2
) {
1802 return vceqq_f32(v1
, v2
);
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK: ret <16 x i8> [[SEXT_I]]
1809 uint8x16_t
test_vceqq_u8(uint8x16_t v1
, uint8x16_t v2
) {
1810 return vceqq_u8(v1
, v2
);
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK: ret <8 x i16> [[SEXT_I]]
1817 uint16x8_t
test_vceqq_u16(uint16x8_t v1
, uint16x8_t v2
) {
1818 return vceqq_u16(v1
, v2
);
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK: ret <4 x i32> [[SEXT_I]]
1825 uint32x4_t
test_vceqq_u32(uint32x4_t v1
, uint32x4_t v2
) {
1826 return vceqq_u32(v1
, v2
);
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK: ret <16 x i8> [[SEXT_I]]
1833 uint8x16_t
test_vceqq_p8(poly8x16_t v1
, poly8x16_t v2
) {
1834 return vceqq_p8(v1
, v2
);
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK: ret <2 x i64> [[SEXT_I]]
1841 uint64x2_t
test_vceqq_s64(int64x2_t v1
, int64x2_t v2
) {
1842 return vceqq_s64(v1
, v2
);
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK: ret <2 x i64> [[SEXT_I]]
1849 uint64x2_t
test_vceqq_u64(uint64x2_t v1
, uint64x2_t v2
) {
1850 return vceqq_u64(v1
, v2
);
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK: ret <2 x i64> [[SEXT_I]]
1857 uint64x2_t
test_vceqq_f64(float64x2_t v1
, float64x2_t v2
) {
1858 return vceqq_f64(v1
, v2
);
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK: ret <8 x i8> [[SEXT_I]]
1865 uint8x8_t
test_vcge_s8(int8x8_t v1
, int8x8_t v2
) {
1866 return vcge_s8(v1
, v2
);
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK: ret <4 x i16> [[SEXT_I]]
1873 uint16x4_t
test_vcge_s16(int16x4_t v1
, int16x4_t v2
) {
1874 return vcge_s16(v1
, v2
);
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK: ret <2 x i32> [[SEXT_I]]
1881 uint32x2_t
test_vcge_s32(int32x2_t v1
, int32x2_t v2
) {
1882 return vcge_s32(v1
, v2
);
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK: ret <1 x i64> [[SEXT_I]]
1889 uint64x1_t
test_vcge_s64(int64x1_t a
, int64x1_t b
) {
1890 return vcge_s64(a
, b
);
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK: ret <1 x i64> [[SEXT_I]]
1897 uint64x1_t
test_vcge_u64(uint64x1_t a
, uint64x1_t b
) {
1898 return vcge_u64(a
, b
);
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK: ret <2 x i32> [[SEXT_I]]
1905 uint32x2_t
test_vcge_f32(float32x2_t v1
, float32x2_t v2
) {
1906 return vcge_f32(v1
, v2
);
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK: ret <1 x i64> [[SEXT_I]]
1913 uint64x1_t
test_vcge_f64(float64x1_t a
, float64x1_t b
) {
1914 return vcge_f64(a
, b
);
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK: ret <8 x i8> [[SEXT_I]]
1921 uint8x8_t
test_vcge_u8(uint8x8_t v1
, uint8x8_t v2
) {
1922 return vcge_u8(v1
, v2
);
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK: ret <4 x i16> [[SEXT_I]]
1929 uint16x4_t
test_vcge_u16(uint16x4_t v1
, uint16x4_t v2
) {
1930 return vcge_u16(v1
, v2
);
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK: ret <2 x i32> [[SEXT_I]]
1937 uint32x2_t
test_vcge_u32(uint32x2_t v1
, uint32x2_t v2
) {
1938 return vcge_u32(v1
, v2
);
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK: ret <16 x i8> [[SEXT_I]]
1945 uint8x16_t
test_vcgeq_s8(int8x16_t v1
, int8x16_t v2
) {
1946 return vcgeq_s8(v1
, v2
);
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK: ret <8 x i16> [[SEXT_I]]
1953 uint16x8_t
test_vcgeq_s16(int16x8_t v1
, int16x8_t v2
) {
1954 return vcgeq_s16(v1
, v2
);
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK: ret <4 x i32> [[SEXT_I]]
1961 uint32x4_t
test_vcgeq_s32(int32x4_t v1
, int32x4_t v2
) {
1962 return vcgeq_s32(v1
, v2
);
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK: ret <4 x i32> [[SEXT_I]]
1969 uint32x4_t
test_vcgeq_f32(float32x4_t v1
, float32x4_t v2
) {
1970 return vcgeq_f32(v1
, v2
);
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK: ret <16 x i8> [[SEXT_I]]
1977 uint8x16_t
test_vcgeq_u8(uint8x16_t v1
, uint8x16_t v2
) {
1978 return vcgeq_u8(v1
, v2
);
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK: ret <8 x i16> [[SEXT_I]]
1985 uint16x8_t
test_vcgeq_u16(uint16x8_t v1
, uint16x8_t v2
) {
1986 return vcgeq_u16(v1
, v2
);
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK: ret <4 x i32> [[SEXT_I]]
1993 uint32x4_t
test_vcgeq_u32(uint32x4_t v1
, uint32x4_t v2
) {
1994 return vcgeq_u32(v1
, v2
);
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK: ret <2 x i64> [[SEXT_I]]
2001 uint64x2_t
test_vcgeq_s64(int64x2_t v1
, int64x2_t v2
) {
2002 return vcgeq_s64(v1
, v2
);
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK: ret <2 x i64> [[SEXT_I]]
2009 uint64x2_t
test_vcgeq_u64(uint64x2_t v1
, uint64x2_t v2
) {
2010 return vcgeq_u64(v1
, v2
);
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK: ret <2 x i64> [[SEXT_I]]
2017 uint64x2_t
test_vcgeq_f64(float64x2_t v1
, float64x2_t v2
) {
2018 return vcgeq_f64(v1
, v2
);
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK: ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
2028 uint8x8_t
test_vcle_s8(int8x8_t v1
, int8x8_t v2
) {
2029 return vcle_s8(v1
, v2
);
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK: ret <4 x i16> [[SEXT_I]]
2036 uint16x4_t
test_vcle_s16(int16x4_t v1
, int16x4_t v2
) {
2037 return vcle_s16(v1
, v2
);
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK: ret <2 x i32> [[SEXT_I]]
2044 uint32x2_t
test_vcle_s32(int32x2_t v1
, int32x2_t v2
) {
2045 return vcle_s32(v1
, v2
);
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK: ret <1 x i64> [[SEXT_I]]
2052 uint64x1_t
test_vcle_s64(int64x1_t a
, int64x1_t b
) {
2053 return vcle_s64(a
, b
);
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK: ret <1 x i64> [[SEXT_I]]
2060 uint64x1_t
test_vcle_u64(uint64x1_t a
, uint64x1_t b
) {
2061 return vcle_u64(a
, b
);
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK: ret <2 x i32> [[SEXT_I]]
2068 uint32x2_t
test_vcle_f32(float32x2_t v1
, float32x2_t v2
) {
2069 return vcle_f32(v1
, v2
);
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK: ret <1 x i64> [[SEXT_I]]
2076 uint64x1_t
test_vcle_f64(float64x1_t a
, float64x1_t b
) {
2077 return vcle_f64(a
, b
);
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK: ret <8 x i8> [[SEXT_I]]
2084 uint8x8_t
test_vcle_u8(uint8x8_t v1
, uint8x8_t v2
) {
2085 return vcle_u8(v1
, v2
);
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK: ret <4 x i16> [[SEXT_I]]
2092 uint16x4_t
test_vcle_u16(uint16x4_t v1
, uint16x4_t v2
) {
2093 return vcle_u16(v1
, v2
);
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK: ret <2 x i32> [[SEXT_I]]
2100 uint32x2_t
test_vcle_u32(uint32x2_t v1
, uint32x2_t v2
) {
2101 return vcle_u32(v1
, v2
);
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK: ret <16 x i8> [[SEXT_I]]
2108 uint8x16_t
test_vcleq_s8(int8x16_t v1
, int8x16_t v2
) {
2109 return vcleq_s8(v1
, v2
);
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK: ret <8 x i16> [[SEXT_I]]
2116 uint16x8_t
test_vcleq_s16(int16x8_t v1
, int16x8_t v2
) {
2117 return vcleq_s16(v1
, v2
);
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK: ret <4 x i32> [[SEXT_I]]
2124 uint32x4_t
test_vcleq_s32(int32x4_t v1
, int32x4_t v2
) {
2125 return vcleq_s32(v1
, v2
);
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK: ret <4 x i32> [[SEXT_I]]
2132 uint32x4_t
test_vcleq_f32(float32x4_t v1
, float32x4_t v2
) {
2133 return vcleq_f32(v1
, v2
);
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK: ret <16 x i8> [[SEXT_I]]
2140 uint8x16_t
test_vcleq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2141 return vcleq_u8(v1
, v2
);
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK: ret <8 x i16> [[SEXT_I]]
2148 uint16x8_t
test_vcleq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2149 return vcleq_u16(v1
, v2
);
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK: ret <4 x i32> [[SEXT_I]]
2156 uint32x4_t
test_vcleq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2157 return vcleq_u32(v1
, v2
);
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK: ret <2 x i64> [[SEXT_I]]
2164 uint64x2_t
test_vcleq_s64(int64x2_t v1
, int64x2_t v2
) {
2165 return vcleq_s64(v1
, v2
);
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK: ret <2 x i64> [[SEXT_I]]
2172 uint64x2_t
test_vcleq_u64(uint64x2_t v1
, uint64x2_t v2
) {
2173 return vcleq_u64(v1
, v2
);
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK: ret <2 x i64> [[SEXT_I]]
2180 uint64x2_t
test_vcleq_f64(float64x2_t v1
, float64x2_t v2
) {
2181 return vcleq_f64(v1
, v2
);
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK: ret <8 x i8> [[SEXT_I]]
2188 uint8x8_t
test_vcgt_s8(int8x8_t v1
, int8x8_t v2
) {
2189 return vcgt_s8(v1
, v2
);
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK: ret <4 x i16> [[SEXT_I]]
2196 uint16x4_t
test_vcgt_s16(int16x4_t v1
, int16x4_t v2
) {
2197 return vcgt_s16(v1
, v2
);
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK: ret <2 x i32> [[SEXT_I]]
2204 uint32x2_t
test_vcgt_s32(int32x2_t v1
, int32x2_t v2
) {
2205 return vcgt_s32(v1
, v2
);
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK: ret <1 x i64> [[SEXT_I]]
2212 uint64x1_t
test_vcgt_s64(int64x1_t a
, int64x1_t b
) {
2213 return vcgt_s64(a
, b
);
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK: ret <1 x i64> [[SEXT_I]]
2220 uint64x1_t
test_vcgt_u64(uint64x1_t a
, uint64x1_t b
) {
2221 return vcgt_u64(a
, b
);
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK: ret <2 x i32> [[SEXT_I]]
2228 uint32x2_t
test_vcgt_f32(float32x2_t v1
, float32x2_t v2
) {
2229 return vcgt_f32(v1
, v2
);
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK: ret <1 x i64> [[SEXT_I]]
2236 uint64x1_t
test_vcgt_f64(float64x1_t a
, float64x1_t b
) {
2237 return vcgt_f64(a
, b
);
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK: ret <8 x i8> [[SEXT_I]]
2244 uint8x8_t
test_vcgt_u8(uint8x8_t v1
, uint8x8_t v2
) {
2245 return vcgt_u8(v1
, v2
);
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK: ret <4 x i16> [[SEXT_I]]
2252 uint16x4_t
test_vcgt_u16(uint16x4_t v1
, uint16x4_t v2
) {
2253 return vcgt_u16(v1
, v2
);
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK: ret <2 x i32> [[SEXT_I]]
2260 uint32x2_t
test_vcgt_u32(uint32x2_t v1
, uint32x2_t v2
) {
2261 return vcgt_u32(v1
, v2
);
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK: ret <16 x i8> [[SEXT_I]]
2268 uint8x16_t
test_vcgtq_s8(int8x16_t v1
, int8x16_t v2
) {
2269 return vcgtq_s8(v1
, v2
);
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK: ret <8 x i16> [[SEXT_I]]
2276 uint16x8_t
test_vcgtq_s16(int16x8_t v1
, int16x8_t v2
) {
2277 return vcgtq_s16(v1
, v2
);
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK: ret <4 x i32> [[SEXT_I]]
2284 uint32x4_t
test_vcgtq_s32(int32x4_t v1
, int32x4_t v2
) {
2285 return vcgtq_s32(v1
, v2
);
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK: ret <4 x i32> [[SEXT_I]]
2292 uint32x4_t
test_vcgtq_f32(float32x4_t v1
, float32x4_t v2
) {
2293 return vcgtq_f32(v1
, v2
);
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK: ret <16 x i8> [[SEXT_I]]
2300 uint8x16_t
test_vcgtq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2301 return vcgtq_u8(v1
, v2
);
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK: ret <8 x i16> [[SEXT_I]]
2308 uint16x8_t
test_vcgtq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2309 return vcgtq_u16(v1
, v2
);
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK: ret <4 x i32> [[SEXT_I]]
2316 uint32x4_t
test_vcgtq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2317 return vcgtq_u32(v1
, v2
);
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK: ret <2 x i64> [[SEXT_I]]
2324 uint64x2_t
test_vcgtq_s64(int64x2_t v1
, int64x2_t v2
) {
2325 return vcgtq_s64(v1
, v2
);
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK: ret <2 x i64> [[SEXT_I]]
2332 uint64x2_t
test_vcgtq_u64(uint64x2_t v1
, uint64x2_t v2
) {
2333 return vcgtq_u64(v1
, v2
);
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK: ret <2 x i64> [[SEXT_I]]
2340 uint64x2_t
test_vcgtq_f64(float64x2_t v1
, float64x2_t v2
) {
2341 return vcgtq_f64(v1
, v2
);
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK: ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
2351 uint8x8_t
test_vclt_s8(int8x8_t v1
, int8x8_t v2
) {
2352 return vclt_s8(v1
, v2
);
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK: ret <4 x i16> [[SEXT_I]]
2359 uint16x4_t
test_vclt_s16(int16x4_t v1
, int16x4_t v2
) {
2360 return vclt_s16(v1
, v2
);
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK: ret <2 x i32> [[SEXT_I]]
2367 uint32x2_t
test_vclt_s32(int32x2_t v1
, int32x2_t v2
) {
2368 return vclt_s32(v1
, v2
);
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK: ret <1 x i64> [[SEXT_I]]
2375 uint64x1_t
test_vclt_s64(int64x1_t a
, int64x1_t b
) {
2376 return vclt_s64(a
, b
);
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK: ret <1 x i64> [[SEXT_I]]
2383 uint64x1_t
test_vclt_u64(uint64x1_t a
, uint64x1_t b
) {
2384 return vclt_u64(a
, b
);
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK: ret <2 x i32> [[SEXT_I]]
2391 uint32x2_t
test_vclt_f32(float32x2_t v1
, float32x2_t v2
) {
2392 return vclt_f32(v1
, v2
);
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK: ret <1 x i64> [[SEXT_I]]
2399 uint64x1_t
test_vclt_f64(float64x1_t a
, float64x1_t b
) {
2400 return vclt_f64(a
, b
);
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK: ret <8 x i8> [[SEXT_I]]
2407 uint8x8_t
test_vclt_u8(uint8x8_t v1
, uint8x8_t v2
) {
2408 return vclt_u8(v1
, v2
);
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK: ret <4 x i16> [[SEXT_I]]
2415 uint16x4_t
test_vclt_u16(uint16x4_t v1
, uint16x4_t v2
) {
2416 return vclt_u16(v1
, v2
);
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK: ret <2 x i32> [[SEXT_I]]
2423 uint32x2_t
test_vclt_u32(uint32x2_t v1
, uint32x2_t v2
) {
2424 return vclt_u32(v1
, v2
);
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK: ret <16 x i8> [[SEXT_I]]
2431 uint8x16_t
test_vcltq_s8(int8x16_t v1
, int8x16_t v2
) {
2432 return vcltq_s8(v1
, v2
);
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK: ret <8 x i16> [[SEXT_I]]
2439 uint16x8_t
test_vcltq_s16(int16x8_t v1
, int16x8_t v2
) {
2440 return vcltq_s16(v1
, v2
);
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK: ret <4 x i32> [[SEXT_I]]
2447 uint32x4_t
test_vcltq_s32(int32x4_t v1
, int32x4_t v2
) {
2448 return vcltq_s32(v1
, v2
);
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK: ret <4 x i32> [[SEXT_I]]
2455 uint32x4_t
test_vcltq_f32(float32x4_t v1
, float32x4_t v2
) {
2456 return vcltq_f32(v1
, v2
);
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK: ret <16 x i8> [[SEXT_I]]
2463 uint8x16_t
test_vcltq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2464 return vcltq_u8(v1
, v2
);
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK: ret <8 x i16> [[SEXT_I]]
2471 uint16x8_t
test_vcltq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2472 return vcltq_u16(v1
, v2
);
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK: ret <4 x i32> [[SEXT_I]]
2479 uint32x4_t
test_vcltq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2480 return vcltq_u32(v1
, v2
);
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK: ret <2 x i64> [[SEXT_I]]
2487 uint64x2_t
test_vcltq_s64(int64x2_t v1
, int64x2_t v2
) {
2488 return vcltq_s64(v1
, v2
);
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK: ret <2 x i64> [[SEXT_I]]
2495 uint64x2_t
test_vcltq_u64(uint64x2_t v1
, uint64x2_t v2
) {
2496 return vcltq_u64(v1
, v2
);
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK: ret <2 x i64> [[SEXT_I]]
2503 uint64x2_t
test_vcltq_f64(float64x2_t v1
, float64x2_t v2
) {
2504 return vcltq_f64(v1
, v2
);
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK: ret <8 x i8> [[VHADD_V_I]]
2510 int8x8_t
test_vhadd_s8(int8x8_t v1
, int8x8_t v2
) {
2511 return vhadd_s8(v1
, v2
);
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
2520 int16x4_t
test_vhadd_s16(int16x4_t v1
, int16x4_t v2
) {
2521 return vhadd_s16(v1
, v2
);
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
2530 int32x2_t
test_vhadd_s32(int32x2_t v1
, int32x2_t v2
) {
2531 return vhadd_s32(v1
, v2
);
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK: ret <8 x i8> [[VHADD_V_I]]
2537 uint8x8_t
test_vhadd_u8(uint8x8_t v1
, uint8x8_t v2
) {
2538 return vhadd_u8(v1
, v2
);
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
2547 uint16x4_t
test_vhadd_u16(uint16x4_t v1
, uint16x4_t v2
) {
2548 return vhadd_u16(v1
, v2
);
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
2557 uint32x2_t
test_vhadd_u32(uint32x2_t v1
, uint32x2_t v2
) {
2558 return vhadd_u32(v1
, v2
);
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
2564 int8x16_t
test_vhaddq_s8(int8x16_t v1
, int8x16_t v2
) {
2565 return vhaddq_s8(v1
, v2
);
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
2574 int16x8_t
test_vhaddq_s16(int16x8_t v1
, int16x8_t v2
) {
2575 return vhaddq_s16(v1
, v2
);
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
2584 int32x4_t
test_vhaddq_s32(int32x4_t v1
, int32x4_t v2
) {
2585 return vhaddq_s32(v1
, v2
);
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
2591 uint8x16_t
test_vhaddq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2592 return vhaddq_u8(v1
, v2
);
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
2601 uint16x8_t
test_vhaddq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2602 return vhaddq_u16(v1
, v2
);
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
2611 uint32x4_t
test_vhaddq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2612 return vhaddq_u32(v1
, v2
);
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
2618 int8x8_t
test_vhsub_s8(int8x8_t v1
, int8x8_t v2
) {
2619 return vhsub_s8(v1
, v2
);
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
2628 int16x4_t
test_vhsub_s16(int16x4_t v1
, int16x4_t v2
) {
2629 return vhsub_s16(v1
, v2
);
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
2638 int32x2_t
test_vhsub_s32(int32x2_t v1
, int32x2_t v2
) {
2639 return vhsub_s32(v1
, v2
);
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
2645 uint8x8_t
test_vhsub_u8(uint8x8_t v1
, uint8x8_t v2
) {
2646 return vhsub_u8(v1
, v2
);
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
2655 uint16x4_t
test_vhsub_u16(uint16x4_t v1
, uint16x4_t v2
) {
2656 return vhsub_u16(v1
, v2
);
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
2665 uint32x2_t
test_vhsub_u32(uint32x2_t v1
, uint32x2_t v2
) {
2666 return vhsub_u32(v1
, v2
);
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
2672 int8x16_t
test_vhsubq_s8(int8x16_t v1
, int8x16_t v2
) {
2673 return vhsubq_s8(v1
, v2
);
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
2682 int16x8_t
test_vhsubq_s16(int16x8_t v1
, int16x8_t v2
) {
2683 return vhsubq_s16(v1
, v2
);
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
2692 int32x4_t
test_vhsubq_s32(int32x4_t v1
, int32x4_t v2
) {
2693 return vhsubq_s32(v1
, v2
);
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
2699 uint8x16_t
test_vhsubq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2700 return vhsubq_u8(v1
, v2
);
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
2709 uint16x8_t
test_vhsubq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2710 return vhsubq_u16(v1
, v2
);
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
2719 uint32x4_t
test_vhsubq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2720 return vhsubq_u32(v1
, v2
);
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
2726 int8x8_t
test_vrhadd_s8(int8x8_t v1
, int8x8_t v2
) {
2727 return vrhadd_s8(v1
, v2
);
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
2736 int16x4_t
test_vrhadd_s16(int16x4_t v1
, int16x4_t v2
) {
2737 return vrhadd_s16(v1
, v2
);
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
2746 int32x2_t
test_vrhadd_s32(int32x2_t v1
, int32x2_t v2
) {
2747 return vrhadd_s32(v1
, v2
);
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
2753 uint8x8_t
test_vrhadd_u8(uint8x8_t v1
, uint8x8_t v2
) {
2754 return vrhadd_u8(v1
, v2
);
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
2763 uint16x4_t
test_vrhadd_u16(uint16x4_t v1
, uint16x4_t v2
) {
2764 return vrhadd_u16(v1
, v2
);
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
2773 uint32x2_t
test_vrhadd_u32(uint32x2_t v1
, uint32x2_t v2
) {
2774 return vrhadd_u32(v1
, v2
);
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
2780 int8x16_t
test_vrhaddq_s8(int8x16_t v1
, int8x16_t v2
) {
2781 return vrhaddq_s8(v1
, v2
);
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
2790 int16x8_t
test_vrhaddq_s16(int16x8_t v1
, int16x8_t v2
) {
2791 return vrhaddq_s16(v1
, v2
);
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
2800 int32x4_t
test_vrhaddq_s32(int32x4_t v1
, int32x4_t v2
) {
2801 return vrhaddq_s32(v1
, v2
);
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
2807 uint8x16_t
test_vrhaddq_u8(uint8x16_t v1
, uint8x16_t v2
) {
2808 return vrhaddq_u8(v1
, v2
);
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
2817 uint16x8_t
test_vrhaddq_u16(uint16x8_t v1
, uint16x8_t v2
) {
2818 return vrhaddq_u16(v1
, v2
);
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
2827 uint32x4_t
test_vrhaddq_u32(uint32x4_t v1
, uint32x4_t v2
) {
2828 return vrhaddq_u32(v1
, v2
);
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK: ret <8 x i8> [[VQADD_V_I]]
2834 int8x8_t
test_vqadd_s8(int8x8_t a
, int8x8_t b
) {
2835 return vqadd_s8(a
, b
);
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
2844 int16x4_t
test_vqadd_s16(int16x4_t a
, int16x4_t b
) {
2845 return vqadd_s16(a
, b
);
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
2854 int32x2_t
test_vqadd_s32(int32x2_t a
, int32x2_t b
) {
2855 return vqadd_s32(a
, b
);
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
2864 int64x1_t
test_vqadd_s64(int64x1_t a
, int64x1_t b
) {
2865 return vqadd_s64(a
, b
);
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK: ret <8 x i8> [[VQADD_V_I]]
2871 uint8x8_t
test_vqadd_u8(uint8x8_t a
, uint8x8_t b
) {
2872 return vqadd_u8(a
, b
);
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
2881 uint16x4_t
test_vqadd_u16(uint16x4_t a
, uint16x4_t b
) {
2882 return vqadd_u16(a
, b
);
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
2891 uint32x2_t
test_vqadd_u32(uint32x2_t a
, uint32x2_t b
) {
2892 return vqadd_u32(a
, b
);
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
2901 uint64x1_t
test_vqadd_u64(uint64x1_t a
, uint64x1_t b
) {
2902 return vqadd_u64(a
, b
);
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
2908 int8x16_t
test_vqaddq_s8(int8x16_t a
, int8x16_t b
) {
2909 return vqaddq_s8(a
, b
);
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
2918 int16x8_t
test_vqaddq_s16(int16x8_t a
, int16x8_t b
) {
2919 return vqaddq_s16(a
, b
);
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
2928 int32x4_t
test_vqaddq_s32(int32x4_t a
, int32x4_t b
) {
2929 return vqaddq_s32(a
, b
);
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
2938 int64x2_t
test_vqaddq_s64(int64x2_t a
, int64x2_t b
) {
2939 return vqaddq_s64(a
, b
);
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
2945 uint8x16_t
test_vqaddq_u8(uint8x16_t a
, uint8x16_t b
) {
2946 return vqaddq_u8(a
, b
);
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
2955 uint16x8_t
test_vqaddq_u16(uint16x8_t a
, uint16x8_t b
) {
2956 return vqaddq_u16(a
, b
);
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
2965 uint32x4_t
test_vqaddq_u32(uint32x4_t a
, uint32x4_t b
) {
2966 return vqaddq_u32(a
, b
);
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
2975 uint64x2_t
test_vqaddq_u64(uint64x2_t a
, uint64x2_t b
) {
2976 return vqaddq_u64(a
, b
);
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
2982 int8x8_t
test_vqsub_s8(int8x8_t a
, int8x8_t b
) {
2983 return vqsub_s8(a
, b
);
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
2992 int16x4_t
test_vqsub_s16(int16x4_t a
, int16x4_t b
) {
2993 return vqsub_s16(a
, b
);
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
3002 int32x2_t
test_vqsub_s32(int32x2_t a
, int32x2_t b
) {
3003 return vqsub_s32(a
, b
);
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
3012 int64x1_t
test_vqsub_s64(int64x1_t a
, int64x1_t b
) {
3013 return vqsub_s64(a
, b
);
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
3019 uint8x8_t
test_vqsub_u8(uint8x8_t a
, uint8x8_t b
) {
3020 return vqsub_u8(a
, b
);
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
3029 uint16x4_t
test_vqsub_u16(uint16x4_t a
, uint16x4_t b
) {
3030 return vqsub_u16(a
, b
);
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
3039 uint32x2_t
test_vqsub_u32(uint32x2_t a
, uint32x2_t b
) {
3040 return vqsub_u32(a
, b
);
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
3049 uint64x1_t
test_vqsub_u64(uint64x1_t a
, uint64x1_t b
) {
3050 return vqsub_u64(a
, b
);
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
3056 int8x16_t
test_vqsubq_s8(int8x16_t a
, int8x16_t b
) {
3057 return vqsubq_s8(a
, b
);
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
3066 int16x8_t
test_vqsubq_s16(int16x8_t a
, int16x8_t b
) {
3067 return vqsubq_s16(a
, b
);
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
3076 int32x4_t
test_vqsubq_s32(int32x4_t a
, int32x4_t b
) {
3077 return vqsubq_s32(a
, b
);
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
3086 int64x2_t
test_vqsubq_s64(int64x2_t a
, int64x2_t b
) {
3087 return vqsubq_s64(a
, b
);
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
3093 uint8x16_t
test_vqsubq_u8(uint8x16_t a
, uint8x16_t b
) {
3094 return vqsubq_u8(a
, b
);
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
3103 uint16x8_t
test_vqsubq_u16(uint16x8_t a
, uint16x8_t b
) {
3104 return vqsubq_u16(a
, b
);
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
3113 uint32x4_t
test_vqsubq_u32(uint32x4_t a
, uint32x4_t b
) {
3114 return vqsubq_u32(a
, b
);
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
3123 uint64x2_t
test_vqsubq_u64(uint64x2_t a
, uint64x2_t b
) {
3124 return vqsubq_u64(a
, b
);
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK: ret <8 x i8> [[VSHL_V_I]]
3130 int8x8_t
test_vshl_s8(int8x8_t a
, int8x8_t b
) {
3131 return vshl_s8(a
, b
);
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
3140 int16x4_t
test_vshl_s16(int16x4_t a
, int16x4_t b
) {
3141 return vshl_s16(a
, b
);
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
3150 int32x2_t
test_vshl_s32(int32x2_t a
, int32x2_t b
) {
3151 return vshl_s32(a
, b
);
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
3160 int64x1_t
test_vshl_s64(int64x1_t a
, int64x1_t b
) {
3161 return vshl_s64(a
, b
);
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK: ret <8 x i8> [[VSHL_V_I]]
3167 uint8x8_t
test_vshl_u8(uint8x8_t a
, int8x8_t b
) {
3168 return vshl_u8(a
, b
);
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
3177 uint16x4_t
test_vshl_u16(uint16x4_t a
, int16x4_t b
) {
3178 return vshl_u16(a
, b
);
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
3187 uint32x2_t
test_vshl_u32(uint32x2_t a
, int32x2_t b
) {
3188 return vshl_u32(a
, b
);
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
3197 uint64x1_t
test_vshl_u64(uint64x1_t a
, int64x1_t b
) {
3198 return vshl_u64(a
, b
);
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
3204 int8x16_t
test_vshlq_s8(int8x16_t a
, int8x16_t b
) {
3205 return vshlq_s8(a
, b
);
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
3214 int16x8_t
test_vshlq_s16(int16x8_t a
, int16x8_t b
) {
3215 return vshlq_s16(a
, b
);
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
3224 int32x4_t
test_vshlq_s32(int32x4_t a
, int32x4_t b
) {
3225 return vshlq_s32(a
, b
);
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
3234 int64x2_t
test_vshlq_s64(int64x2_t a
, int64x2_t b
) {
3235 return vshlq_s64(a
, b
);
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
3241 uint8x16_t
test_vshlq_u8(uint8x16_t a
, int8x16_t b
) {
3242 return vshlq_u8(a
, b
);
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
3251 uint16x8_t
test_vshlq_u16(uint16x8_t a
, int16x8_t b
) {
3252 return vshlq_u16(a
, b
);
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
3261 uint32x4_t
test_vshlq_u32(uint32x4_t a
, int32x4_t b
) {
3262 return vshlq_u32(a
, b
);
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
3271 uint64x2_t
test_vshlq_u64(uint64x2_t a
, int64x2_t b
) {
3272 return vshlq_u64(a
, b
);
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
3278 int8x8_t
test_vqshl_s8(int8x8_t a
, int8x8_t b
) {
3279 return vqshl_s8(a
, b
);
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
3288 int16x4_t
test_vqshl_s16(int16x4_t a
, int16x4_t b
) {
3289 return vqshl_s16(a
, b
);
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
3298 int32x2_t
test_vqshl_s32(int32x2_t a
, int32x2_t b
) {
3299 return vqshl_s32(a
, b
);
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
3308 int64x1_t
test_vqshl_s64(int64x1_t a
, int64x1_t b
) {
3309 return vqshl_s64(a
, b
);
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
3315 uint8x8_t
test_vqshl_u8(uint8x8_t a
, int8x8_t b
) {
3316 return vqshl_u8(a
, b
);
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
3325 uint16x4_t
test_vqshl_u16(uint16x4_t a
, int16x4_t b
) {
3326 return vqshl_u16(a
, b
);
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
3335 uint32x2_t
test_vqshl_u32(uint32x2_t a
, int32x2_t b
) {
3336 return vqshl_u32(a
, b
);
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
3345 uint64x1_t
test_vqshl_u64(uint64x1_t a
, int64x1_t b
) {
3346 return vqshl_u64(a
, b
);
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
3352 int8x16_t
test_vqshlq_s8(int8x16_t a
, int8x16_t b
) {
3353 return vqshlq_s8(a
, b
);
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
3362 int16x8_t
test_vqshlq_s16(int16x8_t a
, int16x8_t b
) {
3363 return vqshlq_s16(a
, b
);
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
3372 int32x4_t
test_vqshlq_s32(int32x4_t a
, int32x4_t b
) {
3373 return vqshlq_s32(a
, b
);
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
3382 int64x2_t
test_vqshlq_s64(int64x2_t a
, int64x2_t b
) {
3383 return vqshlq_s64(a
, b
);
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
3389 uint8x16_t
test_vqshlq_u8(uint8x16_t a
, int8x16_t b
) {
3390 return vqshlq_u8(a
, b
);
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
3399 uint16x8_t
test_vqshlq_u16(uint16x8_t a
, int16x8_t b
) {
3400 return vqshlq_u16(a
, b
);
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
3409 uint32x4_t
test_vqshlq_u32(uint32x4_t a
, int32x4_t b
) {
3410 return vqshlq_u32(a
, b
);
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
3419 uint64x2_t
test_vqshlq_u64(uint64x2_t a
, int64x2_t b
) {
3420 return vqshlq_u64(a
, b
);
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
3426 int8x8_t
test_vrshl_s8(int8x8_t a
, int8x8_t b
) {
3427 return vrshl_s8(a
, b
);
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
3436 int16x4_t
test_vrshl_s16(int16x4_t a
, int16x4_t b
) {
3437 return vrshl_s16(a
, b
);
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
3446 int32x2_t
test_vrshl_s32(int32x2_t a
, int32x2_t b
) {
3447 return vrshl_s32(a
, b
);
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
3456 int64x1_t
test_vrshl_s64(int64x1_t a
, int64x1_t b
) {
3457 return vrshl_s64(a
, b
);
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
3463 uint8x8_t
test_vrshl_u8(uint8x8_t a
, int8x8_t b
) {
3464 return vrshl_u8(a
, b
);
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
3473 uint16x4_t
test_vrshl_u16(uint16x4_t a
, int16x4_t b
) {
3474 return vrshl_u16(a
, b
);
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
3483 uint32x2_t
test_vrshl_u32(uint32x2_t a
, int32x2_t b
) {
3484 return vrshl_u32(a
, b
);
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
3493 uint64x1_t
test_vrshl_u64(uint64x1_t a
, int64x1_t b
) {
3494 return vrshl_u64(a
, b
);
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
3500 int8x16_t
test_vrshlq_s8(int8x16_t a
, int8x16_t b
) {
3501 return vrshlq_s8(a
, b
);
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
3510 int16x8_t
test_vrshlq_s16(int16x8_t a
, int16x8_t b
) {
3511 return vrshlq_s16(a
, b
);
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
3520 int32x4_t
test_vrshlq_s32(int32x4_t a
, int32x4_t b
) {
3521 return vrshlq_s32(a
, b
);
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
3530 int64x2_t
test_vrshlq_s64(int64x2_t a
, int64x2_t b
) {
3531 return vrshlq_s64(a
, b
);
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
3537 uint8x16_t
test_vrshlq_u8(uint8x16_t a
, int8x16_t b
) {
3538 return vrshlq_u8(a
, b
);
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
3547 uint16x8_t
test_vrshlq_u16(uint16x8_t a
, int16x8_t b
) {
3548 return vrshlq_u16(a
, b
);
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
3557 uint32x4_t
test_vrshlq_u32(uint32x4_t a
, int32x4_t b
) {
3558 return vrshlq_u32(a
, b
);
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
3567 uint64x2_t
test_vrshlq_u64(uint64x2_t a
, int64x2_t b
) {
3568 return vrshlq_u64(a
, b
);
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
3574 int8x8_t
test_vqrshl_s8(int8x8_t a
, int8x8_t b
) {
3575 return vqrshl_s8(a
, b
);
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
3584 int16x4_t
test_vqrshl_s16(int16x4_t a
, int16x4_t b
) {
3585 return vqrshl_s16(a
, b
);
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
3594 int32x2_t
test_vqrshl_s32(int32x2_t a
, int32x2_t b
) {
3595 return vqrshl_s32(a
, b
);
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
3604 int64x1_t
test_vqrshl_s64(int64x1_t a
, int64x1_t b
) {
3605 return vqrshl_s64(a
, b
);
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
3611 uint8x8_t
test_vqrshl_u8(uint8x8_t a
, int8x8_t b
) {
3612 return vqrshl_u8(a
, b
);
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
3621 uint16x4_t
test_vqrshl_u16(uint16x4_t a
, int16x4_t b
) {
3622 return vqrshl_u16(a
, b
);
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
3631 uint32x2_t
test_vqrshl_u32(uint32x2_t a
, int32x2_t b
) {
3632 return vqrshl_u32(a
, b
);
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
3641 uint64x1_t
test_vqrshl_u64(uint64x1_t a
, int64x1_t b
) {
3642 return vqrshl_u64(a
, b
);
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
3648 int8x16_t
test_vqrshlq_s8(int8x16_t a
, int8x16_t b
) {
3649 return vqrshlq_s8(a
, b
);
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
3658 int16x8_t
test_vqrshlq_s16(int16x8_t a
, int16x8_t b
) {
3659 return vqrshlq_s16(a
, b
);
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
3668 int32x4_t
test_vqrshlq_s32(int32x4_t a
, int32x4_t b
) {
3669 return vqrshlq_s32(a
, b
);
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
3678 int64x2_t
test_vqrshlq_s64(int64x2_t a
, int64x2_t b
) {
3679 return vqrshlq_s64(a
, b
);
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
3685 uint8x16_t
test_vqrshlq_u8(uint8x16_t a
, int8x16_t b
) {
3686 return vqrshlq_u8(a
, b
);
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
3695 uint16x8_t
test_vqrshlq_u16(uint16x8_t a
, int16x8_t b
) {
3696 return vqrshlq_u16(a
, b
);
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
3705 uint32x4_t
test_vqrshlq_u32(uint32x4_t a
, int32x4_t b
) {
3706 return vqrshlq_u32(a
, b
);
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
3715 uint64x2_t
test_vqrshlq_u64(uint64x2_t a
, int64x2_t b
) {
3716 return vqrshlq_u64(a
, b
);
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK: ret <1 x i64> [[VSLI_N2]]
3726 poly64x1_t
test_vsli_n_p64(poly64x1_t a
, poly64x1_t b
) {
3727 return vsli_n_p64(a
, b
, 0);
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK: ret <2 x i64> [[VSLI_N2]]
3737 poly64x2_t
test_vsliq_n_p64(poly64x2_t a
, poly64x2_t b
) {
3738 return vsliq_n_p64(a
, b
, 0);
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK: ret <8 x i8> [[VMAX_I]]
3744 int8x8_t
test_vmax_s8(int8x8_t a
, int8x8_t b
) {
3745 return vmax_s8(a
, b
);
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK: ret <4 x i16> [[VMAX2_I]]
3753 int16x4_t
test_vmax_s16(int16x4_t a
, int16x4_t b
) {
3754 return vmax_s16(a
, b
);
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK: ret <2 x i32> [[VMAX2_I]]
3762 int32x2_t
test_vmax_s32(int32x2_t a
, int32x2_t b
) {
3763 return vmax_s32(a
, b
);
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK: ret <8 x i8> [[VMAX_I]]
3769 uint8x8_t
test_vmax_u8(uint8x8_t a
, uint8x8_t b
) {
3770 return vmax_u8(a
, b
);
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK: ret <4 x i16> [[VMAX2_I]]
3778 uint16x4_t
test_vmax_u16(uint16x4_t a
, uint16x4_t b
) {
3779 return vmax_u16(a
, b
);
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK: ret <2 x i32> [[VMAX2_I]]
3787 uint32x2_t
test_vmax_u32(uint32x2_t a
, uint32x2_t b
) {
3788 return vmax_u32(a
, b
);
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK: ret <2 x float> [[VMAX2_I]]
3796 float32x2_t
test_vmax_f32(float32x2_t a
, float32x2_t b
) {
3797 return vmax_f32(a
, b
);
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK: ret <16 x i8> [[VMAX_I]]
3803 int8x16_t
test_vmaxq_s8(int8x16_t a
, int8x16_t b
) {
3804 return vmaxq_s8(a
, b
);
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK: ret <8 x i16> [[VMAX2_I]]
3812 int16x8_t
test_vmaxq_s16(int16x8_t a
, int16x8_t b
) {
3813 return vmaxq_s16(a
, b
);
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK: ret <4 x i32> [[VMAX2_I]]
3821 int32x4_t
test_vmaxq_s32(int32x4_t a
, int32x4_t b
) {
3822 return vmaxq_s32(a
, b
);
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK: ret <16 x i8> [[VMAX_I]]
3828 uint8x16_t
test_vmaxq_u8(uint8x16_t a
, uint8x16_t b
) {
3829 return vmaxq_u8(a
, b
);
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK: ret <8 x i16> [[VMAX2_I]]
3837 uint16x8_t
test_vmaxq_u16(uint16x8_t a
, uint16x8_t b
) {
3838 return vmaxq_u16(a
, b
);
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK: ret <4 x i32> [[VMAX2_I]]
3846 uint32x4_t
test_vmaxq_u32(uint32x4_t a
, uint32x4_t b
) {
3847 return vmaxq_u32(a
, b
);
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK: ret <4 x float> [[VMAX2_I]]
3855 float32x4_t
test_vmaxq_f32(float32x4_t a
, float32x4_t b
) {
3856 return vmaxq_f32(a
, b
);
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK: ret <2 x double> [[VMAX2_I]]
3864 float64x2_t
test_vmaxq_f64(float64x2_t a
, float64x2_t b
) {
3865 return vmaxq_f64(a
, b
);
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK: ret <8 x i8> [[VMIN_I]]
3871 int8x8_t
test_vmin_s8(int8x8_t a
, int8x8_t b
) {
3872 return vmin_s8(a
, b
);
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK: ret <4 x i16> [[VMIN2_I]]
3880 int16x4_t
test_vmin_s16(int16x4_t a
, int16x4_t b
) {
3881 return vmin_s16(a
, b
);
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK: ret <2 x i32> [[VMIN2_I]]
3889 int32x2_t
test_vmin_s32(int32x2_t a
, int32x2_t b
) {
3890 return vmin_s32(a
, b
);
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK: ret <8 x i8> [[VMIN_I]]
3896 uint8x8_t
test_vmin_u8(uint8x8_t a
, uint8x8_t b
) {
3897 return vmin_u8(a
, b
);
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK: ret <4 x i16> [[VMIN2_I]]
3905 uint16x4_t
test_vmin_u16(uint16x4_t a
, uint16x4_t b
) {
3906 return vmin_u16(a
, b
);
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK: ret <2 x i32> [[VMIN2_I]]
3914 uint32x2_t
test_vmin_u32(uint32x2_t a
, uint32x2_t b
) {
3915 return vmin_u32(a
, b
);
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK: ret <2 x float> [[VMIN2_I]]
3923 float32x2_t
test_vmin_f32(float32x2_t a
, float32x2_t b
) {
3924 return vmin_f32(a
, b
);
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK: ret <16 x i8> [[VMIN_I]]
3930 int8x16_t
test_vminq_s8(int8x16_t a
, int8x16_t b
) {
3931 return vminq_s8(a
, b
);
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK: ret <8 x i16> [[VMIN2_I]]
3939 int16x8_t
test_vminq_s16(int16x8_t a
, int16x8_t b
) {
3940 return vminq_s16(a
, b
);
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK: ret <4 x i32> [[VMIN2_I]]
3948 int32x4_t
test_vminq_s32(int32x4_t a
, int32x4_t b
) {
3949 return vminq_s32(a
, b
);
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK: ret <16 x i8> [[VMIN_I]]
3955 uint8x16_t
test_vminq_u8(uint8x16_t a
, uint8x16_t b
) {
3956 return vminq_u8(a
, b
);
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK: ret <8 x i16> [[VMIN2_I]]
3964 uint16x8_t
test_vminq_u16(uint16x8_t a
, uint16x8_t b
) {
3965 return vminq_u16(a
, b
);
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK: ret <4 x i32> [[VMIN2_I]]
3973 uint32x4_t
test_vminq_u32(uint32x4_t a
, uint32x4_t b
) {
3974 return vminq_u32(a
, b
);
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK: ret <4 x float> [[VMIN2_I]]
3982 float32x4_t
test_vminq_f32(float32x4_t a
, float32x4_t b
) {
3983 return vminq_f32(a
, b
);
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK: ret <2 x double> [[VMIN2_I]]
3991 float64x2_t
test_vminq_f64(float64x2_t a
, float64x2_t b
) {
3992 return vminq_f64(a
, b
);
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK: ret <2 x float> [[VMAXNM2_I]]
4000 float32x2_t
test_vmaxnm_f32(float32x2_t a
, float32x2_t b
) {
4001 return vmaxnm_f32(a
, b
);
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK: ret <4 x float> [[VMAXNM2_I]]
4009 float32x4_t
test_vmaxnmq_f32(float32x4_t a
, float32x4_t b
) {
4010 return vmaxnmq_f32(a
, b
);
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK: ret <2 x double> [[VMAXNM2_I]]
4018 float64x2_t
test_vmaxnmq_f64(float64x2_t a
, float64x2_t b
) {
4019 return vmaxnmq_f64(a
, b
);
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK: ret <2 x float> [[VMINNM2_I]]
4027 float32x2_t
test_vminnm_f32(float32x2_t a
, float32x2_t b
) {
4028 return vminnm_f32(a
, b
);
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK: ret <4 x float> [[VMINNM2_I]]
4036 float32x4_t
test_vminnmq_f32(float32x4_t a
, float32x4_t b
) {
4037 return vminnmq_f32(a
, b
);
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK: ret <2 x double> [[VMINNM2_I]]
4045 float64x2_t
test_vminnmq_f64(float64x2_t a
, float64x2_t b
) {
4046 return vminnmq_f64(a
, b
);
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK: ret <8 x i8> [[VPMAX_I]]
4052 int8x8_t
test_vpmax_s8(int8x8_t a
, int8x8_t b
) {
4053 return vpmax_s8(a
, b
);
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK: ret <4 x i16> [[VPMAX2_I]]
4061 int16x4_t
test_vpmax_s16(int16x4_t a
, int16x4_t b
) {
4062 return vpmax_s16(a
, b
);
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK: ret <2 x i32> [[VPMAX2_I]]
4070 int32x2_t
test_vpmax_s32(int32x2_t a
, int32x2_t b
) {
4071 return vpmax_s32(a
, b
);
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK: ret <8 x i8> [[VPMAX_I]]
4077 uint8x8_t
test_vpmax_u8(uint8x8_t a
, uint8x8_t b
) {
4078 return vpmax_u8(a
, b
);
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK: ret <4 x i16> [[VPMAX2_I]]
4086 uint16x4_t
test_vpmax_u16(uint16x4_t a
, uint16x4_t b
) {
4087 return vpmax_u16(a
, b
);
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK: ret <2 x i32> [[VPMAX2_I]]
4095 uint32x2_t
test_vpmax_u32(uint32x2_t a
, uint32x2_t b
) {
4096 return vpmax_u32(a
, b
);
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK: ret <2 x float> [[VPMAX2_I]]
4104 float32x2_t
test_vpmax_f32(float32x2_t a
, float32x2_t b
) {
4105 return vpmax_f32(a
, b
);
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK: ret <16 x i8> [[VPMAX_I]]
4111 int8x16_t
test_vpmaxq_s8(int8x16_t a
, int8x16_t b
) {
4112 return vpmaxq_s8(a
, b
);
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK: ret <8 x i16> [[VPMAX2_I]]
4120 int16x8_t
test_vpmaxq_s16(int16x8_t a
, int16x8_t b
) {
4121 return vpmaxq_s16(a
, b
);
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK: ret <4 x i32> [[VPMAX2_I]]
4129 int32x4_t
test_vpmaxq_s32(int32x4_t a
, int32x4_t b
) {
4130 return vpmaxq_s32(a
, b
);
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK: ret <16 x i8> [[VPMAX_I]]
4136 uint8x16_t
test_vpmaxq_u8(uint8x16_t a
, uint8x16_t b
) {
4137 return vpmaxq_u8(a
, b
);
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK: ret <8 x i16> [[VPMAX2_I]]
4145 uint16x8_t
test_vpmaxq_u16(uint16x8_t a
, uint16x8_t b
) {
4146 return vpmaxq_u16(a
, b
);
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK: ret <4 x i32> [[VPMAX2_I]]
4154 uint32x4_t
test_vpmaxq_u32(uint32x4_t a
, uint32x4_t b
) {
4155 return vpmaxq_u32(a
, b
);
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK: ret <4 x float> [[VPMAX2_I]]
4163 float32x4_t
test_vpmaxq_f32(float32x4_t a
, float32x4_t b
) {
4164 return vpmaxq_f32(a
, b
);
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK: ret <2 x double> [[VPMAX2_I]]
4172 float64x2_t
test_vpmaxq_f64(float64x2_t a
, float64x2_t b
) {
4173 return vpmaxq_f64(a
, b
);
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK: ret <8 x i8> [[VPMIN_I]]
4179 int8x8_t
test_vpmin_s8(int8x8_t a
, int8x8_t b
) {
4180 return vpmin_s8(a
, b
);
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK: ret <4 x i16> [[VPMIN2_I]]
4188 int16x4_t
test_vpmin_s16(int16x4_t a
, int16x4_t b
) {
4189 return vpmin_s16(a
, b
);
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK: ret <2 x i32> [[VPMIN2_I]]
4197 int32x2_t
test_vpmin_s32(int32x2_t a
, int32x2_t b
) {
4198 return vpmin_s32(a
, b
);
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK: ret <8 x i8> [[VPMIN_I]]
4204 uint8x8_t
test_vpmin_u8(uint8x8_t a
, uint8x8_t b
) {
4205 return vpmin_u8(a
, b
);
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK: ret <4 x i16> [[VPMIN2_I]]
4213 uint16x4_t
test_vpmin_u16(uint16x4_t a
, uint16x4_t b
) {
4214 return vpmin_u16(a
, b
);
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK: ret <2 x i32> [[VPMIN2_I]]
4222 uint32x2_t
test_vpmin_u32(uint32x2_t a
, uint32x2_t b
) {
4223 return vpmin_u32(a
, b
);
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK: ret <2 x float> [[VPMIN2_I]]
4231 float32x2_t
test_vpmin_f32(float32x2_t a
, float32x2_t b
) {
4232 return vpmin_f32(a
, b
);
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK: ret <16 x i8> [[VPMIN_I]]
4238 int8x16_t
test_vpminq_s8(int8x16_t a
, int8x16_t b
) {
4239 return vpminq_s8(a
, b
);
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK: ret <8 x i16> [[VPMIN2_I]]
4247 int16x8_t
test_vpminq_s16(int16x8_t a
, int16x8_t b
) {
4248 return vpminq_s16(a
, b
);
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK: ret <4 x i32> [[VPMIN2_I]]
4256 int32x4_t
test_vpminq_s32(int32x4_t a
, int32x4_t b
) {
4257 return vpminq_s32(a
, b
);
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK: ret <16 x i8> [[VPMIN_I]]
4263 uint8x16_t
test_vpminq_u8(uint8x16_t a
, uint8x16_t b
) {
4264 return vpminq_u8(a
, b
);
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK: ret <8 x i16> [[VPMIN2_I]]
4272 uint16x8_t
test_vpminq_u16(uint16x8_t a
, uint16x8_t b
) {
4273 return vpminq_u16(a
, b
);
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK: ret <4 x i32> [[VPMIN2_I]]
4281 uint32x4_t
test_vpminq_u32(uint32x4_t a
, uint32x4_t b
) {
4282 return vpminq_u32(a
, b
);
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK: ret <4 x float> [[VPMIN2_I]]
4290 float32x4_t
test_vpminq_f32(float32x4_t a
, float32x4_t b
) {
4291 return vpminq_f32(a
, b
);
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK: ret <2 x double> [[VPMIN2_I]]
4299 float64x2_t
test_vpminq_f64(float64x2_t a
, float64x2_t b
) {
4300 return vpminq_f64(a
, b
);
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK: ret <2 x float> [[VPMAXNM2_I]]
4308 float32x2_t
test_vpmaxnm_f32(float32x2_t a
, float32x2_t b
) {
4309 return vpmaxnm_f32(a
, b
);
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK: ret <4 x float> [[VPMAXNM2_I]]
4317 float32x4_t
test_vpmaxnmq_f32(float32x4_t a
, float32x4_t b
) {
4318 return vpmaxnmq_f32(a
, b
);
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK: ret <2 x double> [[VPMAXNM2_I]]
4326 float64x2_t
test_vpmaxnmq_f64(float64x2_t a
, float64x2_t b
) {
4327 return vpmaxnmq_f64(a
, b
);
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK: ret <2 x float> [[VPMINNM2_I]]
4335 float32x2_t
test_vpminnm_f32(float32x2_t a
, float32x2_t b
) {
4336 return vpminnm_f32(a
, b
);
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK: ret <4 x float> [[VPMINNM2_I]]
4344 float32x4_t
test_vpminnmq_f32(float32x4_t a
, float32x4_t b
) {
4345 return vpminnmq_f32(a
, b
);
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK: ret <2 x double> [[VPMINNM2_I]]
4353 float64x2_t
test_vpminnmq_f64(float64x2_t a
, float64x2_t b
) {
4354 return vpminnmq_f64(a
, b
);
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK: ret <8 x i8> [[VPADD_V_I]]
4360 int8x8_t
test_vpadd_s8(int8x8_t a
, int8x8_t b
) {
4361 return vpadd_s8(a
, b
);
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
4370 int16x4_t
test_vpadd_s16(int16x4_t a
, int16x4_t b
) {
4371 return vpadd_s16(a
, b
);
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
4380 int32x2_t
test_vpadd_s32(int32x2_t a
, int32x2_t b
) {
4381 return vpadd_s32(a
, b
);
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK: ret <8 x i8> [[VPADD_V_I]]
4387 uint8x8_t
test_vpadd_u8(uint8x8_t a
, uint8x8_t b
) {
4388 return vpadd_u8(a
, b
);
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
4397 uint16x4_t
test_vpadd_u16(uint16x4_t a
, uint16x4_t b
) {
4398 return vpadd_u16(a
, b
);
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
4407 uint32x2_t
test_vpadd_u32(uint32x2_t a
, uint32x2_t b
) {
4408 return vpadd_u32(a
, b
);
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK: ret <2 x float> [[VPADD_V2_I]]
4417 float32x2_t
test_vpadd_f32(float32x2_t a
, float32x2_t b
) {
4418 return vpadd_f32(a
, b
);
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
4424 int8x16_t
test_vpaddq_s8(int8x16_t a
, int8x16_t b
) {
4425 return vpaddq_s8(a
, b
);
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
4434 int16x8_t
test_vpaddq_s16(int16x8_t a
, int16x8_t b
) {
4435 return vpaddq_s16(a
, b
);
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
4444 int32x4_t
test_vpaddq_s32(int32x4_t a
, int32x4_t b
) {
4445 return vpaddq_s32(a
, b
);
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK: ret <16 x i8> [[VPADDQ_V_I]]
4451 uint8x16_t
test_vpaddq_u8(uint8x16_t a
, uint8x16_t b
) {
4452 return vpaddq_u8(a
, b
);
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]]
4461 uint16x8_t
test_vpaddq_u16(uint16x8_t a
, uint16x8_t b
) {
4462 return vpaddq_u16(a
, b
);
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]]
4471 uint32x4_t
test_vpaddq_u32(uint32x4_t a
, uint32x4_t b
) {
4472 return vpaddq_u32(a
, b
);
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK: ret <4 x float> [[VPADDQ_V2_I]]
4481 float32x4_t
test_vpaddq_f32(float32x4_t a
, float32x4_t b
) {
4482 return vpaddq_f32(a
, b
);
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK: ret <2 x double> [[VPADDQ_V2_I]]
4491 float64x2_t
test_vpaddq_f64(float64x2_t a
, float64x2_t b
) {
4492 return vpaddq_f64(a
, b
);
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
4501 int16x4_t
test_vqdmulh_s16(int16x4_t a
, int16x4_t b
) {
4502 return vqdmulh_s16(a
, b
);
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
4511 int32x2_t
test_vqdmulh_s32(int32x2_t a
, int32x2_t b
) {
4512 return vqdmulh_s32(a
, b
);
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
4521 int16x8_t
test_vqdmulhq_s16(int16x8_t a
, int16x8_t b
) {
4522 return vqdmulhq_s16(a
, b
);
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
4531 int32x4_t
test_vqdmulhq_s32(int32x4_t a
, int32x4_t b
) {
4532 return vqdmulhq_s32(a
, b
);
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
4541 int16x4_t
test_vqrdmulh_s16(int16x4_t a
, int16x4_t b
) {
4542 return vqrdmulh_s16(a
, b
);
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
4551 int32x2_t
test_vqrdmulh_s32(int32x2_t a
, int32x2_t b
) {
4552 return vqrdmulh_s32(a
, b
);
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
4561 int16x8_t
test_vqrdmulhq_s16(int16x8_t a
, int16x8_t b
) {
4562 return vqrdmulhq_s16(a
, b
);
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
4571 int32x4_t
test_vqrdmulhq_s32(int32x4_t a
, int32x4_t b
) {
4572 return vqrdmulhq_s32(a
, b
);
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK: ret <2 x float> [[VMULX2_I]]
4580 float32x2_t
test_vmulx_f32(float32x2_t a
, float32x2_t b
) {
4581 return vmulx_f32(a
, b
);
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK: ret <4 x float> [[VMULX2_I]]
4589 float32x4_t
test_vmulxq_f32(float32x4_t a
, float32x4_t b
) {
4590 return vmulxq_f32(a
, b
);
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK: ret <2 x double> [[VMULX2_I]]
4598 float64x2_t
test_vmulxq_f64(float64x2_t a
, float64x2_t b
) {
4599 return vmulxq_f64(a
, b
);
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3)
4604 // CHECK: ret <8 x i8> [[VSHL_N]]
4605 int8x8_t
test_vshl_n_s8(int8x8_t a
) {
4606 return vshl_n_s8(a
, 3);
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
4613 // CHECK: ret <4 x i16> [[VSHL_N]]
4614 int16x4_t
test_vshl_n_s16(int16x4_t a
) {
4615 return vshl_n_s16(a
, 3);
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
4622 // CHECK: ret <2 x i32> [[VSHL_N]]
4623 int32x2_t
test_vshl_n_s32(int32x2_t a
) {
4624 return vshl_n_s32(a
, 3);
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3)
4629 // CHECK: ret <16 x i8> [[VSHL_N]]
4630 int8x16_t
test_vshlq_n_s8(int8x16_t a
) {
4631 return vshlq_n_s8(a
, 3);
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
4638 // CHECK: ret <8 x i16> [[VSHL_N]]
4639 int16x8_t
test_vshlq_n_s16(int16x8_t a
) {
4640 return vshlq_n_s16(a
, 3);
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
4647 // CHECK: ret <4 x i32> [[VSHL_N]]
4648 int32x4_t
test_vshlq_n_s32(int32x4_t a
) {
4649 return vshlq_n_s32(a
, 3);
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
4656 // CHECK: ret <2 x i64> [[VSHL_N]]
4657 int64x2_t
test_vshlq_n_s64(int64x2_t a
) {
4658 return vshlq_n_s64(a
, 3);
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3)
4663 // CHECK: ret <8 x i8> [[VSHL_N]]
4664 uint8x8_t
test_vshl_n_u8(uint8x8_t a
) {
4665 return vshl_n_u8(a
, 3);
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3)
4672 // CHECK: ret <4 x i16> [[VSHL_N]]
4673 uint16x4_t
test_vshl_n_u16(uint16x4_t a
) {
4674 return vshl_n_u16(a
, 3);
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3)
4681 // CHECK: ret <2 x i32> [[VSHL_N]]
4682 uint32x2_t
test_vshl_n_u32(uint32x2_t a
) {
4683 return vshl_n_u32(a
, 3);
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3)
4688 // CHECK: ret <16 x i8> [[VSHL_N]]
4689 uint8x16_t
test_vshlq_n_u8(uint8x16_t a
) {
4690 return vshlq_n_u8(a
, 3);
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3)
4697 // CHECK: ret <8 x i16> [[VSHL_N]]
4698 uint16x8_t
test_vshlq_n_u16(uint16x8_t a
) {
4699 return vshlq_n_u16(a
, 3);
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3)
4706 // CHECK: ret <4 x i32> [[VSHL_N]]
4707 uint32x4_t
test_vshlq_n_u32(uint32x4_t a
) {
4708 return vshlq_n_u32(a
, 3);
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3)
4715 // CHECK: ret <2 x i64> [[VSHL_N]]
4716 uint64x2_t
test_vshlq_n_u64(uint64x2_t a
) {
4717 return vshlq_n_u64(a
, 3);
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 3)
4722 // CHECK: ret <8 x i8> [[VSHR_N]]
4723 int8x8_t
test_vshr_n_s8(int8x8_t a
) {
4724 return vshr_n_s8(a
, 3);
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3)
4731 // CHECK: ret <4 x i16> [[VSHR_N]]
4732 int16x4_t
test_vshr_n_s16(int16x4_t a
) {
4733 return vshr_n_s16(a
, 3);
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3)
4740 // CHECK: ret <2 x i32> [[VSHR_N]]
4741 int32x2_t
test_vshr_n_s32(int32x2_t a
) {
4742 return vshr_n_s32(a
, 3);
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 3)
4747 // CHECK: ret <16 x i8> [[VSHR_N]]
4748 int8x16_t
test_vshrq_n_s8(int8x16_t a
) {
4749 return vshrq_n_s8(a
, 3);
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
4756 // CHECK: ret <8 x i16> [[VSHR_N]]
4757 int16x8_t
test_vshrq_n_s16(int16x8_t a
) {
4758 return vshrq_n_s16(a
, 3);
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3)
4765 // CHECK: ret <4 x i32> [[VSHR_N]]
4766 int32x4_t
test_vshrq_n_s32(int32x4_t a
) {
4767 return vshrq_n_s32(a
, 3);
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3)
4774 // CHECK: ret <2 x i64> [[VSHR_N]]
4775 int64x2_t
test_vshrq_n_s64(int64x2_t a
) {
4776 return vshrq_n_s64(a
, 3);
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 3)
4781 // CHECK: ret <8 x i8> [[VSHR_N]]
4782 uint8x8_t
test_vshr_n_u8(uint8x8_t a
) {
4783 return vshr_n_u8(a
, 3);
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3)
4790 // CHECK: ret <4 x i16> [[VSHR_N]]
4791 uint16x4_t
test_vshr_n_u16(uint16x4_t a
) {
4792 return vshr_n_u16(a
, 3);
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
4799 // CHECK: ret <2 x i32> [[VSHR_N]]
4800 uint32x2_t
test_vshr_n_u32(uint32x2_t a
) {
4801 return vshr_n_u32(a
, 3);
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 3)
4806 // CHECK: ret <16 x i8> [[VSHR_N]]
4807 uint8x16_t
test_vshrq_n_u8(uint8x16_t a
) {
4808 return vshrq_n_u8(a
, 3);
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
4815 // CHECK: ret <8 x i16> [[VSHR_N]]
4816 uint16x8_t
test_vshrq_n_u16(uint16x8_t a
) {
4817 return vshrq_n_u16(a
, 3);
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3)
4824 // CHECK: ret <4 x i32> [[VSHR_N]]
4825 uint32x4_t
test_vshrq_n_u32(uint32x4_t a
) {
4826 return vshrq_n_u32(a
, 3);
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3)
4833 // CHECK: ret <2 x i64> [[VSHR_N]]
4834 uint64x2_t
test_vshrq_n_u64(uint64x2_t a
) {
4835 return vshrq_n_u64(a
, 3);
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 3)
4840 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK: ret <8 x i8> [[TMP0]]
4842 int8x8_t
test_vsra_n_s8(int8x8_t a
, int8x8_t b
) {
4843 return vsra_n_s8(a
, b
, 3);
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3)
4852 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK: ret <4 x i16> [[TMP4]]
4854 int16x4_t
test_vsra_n_s16(int16x4_t a
, int16x4_t b
) {
4855 return vsra_n_s16(a
, b
, 3);
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3)
4864 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK: ret <2 x i32> [[TMP4]]
4866 int32x2_t
test_vsra_n_s32(int32x2_t a
, int32x2_t b
) {
4867 return vsra_n_s32(a
, b
, 3);
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 3)
4872 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK: ret <16 x i8> [[TMP0]]
4874 int8x16_t
test_vsraq_n_s8(int8x16_t a
, int8x16_t b
) {
4875 return vsraq_n_s8(a
, b
, 3);
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3)
4884 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK: ret <8 x i16> [[TMP4]]
4886 int16x8_t
test_vsraq_n_s16(int16x8_t a
, int16x8_t b
) {
4887 return vsraq_n_s16(a
, b
, 3);
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3)
4896 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK: ret <4 x i32> [[TMP4]]
4898 int32x4_t
test_vsraq_n_s32(int32x4_t a
, int32x4_t b
) {
4899 return vsraq_n_s32(a
, b
, 3);
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3)
4908 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK: ret <2 x i64> [[TMP4]]
4910 int64x2_t
test_vsraq_n_s64(int64x2_t a
, int64x2_t b
) {
4911 return vsraq_n_s64(a
, b
, 3);
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 3)
4916 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK: ret <8 x i8> [[TMP0]]
4918 uint8x8_t
test_vsra_n_u8(uint8x8_t a
, uint8x8_t b
) {
4919 return vsra_n_u8(a
, b
, 3);
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3)
4928 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK: ret <4 x i16> [[TMP4]]
4930 uint16x4_t
test_vsra_n_u16(uint16x4_t a
, uint16x4_t b
) {
4931 return vsra_n_u16(a
, b
, 3);
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3)
4940 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK: ret <2 x i32> [[TMP4]]
4942 uint32x2_t
test_vsra_n_u32(uint32x2_t a
, uint32x2_t b
) {
4943 return vsra_n_u32(a
, b
, 3);
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 3)
4948 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK: ret <16 x i8> [[TMP0]]
4950 uint8x16_t
test_vsraq_n_u8(uint8x16_t a
, uint8x16_t b
) {
4951 return vsraq_n_u8(a
, b
, 3);
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3)
4960 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK: ret <8 x i16> [[TMP4]]
4962 uint16x8_t
test_vsraq_n_u16(uint16x8_t a
, uint16x8_t b
) {
4963 return vsraq_n_u16(a
, b
, 3);
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3)
4972 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK: ret <4 x i32> [[TMP4]]
4974 uint32x4_t
test_vsraq_n_u32(uint32x4_t a
, uint32x4_t b
) {
4975 return vsraq_n_u32(a
, b
, 3);
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3)
4984 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK: ret <2 x i64> [[TMP4]]
4986 uint64x2_t
test_vsraq_n_u64(uint64x2_t a
, uint64x2_t b
) {
4987 return vsraq_n_u64(a
, b
, 3);
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3))
4992 // CHECK: ret <8 x i8> [[VRSHR_N]]
4993 int8x8_t
test_vrshr_n_s8(int8x8_t a
) {
4994 return vrshr_n_s8(a
, 3);
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
5001 // CHECK: ret <4 x i16> [[VRSHR_N1]]
5002 int16x4_t
test_vrshr_n_s16(int16x4_t a
) {
5003 return vrshr_n_s16(a
, 3);
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
5010 // CHECK: ret <2 x i32> [[VRSHR_N1]]
5011 int32x2_t
test_vrshr_n_s32(int32x2_t a
) {
5012 return vrshr_n_s32(a
, 3);
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3))
5017 // CHECK: ret <16 x i8> [[VRSHR_N]]
5018 int8x16_t
test_vrshrq_n_s8(int8x16_t a
) {
5019 return vrshrq_n_s8(a
, 3);
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
5026 // CHECK: ret <8 x i16> [[VRSHR_N1]]
5027 int16x8_t
test_vrshrq_n_s16(int16x8_t a
) {
5028 return vrshrq_n_s16(a
, 3);
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
5035 // CHECK: ret <4 x i32> [[VRSHR_N1]]
5036 int32x4_t
test_vrshrq_n_s32(int32x4_t a
) {
5037 return vrshrq_n_s32(a
, 3);
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
5044 // CHECK: ret <2 x i64> [[VRSHR_N1]]
5045 int64x2_t
test_vrshrq_n_s64(int64x2_t a
) {
5046 return vrshrq_n_s64(a
, 3);
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3))
5051 // CHECK: ret <8 x i8> [[VRSHR_N]]
5052 uint8x8_t
test_vrshr_n_u8(uint8x8_t a
) {
5053 return vrshr_n_u8(a
, 3);
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
5060 // CHECK: ret <4 x i16> [[VRSHR_N1]]
5061 uint16x4_t
test_vrshr_n_u16(uint16x4_t a
) {
5062 return vrshr_n_u16(a
, 3);
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
5069 // CHECK: ret <2 x i32> [[VRSHR_N1]]
5070 uint32x2_t
test_vrshr_n_u32(uint32x2_t a
) {
5071 return vrshr_n_u32(a
, 3);
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3))
5076 // CHECK: ret <16 x i8> [[VRSHR_N]]
5077 uint8x16_t
test_vrshrq_n_u8(uint8x16_t a
) {
5078 return vrshrq_n_u8(a
, 3);
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
5085 // CHECK: ret <8 x i16> [[VRSHR_N1]]
5086 uint16x8_t
test_vrshrq_n_u16(uint16x8_t a
) {
5087 return vrshrq_n_u16(a
, 3);
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
5094 // CHECK: ret <4 x i32> [[VRSHR_N1]]
5095 uint32x4_t
test_vrshrq_n_u32(uint32x4_t a
) {
5096 return vrshrq_n_u32(a
, 3);
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
5103 // CHECK: ret <2 x i64> [[VRSHR_N1]]
5104 uint64x2_t
test_vrshrq_n_u64(uint64x2_t a
) {
5105 return vrshrq_n_u64(a
, 3);
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3))
5110 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK: ret <8 x i8> [[TMP0]]
5112 int8x8_t
test_vrsra_n_s8(int8x8_t a
, int8x8_t b
) {
5113 return vrsra_n_s8(a
, b
, 3);
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
5121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK: ret <4 x i16> [[TMP3]]
5124 int16x4_t
test_vrsra_n_s16(int16x4_t a
, int16x4_t b
) {
5125 return vrsra_n_s16(a
, b
, 3);
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
5133 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK: ret <2 x i32> [[TMP3]]
5136 int32x2_t
test_vrsra_n_s32(int32x2_t a
, int32x2_t b
) {
5137 return vrsra_n_s32(a
, b
, 3);
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3))
5142 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK: ret <16 x i8> [[TMP0]]
5144 int8x16_t
test_vrsraq_n_s8(int8x16_t a
, int8x16_t b
) {
5145 return vrsraq_n_s8(a
, b
, 3);
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK: ret <8 x i16> [[TMP3]]
5156 int16x8_t
test_vrsraq_n_s16(int16x8_t a
, int16x8_t b
) {
5157 return vrsraq_n_s16(a
, b
, 3);
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
5165 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK: ret <4 x i32> [[TMP3]]
5168 int32x4_t
test_vrsraq_n_s32(int32x4_t a
, int32x4_t b
) {
5169 return vrsraq_n_s32(a
, b
, 3);
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
5177 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK: ret <2 x i64> [[TMP3]]
5180 int64x2_t
test_vrsraq_n_s64(int64x2_t a
, int64x2_t b
) {
5181 return vrsraq_n_s64(a
, b
, 3);
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3))
5186 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK: ret <8 x i8> [[TMP0]]
5188 uint8x8_t
test_vrsra_n_u8(uint8x8_t a
, uint8x8_t b
) {
5189 return vrsra_n_u8(a
, b
, 3);
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3))
5197 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK: ret <4 x i16> [[TMP3]]
5200 uint16x4_t
test_vrsra_n_u16(uint16x4_t a
, uint16x4_t b
) {
5201 return vrsra_n_u16(a
, b
, 3);
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3))
5209 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK: ret <2 x i32> [[TMP3]]
5212 uint32x2_t
test_vrsra_n_u32(uint32x2_t a
, uint32x2_t b
) {
5213 return vrsra_n_u32(a
, b
, 3);
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3))
5218 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK: ret <16 x i8> [[TMP0]]
5220 uint8x16_t
test_vrsraq_n_u8(uint8x16_t a
, uint8x16_t b
) {
5221 return vrsraq_n_u8(a
, b
, 3);
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3))
5229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK: ret <8 x i16> [[TMP3]]
5232 uint16x8_t
test_vrsraq_n_u16(uint16x8_t a
, uint16x8_t b
) {
5233 return vrsraq_n_u16(a
, b
, 3);
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3))
5241 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK: ret <4 x i32> [[TMP3]]
5244 uint32x4_t
test_vrsraq_n_u32(uint32x4_t a
, uint32x4_t b
) {
5245 return vrsraq_n_u32(a
, b
, 3);
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3))
5253 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK: ret <2 x i64> [[TMP3]]
5256 uint64x2_t
test_vrsraq_n_u64(uint64x2_t a
, uint64x2_t b
) {
5257 return vrsraq_n_u64(a
, b
, 3);
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK: ret <8 x i8> [[VSRI_N]]
5263 int8x8_t
test_vsri_n_s8(int8x8_t a
, int8x8_t b
) {
5264 return vsri_n_s8(a
, b
, 3);
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK: ret <4 x i16> [[VSRI_N2]]
5274 int16x4_t
test_vsri_n_s16(int16x4_t a
, int16x4_t b
) {
5275 return vsri_n_s16(a
, b
, 3);
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK: ret <2 x i32> [[VSRI_N2]]
5285 int32x2_t
test_vsri_n_s32(int32x2_t a
, int32x2_t b
) {
5286 return vsri_n_s32(a
, b
, 3);
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK: ret <16 x i8> [[VSRI_N]]
5292 int8x16_t
test_vsriq_n_s8(int8x16_t a
, int8x16_t b
) {
5293 return vsriq_n_s8(a
, b
, 3);
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK: ret <8 x i16> [[VSRI_N2]]
5303 int16x8_t
test_vsriq_n_s16(int16x8_t a
, int16x8_t b
) {
5304 return vsriq_n_s16(a
, b
, 3);
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK: ret <4 x i32> [[VSRI_N2]]
5314 int32x4_t
test_vsriq_n_s32(int32x4_t a
, int32x4_t b
) {
5315 return vsriq_n_s32(a
, b
, 3);
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK: ret <2 x i64> [[VSRI_N2]]
5325 int64x2_t
test_vsriq_n_s64(int64x2_t a
, int64x2_t b
) {
5326 return vsriq_n_s64(a
, b
, 3);
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK: ret <8 x i8> [[VSRI_N]]
5332 uint8x8_t
test_vsri_n_u8(uint8x8_t a
, uint8x8_t b
) {
5333 return vsri_n_u8(a
, b
, 3);
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK: ret <4 x i16> [[VSRI_N2]]
5343 uint16x4_t
test_vsri_n_u16(uint16x4_t a
, uint16x4_t b
) {
5344 return vsri_n_u16(a
, b
, 3);
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK: ret <2 x i32> [[VSRI_N2]]
5354 uint32x2_t
test_vsri_n_u32(uint32x2_t a
, uint32x2_t b
) {
5355 return vsri_n_u32(a
, b
, 3);
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK: ret <16 x i8> [[VSRI_N]]
5361 uint8x16_t
test_vsriq_n_u8(uint8x16_t a
, uint8x16_t b
) {
5362 return vsriq_n_u8(a
, b
, 3);
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK: ret <8 x i16> [[VSRI_N2]]
5372 uint16x8_t
test_vsriq_n_u16(uint16x8_t a
, uint16x8_t b
) {
5373 return vsriq_n_u16(a
, b
, 3);
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK: ret <4 x i32> [[VSRI_N2]]
5383 uint32x4_t
test_vsriq_n_u32(uint32x4_t a
, uint32x4_t b
) {
5384 return vsriq_n_u32(a
, b
, 3);
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK: ret <2 x i64> [[VSRI_N2]]
5394 uint64x2_t
test_vsriq_n_u64(uint64x2_t a
, uint64x2_t b
) {
5395 return vsriq_n_u64(a
, b
, 3);
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK: ret <8 x i8> [[VSRI_N]]
5401 poly8x8_t
test_vsri_n_p8(poly8x8_t a
, poly8x8_t b
) {
5402 return vsri_n_p8(a
, b
, 3);
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK: ret <4 x i16> [[VSRI_N2]]
5412 poly16x4_t
test_vsri_n_p16(poly16x4_t a
, poly16x4_t b
) {
5413 return vsri_n_p16(a
, b
, 15);
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK: ret <16 x i8> [[VSRI_N]]
5419 poly8x16_t
test_vsriq_n_p8(poly8x16_t a
, poly8x16_t b
) {
5420 return vsriq_n_p8(a
, b
, 3);
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK: ret <8 x i16> [[VSRI_N2]]
5430 poly16x8_t
test_vsriq_n_p16(poly16x8_t a
, poly16x8_t b
) {
5431 return vsriq_n_p16(a
, b
, 15);
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK: ret <8 x i8> [[VSLI_N]]
5437 int8x8_t
test_vsli_n_s8(int8x8_t a
, int8x8_t b
) {
5438 return vsli_n_s8(a
, b
, 3);
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK: ret <4 x i16> [[VSLI_N2]]
5448 int16x4_t
test_vsli_n_s16(int16x4_t a
, int16x4_t b
) {
5449 return vsli_n_s16(a
, b
, 3);
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK: ret <2 x i32> [[VSLI_N2]]
5459 int32x2_t
test_vsli_n_s32(int32x2_t a
, int32x2_t b
) {
5460 return vsli_n_s32(a
, b
, 3);
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK: ret <16 x i8> [[VSLI_N]]
5466 int8x16_t
test_vsliq_n_s8(int8x16_t a
, int8x16_t b
) {
5467 return vsliq_n_s8(a
, b
, 3);
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK: ret <8 x i16> [[VSLI_N2]]
5477 int16x8_t
test_vsliq_n_s16(int16x8_t a
, int16x8_t b
) {
5478 return vsliq_n_s16(a
, b
, 3);
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK: ret <4 x i32> [[VSLI_N2]]
5488 int32x4_t
test_vsliq_n_s32(int32x4_t a
, int32x4_t b
) {
5489 return vsliq_n_s32(a
, b
, 3);
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK: ret <2 x i64> [[VSLI_N2]]
5499 int64x2_t
test_vsliq_n_s64(int64x2_t a
, int64x2_t b
) {
5500 return vsliq_n_s64(a
, b
, 3);
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK: ret <8 x i8> [[VSLI_N]]
5506 uint8x8_t
test_vsli_n_u8(uint8x8_t a
, uint8x8_t b
) {
5507 return vsli_n_u8(a
, b
, 3);
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK: ret <4 x i16> [[VSLI_N2]]
5517 uint16x4_t
test_vsli_n_u16(uint16x4_t a
, uint16x4_t b
) {
5518 return vsli_n_u16(a
, b
, 3);
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK: ret <2 x i32> [[VSLI_N2]]
5528 uint32x2_t
test_vsli_n_u32(uint32x2_t a
, uint32x2_t b
) {
5529 return vsli_n_u32(a
, b
, 3);
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK: ret <16 x i8> [[VSLI_N]]
5535 uint8x16_t
test_vsliq_n_u8(uint8x16_t a
, uint8x16_t b
) {
5536 return vsliq_n_u8(a
, b
, 3);
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK: ret <8 x i16> [[VSLI_N2]]
5546 uint16x8_t
test_vsliq_n_u16(uint16x8_t a
, uint16x8_t b
) {
5547 return vsliq_n_u16(a
, b
, 3);
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK: ret <4 x i32> [[VSLI_N2]]
5557 uint32x4_t
test_vsliq_n_u32(uint32x4_t a
, uint32x4_t b
) {
5558 return vsliq_n_u32(a
, b
, 3);
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK: ret <2 x i64> [[VSLI_N2]]
5568 uint64x2_t
test_vsliq_n_u64(uint64x2_t a
, uint64x2_t b
) {
5569 return vsliq_n_u64(a
, b
, 3);
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK: ret <8 x i8> [[VSLI_N]]
5575 poly8x8_t
test_vsli_n_p8(poly8x8_t a
, poly8x8_t b
) {
5576 return vsli_n_p8(a
, b
, 3);
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK: ret <4 x i16> [[VSLI_N2]]
5586 poly16x4_t
test_vsli_n_p16(poly16x4_t a
, poly16x4_t b
) {
5587 return vsli_n_p16(a
, b
, 15);
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK: ret <16 x i8> [[VSLI_N]]
5593 poly8x16_t
test_vsliq_n_p8(poly8x16_t a
, poly8x16_t b
) {
5594 return vsliq_n_p8(a
, b
, 3);
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK: ret <8 x i16> [[VSLI_N2]]
5604 poly16x8_t
test_vsliq_n_p16(poly16x8_t a
, poly16x8_t b
) {
5605 return vsliq_n_p16(a
, b
, 15);
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 3))
5610 // CHECK: ret <8 x i8> [[VQSHLU_N]]
5611 uint8x8_t
test_vqshlu_n_s8(int8x8_t a
) {
5612 return vqshlu_n_s8(a
, 3);
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3))
5619 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
5620 uint16x4_t
test_vqshlu_n_s16(int16x4_t a
) {
5621 return vqshlu_n_s16(a
, 3);
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3))
5628 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
5629 uint32x2_t
test_vqshlu_n_s32(int32x2_t a
) {
5630 return vqshlu_n_s32(a
, 3);
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 3))
5635 // CHECK: ret <16 x i8> [[VQSHLU_N]]
5636 uint8x16_t
test_vqshluq_n_s8(int8x16_t a
) {
5637 return vqshluq_n_s8(a
, 3);
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3))
5644 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
5645 uint16x8_t
test_vqshluq_n_s16(int16x8_t a
) {
5646 return vqshluq_n_s16(a
, 3);
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3))
5653 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
5654 uint32x4_t
test_vqshluq_n_s32(int32x4_t a
) {
5655 return vqshluq_n_s32(a
, 3);
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3))
5662 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
5663 uint64x2_t
test_vqshluq_n_s64(int64x2_t a
) {
5664 return vqshluq_n_s64(a
, 3);
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
5671 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK: ret <8 x i8> [[VSHRN_N]]
5673 int8x8_t
test_vshrn_n_s16(int16x8_t a
) {
5674 return vshrn_n_s16(a
, 3);
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
5681 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK: ret <4 x i16> [[VSHRN_N]]
5683 int16x4_t
test_vshrn_n_s32(int32x4_t a
) {
5684 return vshrn_n_s32(a
, 9);
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
5691 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK: ret <2 x i32> [[VSHRN_N]]
5693 int32x2_t
test_vshrn_n_s64(int64x2_t a
) {
5694 return vshrn_n_s64(a
, 19);
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
5701 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK: ret <8 x i8> [[VSHRN_N]]
5703 uint8x8_t
test_vshrn_n_u16(uint16x8_t a
) {
5704 return vshrn_n_u16(a
, 3);
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
5711 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK: ret <4 x i16> [[VSHRN_N]]
5713 uint16x4_t
test_vshrn_n_u32(uint32x4_t a
) {
5714 return vshrn_n_u32(a
, 9);
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
5721 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK: ret <2 x i32> [[VSHRN_N]]
5723 uint32x2_t
test_vshrn_n_u64(uint64x2_t a
) {
5724 return vshrn_n_u64(a
, 19);
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3)
5731 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5734 int8x16_t
test_vshrn_high_n_s16(int8x8_t a
, int16x8_t b
) {
5735 return vshrn_high_n_s16(a
, b
, 3);
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9)
5742 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
5745 int16x8_t
test_vshrn_high_n_s32(int16x4_t a
, int32x4_t b
) {
5746 return vshrn_high_n_s32(a
, b
, 9);
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19)
5753 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
5756 int32x4_t
test_vshrn_high_n_s64(int32x2_t a
, int64x2_t b
) {
5757 return vshrn_high_n_s64(a
, b
, 19);
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3)
5764 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5767 uint8x16_t
test_vshrn_high_n_u16(uint8x8_t a
, uint16x8_t b
) {
5768 return vshrn_high_n_u16(a
, b
, 3);
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9)
5775 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
5778 uint16x8_t
test_vshrn_high_n_u32(uint16x4_t a
, uint32x4_t b
) {
5779 return vshrn_high_n_u32(a
, b
, 9);
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19)
5786 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
5789 uint32x4_t
test_vshrn_high_n_u64(uint32x2_t a
, uint64x2_t b
) {
5790 return vshrn_high_n_u64(a
, b
, 19);
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
5798 uint8x8_t
test_vqshrun_n_s16(int16x8_t a
) {
5799 return vqshrun_n_s16(a
, 3);
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
5807 uint16x4_t
test_vqshrun_n_s32(int32x4_t a
) {
5808 return vqshrun_n_s32(a
, 9);
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
5816 uint32x2_t
test_vqshrun_n_s64(int64x2_t a
) {
5817 return vqshrun_n_s64(a
, 19);
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5826 int8x16_t
test_vqshrun_high_n_s16(int8x8_t a
, int16x8_t b
) {
5827 return vqshrun_high_n_s16(a
, b
, 3);
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
5836 int16x8_t
test_vqshrun_high_n_s32(int16x4_t a
, int32x4_t b
) {
5837 return vqshrun_high_n_s32(a
, b
, 9);
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
5846 int32x4_t
test_vqshrun_high_n_s64(int32x2_t a
, int64x2_t b
) {
5847 return vqshrun_high_n_s64(a
, b
, 19);
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
5855 int8x8_t
test_vrshrn_n_s16(int16x8_t a
) {
5856 return vrshrn_n_s16(a
, 3);
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
5864 int16x4_t
test_vrshrn_n_s32(int32x4_t a
) {
5865 return vrshrn_n_s32(a
, 9);
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
5873 int32x2_t
test_vrshrn_n_s64(int64x2_t a
) {
5874 return vrshrn_n_s64(a
, 19);
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
5882 uint8x8_t
test_vrshrn_n_u16(uint16x8_t a
) {
5883 return vrshrn_n_u16(a
, 3);
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
5891 uint16x4_t
test_vrshrn_n_u32(uint32x4_t a
) {
5892 return vrshrn_n_u32(a
, 9);
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
5900 uint32x2_t
test_vrshrn_n_u64(uint64x2_t a
) {
5901 return vrshrn_n_u64(a
, 19);
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5910 int8x16_t
test_vrshrn_high_n_s16(int8x8_t a
, int16x8_t b
) {
5911 return vrshrn_high_n_s16(a
, b
, 3);
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
5920 int16x8_t
test_vrshrn_high_n_s32(int16x4_t a
, int32x4_t b
) {
5921 return vrshrn_high_n_s32(a
, b
, 9);
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
5930 int32x4_t
test_vrshrn_high_n_s64(int32x2_t a
, int64x2_t b
) {
5931 return vrshrn_high_n_s64(a
, b
, 19);
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5940 uint8x16_t
test_vrshrn_high_n_u16(uint8x8_t a
, uint16x8_t b
) {
5941 return vrshrn_high_n_u16(a
, b
, 3);
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
5950 uint16x8_t
test_vrshrn_high_n_u32(uint16x4_t a
, uint32x4_t b
) {
5951 return vrshrn_high_n_u32(a
, b
, 9);
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
5960 uint32x4_t
test_vrshrn_high_n_u64(uint32x2_t a
, uint64x2_t b
) {
5961 return vrshrn_high_n_u64(a
, b
, 19);
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
5969 uint8x8_t
test_vqrshrun_n_s16(int16x8_t a
) {
5970 return vqrshrun_n_s16(a
, 3);
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
5978 uint16x4_t
test_vqrshrun_n_s32(int32x4_t a
) {
5979 return vqrshrun_n_s32(a
, 9);
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
5987 uint32x2_t
test_vqrshrun_n_s64(int64x2_t a
) {
5988 return vqrshrun_n_s64(a
, 19);
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
5997 int8x16_t
test_vqrshrun_high_n_s16(int8x8_t a
, int16x8_t b
) {
5998 return vqrshrun_high_n_s16(a
, b
, 3);
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
6007 int16x8_t
test_vqrshrun_high_n_s32(int16x4_t a
, int32x4_t b
) {
6008 return vqrshrun_high_n_s32(a
, b
, 9);
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
6017 int32x4_t
test_vqrshrun_high_n_s64(int32x2_t a
, int64x2_t b
) {
6018 return vqrshrun_high_n_s64(a
, b
, 19);
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
6026 int8x8_t
test_vqshrn_n_s16(int16x8_t a
) {
6027 return vqshrn_n_s16(a
, 3);
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
6035 int16x4_t
test_vqshrn_n_s32(int32x4_t a
) {
6036 return vqshrn_n_s32(a
, 9);
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
6044 int32x2_t
test_vqshrn_n_s64(int64x2_t a
) {
6045 return vqshrn_n_s64(a
, 19);
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
6053 uint8x8_t
test_vqshrn_n_u16(uint16x8_t a
) {
6054 return vqshrn_n_u16(a
, 3);
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
6062 uint16x4_t
test_vqshrn_n_u32(uint32x4_t a
) {
6063 return vqshrn_n_u32(a
, 9);
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
6071 uint32x2_t
test_vqshrn_n_u64(uint64x2_t a
) {
6072 return vqshrn_n_u64(a
, 19);
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
6081 int8x16_t
test_vqshrn_high_n_s16(int8x8_t a
, int16x8_t b
) {
6082 return vqshrn_high_n_s16(a
, b
, 3);
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
6091 int16x8_t
test_vqshrn_high_n_s32(int16x4_t a
, int32x4_t b
) {
6092 return vqshrn_high_n_s32(a
, b
, 9);
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
6101 int32x4_t
test_vqshrn_high_n_s64(int32x2_t a
, int64x2_t b
) {
6102 return vqshrn_high_n_s64(a
, b
, 19);
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
6111 uint8x16_t
test_vqshrn_high_n_u16(uint8x8_t a
, uint16x8_t b
) {
6112 return vqshrn_high_n_u16(a
, b
, 3);
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
6121 uint16x8_t
test_vqshrn_high_n_u32(uint16x4_t a
, uint32x4_t b
) {
6122 return vqshrn_high_n_u32(a
, b
, 9);
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
6131 uint32x4_t
test_vqshrn_high_n_u64(uint32x2_t a
, uint64x2_t b
) {
6132 return vqshrn_high_n_u64(a
, b
, 19);
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
6140 int8x8_t
test_vqrshrn_n_s16(int16x8_t a
) {
6141 return vqrshrn_n_s16(a
, 3);
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
6149 int16x4_t
test_vqrshrn_n_s32(int32x4_t a
) {
6150 return vqrshrn_n_s32(a
, 9);
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
6158 int32x2_t
test_vqrshrn_n_s64(int64x2_t a
) {
6159 return vqrshrn_n_s64(a
, 19);
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
6167 uint8x8_t
test_vqrshrn_n_u16(uint16x8_t a
) {
6168 return vqrshrn_n_u16(a
, 3);
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
6176 uint16x4_t
test_vqrshrn_n_u32(uint32x4_t a
) {
6177 return vqrshrn_n_u32(a
, 9);
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
6185 uint32x2_t
test_vqrshrn_n_u64(uint64x2_t a
) {
6186 return vqrshrn_n_u64(a
, 19);
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
6195 int8x16_t
test_vqrshrn_high_n_s16(int8x8_t a
, int16x8_t b
) {
6196 return vqrshrn_high_n_s16(a
, b
, 3);
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
6205 int16x8_t
test_vqrshrn_high_n_s32(int16x4_t a
, int32x4_t b
) {
6206 return vqrshrn_high_n_s32(a
, b
, 9);
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
6215 int32x4_t
test_vqrshrn_high_n_s64(int32x2_t a
, int64x2_t b
) {
6216 return vqrshrn_high_n_s64(a
, b
, 19);
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
6225 uint8x16_t
test_vqrshrn_high_n_u16(uint8x8_t a
, uint16x8_t b
) {
6226 return vqrshrn_high_n_u16(a
, b
, 3);
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
6235 uint16x8_t
test_vqrshrn_high_n_u32(uint16x4_t a
, uint32x4_t b
) {
6236 return vqrshrn_high_n_u32(a
, b
, 9);
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
6245 uint32x4_t
test_vqrshrn_high_n_u64(uint32x2_t a
, uint64x2_t b
) {
6246 return vqrshrn_high_n_u64(a
, b
, 19);
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
6252 // CHECK: ret <8 x i16> [[VSHLL_N]]
6253 int16x8_t
test_vshll_n_s8(int8x8_t a
) {
6254 return vshll_n_s8(a
, 3);
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
6262 // CHECK: ret <4 x i32> [[VSHLL_N]]
6263 int32x4_t
test_vshll_n_s16(int16x4_t a
) {
6264 return vshll_n_s16(a
, 9);
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
6272 // CHECK: ret <2 x i64> [[VSHLL_N]]
6273 int64x2_t
test_vshll_n_s32(int32x2_t a
) {
6274 return vshll_n_s32(a
, 19);
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
6280 // CHECK: ret <8 x i16> [[VSHLL_N]]
6281 uint16x8_t
test_vshll_n_u8(uint8x8_t a
) {
6282 return vshll_n_u8(a
, 3);
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
6290 // CHECK: ret <4 x i32> [[VSHLL_N]]
6291 uint32x4_t
test_vshll_n_u16(uint16x4_t a
) {
6292 return vshll_n_u16(a
, 9);
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
6300 // CHECK: ret <2 x i64> [[VSHLL_N]]
6301 uint64x2_t
test_vshll_n_u32(uint32x2_t a
) {
6302 return vshll_n_u32(a
, 19);
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
6309 // CHECK: ret <8 x i16> [[VSHLL_N]]
6310 int16x8_t
test_vshll_high_n_s8(int8x16_t a
) {
6311 return vshll_high_n_s8(a
, 3);
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
6320 // CHECK: ret <4 x i32> [[VSHLL_N]]
6321 int32x4_t
test_vshll_high_n_s16(int16x8_t a
) {
6322 return vshll_high_n_s16(a
, 9);
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
6331 // CHECK: ret <2 x i64> [[VSHLL_N]]
6332 int64x2_t
test_vshll_high_n_s32(int32x4_t a
) {
6333 return vshll_high_n_s32(a
, 19);
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
6340 // CHECK: ret <8 x i16> [[VSHLL_N]]
6341 uint16x8_t
test_vshll_high_n_u8(uint8x16_t a
) {
6342 return vshll_high_n_u8(a
, 3);
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
6351 // CHECK: ret <4 x i32> [[VSHLL_N]]
6352 uint32x4_t
test_vshll_high_n_u16(uint16x8_t a
) {
6353 return vshll_high_n_u16(a
, 9);
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
6362 // CHECK: ret <2 x i64> [[VSHLL_N]]
6363 uint64x2_t
test_vshll_high_n_u32(uint32x4_t a
) {
6364 return vshll_high_n_u32(a
, 19);
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK: ret <8 x i16> [[VMOVL_I]]
6370 int16x8_t
test_vmovl_s8(int8x8_t a
) {
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK: ret <4 x i32> [[VMOVL_I]]
6378 int32x4_t
test_vmovl_s16(int16x4_t a
) {
6379 return vmovl_s16(a
);
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK: ret <2 x i64> [[VMOVL_I]]
6386 int64x2_t
test_vmovl_s32(int32x2_t a
) {
6387 return vmovl_s32(a
);
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK: ret <8 x i16> [[VMOVL_I]]
6393 uint16x8_t
test_vmovl_u8(uint8x8_t a
) {
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK: ret <4 x i32> [[VMOVL_I]]
6401 uint32x4_t
test_vmovl_u16(uint16x4_t a
) {
6402 return vmovl_u16(a
);
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK: ret <2 x i64> [[VMOVL_I]]
6409 uint64x2_t
test_vmovl_u32(uint32x2_t a
) {
6410 return vmovl_u32(a
);
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK: ret <8 x i16> [[TMP0]]
6417 int16x8_t
test_vmovl_high_s8(int8x16_t a
) {
6418 return vmovl_high_s8(a
);
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK: ret <4 x i32> [[TMP1]]
6426 int32x4_t
test_vmovl_high_s16(int16x8_t a
) {
6427 return vmovl_high_s16(a
);
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK: ret <2 x i64> [[TMP1]]
6435 int64x2_t
test_vmovl_high_s32(int32x4_t a
) {
6436 return vmovl_high_s32(a
);
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK: ret <8 x i16> [[TMP0]]
6443 uint16x8_t
test_vmovl_high_u8(uint8x16_t a
) {
6444 return vmovl_high_u8(a
);
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK: ret <4 x i32> [[TMP1]]
6452 uint32x4_t
test_vmovl_high_u16(uint16x8_t a
) {
6453 return vmovl_high_u16(a
);
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK: ret <2 x i64> [[TMP1]]
6461 uint64x2_t
test_vmovl_high_u32(uint32x4_t a
) {
6462 return vmovl_high_u32(a
);
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK: ret <2 x float> [[VCVT_N1]]
6470 float32x2_t
test_vcvt_n_f32_s32(int32x2_t a
) {
6471 return vcvt_n_f32_s32(a
, 31);
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK: ret <4 x float> [[VCVT_N1]]
6479 float32x4_t
test_vcvtq_n_f32_s32(int32x4_t a
) {
6480 return vcvtq_n_f32_s32(a
, 31);
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK: ret <2 x double> [[VCVT_N1]]
6488 float64x2_t
test_vcvtq_n_f64_s64(int64x2_t a
) {
6489 return vcvtq_n_f64_s64(a
, 50);
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK: ret <2 x float> [[VCVT_N1]]
6497 float32x2_t
test_vcvt_n_f32_u32(uint32x2_t a
) {
6498 return vcvt_n_f32_u32(a
, 31);
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK: ret <4 x float> [[VCVT_N1]]
6506 float32x4_t
test_vcvtq_n_f32_u32(uint32x4_t a
) {
6507 return vcvtq_n_f32_u32(a
, 31);
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK: ret <2 x double> [[VCVT_N1]]
6515 float64x2_t
test_vcvtq_n_f64_u64(uint64x2_t a
) {
6516 return vcvtq_n_f64_u64(a
, 50);
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK: ret <2 x i32> [[VCVT_N1]]
6524 int32x2_t
test_vcvt_n_s32_f32(float32x2_t a
) {
6525 return vcvt_n_s32_f32(a
, 31);
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK: ret <4 x i32> [[VCVT_N1]]
6533 int32x4_t
test_vcvtq_n_s32_f32(float32x4_t a
) {
6534 return vcvtq_n_s32_f32(a
, 31);
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK: ret <2 x i64> [[VCVT_N1]]
6542 int64x2_t
test_vcvtq_n_s64_f64(float64x2_t a
) {
6543 return vcvtq_n_s64_f64(a
, 50);
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK: ret <2 x i32> [[VCVT_N1]]
6551 uint32x2_t
test_vcvt_n_u32_f32(float32x2_t a
) {
6552 return vcvt_n_u32_f32(a
, 31);
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK: ret <4 x i32> [[VCVT_N1]]
6560 uint32x4_t
test_vcvtq_n_u32_f32(float32x4_t a
) {
6561 return vcvtq_n_u32_f32(a
, 31);
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK: ret <2 x i64> [[VCVT_N1]]
6569 uint64x2_t
test_vcvtq_n_u64_f64(float64x2_t a
) {
6570 return vcvtq_n_u64_f64(a
, 50);
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK: ret <8 x i16> [[ADD_I]]
6578 int16x8_t
test_vaddl_s8(int8x8_t a
, int8x8_t b
) {
6579 return vaddl_s8(a
, b
);
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK: ret <4 x i32> [[ADD_I]]
6589 int32x4_t
test_vaddl_s16(int16x4_t a
, int16x4_t b
) {
6590 return vaddl_s16(a
, b
);
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK: ret <2 x i64> [[ADD_I]]
6600 int64x2_t
test_vaddl_s32(int32x2_t a
, int32x2_t b
) {
6601 return vaddl_s32(a
, b
);
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK: ret <8 x i16> [[ADD_I]]
6609 uint16x8_t
test_vaddl_u8(uint8x8_t a
, uint8x8_t b
) {
6610 return vaddl_u8(a
, b
);
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK: ret <4 x i32> [[ADD_I]]
6620 uint32x4_t
test_vaddl_u16(uint16x4_t a
, uint16x4_t b
) {
6621 return vaddl_u16(a
, b
);
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK: ret <2 x i64> [[ADD_I]]
6631 uint64x2_t
test_vaddl_u32(uint32x2_t a
, uint32x2_t b
) {
6632 return vaddl_u32(a
, b
);
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK: ret <8 x i16> [[ADD_I]]
6642 int16x8_t
test_vaddl_high_s8(int8x16_t a
, int8x16_t b
) {
6643 return vaddl_high_s8(a
, b
);
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK: ret <4 x i32> [[ADD_I]]
6655 int32x4_t
test_vaddl_high_s16(int16x8_t a
, int16x8_t b
) {
6656 return vaddl_high_s16(a
, b
);
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK: ret <2 x i64> [[ADD_I]]
6668 int64x2_t
test_vaddl_high_s32(int32x4_t a
, int32x4_t b
) {
6669 return vaddl_high_s32(a
, b
);
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK: ret <8 x i16> [[ADD_I]]
6679 uint16x8_t
test_vaddl_high_u8(uint8x16_t a
, uint8x16_t b
) {
6680 return vaddl_high_u8(a
, b
);
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK: ret <4 x i32> [[ADD_I]]
6692 uint32x4_t
test_vaddl_high_u16(uint16x8_t a
, uint16x8_t b
) {
6693 return vaddl_high_u16(a
, b
);
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK: ret <2 x i64> [[ADD_I]]
6705 uint64x2_t
test_vaddl_high_u32(uint32x4_t a
, uint32x4_t b
) {
6706 return vaddl_high_u32(a
, b
);
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK: ret <8 x i16> [[ADD_I]]
6713 int16x8_t
test_vaddw_s8(int16x8_t a
, int8x8_t b
) {
6714 return vaddw_s8(a
, b
);
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK: ret <4 x i32> [[ADD_I]]
6722 int32x4_t
test_vaddw_s16(int32x4_t a
, int16x4_t b
) {
6723 return vaddw_s16(a
, b
);
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK: ret <2 x i64> [[ADD_I]]
6731 int64x2_t
test_vaddw_s32(int64x2_t a
, int32x2_t b
) {
6732 return vaddw_s32(a
, b
);
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK: ret <8 x i16> [[ADD_I]]
6739 uint16x8_t
test_vaddw_u8(uint16x8_t a
, uint8x8_t b
) {
6740 return vaddw_u8(a
, b
);
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK: ret <4 x i32> [[ADD_I]]
6748 uint32x4_t
test_vaddw_u16(uint32x4_t a
, uint16x4_t b
) {
6749 return vaddw_u16(a
, b
);
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK: ret <2 x i64> [[ADD_I]]
6757 uint64x2_t
test_vaddw_u32(uint64x2_t a
, uint32x2_t b
) {
6758 return vaddw_u32(a
, b
);
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK: ret <8 x i16> [[ADD_I]]
6766 int16x8_t
test_vaddw_high_s8(int16x8_t a
, int8x16_t b
) {
6767 return vaddw_high_s8(a
, b
);
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK: ret <4 x i32> [[ADD_I]]
6776 int32x4_t
test_vaddw_high_s16(int32x4_t a
, int16x8_t b
) {
6777 return vaddw_high_s16(a
, b
);
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK: ret <2 x i64> [[ADD_I]]
6786 int64x2_t
test_vaddw_high_s32(int64x2_t a
, int32x4_t b
) {
6787 return vaddw_high_s32(a
, b
);
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK: ret <8 x i16> [[ADD_I]]
6795 uint16x8_t
test_vaddw_high_u8(uint16x8_t a
, uint8x16_t b
) {
6796 return vaddw_high_u8(a
, b
);
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK: ret <4 x i32> [[ADD_I]]
6805 uint32x4_t
test_vaddw_high_u16(uint32x4_t a
, uint16x8_t b
) {
6806 return vaddw_high_u16(a
, b
);
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK: ret <2 x i64> [[ADD_I]]
6815 uint64x2_t
test_vaddw_high_u32(uint64x2_t a
, uint32x4_t b
) {
6816 return vaddw_high_u32(a
, b
);
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK: ret <8 x i16> [[SUB_I]]
6824 int16x8_t
test_vsubl_s8(int8x8_t a
, int8x8_t b
) {
6825 return vsubl_s8(a
, b
);
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK: ret <4 x i32> [[SUB_I]]
6835 int32x4_t
test_vsubl_s16(int16x4_t a
, int16x4_t b
) {
6836 return vsubl_s16(a
, b
);
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK: ret <2 x i64> [[SUB_I]]
6846 int64x2_t
test_vsubl_s32(int32x2_t a
, int32x2_t b
) {
6847 return vsubl_s32(a
, b
);
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK: ret <8 x i16> [[SUB_I]]
6855 uint16x8_t
test_vsubl_u8(uint8x8_t a
, uint8x8_t b
) {
6856 return vsubl_u8(a
, b
);
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK: ret <4 x i32> [[SUB_I]]
6866 uint32x4_t
test_vsubl_u16(uint16x4_t a
, uint16x4_t b
) {
6867 return vsubl_u16(a
, b
);
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK: ret <2 x i64> [[SUB_I]]
6877 uint64x2_t
test_vsubl_u32(uint32x2_t a
, uint32x2_t b
) {
6878 return vsubl_u32(a
, b
);
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK: ret <8 x i16> [[SUB_I]]
6888 int16x8_t
test_vsubl_high_s8(int8x16_t a
, int8x16_t b
) {
6889 return vsubl_high_s8(a
, b
);
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK: ret <4 x i32> [[SUB_I]]
6901 int32x4_t
test_vsubl_high_s16(int16x8_t a
, int16x8_t b
) {
6902 return vsubl_high_s16(a
, b
);
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK: ret <2 x i64> [[SUB_I]]
6914 int64x2_t
test_vsubl_high_s32(int32x4_t a
, int32x4_t b
) {
6915 return vsubl_high_s32(a
, b
);
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK: ret <8 x i16> [[SUB_I]]
6925 uint16x8_t
test_vsubl_high_u8(uint8x16_t a
, uint8x16_t b
) {
6926 return vsubl_high_u8(a
, b
);
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK: ret <4 x i32> [[SUB_I]]
6938 uint32x4_t
test_vsubl_high_u16(uint16x8_t a
, uint16x8_t b
) {
6939 return vsubl_high_u16(a
, b
);
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK: ret <2 x i64> [[SUB_I]]
6951 uint64x2_t
test_vsubl_high_u32(uint32x4_t a
, uint32x4_t b
) {
6952 return vsubl_high_u32(a
, b
);
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK: ret <8 x i16> [[SUB_I]]
6959 int16x8_t
test_vsubw_s8(int16x8_t a
, int8x8_t b
) {
6960 return vsubw_s8(a
, b
);
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK: ret <4 x i32> [[SUB_I]]
6968 int32x4_t
test_vsubw_s16(int32x4_t a
, int16x4_t b
) {
6969 return vsubw_s16(a
, b
);
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK: ret <2 x i64> [[SUB_I]]
6977 int64x2_t
test_vsubw_s32(int64x2_t a
, int32x2_t b
) {
6978 return vsubw_s32(a
, b
);
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK: ret <8 x i16> [[SUB_I]]
6985 uint16x8_t
test_vsubw_u8(uint16x8_t a
, uint8x8_t b
) {
6986 return vsubw_u8(a
, b
);
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK: ret <4 x i32> [[SUB_I]]
6994 uint32x4_t
test_vsubw_u16(uint32x4_t a
, uint16x4_t b
) {
6995 return vsubw_u16(a
, b
);
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK: ret <2 x i64> [[SUB_I]]
7003 uint64x2_t
test_vsubw_u32(uint64x2_t a
, uint32x2_t b
) {
7004 return vsubw_u32(a
, b
);
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK: ret <8 x i16> [[SUB_I]]
7012 int16x8_t
test_vsubw_high_s8(int16x8_t a
, int8x16_t b
) {
7013 return vsubw_high_s8(a
, b
);
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK: ret <4 x i32> [[SUB_I]]
7022 int32x4_t
test_vsubw_high_s16(int32x4_t a
, int16x8_t b
) {
7023 return vsubw_high_s16(a
, b
);
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK: ret <2 x i64> [[SUB_I]]
7032 int64x2_t
test_vsubw_high_s32(int64x2_t a
, int32x4_t b
) {
7033 return vsubw_high_s32(a
, b
);
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK: ret <8 x i16> [[SUB_I]]
7041 uint16x8_t
test_vsubw_high_u8(uint16x8_t a
, uint8x16_t b
) {
7042 return vsubw_high_u8(a
, b
);
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK: ret <4 x i32> [[SUB_I]]
7051 uint32x4_t
test_vsubw_high_u16(uint32x4_t a
, uint16x8_t b
) {
7052 return vsubw_high_u16(a
, b
);
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK: ret <2 x i64> [[SUB_I]]
7061 uint64x2_t
test_vsubw_high_u32(uint64x2_t a
, uint32x4_t b
) {
7062 return vsubw_high_u32(a
, b
);
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
7070 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK: ret <8 x i8> [[VADDHN2_I]]
7072 int8x8_t
test_vaddhn_s16(int16x8_t a
, int16x8_t b
) {
7073 return vaddhn_s16(a
, b
);
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
7081 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK: ret <4 x i16> [[VADDHN2_I]]
7083 int16x4_t
test_vaddhn_s32(int32x4_t a
, int32x4_t b
) {
7084 return vaddhn_s32(a
, b
);
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
7092 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK: ret <2 x i32> [[VADDHN2_I]]
7094 int32x2_t
test_vaddhn_s64(int64x2_t a
, int64x2_t b
) {
7095 return vaddhn_s64(a
, b
);
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8)
7103 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK: ret <8 x i8> [[VADDHN2_I]]
7105 uint8x8_t
test_vaddhn_u16(uint16x8_t a
, uint16x8_t b
) {
7106 return vaddhn_u16(a
, b
);
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16)
7114 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK: ret <4 x i16> [[VADDHN2_I]]
7116 uint16x4_t
test_vaddhn_u32(uint32x4_t a
, uint32x4_t b
) {
7117 return vaddhn_u32(a
, b
);
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32)
7125 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK: ret <2 x i32> [[VADDHN2_I]]
7127 uint32x2_t
test_vaddhn_u64(uint64x2_t a
, uint64x2_t b
) {
7128 return vaddhn_u64(a
, b
);
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
7136 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7139 int8x16_t
test_vaddhn_high_s16(int8x8_t r
, int16x8_t a
, int16x8_t b
) {
7140 return vaddhn_high_s16(r
, a
, b
);
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
7148 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7151 int16x8_t
test_vaddhn_high_s32(int16x4_t r
, int32x4_t a
, int32x4_t b
) {
7152 return vaddhn_high_s32(r
, a
, b
);
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
7160 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7163 int32x4_t
test_vaddhn_high_s64(int32x2_t r
, int64x2_t a
, int64x2_t b
) {
7164 return vaddhn_high_s64(r
, a
, b
);
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8)
7172 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7175 uint8x16_t
test_vaddhn_high_u16(uint8x8_t r
, uint16x8_t a
, uint16x8_t b
) {
7176 return vaddhn_high_u16(r
, a
, b
);
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16)
7184 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7187 uint16x8_t
test_vaddhn_high_u32(uint16x4_t r
, uint32x4_t a
, uint32x4_t b
) {
7188 return vaddhn_high_u32(r
, a
, b
);
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32)
7196 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7199 uint32x4_t
test_vaddhn_high_u64(uint32x2_t r
, uint64x2_t a
, uint64x2_t b
) {
7200 return vaddhn_high_u64(r
, a
, b
);
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
7208 int8x8_t
test_vraddhn_s16(int16x8_t a
, int16x8_t b
) {
7209 return vraddhn_s16(a
, b
);
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
7218 int16x4_t
test_vraddhn_s32(int32x4_t a
, int32x4_t b
) {
7219 return vraddhn_s32(a
, b
);
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
7228 int32x2_t
test_vraddhn_s64(int64x2_t a
, int64x2_t b
) {
7229 return vraddhn_s64(a
, b
);
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
7237 uint8x8_t
test_vraddhn_u16(uint16x8_t a
, uint16x8_t b
) {
7238 return vraddhn_u16(a
, b
);
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
7247 uint16x4_t
test_vraddhn_u32(uint32x4_t a
, uint32x4_t b
) {
7248 return vraddhn_u32(a
, b
);
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
7257 uint32x2_t
test_vraddhn_u64(uint64x2_t a
, uint64x2_t b
) {
7258 return vraddhn_u64(a
, b
);
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7267 int8x16_t
test_vraddhn_high_s16(int8x8_t r
, int16x8_t a
, int16x8_t b
) {
7268 return vraddhn_high_s16(r
, a
, b
);
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7278 int16x8_t
test_vraddhn_high_s32(int16x4_t r
, int32x4_t a
, int32x4_t b
) {
7279 return vraddhn_high_s32(r
, a
, b
);
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7289 int32x4_t
test_vraddhn_high_s64(int32x2_t r
, int64x2_t a
, int64x2_t b
) {
7290 return vraddhn_high_s64(r
, a
, b
);
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7299 uint8x16_t
test_vraddhn_high_u16(uint8x8_t r
, uint16x8_t a
, uint16x8_t b
) {
7300 return vraddhn_high_u16(r
, a
, b
);
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7310 uint16x8_t
test_vraddhn_high_u32(uint16x4_t r
, uint32x4_t a
, uint32x4_t b
) {
7311 return vraddhn_high_u32(r
, a
, b
);
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7321 uint32x4_t
test_vraddhn_high_u64(uint32x2_t r
, uint64x2_t a
, uint64x2_t b
) {
7322 return vraddhn_high_u64(r
, a
, b
);
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
7330 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
7332 int8x8_t
test_vsubhn_s16(int16x8_t a
, int16x8_t b
) {
7333 return vsubhn_s16(a
, b
);
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
7341 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
7343 int16x4_t
test_vsubhn_s32(int32x4_t a
, int32x4_t b
) {
7344 return vsubhn_s32(a
, b
);
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
7352 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
7354 int32x2_t
test_vsubhn_s64(int64x2_t a
, int64x2_t b
) {
7355 return vsubhn_s64(a
, b
);
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8)
7363 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
7365 uint8x8_t
test_vsubhn_u16(uint16x8_t a
, uint16x8_t b
) {
7366 return vsubhn_u16(a
, b
);
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16)
7374 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
7376 uint16x4_t
test_vsubhn_u32(uint32x4_t a
, uint32x4_t b
) {
7377 return vsubhn_u32(a
, b
);
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32)
7385 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
7387 uint32x2_t
test_vsubhn_u64(uint64x2_t a
, uint64x2_t b
) {
7388 return vsubhn_u64(a
, b
);
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
7396 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7399 int8x16_t
test_vsubhn_high_s16(int8x8_t r
, int16x8_t a
, int16x8_t b
) {
7400 return vsubhn_high_s16(r
, a
, b
);
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
7408 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7411 int16x8_t
test_vsubhn_high_s32(int16x4_t r
, int32x4_t a
, int32x4_t b
) {
7412 return vsubhn_high_s32(r
, a
, b
);
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
7420 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7423 int32x4_t
test_vsubhn_high_s64(int32x2_t r
, int64x2_t a
, int64x2_t b
) {
7424 return vsubhn_high_s64(r
, a
, b
);
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8)
7432 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7435 uint8x16_t
test_vsubhn_high_u16(uint8x8_t r
, uint16x8_t a
, uint16x8_t b
) {
7436 return vsubhn_high_u16(r
, a
, b
);
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16)
7444 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7447 uint16x8_t
test_vsubhn_high_u32(uint16x4_t r
, uint32x4_t a
, uint32x4_t b
) {
7448 return vsubhn_high_u32(r
, a
, b
);
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32)
7456 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7459 uint32x4_t
test_vsubhn_high_u64(uint32x2_t r
, uint64x2_t a
, uint64x2_t b
) {
7460 return vsubhn_high_u64(r
, a
, b
);
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
7468 int8x8_t
test_vrsubhn_s16(int16x8_t a
, int16x8_t b
) {
7469 return vrsubhn_s16(a
, b
);
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
7478 int16x4_t
test_vrsubhn_s32(int32x4_t a
, int32x4_t b
) {
7479 return vrsubhn_s32(a
, b
);
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
7488 int32x2_t
test_vrsubhn_s64(int64x2_t a
, int64x2_t b
) {
7489 return vrsubhn_s64(a
, b
);
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
7497 uint8x8_t
test_vrsubhn_u16(uint16x8_t a
, uint16x8_t b
) {
7498 return vrsubhn_u16(a
, b
);
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
7507 uint16x4_t
test_vrsubhn_u32(uint32x4_t a
, uint32x4_t b
) {
7508 return vrsubhn_u32(a
, b
);
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
7517 uint32x2_t
test_vrsubhn_u64(uint64x2_t a
, uint64x2_t b
) {
7518 return vrsubhn_u64(a
, b
);
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7527 int8x16_t
test_vrsubhn_high_s16(int8x8_t r
, int16x8_t a
, int16x8_t b
) {
7528 return vrsubhn_high_s16(r
, a
, b
);
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7538 int16x8_t
test_vrsubhn_high_s32(int16x4_t r
, int32x4_t a
, int32x4_t b
) {
7539 return vrsubhn_high_s32(r
, a
, b
);
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7549 int32x4_t
test_vrsubhn_high_s64(int32x2_t r
, int64x2_t a
, int64x2_t b
) {
7550 return vrsubhn_high_s64(r
, a
, b
);
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]]
7559 uint8x16_t
test_vrsubhn_high_u16(uint8x8_t r
, uint16x8_t a
, uint16x8_t b
) {
7560 return vrsubhn_high_u16(r
, a
, b
);
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]]
7570 uint16x8_t
test_vrsubhn_high_u32(uint16x4_t r
, uint32x4_t a
, uint32x4_t b
) {
7571 return vrsubhn_high_u32(r
, a
, b
);
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]]
7581 uint32x4_t
test_vrsubhn_high_u64(uint32x2_t r
, uint64x2_t a
, uint64x2_t b
) {
7582 return vrsubhn_high_u64(r
, a
, b
);
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
7589 int16x8_t
test_vabdl_s8(int8x8_t a
, int8x8_t b
) {
7590 return vabdl_s8(a
, b
);
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
7600 int32x4_t
test_vabdl_s16(int16x4_t a
, int16x4_t b
) {
7601 return vabdl_s16(a
, b
);
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
7611 int64x2_t
test_vabdl_s32(int32x2_t a
, int32x2_t b
) {
7612 return vabdl_s32(a
, b
);
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
7619 uint16x8_t
test_vabdl_u8(uint8x8_t a
, uint8x8_t b
) {
7620 return vabdl_u8(a
, b
);
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
7630 uint32x4_t
test_vabdl_u16(uint16x4_t a
, uint16x4_t b
) {
7631 return vabdl_u16(a
, b
);
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
7641 uint64x2_t
test_vabdl_u32(uint32x2_t a
, uint32x2_t b
) {
7642 return vabdl_u32(a
, b
);
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK: ret <8 x i16> [[ADD_I]]
7650 int16x8_t
test_vabal_s8(int16x8_t a
, int8x8_t b
, int8x8_t c
) {
7651 return vabal_s8(a
, b
, c
);
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK: ret <4 x i32> [[ADD_I]]
7662 int32x4_t
test_vabal_s16(int32x4_t a
, int16x4_t b
, int16x4_t c
) {
7663 return vabal_s16(a
, b
, c
);
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK: ret <2 x i64> [[ADD_I]]
7674 int64x2_t
test_vabal_s32(int64x2_t a
, int32x2_t b
, int32x2_t c
) {
7675 return vabal_s32(a
, b
, c
);
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK: ret <8 x i16> [[ADD_I]]
7683 uint16x8_t
test_vabal_u8(uint16x8_t a
, uint8x8_t b
, uint8x8_t c
) {
7684 return vabal_u8(a
, b
, c
);
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK: ret <4 x i32> [[ADD_I]]
7695 uint32x4_t
test_vabal_u16(uint32x4_t a
, uint16x4_t b
, uint16x4_t c
) {
7696 return vabal_u16(a
, b
, c
);
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK: ret <2 x i64> [[ADD_I]]
7707 uint64x2_t
test_vabal_u32(uint64x2_t a
, uint32x2_t b
, uint32x2_t c
) {
7708 return vabal_u32(a
, b
, c
);
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
7717 int16x8_t
test_vabdl_high_s8(int8x16_t a
, int8x16_t b
) {
7718 return vabdl_high_s8(a
, b
);
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
7730 int32x4_t
test_vabdl_high_s16(int16x8_t a
, int16x8_t b
) {
7731 return vabdl_high_s16(a
, b
);
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
7743 int64x2_t
test_vabdl_high_s32(int32x4_t a
, int32x4_t b
) {
7744 return vabdl_high_s32(a
, b
);
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]]
7753 uint16x8_t
test_vabdl_high_u8(uint8x16_t a
, uint8x16_t b
) {
7754 return vabdl_high_u8(a
, b
);
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]]
7766 uint32x4_t
test_vabdl_high_u16(uint16x8_t a
, uint16x8_t b
) {
7767 return vabdl_high_u16(a
, b
);
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]]
7779 uint64x2_t
test_vabdl_high_u32(uint32x4_t a
, uint32x4_t b
) {
7780 return vabdl_high_u32(a
, b
);
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK: ret <8 x i16> [[ADD_I_I]]
7790 int16x8_t
test_vabal_high_s8(int16x8_t a
, int8x16_t b
, int8x16_t c
) {
7791 return vabal_high_s8(a
, b
, c
);
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK: ret <4 x i32> [[ADD_I_I]]
7804 int32x4_t
test_vabal_high_s16(int32x4_t a
, int16x8_t b
, int16x8_t c
) {
7805 return vabal_high_s16(a
, b
, c
);
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK: ret <2 x i64> [[ADD_I_I]]
7818 int64x2_t
test_vabal_high_s32(int64x2_t a
, int32x4_t b
, int32x4_t c
) {
7819 return vabal_high_s32(a
, b
, c
);
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK: ret <8 x i16> [[ADD_I_I]]
7829 uint16x8_t
test_vabal_high_u8(uint16x8_t a
, uint8x16_t b
, uint8x16_t c
) {
7830 return vabal_high_u8(a
, b
, c
);
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK: ret <4 x i32> [[ADD_I_I]]
7843 uint32x4_t
test_vabal_high_u16(uint32x4_t a
, uint16x8_t b
, uint16x8_t c
) {
7844 return vabal_high_u16(a
, b
, c
);
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK: ret <2 x i64> [[ADD_I_I]]
7857 uint64x2_t
test_vabal_high_u32(uint64x2_t a
, uint32x4_t b
, uint32x4_t c
) {
7858 return vabal_high_u32(a
, b
, c
);
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK: ret <8 x i16> [[VMULL_I]]
7864 int16x8_t
test_vmull_s8(int8x8_t a
, int8x8_t b
) {
7865 return vmull_s8(a
, b
);
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK: ret <4 x i32> [[VMULL2_I]]
7873 int32x4_t
test_vmull_s16(int16x4_t a
, int16x4_t b
) {
7874 return vmull_s16(a
, b
);
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK: ret <2 x i64> [[VMULL2_I]]
7882 int64x2_t
test_vmull_s32(int32x2_t a
, int32x2_t b
) {
7883 return vmull_s32(a
, b
);
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK: ret <8 x i16> [[VMULL_I]]
7889 uint16x8_t
test_vmull_u8(uint8x8_t a
, uint8x8_t b
) {
7890 return vmull_u8(a
, b
);
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK: ret <4 x i32> [[VMULL2_I]]
7898 uint32x4_t
test_vmull_u16(uint16x4_t a
, uint16x4_t b
) {
7899 return vmull_u16(a
, b
);
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK: ret <2 x i64> [[VMULL2_I]]
7907 uint64x2_t
test_vmull_u32(uint32x2_t a
, uint32x2_t b
) {
7908 return vmull_u32(a
, b
);
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK: ret <8 x i16> [[VMULL_I_I]]
7916 int16x8_t
test_vmull_high_s8(int8x16_t a
, int8x16_t b
) {
7917 return vmull_high_s8(a
, b
);
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
7927 int32x4_t
test_vmull_high_s16(int16x8_t a
, int16x8_t b
) {
7928 return vmull_high_s16(a
, b
);
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
7938 int64x2_t
test_vmull_high_s32(int32x4_t a
, int32x4_t b
) {
7939 return vmull_high_s32(a
, b
);
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK: ret <8 x i16> [[VMULL_I_I]]
7947 uint16x8_t
test_vmull_high_u8(uint8x16_t a
, uint8x16_t b
) {
7948 return vmull_high_u8(a
, b
);
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK: ret <4 x i32> [[VMULL2_I_I]]
7958 uint32x4_t
test_vmull_high_u16(uint16x8_t a
, uint16x8_t b
) {
7959 return vmull_high_u16(a
, b
);
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK: ret <2 x i64> [[VMULL2_I_I]]
7969 uint64x2_t
test_vmull_high_u32(uint32x4_t a
, uint32x4_t b
) {
7970 return vmull_high_u32(a
, b
);
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK: ret <8 x i16> [[ADD_I]]
7977 int16x8_t
test_vmlal_s8(int16x8_t a
, int8x8_t b
, int8x8_t c
) {
7978 return vmlal_s8(a
, b
, c
);
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK: ret <4 x i32> [[ADD_I]]
7987 int32x4_t
test_vmlal_s16(int32x4_t a
, int16x4_t b
, int16x4_t c
) {
7988 return vmlal_s16(a
, b
, c
);
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK: ret <2 x i64> [[ADD_I]]
7997 int64x2_t
test_vmlal_s32(int64x2_t a
, int32x2_t b
, int32x2_t c
) {
7998 return vmlal_s32(a
, b
, c
);
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK: ret <8 x i16> [[ADD_I]]
8005 uint16x8_t
test_vmlal_u8(uint16x8_t a
, uint8x8_t b
, uint8x8_t c
) {
8006 return vmlal_u8(a
, b
, c
);
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK: ret <4 x i32> [[ADD_I]]
8015 uint32x4_t
test_vmlal_u16(uint32x4_t a
, uint16x4_t b
, uint16x4_t c
) {
8016 return vmlal_u16(a
, b
, c
);
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK: ret <2 x i64> [[ADD_I]]
8025 uint64x2_t
test_vmlal_u32(uint64x2_t a
, uint32x2_t b
, uint32x2_t c
) {
8026 return vmlal_u32(a
, b
, c
);
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK: ret <8 x i16> [[ADD_I_I]]
8035 int16x8_t
test_vmlal_high_s8(int16x8_t a
, int8x16_t b
, int8x16_t c
) {
8036 return vmlal_high_s8(a
, b
, c
);
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK: ret <4 x i32> [[ADD_I_I]]
8047 int32x4_t
test_vmlal_high_s16(int32x4_t a
, int16x8_t b
, int16x8_t c
) {
8048 return vmlal_high_s16(a
, b
, c
);
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK: ret <2 x i64> [[ADD_I_I]]
8059 int64x2_t
test_vmlal_high_s32(int64x2_t a
, int32x4_t b
, int32x4_t c
) {
8060 return vmlal_high_s32(a
, b
, c
);
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK: ret <8 x i16> [[ADD_I_I]]
8069 uint16x8_t
test_vmlal_high_u8(uint16x8_t a
, uint8x16_t b
, uint8x16_t c
) {
8070 return vmlal_high_u8(a
, b
, c
);
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK: ret <4 x i32> [[ADD_I_I]]
8081 uint32x4_t
test_vmlal_high_u16(uint32x4_t a
, uint16x8_t b
, uint16x8_t c
) {
8082 return vmlal_high_u16(a
, b
, c
);
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK: ret <2 x i64> [[ADD_I_I]]
8093 uint64x2_t
test_vmlal_high_u32(uint64x2_t a
, uint32x4_t b
, uint32x4_t c
) {
8094 return vmlal_high_u32(a
, b
, c
);
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK: ret <8 x i16> [[SUB_I]]
8101 int16x8_t
test_vmlsl_s8(int16x8_t a
, int8x8_t b
, int8x8_t c
) {
8102 return vmlsl_s8(a
, b
, c
);
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK: ret <4 x i32> [[SUB_I]]
8111 int32x4_t
test_vmlsl_s16(int32x4_t a
, int16x4_t b
, int16x4_t c
) {
8112 return vmlsl_s16(a
, b
, c
);
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK: ret <2 x i64> [[SUB_I]]
8121 int64x2_t
test_vmlsl_s32(int64x2_t a
, int32x2_t b
, int32x2_t c
) {
8122 return vmlsl_s32(a
, b
, c
);
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK: ret <8 x i16> [[SUB_I]]
8129 uint16x8_t
test_vmlsl_u8(uint16x8_t a
, uint8x8_t b
, uint8x8_t c
) {
8130 return vmlsl_u8(a
, b
, c
);
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK: ret <4 x i32> [[SUB_I]]
8139 uint32x4_t
test_vmlsl_u16(uint32x4_t a
, uint16x4_t b
, uint16x4_t c
) {
8140 return vmlsl_u16(a
, b
, c
);
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK: ret <2 x i64> [[SUB_I]]
8149 uint64x2_t
test_vmlsl_u32(uint64x2_t a
, uint32x2_t b
, uint32x2_t c
) {
8150 return vmlsl_u32(a
, b
, c
);
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK: ret <8 x i16> [[SUB_I_I]]
8159 int16x8_t
test_vmlsl_high_s8(int16x8_t a
, int8x16_t b
, int8x16_t c
) {
8160 return vmlsl_high_s8(a
, b
, c
);
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK: ret <4 x i32> [[SUB_I_I]]
8171 int32x4_t
test_vmlsl_high_s16(int32x4_t a
, int16x8_t b
, int16x8_t c
) {
8172 return vmlsl_high_s16(a
, b
, c
);
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK: ret <2 x i64> [[SUB_I_I]]
8183 int64x2_t
test_vmlsl_high_s32(int64x2_t a
, int32x4_t b
, int32x4_t c
) {
8184 return vmlsl_high_s32(a
, b
, c
);
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK: ret <8 x i16> [[SUB_I_I]]
8193 uint16x8_t
test_vmlsl_high_u8(uint16x8_t a
, uint8x16_t b
, uint8x16_t c
) {
8194 return vmlsl_high_u8(a
, b
, c
);
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK: ret <4 x i32> [[SUB_I_I]]
8205 uint32x4_t
test_vmlsl_high_u16(uint32x4_t a
, uint16x8_t b
, uint16x8_t c
) {
8206 return vmlsl_high_u16(a
, b
, c
);
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK: ret <2 x i64> [[SUB_I_I]]
8217 uint64x2_t
test_vmlsl_high_u32(uint64x2_t a
, uint32x4_t b
, uint32x4_t c
) {
8218 return vmlsl_high_u32(a
, b
, c
);
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
8227 int32x4_t
test_vqdmull_s16(int16x4_t a
, int16x4_t b
) {
8228 return vqdmull_s16(a
, b
);
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
8237 int64x2_t
test_vqdmull_s32(int32x2_t a
, int32x2_t b
) {
8238 return vqdmull_s32(a
, b
);
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
8248 int32x4_t
test_vqdmlal_s16(int32x4_t a
, int16x4_t b
, int16x4_t c
) {
8249 return vqdmlal_s16(a
, b
, c
);
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
8259 int64x2_t
test_vqdmlal_s32(int64x2_t a
, int32x2_t b
, int32x2_t c
) {
8260 return vqdmlal_s32(a
, b
, c
);
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
8270 int32x4_t
test_vqdmlsl_s16(int32x4_t a
, int16x4_t b
, int16x4_t c
) {
8271 return vqdmlsl_s16(a
, b
, c
);
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
8281 int64x2_t
test_vqdmlsl_s32(int64x2_t a
, int32x2_t b
, int32x2_t c
) {
8282 return vqdmlsl_s32(a
, b
, c
);
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]]
8293 int32x4_t
test_vqdmull_high_s16(int16x8_t a
, int16x8_t b
) {
8294 return vqdmull_high_s16(a
, b
);
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]]
8305 int64x2_t
test_vqdmull_high_s32(int32x4_t a
, int32x4_t b
) {
8306 return vqdmull_high_s32(a
, b
);
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]]
8318 int32x4_t
test_vqdmlal_high_s16(int32x4_t a
, int16x8_t b
, int16x8_t c
) {
8319 return vqdmlal_high_s16(a
, b
, c
);
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]]
8331 int64x2_t
test_vqdmlal_high_s32(int64x2_t a
, int32x4_t b
, int32x4_t c
) {
8332 return vqdmlal_high_s32(a
, b
, c
);
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]]
8344 int32x4_t
test_vqdmlsl_high_s16(int32x4_t a
, int16x8_t b
, int16x8_t c
) {
8345 return vqdmlsl_high_s16(a
, b
, c
);
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]]
8357 int64x2_t
test_vqdmlsl_high_s32(int64x2_t a
, int32x4_t b
, int32x4_t c
) {
8358 return vqdmlsl_high_s32(a
, b
, c
);
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK: ret <8 x i16> [[VMULL_I]]
8364 poly16x8_t
test_vmull_p8(poly8x8_t a
, poly8x8_t b
) {
8365 return vmull_p8(a
, b
);
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK: ret <8 x i16> [[VMULL_I_I]]
8373 poly16x8_t
test_vmull_high_p8(poly8x16_t a
, poly8x16_t b
) {
8374 return vmull_high_p8(a
, b
);
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK: ret i64 [[VADDD_I]]
8380 int64_t test_vaddd_s64(int64_t a
, int64_t b
) {
8381 return vaddd_s64(a
, b
);
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK: ret i64 [[VADDD_I]]
8387 uint64_t test_vaddd_u64(uint64_t a
, uint64_t b
) {
8388 return vaddd_u64(a
, b
);
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK: ret i64 [[VSUBD_I]]
8394 int64_t test_vsubd_s64(int64_t a
, int64_t b
) {
8395 return vsubd_s64(a
, b
);
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK: ret i64 [[VSUBD_I]]
8401 uint64_t test_vsubd_u64(uint64_t a
, uint64_t b
) {
8402 return vsubd_u64(a
, b
);
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8407 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8408 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK: ret i8 [[TMP2]]
8411 int8_t test_vqaddb_s8(int8_t a
, int8_t b
) {
8412 return vqaddb_s8(a
, b
);
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8417 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8418 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK: ret i16 [[TMP2]]
8421 int16_t test_vqaddh_s16(int16_t a
, int16_t b
) {
8422 return vqaddh_s16(a
, b
);
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK: ret i32 [[VQADDS_S32_I]]
8428 int32_t test_vqadds_s32(int32_t a
, int32_t b
) {
8429 return vqadds_s32(a
, b
);
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK: ret i64 [[VQADDD_S64_I]]
8435 int64_t test_vqaddd_s64(int64_t a
, int64_t b
) {
8436 return vqaddd_s64(a
, b
);
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8441 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8442 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK: ret i8 [[TMP2]]
8445 uint8_t test_vqaddb_u8(uint8_t a
, uint8_t b
) {
8446 return vqaddb_u8(a
, b
);
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8451 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8452 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK: ret i16 [[TMP2]]
8455 uint16_t test_vqaddh_u16(uint16_t a
, uint16_t b
) {
8456 return vqaddh_u16(a
, b
);
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK: ret i32 [[VQADDS_U32_I]]
8462 uint32_t test_vqadds_u32(uint32_t a
, uint32_t b
) {
8463 return vqadds_u32(a
, b
);
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK: ret i64 [[VQADDD_U64_I]]
8469 uint64_t test_vqaddd_u64(uint64_t a
, uint64_t b
) {
8470 return vqaddd_u64(a
, b
);
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8475 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8476 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK: ret i8 [[TMP2]]
8479 int8_t test_vqsubb_s8(int8_t a
, int8_t b
) {
8480 return vqsubb_s8(a
, b
);
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8485 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8486 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK: ret i16 [[TMP2]]
8489 int16_t test_vqsubh_s16(int16_t a
, int16_t b
) {
8490 return vqsubh_s16(a
, b
);
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK: ret i32 [[VQSUBS_S32_I]]
8496 int32_t test_vqsubs_s32(int32_t a
, int32_t b
) {
8497 return vqsubs_s32(a
, b
);
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK: ret i64 [[VQSUBD_S64_I]]
8503 int64_t test_vqsubd_s64(int64_t a
, int64_t b
) {
8504 return vqsubd_s64(a
, b
);
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8509 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8510 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK: ret i8 [[TMP2]]
8513 uint8_t test_vqsubb_u8(uint8_t a
, uint8_t b
) {
8514 return vqsubb_u8(a
, b
);
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8519 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8520 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK: ret i16 [[TMP2]]
8523 uint16_t test_vqsubh_u16(uint16_t a
, uint16_t b
) {
8524 return vqsubh_u16(a
, b
);
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK: ret i32 [[VQSUBS_U32_I]]
8530 uint32_t test_vqsubs_u32(uint32_t a
, uint32_t b
) {
8531 return vqsubs_u32(a
, b
);
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK: ret i64 [[VQSUBD_U64_I]]
8537 uint64_t test_vqsubd_u64(uint64_t a
, uint64_t b
) {
8538 return vqsubd_u64(a
, b
);
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK: ret i64 [[VSHLD_S64_I]]
8544 int64_t test_vshld_s64(int64_t a
, int64_t b
) {
8545 return vshld_s64(a
, b
);
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK: ret i64 [[VSHLD_U64_I]]
8551 uint64_t test_vshld_u64(uint64_t a
, int64_t b
) {
8552 return vshld_u64(a
, b
);
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8557 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8558 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK: ret i8 [[TMP2]]
8561 int8_t test_vqshlb_s8(int8_t a
, int8_t b
) {
8562 return vqshlb_s8(a
, b
);
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8567 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8568 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK: ret i16 [[TMP2]]
8571 int16_t test_vqshlh_s16(int16_t a
, int16_t b
) {
8572 return vqshlh_s16(a
, b
);
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK: ret i32 [[VQSHLS_S32_I]]
8578 int32_t test_vqshls_s32(int32_t a
, int32_t b
) {
8579 return vqshls_s32(a
, b
);
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK: ret i64 [[VQSHLD_S64_I]]
8585 int64_t test_vqshld_s64(int64_t a
, int64_t b
) {
8586 return vqshld_s64(a
, b
);
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8591 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8592 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK: ret i8 [[TMP2]]
8595 uint8_t test_vqshlb_u8(uint8_t a
, int8_t b
) {
8596 return vqshlb_u8(a
, b
);
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8601 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8602 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK: ret i16 [[TMP2]]
8605 uint16_t test_vqshlh_u16(uint16_t a
, int16_t b
) {
8606 return vqshlh_u16(a
, b
);
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK: ret i32 [[VQSHLS_U32_I]]
8612 uint32_t test_vqshls_u32(uint32_t a
, int32_t b
) {
8613 return vqshls_u32(a
, b
);
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK: ret i64 [[VQSHLD_U64_I]]
8619 uint64_t test_vqshld_u64(uint64_t a
, int64_t b
) {
8620 return vqshld_u64(a
, b
);
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK: ret i64 [[VRSHLD_S64_I]]
8626 int64_t test_vrshld_s64(int64_t a
, int64_t b
) {
8627 return vrshld_s64(a
, b
);
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK: ret i64 [[VRSHLD_U64_I]]
8633 uint64_t test_vrshld_u64(uint64_t a
, int64_t b
) {
8634 return vrshld_u64(a
, b
);
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8639 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8640 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK: ret i8 [[TMP2]]
8643 int8_t test_vqrshlb_s8(int8_t a
, int8_t b
) {
8644 return vqrshlb_s8(a
, b
);
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8649 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8650 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK: ret i16 [[TMP2]]
8653 int16_t test_vqrshlh_s16(int16_t a
, int16_t b
) {
8654 return vqrshlh_s16(a
, b
);
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK: ret i32 [[VQRSHLS_S32_I]]
8660 int32_t test_vqrshls_s32(int32_t a
, int32_t b
) {
8661 return vqrshls_s32(a
, b
);
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK: ret i64 [[VQRSHLD_S64_I]]
8667 int64_t test_vqrshld_s64(int64_t a
, int64_t b
) {
8668 return vqrshld_s64(a
, b
);
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8673 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8674 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK: ret i8 [[TMP2]]
8677 uint8_t test_vqrshlb_u8(uint8_t a
, int8_t b
) {
8678 return vqrshlb_u8(a
, b
);
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8683 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8684 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK: ret i16 [[TMP2]]
8687 uint16_t test_vqrshlh_u16(uint16_t a
, int16_t b
) {
8688 return vqrshlh_u16(a
, b
);
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK: ret i32 [[VQRSHLS_U32_I]]
8694 uint32_t test_vqrshls_u32(uint32_t a
, int32_t b
) {
8695 return vqrshls_u32(a
, b
);
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK: ret i64 [[VQRSHLD_U64_I]]
8701 uint64_t test_vqrshld_u64(uint64_t a
, int64_t b
) {
8702 return vqrshld_u64(a
, b
);
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8707 // CHECK: ret i64 [[VPADDD_S64_I]]
8708 int64_t test_vpaddd_s64(int64x2_t a
) {
8709 return vpaddd_s64(a
);
8712 // CHECK-LABEL: @test_vpadds_f32(
8713 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8714 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8715 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8716 // CHECK: ret float [[VPADDD_I]]
8717 float32_t
test_vpadds_f32(float32x2_t a
) {
8718 return vpadds_f32(a
);
8721 // CHECK-LABEL: @test_vpaddd_f64(
8722 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8723 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8724 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8725 // CHECK: ret double [[VPADDD_I]]
8726 float64_t
test_vpaddd_f64(float64x2_t a
) {
8727 return vpaddd_f64(a
);
8730 // CHECK-LABEL: @test_vpmaxnms_f32(
8731 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8732 // CHECK: ret float [[VPMAXNMS_F32_I]]
8733 float32_t
test_vpmaxnms_f32(float32x2_t a
) {
8734 return vpmaxnms_f32(a
);
8737 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8738 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8739 // CHECK: ret double [[VPMAXNMQD_F64_I]]
8740 float64_t
test_vpmaxnmqd_f64(float64x2_t a
) {
8741 return vpmaxnmqd_f64(a
);
8744 // CHECK-LABEL: @test_vpmaxs_f32(
8745 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8746 // CHECK: ret float [[VPMAXS_F32_I]]
8747 float32_t
test_vpmaxs_f32(float32x2_t a
) {
8748 return vpmaxs_f32(a
);
8751 // CHECK-LABEL: @test_vpmaxqd_f64(
8752 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8753 // CHECK: ret double [[VPMAXQD_F64_I]]
8754 float64_t
test_vpmaxqd_f64(float64x2_t a
) {
8755 return vpmaxqd_f64(a
);
8758 // CHECK-LABEL: @test_vpminnms_f32(
8759 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8760 // CHECK: ret float [[VPMINNMS_F32_I]]
8761 float32_t
test_vpminnms_f32(float32x2_t a
) {
8762 return vpminnms_f32(a
);
8765 // CHECK-LABEL: @test_vpminnmqd_f64(
8766 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8767 // CHECK: ret double [[VPMINNMQD_F64_I]]
8768 float64_t
test_vpminnmqd_f64(float64x2_t a
) {
8769 return vpminnmqd_f64(a
);
8772 // CHECK-LABEL: @test_vpmins_f32(
8773 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8774 // CHECK: ret float [[VPMINS_F32_I]]
8775 float32_t
test_vpmins_f32(float32x2_t a
) {
8776 return vpmins_f32(a
);
8779 // CHECK-LABEL: @test_vpminqd_f64(
8780 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8781 // CHECK: ret double [[VPMINQD_F64_I]]
8782 float64_t
test_vpminqd_f64(float64x2_t a
) {
8783 return vpminqd_f64(a
);
8786 // CHECK-LABEL: @test_vqdmulhh_s16(
8787 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8788 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8789 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8790 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8791 // CHECK: ret i16 [[TMP2]]
8792 int16_t test_vqdmulhh_s16(int16_t a
, int16_t b
) {
8793 return vqdmulhh_s16(a
, b
);
8796 // CHECK-LABEL: @test_vqdmulhs_s32(
8797 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8798 // CHECK: ret i32 [[VQDMULHS_S32_I]]
8799 int32_t test_vqdmulhs_s32(int32_t a
, int32_t b
) {
8800 return vqdmulhs_s32(a
, b
);
8803 // CHECK-LABEL: @test_vqrdmulhh_s16(
8804 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8805 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8806 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8807 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8808 // CHECK: ret i16 [[TMP2]]
8809 int16_t test_vqrdmulhh_s16(int16_t a
, int16_t b
) {
8810 return vqrdmulhh_s16(a
, b
);
8813 // CHECK-LABEL: @test_vqrdmulhs_s32(
8814 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8815 // CHECK: ret i32 [[VQRDMULHS_S32_I]]
8816 int32_t test_vqrdmulhs_s32(int32_t a
, int32_t b
) {
8817 return vqrdmulhs_s32(a
, b
);
8820 // CHECK-LABEL: @test_vmulxs_f32(
8821 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8822 // CHECK: ret float [[VMULXS_F32_I]]
8823 float32_t
test_vmulxs_f32(float32_t a
, float32_t b
) {
8824 return vmulxs_f32(a
, b
);
8827 // CHECK-LABEL: @test_vmulxd_f64(
8828 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8829 // CHECK: ret double [[VMULXD_F64_I]]
8830 float64_t
test_vmulxd_f64(float64_t a
, float64_t b
) {
8831 return vmulxd_f64(a
, b
);
8834 // CHECK-LABEL: @test_vmulx_f64(
8835 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8836 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8837 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8838 // CHECK: ret <1 x double> [[VMULX2_I]]
8839 float64x1_t
test_vmulx_f64(float64x1_t a
, float64x1_t b
) {
8840 return vmulx_f64(a
, b
);
8843 // CHECK-LABEL: @test_vrecpss_f32(
8844 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8845 // CHECK: ret float [[VRECPS_I]]
8846 float32_t
test_vrecpss_f32(float32_t a
, float32_t b
) {
8847 return vrecpss_f32(a
, b
);
8850 // CHECK-LABEL: @test_vrecpsd_f64(
8851 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8852 // CHECK: ret double [[VRECPS_I]]
8853 float64_t
test_vrecpsd_f64(float64_t a
, float64_t b
) {
8854 return vrecpsd_f64(a
, b
);
8857 // CHECK-LABEL: @test_vrsqrtss_f32(
8858 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8859 // CHECK: ret float [[VRSQRTSS_F32_I]]
8860 float32_t
test_vrsqrtss_f32(float32_t a
, float32_t b
) {
8861 return vrsqrtss_f32(a
, b
);
8864 // CHECK-LABEL: @test_vrsqrtsd_f64(
8865 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8866 // CHECK: ret double [[VRSQRTSD_F64_I]]
8867 float64_t
test_vrsqrtsd_f64(float64_t a
, float64_t b
) {
8868 return vrsqrtsd_f64(a
, b
);
8871 // CHECK-LABEL: @test_vcvts_f32_s32(
8872 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float
8873 // CHECK: ret float [[TMP0]]
8874 float32_t
test_vcvts_f32_s32(int32_t a
) {
8875 return vcvts_f32_s32(a
);
8878 // CHECK-LABEL: @test_vcvtd_f64_s64(
8879 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double
8880 // CHECK: ret double [[TMP0]]
8881 float64_t
test_vcvtd_f64_s64(int64_t a
) {
8882 return vcvtd_f64_s64(a
);
8885 // CHECK-LABEL: @test_vcvts_f32_u32(
8886 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float
8887 // CHECK: ret float [[TMP0]]
8888 float32_t
test_vcvts_f32_u32(uint32_t a
) {
8889 return vcvts_f32_u32(a
);
8892 // CHECK-LABEL: @test_vcvtd_f64_u64(
8893 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double
8894 // CHECK: ret double [[TMP0]]
8895 float64_t
test_vcvtd_f64_u64(uint64_t a
) {
8896 return vcvtd_f64_u64(a
);
8899 // CHECK-LABEL: @test_vrecpes_f32(
8900 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8901 // CHECK: ret float [[VRECPES_F32_I]]
8902 float32_t
test_vrecpes_f32(float32_t a
) {
8903 return vrecpes_f32(a
);
8906 // CHECK-LABEL: @test_vrecped_f64(
8907 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8908 // CHECK: ret double [[VRECPED_F64_I]]
8909 float64_t
test_vrecped_f64(float64_t a
) {
8910 return vrecped_f64(a
);
8913 // CHECK-LABEL: @test_vrecpxs_f32(
8914 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8915 // CHECK: ret float [[VRECPXS_F32_I]]
8916 float32_t
test_vrecpxs_f32(float32_t a
) {
8917 return vrecpxs_f32(a
);
8920 // CHECK-LABEL: @test_vrecpxd_f64(
8921 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8922 // CHECK: ret double [[VRECPXD_F64_I]]
8923 float64_t
test_vrecpxd_f64(float64_t a
) {
8924 return vrecpxd_f64(a
);
8927 // CHECK-LABEL: @test_vrsqrte_u32(
8928 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8929 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8930 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
8931 uint32x2_t
test_vrsqrte_u32(uint32x2_t a
) {
8932 return vrsqrte_u32(a
);
8935 // CHECK-LABEL: @test_vrsqrteq_u32(
8936 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8937 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8938 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
8939 uint32x4_t
test_vrsqrteq_u32(uint32x4_t a
) {
8940 return vrsqrteq_u32(a
);
8943 // CHECK-LABEL: @test_vrsqrtes_f32(
8944 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8945 // CHECK: ret float [[VRSQRTES_F32_I]]
8946 float32_t
test_vrsqrtes_f32(float32_t a
) {
8947 return vrsqrtes_f32(a
);
8950 // CHECK-LABEL: @test_vrsqrted_f64(
8951 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8952 // CHECK: ret double [[VRSQRTED_F64_I]]
8953 float64_t
test_vrsqrted_f64(float64_t a
) {
8954 return vrsqrted_f64(a
);
8957 // CHECK-LABEL: @test_vld1q_u8(
8958 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
8959 // CHECK: ret <16 x i8> [[TMP1]]
8960 uint8x16_t
test_vld1q_u8(uint8_t const *a
) {
8964 // CHECK-LABEL: @test_vld1q_u16(
8965 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
8966 // CHECK: ret <8 x i16> [[TMP2]]
8967 uint16x8_t
test_vld1q_u16(uint16_t const *a
) {
8968 return vld1q_u16(a
);
8971 // CHECK-LABEL: @test_vld1q_u32(
8972 // CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
8973 // CHECK: ret <4 x i32> [[TMP2]]
8974 uint32x4_t
test_vld1q_u32(uint32_t const *a
) {
8975 return vld1q_u32(a
);
8978 // CHECK-LABEL: @test_vld1q_u64(
8979 // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
8980 // CHECK: ret <2 x i64> [[TMP2]]
8981 uint64x2_t
test_vld1q_u64(uint64_t const *a
) {
8982 return vld1q_u64(a
);
8985 // CHECK-LABEL: @test_vld1q_s8(
8986 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
8987 // CHECK: ret <16 x i8> [[TMP1]]
8988 int8x16_t
test_vld1q_s8(int8_t const *a
) {
8992 // CHECK-LABEL: @test_vld1q_s16(
8993 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
8994 // CHECK: ret <8 x i16> [[TMP2]]
8995 int16x8_t
test_vld1q_s16(int16_t const *a
) {
8996 return vld1q_s16(a
);
8999 // CHECK-LABEL: @test_vld1q_s32(
9000 // CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
9001 // CHECK: ret <4 x i32> [[TMP2]]
9002 int32x4_t
test_vld1q_s32(int32_t const *a
) {
9003 return vld1q_s32(a
);
9006 // CHECK-LABEL: @test_vld1q_s64(
9007 // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
9008 // CHECK: ret <2 x i64> [[TMP2]]
9009 int64x2_t
test_vld1q_s64(int64_t const *a
) {
9010 return vld1q_s64(a
);
9013 // CHECK-LABEL: @test_vld1q_f16(
9014 // CHECK: [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
9015 // CHECK: ret <8 x half> [[TMP2]]
9016 float16x8_t
test_vld1q_f16(float16_t
const *a
) {
9017 return vld1q_f16(a
);
9020 // CHECK-LABEL: @test_vld1q_f32(
9021 // CHECK: [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4
9022 // CHECK: ret <4 x float> [[TMP2]]
9023 float32x4_t
test_vld1q_f32(float32_t
const *a
) {
9024 return vld1q_f32(a
);
9027 // CHECK-LABEL: @test_vld1q_f64(
9028 // CHECK: [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8
9029 // CHECK: ret <2 x double> [[TMP2]]
9030 float64x2_t
test_vld1q_f64(float64_t
const *a
) {
9031 return vld1q_f64(a
);
9034 // CHECK-LABEL: @test_vld1q_p8(
9035 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
9036 // CHECK: ret <16 x i8> [[TMP1]]
9037 poly8x16_t
test_vld1q_p8(poly8_t
const *a
) {
9041 // CHECK-LABEL: @test_vld1q_p16(
9042 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
9043 // CHECK: ret <8 x i16> [[TMP2]]
9044 poly16x8_t
test_vld1q_p16(poly16_t
const *a
) {
9045 return vld1q_p16(a
);
9048 // CHECK-LABEL: @test_vld1_u8(
9049 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9050 // CHECK: ret <8 x i8> [[TMP1]]
9051 uint8x8_t
test_vld1_u8(uint8_t const *a
) {
9055 // CHECK-LABEL: @test_vld1_u16(
9056 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9057 // CHECK: ret <4 x i16> [[TMP2]]
9058 uint16x4_t
test_vld1_u16(uint16_t const *a
) {
9062 // CHECK-LABEL: @test_vld1_u32(
9063 // CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
9064 // CHECK: ret <2 x i32> [[TMP2]]
9065 uint32x2_t
test_vld1_u32(uint32_t const *a
) {
9069 // CHECK-LABEL: @test_vld1_u64(
9070 // CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
9071 // CHECK: ret <1 x i64> [[TMP2]]
9072 uint64x1_t
test_vld1_u64(uint64_t const *a
) {
9076 // CHECK-LABEL: @test_vld1_s8(
9077 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9078 // CHECK: ret <8 x i8> [[TMP1]]
9079 int8x8_t
test_vld1_s8(int8_t const *a
) {
9083 // CHECK-LABEL: @test_vld1_s16(
9084 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9085 // CHECK: ret <4 x i16> [[TMP2]]
9086 int16x4_t
test_vld1_s16(int16_t const *a
) {
9090 // CHECK-LABEL: @test_vld1_s32(
9091 // CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
9092 // CHECK: ret <2 x i32> [[TMP2]]
9093 int32x2_t
test_vld1_s32(int32_t const *a
) {
9097 // CHECK-LABEL: @test_vld1_s64(
9098 // CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
9099 // CHECK: ret <1 x i64> [[TMP2]]
9100 int64x1_t
test_vld1_s64(int64_t const *a
) {
9104 // CHECK-LABEL: @test_vld1_f16(
9105 // CHECK: [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2
9106 // CHECK: ret <4 x half> [[TMP2]]
9107 float16x4_t
test_vld1_f16(float16_t
const *a
) {
9111 // CHECK-LABEL: @test_vld1_f32(
9112 // CHECK: [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4
9113 // CHECK: ret <2 x float> [[TMP2]]
9114 float32x2_t
test_vld1_f32(float32_t
const *a
) {
9118 // CHECK-LABEL: @test_vld1_f64(
9119 // CHECK: [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8
9120 // CHECK: ret <1 x double> [[TMP2]]
9121 float64x1_t
test_vld1_f64(float64_t
const *a
) {
9125 // CHECK-LABEL: @test_vld1_p8(
9126 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9127 // CHECK: ret <8 x i8> [[TMP1]]
9128 poly8x8_t
test_vld1_p8(poly8_t
const *a
) {
9132 // CHECK-LABEL: @test_vld1_p16(
9133 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9134 // CHECK: ret <4 x i16> [[TMP2]]
9135 poly16x4_t
test_vld1_p16(poly16_t
const *a
) {
9139 // CHECK-LABEL: @test_vld1_u8_void(
9140 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9141 // CHECK: ret <8 x i8> [[TMP1]]
9142 uint8x8_t
test_vld1_u8_void(void *a
) {
9146 // CHECK-LABEL: @test_vld1_u16_void(
9147 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9148 // CHECK: ret <4 x i16> [[TMP1]]
9149 uint16x4_t
test_vld1_u16_void(void *a
) {
9153 // CHECK-LABEL: @test_vld1_u32_void(
9154 // CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
9155 // CHECK: ret <2 x i32> [[TMP1]]
9156 uint32x2_t
test_vld1_u32_void(void *a
) {
9160 // CHECK-LABEL: @test_vld1_u64_void(
9161 // CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
9162 // CHECK: ret <1 x i64> [[TMP1]]
9163 uint64x1_t
test_vld1_u64_void(void *a
) {
9167 // CHECK-LABEL: @test_vld1_s8_void(
9168 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9169 // CHECK: ret <8 x i8> [[TMP1]]
9170 int8x8_t
test_vld1_s8_void(void *a
) {
9174 // CHECK-LABEL: @test_vld1_s16_void(
9175 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9176 // CHECK: ret <4 x i16> [[TMP1]]
9177 int16x4_t
test_vld1_s16_void(void *a
) {
9181 // CHECK-LABEL: @test_vld1_s32_void(
9182 // CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
9183 // CHECK: ret <2 x i32> [[TMP1]]
9184 int32x2_t
test_vld1_s32_void(void *a
) {
9188 // CHECK-LABEL: @test_vld1_s64_void(
9189 // CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
9190 // CHECK: ret <1 x i64> [[TMP1]]
9191 int64x1_t
test_vld1_s64_void(void *a
) {
9195 // CHECK-LABEL: @test_vld1_f16_void(
9196 // CHECK: [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1
9197 // CHECK: ret <4 x half> [[TMP1]]
9198 float16x4_t
test_vld1_f16_void(void *a
) {
9202 // CHECK-LABEL: @test_vld1_f32_void(
9203 // CHECK: [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1
9204 // CHECK: ret <2 x float> [[TMP1]]
9205 float32x2_t
test_vld1_f32_void(void *a
) {
9209 // CHECK-LABEL: @test_vld1_f64_void(
9210 // CHECK: [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1
9211 // CHECK: ret <1 x double> [[TMP1]]
9212 float64x1_t
test_vld1_f64_void(void *a
) {
9216 // CHECK-LABEL: @test_vld1_p8_void(
9217 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9218 // CHECK: ret <8 x i8> [[TMP1]]
9219 poly8x8_t
test_vld1_p8_void(void *a
) {
9223 // CHECK-LABEL: @test_vld1_p16_void(
9224 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9225 // CHECK: ret <4 x i16> [[TMP1]]
9226 poly16x4_t
test_vld1_p16_void(void *a
) {
9230 // CHECK-LABEL: @test_vld2q_u8(
9231 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9232 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9233 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9234 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9235 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9236 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
9237 // CHECK: ret %struct.uint8x16x2_t [[TMP5]]
9238 uint8x16x2_t
test_vld2q_u8(uint8_t const *a
) {
9242 // CHECK-LABEL: @test_vld2q_u16(
9243 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9244 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9245 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9246 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9247 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9248 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
9249 // CHECK: ret %struct.uint16x8x2_t [[TMP6]]
9250 uint16x8x2_t
test_vld2q_u16(uint16_t const *a
) {
9251 return vld2q_u16(a
);
9254 // CHECK-LABEL: @test_vld2q_u32(
9255 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9256 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9257 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
9258 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
9259 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9260 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
9261 // CHECK: ret %struct.uint32x4x2_t [[TMP6]]
9262 uint32x4x2_t
test_vld2q_u32(uint32_t const *a
) {
9263 return vld2q_u32(a
);
9266 // CHECK-LABEL: @test_vld2q_u64(
9267 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9268 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9269 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
9270 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
9271 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9272 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
9273 // CHECK: ret %struct.uint64x2x2_t [[TMP6]]
9274 uint64x2x2_t
test_vld2q_u64(uint64_t const *a
) {
9275 return vld2q_u64(a
);
9278 // CHECK-LABEL: @test_vld2q_s8(
9279 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9280 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9281 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9282 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9283 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9284 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
9285 // CHECK: ret %struct.int8x16x2_t [[TMP5]]
9286 int8x16x2_t
test_vld2q_s8(int8_t const *a
) {
9290 // CHECK-LABEL: @test_vld2q_s16(
9291 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9292 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9293 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9294 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9295 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9296 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
9297 // CHECK: ret %struct.int16x8x2_t [[TMP6]]
9298 int16x8x2_t
test_vld2q_s16(int16_t const *a
) {
9299 return vld2q_s16(a
);
9302 // CHECK-LABEL: @test_vld2q_s32(
9303 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9304 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9305 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
9306 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
9307 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9308 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
9309 // CHECK: ret %struct.int32x4x2_t [[TMP6]]
9310 int32x4x2_t
test_vld2q_s32(int32_t const *a
) {
9311 return vld2q_s32(a
);
9314 // CHECK-LABEL: @test_vld2q_s64(
9315 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9316 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9317 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
9318 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
9319 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9320 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
9321 // CHECK: ret %struct.int64x2x2_t [[TMP6]]
9322 int64x2x2_t
test_vld2q_s64(int64_t const *a
) {
9323 return vld2q_s64(a
);
9326 // CHECK-LABEL: @test_vld2q_f16(
9327 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9328 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9329 // CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a)
9330 // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]]
9331 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9332 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
9333 // CHECK: ret %struct.float16x8x2_t [[TMP6]]
9334 float16x8x2_t
test_vld2q_f16(float16_t
const *a
) {
9335 return vld2q_f16(a
);
9338 // CHECK-LABEL: @test_vld2q_f32(
9339 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9340 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9341 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a)
9342 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
9343 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9344 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
9345 // CHECK: ret %struct.float32x4x2_t [[TMP6]]
9346 float32x4x2_t
test_vld2q_f32(float32_t
const *a
) {
9347 return vld2q_f32(a
);
9350 // CHECK-LABEL: @test_vld2q_f64(
9351 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9352 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9353 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a)
9354 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
9355 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9356 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
9357 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
9358 float64x2x2_t
test_vld2q_f64(float64_t
const *a
) {
9359 return vld2q_f64(a
);
9362 // CHECK-LABEL: @test_vld2q_p8(
9363 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9364 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9365 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9366 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9367 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9368 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
9369 // CHECK: ret %struct.poly8x16x2_t [[TMP5]]
9370 poly8x16x2_t
test_vld2q_p8(poly8_t
const *a
) {
9374 // CHECK-LABEL: @test_vld2q_p16(
9375 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9376 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9377 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9378 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9379 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9380 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
9381 // CHECK: ret %struct.poly16x8x2_t [[TMP6]]
9382 poly16x8x2_t
test_vld2q_p16(poly16_t
const *a
) {
9383 return vld2q_p16(a
);
9386 // CHECK-LABEL: @test_vld2_u8(
9387 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9388 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9389 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9390 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9391 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9392 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
9393 // CHECK: ret %struct.uint8x8x2_t [[TMP5]]
9394 uint8x8x2_t
test_vld2_u8(uint8_t const *a
) {
9398 // CHECK-LABEL: @test_vld2_u16(
9399 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9400 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9401 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9402 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9403 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9404 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
9405 // CHECK: ret %struct.uint16x4x2_t [[TMP6]]
9406 uint16x4x2_t
test_vld2_u16(uint16_t const *a
) {
9410 // CHECK-LABEL: @test_vld2_u32(
9411 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9412 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9413 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
9414 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
9415 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9416 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
9417 // CHECK: ret %struct.uint32x2x2_t [[TMP6]]
9418 uint32x2x2_t
test_vld2_u32(uint32_t const *a
) {
9422 // CHECK-LABEL: @test_vld2_u64(
9423 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9424 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9425 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
9426 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
9427 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9428 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
9429 // CHECK: ret %struct.uint64x1x2_t [[TMP6]]
9430 uint64x1x2_t
test_vld2_u64(uint64_t const *a
) {
9434 // CHECK-LABEL: @test_vld2_s8(
9435 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9436 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9437 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9438 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9439 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9440 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
9441 // CHECK: ret %struct.int8x8x2_t [[TMP5]]
9442 int8x8x2_t
test_vld2_s8(int8_t const *a
) {
9446 // CHECK-LABEL: @test_vld2_s16(
9447 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9448 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9449 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9450 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9452 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
9453 // CHECK: ret %struct.int16x4x2_t [[TMP6]]
9454 int16x4x2_t
test_vld2_s16(int16_t const *a
) {
9458 // CHECK-LABEL: @test_vld2_s32(
9459 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9460 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9461 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
9462 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
9463 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9464 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
9465 // CHECK: ret %struct.int32x2x2_t [[TMP6]]
9466 int32x2x2_t
test_vld2_s32(int32_t const *a
) {
9470 // CHECK-LABEL: @test_vld2_s64(
9471 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9472 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9473 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
9474 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
9475 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9476 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
9477 // CHECK: ret %struct.int64x1x2_t [[TMP6]]
9478 int64x1x2_t
test_vld2_s64(int64_t const *a
) {
9482 // CHECK-LABEL: @test_vld2_f16(
9483 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9484 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9485 // CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a)
9486 // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]]
9487 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9488 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
9489 // CHECK: ret %struct.float16x4x2_t [[TMP6]]
9490 float16x4x2_t
test_vld2_f16(float16_t
const *a
) {
9494 // CHECK-LABEL: @test_vld2_f32(
9495 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9496 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9497 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a)
9498 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
9499 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9500 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
9501 // CHECK: ret %struct.float32x2x2_t [[TMP6]]
9502 float32x2x2_t
test_vld2_f32(float32_t
const *a
) {
9506 // CHECK-LABEL: @test_vld2_f64(
9507 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9508 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9509 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a)
9510 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
9511 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9512 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
9513 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
9514 float64x1x2_t
test_vld2_f64(float64_t
const *a
) {
9518 // CHECK-LABEL: @test_vld2_p8(
9519 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9520 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9521 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9522 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9523 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9524 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
9525 // CHECK: ret %struct.poly8x8x2_t [[TMP5]]
9526 poly8x8x2_t
test_vld2_p8(poly8_t
const *a
) {
9530 // CHECK-LABEL: @test_vld2_p16(
9531 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9532 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9533 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9534 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9535 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9536 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
9537 // CHECK: ret %struct.poly16x4x2_t [[TMP6]]
9538 poly16x4x2_t
test_vld2_p16(poly16_t
const *a
) {
9542 // CHECK-LABEL: @test_vld3q_u8(
9543 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9544 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9545 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9546 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9547 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9548 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
9549 // CHECK: ret %struct.uint8x16x3_t [[TMP5]]
9550 uint8x16x3_t
test_vld3q_u8(uint8_t const *a
) {
9554 // CHECK-LABEL: @test_vld3q_u16(
9555 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9556 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9557 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9558 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9559 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9560 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
9561 // CHECK: ret %struct.uint16x8x3_t [[TMP6]]
9562 uint16x8x3_t
test_vld3q_u16(uint16_t const *a
) {
9563 return vld3q_u16(a
);
9566 // CHECK-LABEL: @test_vld3q_u32(
9567 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9568 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9569 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
9570 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
9571 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9572 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
9573 // CHECK: ret %struct.uint32x4x3_t [[TMP6]]
9574 uint32x4x3_t
test_vld3q_u32(uint32_t const *a
) {
9575 return vld3q_u32(a
);
9578 // CHECK-LABEL: @test_vld3q_u64(
9579 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9580 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9581 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
9582 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
9583 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9584 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
9585 // CHECK: ret %struct.uint64x2x3_t [[TMP6]]
9586 uint64x2x3_t
test_vld3q_u64(uint64_t const *a
) {
9587 return vld3q_u64(a
);
9590 // CHECK-LABEL: @test_vld3q_s8(
9591 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9592 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9593 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9594 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9595 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9596 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
9597 // CHECK: ret %struct.int8x16x3_t [[TMP5]]
9598 int8x16x3_t
test_vld3q_s8(int8_t const *a
) {
9602 // CHECK-LABEL: @test_vld3q_s16(
9603 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9604 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9605 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9606 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9607 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9608 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
9609 // CHECK: ret %struct.int16x8x3_t [[TMP6]]
9610 int16x8x3_t
test_vld3q_s16(int16_t const *a
) {
9611 return vld3q_s16(a
);
9614 // CHECK-LABEL: @test_vld3q_s32(
9615 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9616 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9617 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
9618 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
9619 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9620 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
9621 // CHECK: ret %struct.int32x4x3_t [[TMP6]]
9622 int32x4x3_t
test_vld3q_s32(int32_t const *a
) {
9623 return vld3q_s32(a
);
9626 // CHECK-LABEL: @test_vld3q_s64(
9627 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9628 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9629 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
9630 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
9631 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9632 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
9633 // CHECK: ret %struct.int64x2x3_t [[TMP6]]
9634 int64x2x3_t
test_vld3q_s64(int64_t const *a
) {
9635 return vld3q_s64(a
);
9638 // CHECK-LABEL: @test_vld3q_f16(
9639 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9640 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9641 // CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a)
9642 // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]]
9643 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9644 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
9645 // CHECK: ret %struct.float16x8x3_t [[TMP6]]
9646 float16x8x3_t
test_vld3q_f16(float16_t
const *a
) {
9647 return vld3q_f16(a
);
9650 // CHECK-LABEL: @test_vld3q_f32(
9651 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9652 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9653 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a)
9654 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
9655 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9656 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
9657 // CHECK: ret %struct.float32x4x3_t [[TMP6]]
9658 float32x4x3_t
test_vld3q_f32(float32_t
const *a
) {
9659 return vld3q_f32(a
);
9662 // CHECK-LABEL: @test_vld3q_f64(
9663 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9664 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9665 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a)
9666 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
9667 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9668 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
9669 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
9670 float64x2x3_t
test_vld3q_f64(float64_t
const *a
) {
9671 return vld3q_f64(a
);
9674 // CHECK-LABEL: @test_vld3q_p8(
9675 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9676 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9677 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9678 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9679 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9680 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
9681 // CHECK: ret %struct.poly8x16x3_t [[TMP5]]
9682 poly8x16x3_t
test_vld3q_p8(poly8_t
const *a
) {
9686 // CHECK-LABEL: @test_vld3q_p16(
9687 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9688 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9689 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9690 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9691 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9692 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
9693 // CHECK: ret %struct.poly16x8x3_t [[TMP6]]
9694 poly16x8x3_t
test_vld3q_p16(poly16_t
const *a
) {
9695 return vld3q_p16(a
);
9698 // CHECK-LABEL: @test_vld3_u8(
9699 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9700 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9701 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9702 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9703 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9704 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
9705 // CHECK: ret %struct.uint8x8x3_t [[TMP5]]
9706 uint8x8x3_t
test_vld3_u8(uint8_t const *a
) {
9710 // CHECK-LABEL: @test_vld3_u16(
9711 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9712 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9713 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9714 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9715 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9716 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
9717 // CHECK: ret %struct.uint16x4x3_t [[TMP6]]
9718 uint16x4x3_t
test_vld3_u16(uint16_t const *a
) {
9722 // CHECK-LABEL: @test_vld3_u32(
9723 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9724 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9725 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
9726 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
9727 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9728 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
9729 // CHECK: ret %struct.uint32x2x3_t [[TMP6]]
9730 uint32x2x3_t
test_vld3_u32(uint32_t const *a
) {
9734 // CHECK-LABEL: @test_vld3_u64(
9735 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9736 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9737 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
9738 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
9739 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9740 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
9741 // CHECK: ret %struct.uint64x1x3_t [[TMP6]]
9742 uint64x1x3_t
test_vld3_u64(uint64_t const *a
) {
9746 // CHECK-LABEL: @test_vld3_s8(
9747 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9748 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9749 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9750 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9752 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
9753 // CHECK: ret %struct.int8x8x3_t [[TMP5]]
9754 int8x8x3_t
test_vld3_s8(int8_t const *a
) {
9758 // CHECK-LABEL: @test_vld3_s16(
9759 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9760 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9761 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9762 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9763 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9764 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
9765 // CHECK: ret %struct.int16x4x3_t [[TMP6]]
9766 int16x4x3_t
test_vld3_s16(int16_t const *a
) {
9770 // CHECK-LABEL: @test_vld3_s32(
9771 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9772 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9773 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
9774 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
9775 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9776 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
9777 // CHECK: ret %struct.int32x2x3_t [[TMP6]]
9778 int32x2x3_t
test_vld3_s32(int32_t const *a
) {
9782 // CHECK-LABEL: @test_vld3_s64(
9783 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
9784 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
9785 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
9786 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
9787 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9788 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
9789 // CHECK: ret %struct.int64x1x3_t [[TMP6]]
9790 int64x1x3_t
test_vld3_s64(int64_t const *a
) {
9794 // CHECK-LABEL: @test_vld3_f16(
9795 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
9796 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
9797 // CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a)
9798 // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]]
9799 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9800 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
9801 // CHECK: ret %struct.float16x4x3_t [[TMP6]]
9802 float16x4x3_t
test_vld3_f16(float16_t
const *a
) {
9806 // CHECK-LABEL: @test_vld3_f32(
9807 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
9808 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
9809 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a)
9810 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
9811 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9812 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
9813 // CHECK: ret %struct.float32x2x3_t [[TMP6]]
9814 float32x2x3_t
test_vld3_f32(float32_t
const *a
) {
9818 // CHECK-LABEL: @test_vld3_f64(
9819 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
9820 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
9821 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a)
9822 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
9823 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9824 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
9825 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
9826 float64x1x3_t
test_vld3_f64(float64_t
const *a
) {
9830 // CHECK-LABEL: @test_vld3_p8(
9831 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
9832 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
9833 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9834 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9835 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9836 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
9837 // CHECK: ret %struct.poly8x8x3_t [[TMP5]]
9838 poly8x8x3_t
test_vld3_p8(poly8_t
const *a
) {
9842 // CHECK-LABEL: @test_vld3_p16(
9843 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
9844 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
9845 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9846 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9847 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9848 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
9849 // CHECK: ret %struct.poly16x4x3_t [[TMP6]]
9850 poly16x4x3_t
test_vld3_p16(poly16_t
const *a
) {
9854 // CHECK-LABEL: @test_vld4q_u8(
9855 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
9856 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
9857 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9858 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9859 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9860 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
9861 // CHECK: ret %struct.uint8x16x4_t [[TMP5]]
9862 uint8x16x4_t
test_vld4q_u8(uint8_t const *a
) {
9866 // CHECK-LABEL: @test_vld4q_u16(
9867 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
9868 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
9869 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
9870 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
9871 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9872 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
9873 // CHECK: ret %struct.uint16x8x4_t [[TMP6]]
9874 uint16x8x4_t
test_vld4q_u16(uint16_t const *a
) {
9875 return vld4q_u16(a
);
9878 // CHECK-LABEL: @test_vld4q_u32(
9879 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
9880 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
9881 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
9882 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
9883 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9884 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
9885 // CHECK: ret %struct.uint32x4x4_t [[TMP6]]
9886 uint32x4x4_t
test_vld4q_u32(uint32_t const *a
) {
9887 return vld4q_u32(a
);
9890 // CHECK-LABEL: @test_vld4q_u64(
9891 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
9892 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
9893 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
9894 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
9895 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9896 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
9897 // CHECK: ret %struct.uint64x2x4_t [[TMP6]]
9898 uint64x2x4_t
test_vld4q_u64(uint64_t const *a
) {
9899 return vld4q_u64(a
);
9902 // CHECK-LABEL: @test_vld4q_s8(
9903 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
9904 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
9905 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9906 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9907 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9908 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
9909 // CHECK: ret %struct.int8x16x4_t [[TMP5]]
9910 int8x16x4_t
test_vld4q_s8(int8_t const *a
) {
9914 // CHECK-LABEL: @test_vld4q_s16(
9915 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
9916 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
9917 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
9918 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
9919 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9920 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
9921 // CHECK: ret %struct.int16x8x4_t [[TMP6]]
9922 int16x8x4_t
test_vld4q_s16(int16_t const *a
) {
9923 return vld4q_s16(a
);
9926 // CHECK-LABEL: @test_vld4q_s32(
9927 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
9928 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
9929 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
9930 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
9931 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9932 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
9933 // CHECK: ret %struct.int32x4x4_t [[TMP6]]
9934 int32x4x4_t
test_vld4q_s32(int32_t const *a
) {
9935 return vld4q_s32(a
);
9938 // CHECK-LABEL: @test_vld4q_s64(
9939 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
9940 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
9941 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
9942 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
9943 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9944 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
9945 // CHECK: ret %struct.int64x2x4_t [[TMP6]]
9946 int64x2x4_t
test_vld4q_s64(int64_t const *a
) {
9947 return vld4q_s64(a
);
9950 // CHECK-LABEL: @test_vld4q_f16(
9951 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
9952 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
9953 // CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a)
9954 // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]]
9955 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9956 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
9957 // CHECK: ret %struct.float16x8x4_t [[TMP6]]
9958 float16x8x4_t
test_vld4q_f16(float16_t
const *a
) {
9959 return vld4q_f16(a
);
9962 // CHECK-LABEL: @test_vld4q_f32(
9963 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
9964 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
9965 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a)
9966 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
9967 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9968 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
9969 // CHECK: ret %struct.float32x4x4_t [[TMP6]]
9970 float32x4x4_t
test_vld4q_f32(float32_t
const *a
) {
9971 return vld4q_f32(a
);
9974 // CHECK-LABEL: @test_vld4q_f64(
9975 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
9976 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
9977 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a)
9978 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
9979 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9980 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
9981 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
9982 float64x2x4_t
test_vld4q_f64(float64_t
const *a
) {
9983 return vld4q_f64(a
);
9986 // CHECK-LABEL: @test_vld4q_p8(
9987 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
9988 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
9989 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9990 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9991 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9992 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
9993 // CHECK: ret %struct.poly8x16x4_t [[TMP5]]
9994 poly8x16x4_t
test_vld4q_p8(poly8_t
const *a
) {
9998 // CHECK-LABEL: @test_vld4q_p16(
9999 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10000 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10001 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
10002 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
10003 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
10004 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
10005 // CHECK: ret %struct.poly16x8x4_t [[TMP6]]
10006 poly16x8x4_t
test_vld4q_p16(poly16_t
const *a
) {
10007 return vld4q_p16(a
);
10010 // CHECK-LABEL: @test_vld4_u8(
10011 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10012 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10013 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10014 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10015 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10016 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
10017 // CHECK: ret %struct.uint8x8x4_t [[TMP5]]
10018 uint8x8x4_t
test_vld4_u8(uint8_t const *a
) {
10022 // CHECK-LABEL: @test_vld4_u16(
10023 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10024 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10025 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10026 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10027 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10028 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
10029 // CHECK: ret %struct.uint16x4x4_t [[TMP6]]
10030 uint16x4x4_t
test_vld4_u16(uint16_t const *a
) {
10031 return vld4_u16(a
);
10034 // CHECK-LABEL: @test_vld4_u32(
10035 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10036 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10037 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
10038 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
10039 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10040 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
10041 // CHECK: ret %struct.uint32x2x4_t [[TMP6]]
10042 uint32x2x4_t
test_vld4_u32(uint32_t const *a
) {
10043 return vld4_u32(a
);
10046 // CHECK-LABEL: @test_vld4_u64(
10047 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10048 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10049 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
10050 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
10051 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10052 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
10053 // CHECK: ret %struct.uint64x1x4_t [[TMP6]]
10054 uint64x1x4_t
test_vld4_u64(uint64_t const *a
) {
10055 return vld4_u64(a
);
10058 // CHECK-LABEL: @test_vld4_s8(
10059 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10060 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10061 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10062 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10063 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10064 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
10065 // CHECK: ret %struct.int8x8x4_t [[TMP5]]
10066 int8x8x4_t
test_vld4_s8(int8_t const *a
) {
10070 // CHECK-LABEL: @test_vld4_s16(
10071 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10072 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10073 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10074 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10075 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10076 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
10077 // CHECK: ret %struct.int16x4x4_t [[TMP6]]
10078 int16x4x4_t
test_vld4_s16(int16_t const *a
) {
10079 return vld4_s16(a
);
10082 // CHECK-LABEL: @test_vld4_s32(
10083 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10084 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10085 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
10086 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
10087 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10088 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
10089 // CHECK: ret %struct.int32x2x4_t [[TMP6]]
10090 int32x2x4_t
test_vld4_s32(int32_t const *a
) {
10091 return vld4_s32(a
);
10094 // CHECK-LABEL: @test_vld4_s64(
10095 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10096 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10097 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
10098 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
10099 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10100 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
10101 // CHECK: ret %struct.int64x1x4_t [[TMP6]]
10102 int64x1x4_t
test_vld4_s64(int64_t const *a
) {
10103 return vld4_s64(a
);
10106 // CHECK-LABEL: @test_vld4_f16(
10107 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10108 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10109 // CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a)
10110 // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]]
10111 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10112 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
10113 // CHECK: ret %struct.float16x4x4_t [[TMP6]]
10114 float16x4x4_t
test_vld4_f16(float16_t
const *a
) {
10115 return vld4_f16(a
);
10118 // CHECK-LABEL: @test_vld4_f32(
10119 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10120 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10121 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a)
10122 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
10123 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10124 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
10125 // CHECK: ret %struct.float32x2x4_t [[TMP6]]
10126 float32x2x4_t
test_vld4_f32(float32_t
const *a
) {
10127 return vld4_f32(a
);
10130 // CHECK-LABEL: @test_vld4_f64(
10131 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10132 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10133 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a)
10134 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
10135 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10136 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
10137 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
10138 float64x1x4_t
test_vld4_f64(float64_t
const *a
) {
10139 return vld4_f64(a
);
10142 // CHECK-LABEL: @test_vld4_p8(
10143 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10144 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10145 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10146 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10147 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10148 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
10149 // CHECK: ret %struct.poly8x8x4_t [[TMP5]]
10150 poly8x8x4_t
test_vld4_p8(poly8_t
const *a
) {
10154 // CHECK-LABEL: @test_vld4_p16(
10155 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10156 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10157 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10158 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10159 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10160 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
10161 // CHECK: ret %struct.poly16x4x4_t [[TMP6]]
10162 poly16x4x4_t
test_vld4_p16(poly16_t
const *a
) {
10163 return vld4_p16(a
);
10166 // CHECK-LABEL: @test_vst1q_u8(
10167 // CHECK: store <16 x i8> %b, ptr %a
10169 void test_vst1q_u8(uint8_t *a
, uint8x16_t b
) {
10173 // CHECK-LABEL: @test_vst1q_u16(
10174 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10175 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10176 // CHECK: store <8 x i16> [[TMP3]], ptr %a
10178 void test_vst1q_u16(uint16_t *a
, uint16x8_t b
) {
10182 // CHECK-LABEL: @test_vst1q_u32(
10183 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10184 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10185 // CHECK: store <4 x i32> [[TMP3]], ptr %a
10187 void test_vst1q_u32(uint32_t *a
, uint32x4_t b
) {
10191 // CHECK-LABEL: @test_vst1q_u64(
10192 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10193 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10194 // CHECK: store <2 x i64> [[TMP3]], ptr %a
10196 void test_vst1q_u64(uint64_t *a
, uint64x2_t b
) {
10200 // CHECK-LABEL: @test_vst1q_s8(
10201 // CHECK: store <16 x i8> %b, ptr %a
10203 void test_vst1q_s8(int8_t *a
, int8x16_t b
) {
10207 // CHECK-LABEL: @test_vst1q_s16(
10208 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10209 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10210 // CHECK: store <8 x i16> [[TMP3]], ptr %a
10212 void test_vst1q_s16(int16_t *a
, int16x8_t b
) {
10216 // CHECK-LABEL: @test_vst1q_s32(
10217 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10218 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10219 // CHECK: store <4 x i32> [[TMP3]], ptr %a
10221 void test_vst1q_s32(int32_t *a
, int32x4_t b
) {
10225 // CHECK-LABEL: @test_vst1q_s64(
10226 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10227 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10228 // CHECK: store <2 x i64> [[TMP3]], ptr %a
10230 void test_vst1q_s64(int64_t *a
, int64x2_t b
) {
10234 // CHECK-LABEL: @test_vst1q_f16(
10235 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10236 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10237 // CHECK: store <8 x half> [[TMP3]], ptr %a
10239 void test_vst1q_f16(float16_t
*a
, float16x8_t b
) {
10243 // CHECK-LABEL: @test_vst1q_f32(
10244 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10245 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10246 // CHECK: store <4 x float> [[TMP3]], ptr %a
10248 void test_vst1q_f32(float32_t
*a
, float32x4_t b
) {
10252 // CHECK-LABEL: @test_vst1q_f64(
10253 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10254 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10255 // CHECK: store <2 x double> [[TMP3]], ptr %a
10257 void test_vst1q_f64(float64_t
*a
, float64x2_t b
) {
10261 // CHECK-LABEL: @test_vst1q_p8(
10262 // CHECK: store <16 x i8> %b, ptr %a
10264 void test_vst1q_p8(poly8_t
*a
, poly8x16_t b
) {
10268 // CHECK-LABEL: @test_vst1q_p16(
10269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10270 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10271 // CHECK: store <8 x i16> [[TMP3]], ptr %a
10273 void test_vst1q_p16(poly16_t
*a
, poly16x8_t b
) {
10277 // CHECK-LABEL: @test_vst1_u8(
10278 // CHECK: store <8 x i8> %b, ptr %a
10280 void test_vst1_u8(uint8_t *a
, uint8x8_t b
) {
10284 // CHECK-LABEL: @test_vst1_u16(
10285 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10286 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10287 // CHECK: store <4 x i16> [[TMP3]], ptr %a
10289 void test_vst1_u16(uint16_t *a
, uint16x4_t b
) {
10293 // CHECK-LABEL: @test_vst1_u32(
10294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10295 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10296 // CHECK: store <2 x i32> [[TMP3]], ptr %a
10298 void test_vst1_u32(uint32_t *a
, uint32x2_t b
) {
10302 // CHECK-LABEL: @test_vst1_u64(
10303 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10304 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10305 // CHECK: store <1 x i64> [[TMP3]], ptr %a
10307 void test_vst1_u64(uint64_t *a
, uint64x1_t b
) {
10311 // CHECK-LABEL: @test_vst1_s8(
10312 // CHECK: store <8 x i8> %b, ptr %a
10314 void test_vst1_s8(int8_t *a
, int8x8_t b
) {
10318 // CHECK-LABEL: @test_vst1_s16(
10319 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10320 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10321 // CHECK: store <4 x i16> [[TMP3]], ptr %a
10323 void test_vst1_s16(int16_t *a
, int16x4_t b
) {
10327 // CHECK-LABEL: @test_vst1_s32(
10328 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10329 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10330 // CHECK: store <2 x i32> [[TMP3]], ptr %a
10332 void test_vst1_s32(int32_t *a
, int32x2_t b
) {
10336 // CHECK-LABEL: @test_vst1_s64(
10337 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10338 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10339 // CHECK: store <1 x i64> [[TMP3]], ptr %a
10341 void test_vst1_s64(int64_t *a
, int64x1_t b
) {
10345 // CHECK-LABEL: @test_vst1_f16(
10346 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10347 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10348 // CHECK: store <4 x half> [[TMP3]], ptr %a
10350 void test_vst1_f16(float16_t
*a
, float16x4_t b
) {
10354 // CHECK-LABEL: @test_vst1_f32(
10355 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10356 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10357 // CHECK: store <2 x float> [[TMP3]], ptr %a
10359 void test_vst1_f32(float32_t
*a
, float32x2_t b
) {
10363 // CHECK-LABEL: @test_vst1_f64(
10364 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10365 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10366 // CHECK: store <1 x double> [[TMP3]], ptr %a
10368 void test_vst1_f64(float64_t
*a
, float64x1_t b
) {
10372 // CHECK-LABEL: @test_vst1_p8(
10373 // CHECK: store <8 x i8> %b, ptr %a
10375 void test_vst1_p8(poly8_t
*a
, poly8x8_t b
) {
10379 // CHECK-LABEL: @test_vst1_p16(
10380 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10381 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10382 // CHECK: store <4 x i16> [[TMP3]], ptr %a
10384 void test_vst1_p16(poly16_t
*a
, poly16x4_t b
) {
10388 // CHECK-LABEL: @test_vst2q_u8(
10389 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10390 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10391 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
10392 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10393 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10394 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
10395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10396 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10397 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
10398 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10399 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10400 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10402 void test_vst2q_u8(uint8_t *a
, uint8x16x2_t b
) {
10406 // CHECK-LABEL: @test_vst2q_u16(
10407 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10408 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10409 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
10410 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10411 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10412 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
10413 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10414 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10415 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10416 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
10417 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10418 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10419 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10420 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10421 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10422 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10424 void test_vst2q_u16(uint16_t *a
, uint16x8x2_t b
) {
10428 // CHECK-LABEL: @test_vst2q_u32(
10429 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10430 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10431 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
10432 // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10433 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10434 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
10435 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10436 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10437 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10438 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
10439 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10440 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10441 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10442 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10443 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10444 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
10446 void test_vst2q_u32(uint32_t *a
, uint32x4x2_t b
) {
10450 // CHECK-LABEL: @test_vst2q_u64(
10451 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10452 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10453 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
10454 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10455 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10456 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
10457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
10458 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
10459 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
10461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
10462 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
10463 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10464 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10465 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10466 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
10468 void test_vst2q_u64(uint64_t *a
, uint64x2x2_t b
) {
10472 // CHECK-LABEL: @test_vst2q_s8(
10473 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10474 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10475 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
10476 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10477 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10478 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
10479 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10480 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10481 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
10482 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10483 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10484 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10486 void test_vst2q_s8(int8_t *a
, int8x16x2_t b
) {
10490 // CHECK-LABEL: @test_vst2q_s16(
10491 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10492 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10493 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
10494 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10495 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10496 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
10497 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10498 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10499 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10500 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
10501 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10502 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10503 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10504 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10505 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10506 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10508 void test_vst2q_s16(int16_t *a
, int16x8x2_t b
) {
10512 // CHECK-LABEL: @test_vst2q_s32(
10513 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10514 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10515 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
10516 // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10517 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10518 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
10519 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10520 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10521 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
10523 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10524 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10525 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10526 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10527 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10528 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
10530 void test_vst2q_s32(int32_t *a
, int32x4x2_t b
) {
10534 // CHECK-LABEL: @test_vst2q_s64(
10535 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
10536 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
10537 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
10538 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10539 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10540 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
10541 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
10542 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
10543 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10544 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
10545 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
10546 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
10547 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10548 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10549 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10550 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
10552 void test_vst2q_s64(int64_t *a
, int64x2x2_t b
) {
10556 // CHECK-LABEL: @test_vst2q_f16(
10557 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
10558 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
10559 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
10560 // CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10561 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10562 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
10563 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
10564 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
10565 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
10566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
10567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
10568 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
10569 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
10570 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
10571 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
10572 // CHECK: call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
10574 void test_vst2q_f16(float16_t
*a
, float16x8x2_t b
) {
10578 // CHECK-LABEL: @test_vst2q_f32(
10579 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
10580 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
10581 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
10582 // CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10583 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10584 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
10585 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
10586 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
10587 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
10588 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
10589 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
10590 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
10591 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
10592 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
10593 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
10594 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
10596 void test_vst2q_f32(float32_t
*a
, float32x4x2_t b
) {
10600 // CHECK-LABEL: @test_vst2q_f64(
10601 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
10602 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
10603 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
10604 // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10605 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10606 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
10607 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
10608 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
10609 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
10610 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
10611 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
10612 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
10613 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
10614 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
10615 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
10616 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
10618 void test_vst2q_f64(float64_t
*a
, float64x2x2_t b
) {
10622 // CHECK-LABEL: @test_vst2q_p8(
10623 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
10624 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
10625 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
10626 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10627 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10628 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
10629 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10630 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
10632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10633 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10634 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10636 void test_vst2q_p8(poly8_t
*a
, poly8x16x2_t b
) {
10640 // CHECK-LABEL: @test_vst2q_p16(
10641 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
10642 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
10643 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
10644 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10645 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10646 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
10647 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10648 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10649 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10650 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
10651 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10652 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10653 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10654 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10655 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10656 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10658 void test_vst2q_p16(poly16_t
*a
, poly16x8x2_t b
) {
10662 // CHECK-LABEL: @test_vst2_u8(
10663 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
10664 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
10665 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
10666 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10667 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10668 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
10669 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10670 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10671 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
10672 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10673 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10674 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10676 void test_vst2_u8(uint8_t *a
, uint8x8x2_t b
) {
10680 // CHECK-LABEL: @test_vst2_u16(
10681 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
10682 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
10683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
10684 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10685 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10686 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
10687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10688 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10689 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
10691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10692 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10693 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10694 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10695 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10696 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10698 void test_vst2_u16(uint16_t *a
, uint16x4x2_t b
) {
10702 // CHECK-LABEL: @test_vst2_u32(
10703 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
10704 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
10705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
10706 // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10707 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10708 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
10709 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
10710 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
10711 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
10712 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
10713 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
10714 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
10715 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
10716 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
10717 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
10718 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
10720 void test_vst2_u32(uint32_t *a
, uint32x2x2_t b
) {
10724 // CHECK-LABEL: @test_vst2_u64(
10725 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
10726 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
10727 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
10728 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10729 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10730 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
10731 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
10732 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
10733 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
10734 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
10735 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
10736 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
10737 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
10738 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
10739 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
10740 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
10742 void test_vst2_u64(uint64_t *a
, uint64x1x2_t b
) {
10746 // CHECK-LABEL: @test_vst2_s8(
10747 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
10748 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
10749 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
10750 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10752 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
10753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10754 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10755 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
10756 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10757 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10758 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10760 void test_vst2_s8(int8_t *a
, int8x8x2_t b
) {
10764 // CHECK-LABEL: @test_vst2_s16(
10765 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
10766 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
10767 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
10768 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10769 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10770 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
10771 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10772 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10773 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10774 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
10775 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10776 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10777 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10778 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10779 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10780 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10782 void test_vst2_s16(int16_t *a
, int16x4x2_t b
) {
10786 // CHECK-LABEL: @test_vst2_s32(
10787 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
10788 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
10789 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
10790 // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10791 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10792 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
10793 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
10794 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
10795 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
10796 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
10797 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
10798 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
10799 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
10800 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
10801 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
10802 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
10804 void test_vst2_s32(int32_t *a
, int32x2x2_t b
) {
10808 // CHECK-LABEL: @test_vst2_s64(
10809 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
10810 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
10811 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
10812 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10813 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10814 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
10815 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
10816 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
10817 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
10818 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
10819 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
10820 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
10821 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
10822 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
10823 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
10824 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
10826 void test_vst2_s64(int64_t *a
, int64x1x2_t b
) {
10830 // CHECK-LABEL: @test_vst2_f16(
10831 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
10832 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
10833 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
10834 // CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10835 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10836 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
10837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
10838 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
10839 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
10840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
10841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
10842 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
10843 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
10844 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
10845 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
10846 // CHECK: call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
10848 void test_vst2_f16(float16_t
*a
, float16x4x2_t b
) {
10852 // CHECK-LABEL: @test_vst2_f32(
10853 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
10854 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
10855 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
10856 // CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10857 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10858 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
10859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
10860 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
10861 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
10862 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
10863 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
10864 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
10865 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
10866 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
10867 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
10868 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
10870 void test_vst2_f32(float32_t
*a
, float32x2x2_t b
) {
10874 // CHECK-LABEL: @test_vst2_f64(
10875 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
10876 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
10877 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
10878 // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10879 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10880 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
10881 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
10882 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
10883 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
10884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
10885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
10886 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
10887 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
10888 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
10889 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
10890 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
10892 void test_vst2_f64(float64_t
*a
, float64x1x2_t b
) {
10896 // CHECK-LABEL: @test_vst2_p8(
10897 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
10898 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
10899 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
10900 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10901 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10902 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
10903 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10904 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10905 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
10906 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10907 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10908 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10910 void test_vst2_p8(poly8_t
*a
, poly8x8x2_t b
) {
10914 // CHECK-LABEL: @test_vst2_p16(
10915 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
10916 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
10917 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
10918 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10919 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10920 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
10921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10922 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
10925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10926 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10928 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10929 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10930 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10932 void test_vst2_p16(poly16_t
*a
, poly16x4x2_t b
) {
10936 // CHECK-LABEL: @test_vst3q_u8(
10937 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
10938 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
10939 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
10940 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10941 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10942 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10943 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10944 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10945 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10946 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10947 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10948 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10949 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
10950 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
10951 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
10953 void test_vst3q_u8(uint8_t *a
, uint8x16x3_t b
) {
10957 // CHECK-LABEL: @test_vst3q_u16(
10958 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
10959 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
10960 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
10961 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10962 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10963 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10965 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10966 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10967 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10968 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10969 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10970 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10971 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10972 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
10973 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
10974 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
10975 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10976 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10977 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
10978 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
10980 void test_vst3q_u16(uint16_t *a
, uint16x8x3_t b
) {
10984 // CHECK-LABEL: @test_vst3q_u32(
10985 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
10986 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
10987 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
10988 // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10989 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10990 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10992 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10993 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10996 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10997 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10998 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10999 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11000 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11001 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11002 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11003 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11004 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11005 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
11007 void test_vst3q_u32(uint32_t *a
, uint32x4x3_t b
) {
11011 // CHECK-LABEL: @test_vst3q_u64(
11012 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11013 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11014 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
11015 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11016 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11017 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11018 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11019 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11020 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11021 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11022 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11023 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11024 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11025 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11026 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11027 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11028 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11029 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11030 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11031 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11032 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
11034 void test_vst3q_u64(uint64_t *a
, uint64x2x3_t b
) {
11038 // CHECK-LABEL: @test_vst3q_s8(
11039 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11040 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11041 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
11042 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11043 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11044 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11046 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11049 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11050 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11051 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11052 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11053 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
11055 void test_vst3q_s8(int8_t *a
, int8x16x3_t b
) {
11059 // CHECK-LABEL: @test_vst3q_s16(
11060 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11061 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11062 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
11063 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11064 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11065 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11066 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11067 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11068 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11069 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11070 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11071 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11072 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11073 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11074 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11075 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11076 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11077 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11078 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11079 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11080 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
11082 void test_vst3q_s16(int16_t *a
, int16x8x3_t b
) {
11086 // CHECK-LABEL: @test_vst3q_s32(
11087 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11088 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11089 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
11090 // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11091 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11092 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11093 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11094 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11095 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11096 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11097 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11098 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11099 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11100 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11101 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11102 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11103 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11104 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11105 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11106 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11107 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
11109 void test_vst3q_s32(int32_t *a
, int32x4x3_t b
) {
11113 // CHECK-LABEL: @test_vst3q_s64(
11114 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11115 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11116 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
11117 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11118 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11119 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11121 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11122 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11125 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11126 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11127 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11128 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11129 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11130 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11131 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11132 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11133 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11134 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
11136 void test_vst3q_s64(int64_t *a
, int64x2x3_t b
) {
11140 // CHECK-LABEL: @test_vst3q_f16(
11141 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11142 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11143 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
11144 // CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11145 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11146 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11147 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
11148 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
11149 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11150 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11151 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
11152 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
11153 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11154 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11155 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
11156 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
11157 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11158 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11159 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11160 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11161 // CHECK: call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
11163 void test_vst3q_f16(float16_t
*a
, float16x8x3_t b
) {
11167 // CHECK-LABEL: @test_vst3q_f32(
11168 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11169 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11170 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
11171 // CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11172 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11173 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11174 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
11175 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
11176 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11177 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11178 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
11179 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
11180 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11181 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11182 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
11183 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
11184 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11185 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11186 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11187 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11188 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
11190 void test_vst3q_f32(float32_t
*a
, float32x4x3_t b
) {
11194 // CHECK-LABEL: @test_vst3q_f64(
11195 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11196 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11197 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
11198 // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11199 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11200 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11201 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
11202 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
11203 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11204 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11205 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
11206 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
11207 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11208 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11209 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
11210 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
11211 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11212 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11213 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11214 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11215 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
11217 void test_vst3q_f64(float64_t
*a
, float64x2x3_t b
) {
11221 // CHECK-LABEL: @test_vst3q_p8(
11222 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11223 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
11225 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11226 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11227 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11228 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11229 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11230 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11231 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11232 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11233 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11234 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11235 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11236 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
11238 void test_vst3q_p8(poly8_t
*a
, poly8x16x3_t b
) {
11242 // CHECK-LABEL: @test_vst3q_p16(
11243 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11244 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11245 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
11246 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11247 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11248 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11249 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11250 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11251 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11252 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11253 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11254 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11255 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11256 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11257 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11258 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11259 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11260 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11261 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11262 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11263 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
11265 void test_vst3q_p16(poly16_t
*a
, poly16x8x3_t b
) {
11269 // CHECK-LABEL: @test_vst3_u8(
11270 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11271 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
11273 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11274 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11275 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11276 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11277 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11278 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11279 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11280 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11281 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11282 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11283 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11284 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11286 void test_vst3_u8(uint8_t *a
, uint8x8x3_t b
) {
11290 // CHECK-LABEL: @test_vst3_u16(
11291 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11292 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11293 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
11294 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11295 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11296 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11297 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11298 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11299 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11300 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11301 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11302 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11303 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11304 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11305 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11306 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11307 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11308 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11309 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11310 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11311 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11313 void test_vst3_u16(uint16_t *a
, uint16x4x3_t b
) {
11317 // CHECK-LABEL: @test_vst3_u32(
11318 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11319 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11320 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
11321 // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11322 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11323 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11324 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
11325 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
11326 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11327 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11328 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
11329 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
11330 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11331 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11332 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
11333 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
11334 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11335 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11336 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11337 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11338 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
11340 void test_vst3_u32(uint32_t *a
, uint32x2x3_t b
) {
11344 // CHECK-LABEL: @test_vst3_u64(
11345 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11346 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11347 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
11348 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11349 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11350 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11351 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
11352 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
11353 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11354 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11355 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
11356 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
11357 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11358 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11359 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
11360 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
11361 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11362 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11363 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11364 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11365 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
11367 void test_vst3_u64(uint64_t *a
, uint64x1x3_t b
) {
11371 // CHECK-LABEL: @test_vst3_s8(
11372 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11373 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11374 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
11375 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11376 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11377 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11378 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11379 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11380 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11381 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11382 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11383 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11384 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11385 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11386 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11388 void test_vst3_s8(int8_t *a
, int8x8x3_t b
) {
11392 // CHECK-LABEL: @test_vst3_s16(
11393 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11394 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11395 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
11396 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11397 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11398 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11399 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11400 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11401 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11404 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11405 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11406 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11407 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11408 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11409 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11410 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11411 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11412 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11413 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11415 void test_vst3_s16(int16_t *a
, int16x4x3_t b
) {
11419 // CHECK-LABEL: @test_vst3_s32(
11420 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
11421 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
11422 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
11423 // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11424 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11425 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11426 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
11427 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
11428 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11429 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11430 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
11431 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
11432 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11433 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11434 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
11435 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
11436 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11437 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11438 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11439 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11440 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
11442 void test_vst3_s32(int32_t *a
, int32x2x3_t b
) {
11446 // CHECK-LABEL: @test_vst3_s64(
11447 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
11448 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
11449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
11450 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11452 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11453 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
11454 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
11455 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11456 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11457 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
11458 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
11459 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11460 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11461 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
11462 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
11463 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11464 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11465 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11466 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11467 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
11469 void test_vst3_s64(int64_t *a
, int64x1x3_t b
) {
11473 // CHECK-LABEL: @test_vst3_f16(
11474 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
11475 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
11476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
11477 // CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11478 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11479 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11480 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
11481 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
11482 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11483 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11484 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
11485 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
11486 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11487 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11488 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
11489 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
11490 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
11491 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11492 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11493 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
11494 // CHECK: call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
11496 void test_vst3_f16(float16_t
*a
, float16x4x3_t b
) {
11500 // CHECK-LABEL: @test_vst3_f32(
11501 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
11502 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
11503 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
11504 // CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11505 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11506 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11507 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
11508 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
11509 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
11512 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
11513 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11514 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11515 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
11516 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
11517 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
11518 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11519 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11520 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
11521 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
11523 void test_vst3_f32(float32_t
*a
, float32x2x3_t b
) {
11527 // CHECK-LABEL: @test_vst3_f64(
11528 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
11529 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
11530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
11531 // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11532 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11533 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11534 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
11535 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
11536 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11537 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11538 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
11539 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
11540 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11541 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11542 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
11543 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
11544 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
11545 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11546 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11547 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
11548 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
11550 void test_vst3_f64(float64_t
*a
, float64x1x3_t b
) {
11554 // CHECK-LABEL: @test_vst3_p8(
11555 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
11556 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
11557 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
11558 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11559 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11560 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11561 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11562 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11563 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11564 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11565 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11566 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11567 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11568 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11569 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11571 void test_vst3_p8(poly8_t
*a
, poly8x8x3_t b
) {
11575 // CHECK-LABEL: @test_vst3_p16(
11576 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
11577 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
11578 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
11579 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11580 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11581 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11582 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11583 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11584 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11585 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11586 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11587 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11588 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11589 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11590 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11591 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11592 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11593 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11594 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11595 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11596 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11598 void test_vst3_p16(poly16_t
*a
, poly16x4x3_t b
) {
11602 // CHECK-LABEL: @test_vst4q_u8(
11603 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
11604 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
11605 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
11606 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11607 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11608 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11609 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11610 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11611 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11612 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11613 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11614 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11615 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11616 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11617 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11618 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11619 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11620 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11622 void test_vst4q_u8(uint8_t *a
, uint8x16x4_t b
) {
11626 // CHECK-LABEL: @test_vst4q_u16(
11627 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
11628 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
11629 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
11630 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11631 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11632 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11633 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11634 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11635 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11636 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11637 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11638 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11639 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11640 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11641 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11642 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11643 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11644 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11645 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11646 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11647 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11648 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11649 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11650 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11651 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11652 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11654 void test_vst4q_u16(uint16_t *a
, uint16x8x4_t b
) {
11658 // CHECK-LABEL: @test_vst4q_u32(
11659 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
11660 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
11661 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
11662 // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11663 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11664 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11666 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11667 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11670 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11671 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11672 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11673 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11674 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11675 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11676 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11677 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
11678 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
11679 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
11680 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11681 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11682 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11683 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
11684 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
11686 void test_vst4q_u32(uint32_t *a
, uint32x4x4_t b
) {
11690 // CHECK-LABEL: @test_vst4q_u64(
11691 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
11692 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
11693 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
11694 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11695 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11696 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11697 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11698 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11699 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11700 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11701 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11702 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11703 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11704 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11705 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11706 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11707 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11708 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11709 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
11710 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
11711 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
11712 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11713 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11714 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11715 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
11716 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
11718 void test_vst4q_u64(uint64_t *a
, uint64x2x4_t b
) {
11722 // CHECK-LABEL: @test_vst4q_s8(
11723 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
11724 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
11725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
11726 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11727 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11728 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11729 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11730 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11731 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11732 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11733 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11734 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11735 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11736 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11737 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11738 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11739 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11740 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11742 void test_vst4q_s8(int8_t *a
, int8x16x4_t b
) {
11746 // CHECK-LABEL: @test_vst4q_s16(
11747 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
11748 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
11749 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
11750 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11752 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11754 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11755 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11756 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11757 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11758 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11759 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11760 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11761 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11762 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11763 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11764 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11765 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11766 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11767 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11768 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11769 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11770 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11771 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11772 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11774 void test_vst4q_s16(int16_t *a
, int16x8x4_t b
) {
11778 // CHECK-LABEL: @test_vst4q_s32(
11779 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
11780 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
11781 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
11782 // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11783 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11784 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11785 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11786 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11787 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11788 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11789 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11790 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11791 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11792 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11793 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11794 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11795 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11796 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11797 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
11798 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
11799 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
11800 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11801 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11802 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11803 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
11804 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
11806 void test_vst4q_s32(int32_t *a
, int32x4x4_t b
) {
11810 // CHECK-LABEL: @test_vst4q_s64(
11811 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
11812 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
11813 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
11814 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11815 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11816 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11817 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11818 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11819 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11820 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11821 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11822 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11823 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11824 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11825 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11826 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11827 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11828 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11829 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
11830 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
11831 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
11832 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11833 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11834 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11835 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
11836 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
11838 void test_vst4q_s64(int64_t *a
, int64x2x4_t b
) {
11842 // CHECK-LABEL: @test_vst4q_f16(
11843 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
11844 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
11845 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
11846 // CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11847 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11848 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11849 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
11850 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
11851 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11852 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11853 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
11854 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
11855 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11856 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11857 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
11858 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
11859 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11860 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11861 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
11862 // CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
11863 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
11864 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11865 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11866 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11867 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
11868 // CHECK: call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
11870 void test_vst4q_f16(float16_t
*a
, float16x8x4_t b
) {
11874 // CHECK-LABEL: @test_vst4q_f32(
11875 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
11876 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
11877 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
11878 // CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11879 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11880 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11881 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
11882 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
11883 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
11886 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
11887 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11888 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11889 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
11890 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
11891 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11892 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11893 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
11894 // CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
11895 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
11896 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11897 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11898 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11899 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
11900 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
11902 void test_vst4q_f32(float32_t
*a
, float32x4x4_t b
) {
11906 // CHECK-LABEL: @test_vst4q_f64(
11907 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
11908 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
11909 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
11910 // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11911 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11912 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11913 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
11914 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
11915 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11916 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11917 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
11918 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
11919 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11920 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11921 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
11922 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
11923 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11924 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11925 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
11926 // CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
11927 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
11928 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11929 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11930 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11931 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
11932 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
11934 void test_vst4q_f64(float64_t
*a
, float64x2x4_t b
) {
11938 // CHECK-LABEL: @test_vst4q_p8(
11939 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
11940 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
11941 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
11942 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11943 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11944 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11945 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11946 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11947 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11948 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11949 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11950 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11951 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11952 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11953 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11954 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11955 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11956 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11958 void test_vst4q_p8(poly8_t
*a
, poly8x16x4_t b
) {
11962 // CHECK-LABEL: @test_vst4q_p16(
11963 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
11964 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
11965 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
11966 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11967 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11968 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11969 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11970 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11971 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11972 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11973 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11974 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11975 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11976 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11977 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11978 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11979 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11980 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11981 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11982 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11983 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11984 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11985 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11986 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11987 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11988 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11990 void test_vst4q_p16(poly16_t
*a
, poly16x8x4_t b
) {
11994 // CHECK-LABEL: @test_vst4_u8(
11995 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
11996 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
11997 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
11998 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11999 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12000 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12001 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12002 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12005 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12006 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12007 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12008 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12009 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12010 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12011 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12012 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12014 void test_vst4_u8(uint8_t *a
, uint8x8x4_t b
) {
12018 // CHECK-LABEL: @test_vst4_u16(
12019 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12020 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12021 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
12022 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12023 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12024 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12025 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12026 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12027 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12028 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12029 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12030 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12031 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12032 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12033 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12034 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12035 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12036 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12037 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12038 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12039 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12040 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12041 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12042 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12043 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12044 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12046 void test_vst4_u16(uint16_t *a
, uint16x4x4_t b
) {
12050 // CHECK-LABEL: @test_vst4_u32(
12051 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12052 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12053 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
12054 // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12055 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12056 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12057 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
12058 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
12059 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12060 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12061 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
12062 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
12063 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12064 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12065 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
12066 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
12067 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12068 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12069 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
12070 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
12071 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12072 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12073 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12074 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12075 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12076 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
12078 void test_vst4_u32(uint32_t *a
, uint32x2x4_t b
) {
12082 // CHECK-LABEL: @test_vst4_u64(
12083 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12084 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
12086 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12087 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12088 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12090 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12091 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12094 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12095 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12096 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12097 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12098 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12099 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12100 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12101 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12102 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12103 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12104 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12105 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12106 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12107 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12108 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12110 void test_vst4_u64(uint64_t *a
, uint64x1x4_t b
) {
12114 // CHECK-LABEL: @test_vst4_s8(
12115 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12116 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12117 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
12118 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12119 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12120 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12121 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12122 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12125 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12126 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12127 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12128 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12129 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12130 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12131 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12132 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12134 void test_vst4_s8(int8_t *a
, int8x8x4_t b
) {
12138 // CHECK-LABEL: @test_vst4_s16(
12139 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12140 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12141 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
12142 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12143 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12144 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12146 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12147 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12150 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12151 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12152 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12153 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12154 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12155 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12156 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12157 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12158 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12159 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12160 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12161 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12162 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12163 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12164 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12166 void test_vst4_s16(int16_t *a
, int16x4x4_t b
) {
12170 // CHECK-LABEL: @test_vst4_s32(
12171 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12172 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12173 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
12174 // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12175 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12176 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12177 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
12178 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
12179 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12180 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12181 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
12182 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
12183 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12184 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12185 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
12186 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
12187 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12188 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12189 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
12190 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
12191 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12192 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12193 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12194 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12195 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12196 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
12198 void test_vst4_s32(int32_t *a
, int32x2x4_t b
) {
12202 // CHECK-LABEL: @test_vst4_s64(
12203 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12204 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
12206 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12207 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12208 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12209 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12210 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12211 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12212 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12213 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12214 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12215 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12216 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12217 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12218 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12219 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12220 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12221 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12222 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12223 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12224 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12225 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12226 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12227 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12228 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12230 void test_vst4_s64(int64_t *a
, int64x1x4_t b
) {
12234 // CHECK-LABEL: @test_vst4_f16(
12235 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12236 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12237 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
12238 // CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12239 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12240 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12241 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
12242 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
12243 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12244 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12245 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
12246 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
12247 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12248 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12249 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
12250 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
12251 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12252 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12253 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
12254 // CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
12255 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12256 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12257 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12258 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12259 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12260 // CHECK: call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
12262 void test_vst4_f16(float16_t
*a
, float16x4x4_t b
) {
12266 // CHECK-LABEL: @test_vst4_f32(
12267 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12268 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12269 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
12270 // CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12271 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12272 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12273 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
12274 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
12275 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12276 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12277 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
12278 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
12279 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12280 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12281 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
12282 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
12283 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12284 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12285 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
12286 // CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
12287 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12288 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12289 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12290 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12291 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12292 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
12294 void test_vst4_f32(float32_t
*a
, float32x2x4_t b
) {
12298 // CHECK-LABEL: @test_vst4_f64(
12299 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12300 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12301 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
12302 // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12303 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12304 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12305 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12306 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12307 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12308 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12309 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12310 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12311 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12312 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12313 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12314 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12315 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12316 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12317 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
12318 // CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
12319 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12320 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12321 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12322 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12323 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12324 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
12326 void test_vst4_f64(float64_t
*a
, float64x1x4_t b
) {
12330 // CHECK-LABEL: @test_vst4_p8(
12331 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
12332 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
12333 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
12334 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12335 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12336 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12337 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12338 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12339 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12340 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12341 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12342 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12343 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12344 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12345 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12346 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12347 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12348 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12350 void test_vst4_p8(poly8_t
*a
, poly8x8x4_t b
) {
12354 // CHECK-LABEL: @test_vst4_p16(
12355 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
12356 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
12357 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
12358 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12359 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12360 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12361 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12362 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12363 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12364 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12365 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12366 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12367 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12368 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12369 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12370 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12371 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12372 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12373 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12374 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12375 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12376 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12377 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12378 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12379 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12380 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12382 void test_vst4_p16(poly16_t
*a
, poly16x4x4_t b
) {
12386 // CHECK-LABEL: @test_vld1q_f64_x2(
12387 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
12388 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
12389 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a)
12390 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12391 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
12392 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
12393 // CHECK: ret %struct.float64x2x2_t [[TMP6]]
12394 float64x2x2_t
test_vld1q_f64_x2(float64_t
const *a
) {
12395 return vld1q_f64_x2(a
);
12398 // CHECK-LABEL: @test_vld1q_p64_x2(
12399 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
12400 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
12401 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a)
12402 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12403 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
12404 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
12405 // CHECK: ret %struct.poly64x2x2_t [[TMP6]]
12406 poly64x2x2_t
test_vld1q_p64_x2(poly64_t
const *a
) {
12407 return vld1q_p64_x2(a
);
12410 // CHECK-LABEL: @test_vld1_f64_x2(
12411 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
12412 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
12413 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a)
12414 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12415 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
12416 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
12417 // CHECK: ret %struct.float64x1x2_t [[TMP6]]
12418 float64x1x2_t
test_vld1_f64_x2(float64_t
const *a
) {
12419 return vld1_f64_x2(a
);
12422 // CHECK-LABEL: @test_vld1_p64_x2(
12423 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
12424 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
12425 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a)
12426 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12427 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
12428 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
12429 // CHECK: ret %struct.poly64x1x2_t [[TMP6]]
12430 poly64x1x2_t
test_vld1_p64_x2(poly64_t
const *a
) {
12431 return vld1_p64_x2(a
);
12434 // CHECK-LABEL: @test_vld1q_f64_x3(
12435 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
12436 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
12437 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a)
12438 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12439 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
12440 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
12441 // CHECK: ret %struct.float64x2x3_t [[TMP6]]
12442 float64x2x3_t
test_vld1q_f64_x3(float64_t
const *a
) {
12443 return vld1q_f64_x3(a
);
12446 // CHECK-LABEL: @test_vld1q_p64_x3(
12447 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
12448 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
12449 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a)
12450 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
12452 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
12453 // CHECK: ret %struct.poly64x2x3_t [[TMP6]]
12454 poly64x2x3_t
test_vld1q_p64_x3(poly64_t
const *a
) {
12455 return vld1q_p64_x3(a
);
12458 // CHECK-LABEL: @test_vld1_f64_x3(
12459 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
12460 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
12461 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a)
12462 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12463 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
12464 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
12465 // CHECK: ret %struct.float64x1x3_t [[TMP6]]
12466 float64x1x3_t
test_vld1_f64_x3(float64_t
const *a
) {
12467 return vld1_f64_x3(a
);
12470 // CHECK-LABEL: @test_vld1_p64_x3(
12471 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
12472 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
12473 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a)
12474 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12475 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
12476 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
12477 // CHECK: ret %struct.poly64x1x3_t [[TMP6]]
12478 poly64x1x3_t
test_vld1_p64_x3(poly64_t
const *a
) {
12479 return vld1_p64_x3(a
);
12482 // CHECK-LABEL: @test_vld1q_f64_x4(
12483 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
12484 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
12485 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a)
12486 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12487 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
12488 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
12489 // CHECK: ret %struct.float64x2x4_t [[TMP6]]
12490 float64x2x4_t
test_vld1q_f64_x4(float64_t
const *a
) {
12491 return vld1q_f64_x4(a
);
12494 // CHECK-LABEL: @test_vld1q_p64_x4(
12495 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
12496 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
12497 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a)
12498 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12499 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
12500 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
12501 // CHECK: ret %struct.poly64x2x4_t [[TMP6]]
12502 poly64x2x4_t
test_vld1q_p64_x4(poly64_t
const *a
) {
12503 return vld1q_p64_x4(a
);
12506 // CHECK-LABEL: @test_vld1_f64_x4(
12507 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
12508 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
12509 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a)
12510 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12511 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
12512 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
12513 // CHECK: ret %struct.float64x1x4_t [[TMP6]]
12514 float64x1x4_t
test_vld1_f64_x4(float64_t
const *a
) {
12515 return vld1_f64_x4(a
);
12518 // CHECK-LABEL: @test_vld1_p64_x4(
12519 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
12520 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
12521 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a)
12522 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12523 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
12524 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
12525 // CHECK: ret %struct.poly64x1x4_t [[TMP6]]
12526 poly64x1x4_t
test_vld1_p64_x4(poly64_t
const *a
) {
12527 return vld1_p64_x4(a
);
12530 // CHECK-LABEL: @test_vst1q_f64_x2(
12531 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
12532 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
12533 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
12534 // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12535 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
12536 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
12537 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12538 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12539 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12540 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
12541 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12542 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12543 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12544 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12545 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12546 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
12548 void test_vst1q_f64_x2(float64_t
*a
, float64x2x2_t b
) {
12549 vst1q_f64_x2(a
, b
);
12552 // CHECK-LABEL: @test_vst1q_p64_x2(
12553 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
12554 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
12555 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
12556 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12557 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
12558 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
12559 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12560 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12561 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12562 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
12563 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12564 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12565 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12566 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12567 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12568 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
12570 void test_vst1q_p64_x2(poly64_t
*a
, poly64x2x2_t b
) {
12571 vst1q_p64_x2(a
, b
);
12574 // CHECK-LABEL: @test_vst1_f64_x2(
12575 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
12576 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
12577 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
12578 // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12579 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
12580 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
12581 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12582 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12583 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12584 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
12585 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12586 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12587 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12588 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12589 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12590 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
12592 void test_vst1_f64_x2(float64_t
*a
, float64x1x2_t b
) {
12596 // CHECK-LABEL: @test_vst1_p64_x2(
12597 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
12598 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
12599 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
12600 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12601 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
12602 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
12603 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12604 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12605 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12606 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
12607 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12608 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12609 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12610 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12611 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12612 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
12614 void test_vst1_p64_x2(poly64_t
*a
, poly64x1x2_t b
) {
12618 // CHECK-LABEL: @test_vst1q_f64_x3(
12619 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
12620 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
12621 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
12622 // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12623 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
12624 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12626 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12627 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12630 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12631 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
12634 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
12635 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12636 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12637 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12638 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12639 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
12641 void test_vst1q_f64_x3(float64_t
*a
, float64x2x3_t b
) {
12642 vst1q_f64_x3(a
, b
);
12645 // CHECK-LABEL: @test_vst1q_p64_x3(
12646 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
12647 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
12648 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
12649 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12650 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
12651 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12652 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12653 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12654 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12655 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12656 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12657 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12658 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12659 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12660 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
12661 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
12662 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12663 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12664 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12665 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12666 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
12668 void test_vst1q_p64_x3(poly64_t
*a
, poly64x2x3_t b
) {
12669 vst1q_p64_x3(a
, b
);
12672 // CHECK-LABEL: @test_vst1_f64_x3(
12673 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12674 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12675 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
12676 // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12677 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
12678 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12679 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12680 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12681 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12682 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12683 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12684 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12685 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12686 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12687 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12688 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12689 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12690 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12691 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12692 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12693 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
12695 void test_vst1_f64_x3(float64_t
*a
, float64x1x3_t b
) {
12699 // CHECK-LABEL: @test_vst1_p64_x3(
12700 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
12701 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
12702 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
12703 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12704 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
12705 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12706 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12707 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12708 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12709 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12710 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12711 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12712 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12713 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12714 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12715 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12716 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12717 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12718 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12719 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12720 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
12722 void test_vst1_p64_x3(poly64_t
*a
, poly64x1x3_t b
) {
12726 // CHECK-LABEL: @test_vst1q_f64_x4(
12727 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12728 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12729 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
12730 // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12731 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
12732 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12733 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12734 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12735 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12736 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12737 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12738 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12739 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12740 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12741 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
12742 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
12743 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12744 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12745 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
12746 // CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
12747 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12748 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12749 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12750 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12751 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12752 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
12754 void test_vst1q_f64_x4(float64_t
*a
, float64x2x4_t b
) {
12755 vst1q_f64_x4(a
, b
);
12758 // CHECK-LABEL: @test_vst1q_p64_x4(
12759 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
12760 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
12761 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
12762 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12763 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
12764 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12765 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12766 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12767 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12768 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12769 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12770 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12771 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12772 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12773 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
12774 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
12775 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12776 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12777 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
12778 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
12779 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12780 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12781 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12782 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12783 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12784 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
12786 void test_vst1q_p64_x4(poly64_t
*a
, poly64x2x4_t b
) {
12787 vst1q_p64_x4(a
, b
);
12790 // CHECK-LABEL: @test_vst1_f64_x4(
12791 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12792 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12793 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
12794 // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12795 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12796 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12797 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12798 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12799 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12800 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12801 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12802 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12803 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12804 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12805 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12806 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12807 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12808 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12809 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
12810 // CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
12811 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12812 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12813 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12814 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12815 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12816 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
12818 void test_vst1_f64_x4(float64_t
*a
, float64x1x4_t b
) {
12822 // CHECK-LABEL: @test_vst1_p64_x4(
12823 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
12824 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
12825 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
12826 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12827 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12828 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12829 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12830 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12831 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12832 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12833 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12834 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12835 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12836 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12837 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12838 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12839 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12842 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12843 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12844 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12845 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12846 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12847 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12848 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12850 void test_vst1_p64_x4(poly64_t
*a
, poly64x1x4_t b
) {
12854 // CHECK-LABEL: @test_vceqd_s64(
12855 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
12856 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12857 // CHECK: ret i64 [[VCEQD_I]]
12858 uint64_t test_vceqd_s64(int64_t a
, int64_t b
) {
12859 return (uint64_t)vceqd_s64(a
, b
);
12862 // CHECK-LABEL: @test_vceqd_u64(
12863 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b
12864 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12865 // CHECK: ret i64 [[VCEQD_I]]
12866 uint64_t test_vceqd_u64(uint64_t a
, uint64_t b
) {
12867 return (int64_t)vceqd_u64(a
, b
);
12870 // CHECK-LABEL: @test_vceqzd_s64(
12871 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
12872 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
12873 // CHECK: ret i64 [[VCEQZ_I]]
12874 uint64_t test_vceqzd_s64(int64_t a
) {
12875 return (uint64_t)vceqzd_s64(a
);
12878 // CHECK-LABEL: @test_vceqzd_u64(
12879 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0
12880 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
12881 // CHECK: ret i64 [[VCEQZD_I]]
12882 int64_t test_vceqzd_u64(int64_t a
) {
12883 return (int64_t)vceqzd_u64(a
);
12886 // CHECK-LABEL: @test_vcged_s64(
12887 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b
12888 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12889 // CHECK: ret i64 [[VCEQD_I]]
12890 uint64_t test_vcged_s64(int64_t a
, int64_t b
) {
12891 return (uint64_t)vcged_s64(a
, b
);
12894 // CHECK-LABEL: @test_vcged_u64(
12895 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b
12896 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12897 // CHECK: ret i64 [[VCEQD_I]]
12898 uint64_t test_vcged_u64(uint64_t a
, uint64_t b
) {
12899 return (uint64_t)vcged_u64(a
, b
);
12902 // CHECK-LABEL: @test_vcgezd_s64(
12903 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0
12904 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
12905 // CHECK: ret i64 [[VCGEZ_I]]
12906 uint64_t test_vcgezd_s64(int64_t a
) {
12907 return (uint64_t)vcgezd_s64(a
);
12910 // CHECK-LABEL: @test_vcgtd_s64(
12911 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b
12912 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12913 // CHECK: ret i64 [[VCEQD_I]]
12914 uint64_t test_vcgtd_s64(int64_t a
, int64_t b
) {
12915 return (uint64_t)vcgtd_s64(a
, b
);
12918 // CHECK-LABEL: @test_vcgtd_u64(
12919 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b
12920 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12921 // CHECK: ret i64 [[VCEQD_I]]
12922 uint64_t test_vcgtd_u64(uint64_t a
, uint64_t b
) {
12923 return (uint64_t)vcgtd_u64(a
, b
);
12926 // CHECK-LABEL: @test_vcgtzd_s64(
12927 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0
12928 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
12929 // CHECK: ret i64 [[VCGTZ_I]]
12930 uint64_t test_vcgtzd_s64(int64_t a
) {
12931 return (uint64_t)vcgtzd_s64(a
);
12934 // CHECK-LABEL: @test_vcled_s64(
12935 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b
12936 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12937 // CHECK: ret i64 [[VCEQD_I]]
12938 uint64_t test_vcled_s64(int64_t a
, int64_t b
) {
12939 return (uint64_t)vcled_s64(a
, b
);
12942 // CHECK-LABEL: @test_vcled_u64(
12943 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b
12944 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12945 // CHECK: ret i64 [[VCEQD_I]]
12946 uint64_t test_vcled_u64(uint64_t a
, uint64_t b
) {
12947 return (uint64_t)vcled_u64(a
, b
);
12950 // CHECK-LABEL: @test_vclezd_s64(
12951 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0
12952 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
12953 // CHECK: ret i64 [[VCLEZ_I]]
12954 uint64_t test_vclezd_s64(int64_t a
) {
12955 return (uint64_t)vclezd_s64(a
);
12958 // CHECK-LABEL: @test_vcltd_s64(
12959 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b
12960 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12961 // CHECK: ret i64 [[VCEQD_I]]
12962 uint64_t test_vcltd_s64(int64_t a
, int64_t b
) {
12963 return (uint64_t)vcltd_s64(a
, b
);
12966 // CHECK-LABEL: @test_vcltd_u64(
12967 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b
12968 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12969 // CHECK: ret i64 [[VCEQD_I]]
12970 uint64_t test_vcltd_u64(uint64_t a
, uint64_t b
) {
12971 return (uint64_t)vcltd_u64(a
, b
);
12974 // CHECK-LABEL: @test_vcltzd_s64(
12975 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0
12976 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
12977 // CHECK: ret i64 [[VCLTZ_I]]
12978 uint64_t test_vcltzd_s64(int64_t a
) {
12979 return (uint64_t)vcltzd_s64(a
);
12982 // CHECK-LABEL: @test_vtstd_s64(
12983 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
12984 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
12985 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
12986 // CHECK: ret i64 [[VTSTD_I]]
12987 uint64_t test_vtstd_s64(int64_t a
, int64_t b
) {
12988 return (uint64_t)vtstd_s64(a
, b
);
12991 // CHECK-LABEL: @test_vtstd_u64(
12992 // CHECK: [[TMP0:%.*]] = and i64 %a, %b
12993 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
12994 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
12995 // CHECK: ret i64 [[VTSTD_I]]
12996 uint64_t test_vtstd_u64(uint64_t a
, uint64_t b
) {
12997 return (uint64_t)vtstd_u64(a
, b
);
13000 // CHECK-LABEL: @test_vabsd_s64(
13001 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13002 // CHECK: ret i64 [[VABSD_S64_I]]
13003 int64_t test_vabsd_s64(int64_t a
) {
13004 return (int64_t)vabsd_s64(a
);
13007 // CHECK-LABEL: @test_vqabsb_s8(
13008 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13009 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13010 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13011 // CHECK: ret i8 [[TMP1]]
13012 int8_t test_vqabsb_s8(int8_t a
) {
13013 return (int8_t)vqabsb_s8(a
);
13016 // CHECK-LABEL: @test_vqabsh_s16(
13017 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13018 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13019 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13020 // CHECK: ret i16 [[TMP1]]
13021 int16_t test_vqabsh_s16(int16_t a
) {
13022 return (int16_t)vqabsh_s16(a
);
13025 // CHECK-LABEL: @test_vqabss_s32(
13026 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13027 // CHECK: ret i32 [[VQABSS_S32_I]]
13028 int32_t test_vqabss_s32(int32_t a
) {
13029 return (int32_t)vqabss_s32(a
);
13032 // CHECK-LABEL: @test_vqabsd_s64(
13033 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13034 // CHECK: ret i64 [[VQABSD_S64_I]]
13035 int64_t test_vqabsd_s64(int64_t a
) {
13036 return (int64_t)vqabsd_s64(a
);
13039 // CHECK-LABEL: @test_vnegd_s64(
13040 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a
13041 // CHECK: ret i64 [[VNEGD_I]]
13042 int64_t test_vnegd_s64(int64_t a
) {
13043 return (int64_t)vnegd_s64(a
);
13046 // CHECK-LABEL: @test_vqnegb_s8(
13047 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13048 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13049 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13050 // CHECK: ret i8 [[TMP1]]
13051 int8_t test_vqnegb_s8(int8_t a
) {
13052 return (int8_t)vqnegb_s8(a
);
13055 // CHECK-LABEL: @test_vqnegh_s16(
13056 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13057 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13058 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13059 // CHECK: ret i16 [[TMP1]]
13060 int16_t test_vqnegh_s16(int16_t a
) {
13061 return (int16_t)vqnegh_s16(a
);
13064 // CHECK-LABEL: @test_vqnegs_s32(
13065 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13066 // CHECK: ret i32 [[VQNEGS_S32_I]]
13067 int32_t test_vqnegs_s32(int32_t a
) {
13068 return (int32_t)vqnegs_s32(a
);
13071 // CHECK-LABEL: @test_vqnegd_s64(
13072 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13073 // CHECK: ret i64 [[VQNEGD_S64_I]]
13074 int64_t test_vqnegd_s64(int64_t a
) {
13075 return (int64_t)vqnegd_s64(a
);
13078 // CHECK-LABEL: @test_vuqaddb_s8(
13079 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13080 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
13081 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13082 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13083 // CHECK: ret i8 [[TMP2]]
13084 int8_t test_vuqaddb_s8(int8_t a
, uint8_t b
) {
13085 return (int8_t)vuqaddb_s8(a
, b
);
13088 // CHECK-LABEL: @test_vuqaddh_s16(
13089 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13090 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13091 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13092 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13093 // CHECK: ret i16 [[TMP2]]
13094 int16_t test_vuqaddh_s16(int16_t a
, uint16_t b
) {
13095 return (int16_t)vuqaddh_s16(a
, b
);
13098 // CHECK-LABEL: @test_vuqadds_s32(
13099 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13100 // CHECK: ret i32 [[VUQADDS_S32_I]]
13101 int32_t test_vuqadds_s32(int32_t a
, uint32_t b
) {
13102 return (int32_t)vuqadds_s32(a
, b
);
13105 // CHECK-LABEL: @test_vuqaddd_s64(
13106 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13107 // CHECK: ret i64 [[VUQADDD_S64_I]]
13108 int64_t test_vuqaddd_s64(int64_t a
, uint64_t b
) {
13109 return (int64_t)vuqaddd_s64(a
, b
);
13112 // CHECK-LABEL: @test_vsqaddb_u8(
13113 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13114 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
13115 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13116 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13117 // CHECK: ret i8 [[TMP2]]
13118 uint8_t test_vsqaddb_u8(uint8_t a
, int8_t b
) {
13119 return (uint8_t)vsqaddb_u8(a
, b
);
13122 // CHECK-LABEL: @test_vsqaddh_u16(
13123 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13124 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13125 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13126 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13127 // CHECK: ret i16 [[TMP2]]
13128 uint16_t test_vsqaddh_u16(uint16_t a
, int16_t b
) {
13129 return (uint16_t)vsqaddh_u16(a
, b
);
13132 // CHECK-LABEL: @test_vsqadds_u32(
13133 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13134 // CHECK: ret i32 [[VSQADDS_U32_I]]
13135 uint32_t test_vsqadds_u32(uint32_t a
, int32_t b
) {
13136 return (uint32_t)vsqadds_u32(a
, b
);
13139 // CHECK-LABEL: @test_vsqaddd_u64(
13140 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13141 // CHECK: ret i64 [[VSQADDD_U64_I]]
13142 uint64_t test_vsqaddd_u64(uint64_t a
, int64_t b
) {
13143 return (uint64_t)vsqaddd_u64(a
, b
);
13146 // CHECK-LABEL: @test_vqdmlalh_s16(
13147 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13148 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
13149 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13150 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13151 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13152 // CHECK: ret i32 [[VQDMLXL1_I]]
13153 int32_t test_vqdmlalh_s16(int32_t a
, int16_t b
, int16_t c
) {
13154 return (int32_t)vqdmlalh_s16(a
, b
, c
);
13157 // CHECK-LABEL: @test_vqdmlals_s32(
13158 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13159 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13160 // CHECK: ret i64 [[VQDMLXL1_I]]
13161 int64_t test_vqdmlals_s32(int64_t a
, int32_t b
, int32_t c
) {
13162 return (int64_t)vqdmlals_s32(a
, b
, c
);
13165 // CHECK-LABEL: @test_vqdmlslh_s16(
13166 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13167 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
13168 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13169 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13170 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13171 // CHECK: ret i32 [[VQDMLXL1_I]]
13172 int32_t test_vqdmlslh_s16(int32_t a
, int16_t b
, int16_t c
) {
13173 return (int32_t)vqdmlslh_s16(a
, b
, c
);
13176 // CHECK-LABEL: @test_vqdmlsls_s32(
13177 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13178 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13179 // CHECK: ret i64 [[VQDMLXL1_I]]
13180 int64_t test_vqdmlsls_s32(int64_t a
, int32_t b
, int32_t c
) {
13181 return (int64_t)vqdmlsls_s32(a
, b
, c
);
13184 // CHECK-LABEL: @test_vqdmullh_s16(
13185 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13186 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13187 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13188 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13189 // CHECK: ret i32 [[TMP2]]
13190 int32_t test_vqdmullh_s16(int16_t a
, int16_t b
) {
13191 return (int32_t)vqdmullh_s16(a
, b
);
13194 // CHECK-LABEL: @test_vqdmulls_s32(
13195 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13196 // CHECK: ret i64 [[VQDMULLS_S32_I]]
13197 int64_t test_vqdmulls_s32(int32_t a
, int32_t b
) {
13198 return (int64_t)vqdmulls_s32(a
, b
);
13201 // CHECK-LABEL: @test_vqmovunh_s16(
13202 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13203 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
13204 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
13205 // CHECK: ret i8 [[TMP1]]
13206 uint8_t test_vqmovunh_s16(int16_t a
) {
13207 return (uint8_t)vqmovunh_s16(a
);
13210 // CHECK-LABEL: @test_vqmovuns_s32(
13211 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13212 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
13213 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
13214 // CHECK: ret i16 [[TMP1]]
13215 uint16_t test_vqmovuns_s32(int32_t a
) {
13216 return (uint16_t)vqmovuns_s32(a
);
13219 // CHECK-LABEL: @test_vqmovund_s64(
13220 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
13221 // CHECK: ret i32 [[VQMOVUND_S64_I]]
13222 uint32_t test_vqmovund_s64(int64_t a
) {
13223 return (uint32_t)vqmovund_s64(a
);
13226 // CHECK-LABEL: @test_vqmovnh_s16(
13227 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13228 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
13229 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
13230 // CHECK: ret i8 [[TMP1]]
13231 int8_t test_vqmovnh_s16(int16_t a
) {
13232 return (int8_t)vqmovnh_s16(a
);
13235 // CHECK-LABEL: @test_vqmovns_s32(
13236 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13237 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
13238 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
13239 // CHECK: ret i16 [[TMP1]]
13240 int16_t test_vqmovns_s32(int32_t a
) {
13241 return (int16_t)vqmovns_s32(a
);
13244 // CHECK-LABEL: @test_vqmovnd_s64(
13245 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
13246 // CHECK: ret i32 [[VQMOVND_S64_I]]
13247 int32_t test_vqmovnd_s64(int64_t a
) {
13248 return (int32_t)vqmovnd_s64(a
);
13251 // CHECK-LABEL: @test_vqmovnh_u16(
13252 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13253 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
13254 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
13255 // CHECK: ret i8 [[TMP1]]
13256 int8_t test_vqmovnh_u16(int16_t a
) {
13257 return (int8_t)vqmovnh_u16(a
);
13260 // CHECK-LABEL: @test_vqmovns_u32(
13261 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13262 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
13263 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
13264 // CHECK: ret i16 [[TMP1]]
13265 int16_t test_vqmovns_u32(int32_t a
) {
13266 return (int16_t)vqmovns_u32(a
);
13269 // CHECK-LABEL: @test_vqmovnd_u64(
13270 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
13271 // CHECK: ret i32 [[VQMOVND_U64_I]]
13272 int32_t test_vqmovnd_u64(int64_t a
) {
13273 return (int32_t)vqmovnd_u64(a
);
13276 // CHECK-LABEL: @test_vceqs_f32(
13277 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b
13278 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13279 // CHECK: ret i32 [[VCMPD_I]]
13280 uint32_t test_vceqs_f32(float32_t a
, float32_t b
) {
13281 return (uint32_t)vceqs_f32(a
, b
);
13284 // CHECK-LABEL: @test_vceqd_f64(
13285 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b
13286 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13287 // CHECK: ret i64 [[VCMPD_I]]
13288 uint64_t test_vceqd_f64(float64_t a
, float64_t b
) {
13289 return (uint64_t)vceqd_f64(a
, b
);
13292 // CHECK-LABEL: @test_vceqzs_f32(
13293 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
13294 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
13295 // CHECK: ret i32 [[VCEQZ_I]]
13296 uint32_t test_vceqzs_f32(float32_t a
) {
13297 return (uint32_t)vceqzs_f32(a
);
13300 // CHECK-LABEL: @test_vceqzd_f64(
13301 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
13302 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13303 // CHECK: ret i64 [[VCEQZ_I]]
13304 uint64_t test_vceqzd_f64(float64_t a
) {
13305 return (uint64_t)vceqzd_f64(a
);
13308 // CHECK-LABEL: @test_vcges_f32(
13309 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b
13310 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13311 // CHECK: ret i32 [[VCMPD_I]]
13312 uint32_t test_vcges_f32(float32_t a
, float32_t b
) {
13313 return (uint32_t)vcges_f32(a
, b
);
13316 // CHECK-LABEL: @test_vcged_f64(
13317 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b
13318 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13319 // CHECK: ret i64 [[VCMPD_I]]
13320 uint64_t test_vcged_f64(float64_t a
, float64_t b
) {
13321 return (uint64_t)vcged_f64(a
, b
);
13324 // CHECK-LABEL: @test_vcgezs_f32(
13325 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
13326 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
13327 // CHECK: ret i32 [[VCGEZ_I]]
13328 uint32_t test_vcgezs_f32(float32_t a
) {
13329 return (uint32_t)vcgezs_f32(a
);
13332 // CHECK-LABEL: @test_vcgezd_f64(
13333 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
13334 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13335 // CHECK: ret i64 [[VCGEZ_I]]
13336 uint64_t test_vcgezd_f64(float64_t a
) {
13337 return (uint64_t)vcgezd_f64(a
);
13340 // CHECK-LABEL: @test_vcgts_f32(
13341 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b
13342 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13343 // CHECK: ret i32 [[VCMPD_I]]
13344 uint32_t test_vcgts_f32(float32_t a
, float32_t b
) {
13345 return (uint32_t)vcgts_f32(a
, b
);
13348 // CHECK-LABEL: @test_vcgtd_f64(
13349 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b
13350 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13351 // CHECK: ret i64 [[VCMPD_I]]
13352 uint64_t test_vcgtd_f64(float64_t a
, float64_t b
) {
13353 return (uint64_t)vcgtd_f64(a
, b
);
13356 // CHECK-LABEL: @test_vcgtzs_f32(
13357 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
13358 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
13359 // CHECK: ret i32 [[VCGTZ_I]]
13360 uint32_t test_vcgtzs_f32(float32_t a
) {
13361 return (uint32_t)vcgtzs_f32(a
);
13364 // CHECK-LABEL: @test_vcgtzd_f64(
13365 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
13366 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13367 // CHECK: ret i64 [[VCGTZ_I]]
13368 uint64_t test_vcgtzd_f64(float64_t a
) {
13369 return (uint64_t)vcgtzd_f64(a
);
13372 // CHECK-LABEL: @test_vcles_f32(
13373 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b
13374 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13375 // CHECK: ret i32 [[VCMPD_I]]
13376 uint32_t test_vcles_f32(float32_t a
, float32_t b
) {
13377 return (uint32_t)vcles_f32(a
, b
);
13380 // CHECK-LABEL: @test_vcled_f64(
13381 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b
13382 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13383 // CHECK: ret i64 [[VCMPD_I]]
13384 uint64_t test_vcled_f64(float64_t a
, float64_t b
) {
13385 return (uint64_t)vcled_f64(a
, b
);
13388 // CHECK-LABEL: @test_vclezs_f32(
13389 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
13390 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
13391 // CHECK: ret i32 [[VCLEZ_I]]
13392 uint32_t test_vclezs_f32(float32_t a
) {
13393 return (uint32_t)vclezs_f32(a
);
13396 // CHECK-LABEL: @test_vclezd_f64(
13397 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
13398 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13399 // CHECK: ret i64 [[VCLEZ_I]]
13400 uint64_t test_vclezd_f64(float64_t a
) {
13401 return (uint64_t)vclezd_f64(a
);
13404 // CHECK-LABEL: @test_vclts_f32(
13405 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b
13406 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13407 // CHECK: ret i32 [[VCMPD_I]]
13408 uint32_t test_vclts_f32(float32_t a
, float32_t b
) {
13409 return (uint32_t)vclts_f32(a
, b
);
13412 // CHECK-LABEL: @test_vcltd_f64(
13413 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b
13414 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13415 // CHECK: ret i64 [[VCMPD_I]]
13416 uint64_t test_vcltd_f64(float64_t a
, float64_t b
) {
13417 return (uint64_t)vcltd_f64(a
, b
);
13420 // CHECK-LABEL: @test_vcltzs_f32(
13421 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
13422 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
13423 // CHECK: ret i32 [[VCLTZ_I]]
13424 uint32_t test_vcltzs_f32(float32_t a
) {
13425 return (uint32_t)vcltzs_f32(a
);
13428 // CHECK-LABEL: @test_vcltzd_f64(
13429 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
13430 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13431 // CHECK: ret i64 [[VCLTZ_I]]
13432 uint64_t test_vcltzd_f64(float64_t a
) {
13433 return (uint64_t)vcltzd_f64(a
);
13436 // CHECK-LABEL: @test_vcages_f32(
13437 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
13438 // CHECK: ret i32 [[VCAGES_F32_I]]
13439 uint32_t test_vcages_f32(float32_t a
, float32_t b
) {
13440 return (uint32_t)vcages_f32(a
, b
);
13443 // CHECK-LABEL: @test_vcaged_f64(
13444 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
13445 // CHECK: ret i64 [[VCAGED_F64_I]]
13446 uint64_t test_vcaged_f64(float64_t a
, float64_t b
) {
13447 return (uint64_t)vcaged_f64(a
, b
);
13450 // CHECK-LABEL: @test_vcagts_f32(
13451 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
13452 // CHECK: ret i32 [[VCAGTS_F32_I]]
13453 uint32_t test_vcagts_f32(float32_t a
, float32_t b
) {
13454 return (uint32_t)vcagts_f32(a
, b
);
13457 // CHECK-LABEL: @test_vcagtd_f64(
13458 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
13459 // CHECK: ret i64 [[VCAGTD_F64_I]]
13460 uint64_t test_vcagtd_f64(float64_t a
, float64_t b
) {
13461 return (uint64_t)vcagtd_f64(a
, b
);
13464 // CHECK-LABEL: @test_vcales_f32(
13465 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
13466 // CHECK: ret i32 [[VCALES_F32_I]]
13467 uint32_t test_vcales_f32(float32_t a
, float32_t b
) {
13468 return (uint32_t)vcales_f32(a
, b
);
13471 // CHECK-LABEL: @test_vcaled_f64(
13472 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
13473 // CHECK: ret i64 [[VCALED_F64_I]]
13474 uint64_t test_vcaled_f64(float64_t a
, float64_t b
) {
13475 return (uint64_t)vcaled_f64(a
, b
);
13478 // CHECK-LABEL: @test_vcalts_f32(
13479 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
13480 // CHECK: ret i32 [[VCALTS_F32_I]]
13481 uint32_t test_vcalts_f32(float32_t a
, float32_t b
) {
13482 return (uint32_t)vcalts_f32(a
, b
);
13485 // CHECK-LABEL: @test_vcaltd_f64(
13486 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
13487 // CHECK: ret i64 [[VCALTD_F64_I]]
13488 uint64_t test_vcaltd_f64(float64_t a
, float64_t b
) {
13489 return (uint64_t)vcaltd_f64(a
, b
);
13492 // CHECK-LABEL: @test_vshrd_n_s64(
13493 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1
13494 // CHECK: ret i64 [[SHRD_N]]
13495 int64_t test_vshrd_n_s64(int64_t a
) {
13496 return (int64_t)vshrd_n_s64(a
, 1);
13499 // CHECK-LABEL: @test_vshr_n_s64(
13500 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13501 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13502 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1)
13503 // CHECK: ret <1 x i64> [[VSHR_N]]
13504 int64x1_t
test_vshr_n_s64(int64x1_t a
) {
13505 return vshr_n_s64(a
, 1);
13508 // CHECK-LABEL: @test_vshrd_n_u64(
13509 // CHECK: ret i64 0
13510 uint64_t test_vshrd_n_u64(uint64_t a
) {
13511 return (uint64_t)vshrd_n_u64(a
, 64);
13514 // CHECK-LABEL: @test_vshrd_n_u64_2(
13515 // CHECK: ret i64 0
13516 uint64_t test_vshrd_n_u64_2() {
13517 uint64_t a
= UINT64_C(0xf000000000000000);
13518 return vshrd_n_u64(a
, 64);
13521 // CHECK-LABEL: @test_vshr_n_u64(
13522 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13524 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
13525 // CHECK: ret <1 x i64> [[VSHR_N]]
13526 uint64x1_t
test_vshr_n_u64(uint64x1_t a
) {
13527 return vshr_n_u64(a
, 1);
13530 // CHECK-LABEL: @test_vrshrd_n_s64(
13531 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
13532 // CHECK: ret i64 [[VRSHR_N]]
13533 int64_t test_vrshrd_n_s64(int64_t a
) {
13534 return (int64_t)vrshrd_n_s64(a
, 63);
13537 // CHECK-LABEL: @test_vrshr_n_s64(
13538 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13539 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13540 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
13541 // CHECK: ret <1 x i64> [[VRSHR_N1]]
13542 int64x1_t
test_vrshr_n_s64(int64x1_t a
) {
13543 return vrshr_n_s64(a
, 1);
13546 // CHECK-LABEL: @test_vrshrd_n_u64(
13547 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
13548 // CHECK: ret i64 [[VRSHR_N]]
13549 uint64_t test_vrshrd_n_u64(uint64_t a
) {
13550 return (uint64_t)vrshrd_n_u64(a
, 63);
13553 // CHECK-LABEL: @test_vrshr_n_u64(
13554 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13555 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13556 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
13557 // CHECK: ret <1 x i64> [[VRSHR_N1]]
13558 uint64x1_t
test_vrshr_n_u64(uint64x1_t a
) {
13559 return vrshr_n_u64(a
, 1);
13562 // CHECK-LABEL: @test_vsrad_n_s64(
13563 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63
13564 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
13565 // CHECK: ret i64 [[TMP0]]
13566 int64_t test_vsrad_n_s64(int64_t a
, int64_t b
) {
13567 return (int64_t)vsrad_n_s64(a
, b
, 63);
13570 // CHECK-LABEL: @test_vsra_n_s64(
13571 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13572 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13573 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13574 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13575 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1)
13576 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
13577 // CHECK: ret <1 x i64> [[TMP4]]
13578 int64x1_t
test_vsra_n_s64(int64x1_t a
, int64x1_t b
) {
13579 return vsra_n_s64(a
, b
, 1);
13582 // CHECK-LABEL: @test_vsrad_n_u64(
13583 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63
13584 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
13585 // CHECK: ret i64 [[TMP0]]
13586 uint64_t test_vsrad_n_u64(uint64_t a
, uint64_t b
) {
13587 return (uint64_t)vsrad_n_u64(a
, b
, 63);
13590 // CHECK-LABEL: @test_vsrad_n_u64_2(
13591 // CHECK: ret i64 %a
13592 uint64_t test_vsrad_n_u64_2(uint64_t a
, uint64_t b
) {
13593 return (uint64_t)vsrad_n_u64(a
, b
, 64);
13596 // CHECK-LABEL: @test_vsra_n_u64(
13597 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13598 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13599 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13600 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13601 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1)
13602 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
13603 // CHECK: ret <1 x i64> [[TMP4]]
13604 uint64x1_t
test_vsra_n_u64(uint64x1_t a
, uint64x1_t b
) {
13605 return vsra_n_u64(a
, b
, 1);
13608 // CHECK-LABEL: @test_vrsrad_n_s64(
13609 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
13610 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
13611 // CHECK: ret i64 [[TMP1]]
13612 int64_t test_vrsrad_n_s64(int64_t a
, int64_t b
) {
13613 return (int64_t)vrsrad_n_s64(a
, b
, 63);
13616 // CHECK-LABEL: @test_vrsra_n_s64(
13617 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13618 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13619 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13620 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
13621 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13622 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
13623 // CHECK: ret <1 x i64> [[TMP3]]
13624 int64x1_t
test_vrsra_n_s64(int64x1_t a
, int64x1_t b
) {
13625 return vrsra_n_s64(a
, b
, 1);
13628 // CHECK-LABEL: @test_vrsrad_n_u64(
13629 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
13630 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]]
13631 // CHECK: ret i64 [[TMP1]]
13632 uint64_t test_vrsrad_n_u64(uint64_t a
, uint64_t b
) {
13633 return (uint64_t)vrsrad_n_u64(a
, b
, 63);
13636 // CHECK-LABEL: @test_vrsra_n_u64(
13637 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13638 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13639 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13640 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1))
13641 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13642 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
13643 // CHECK: ret <1 x i64> [[TMP3]]
13644 uint64x1_t
test_vrsra_n_u64(uint64x1_t a
, uint64x1_t b
) {
13645 return vrsra_n_u64(a
, b
, 1);
13648 // CHECK-LABEL: @test_vshld_n_s64(
13649 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1
13650 // CHECK: ret i64 [[SHLD_N]]
13651 int64_t test_vshld_n_s64(int64_t a
) {
13652 return (int64_t)vshld_n_s64(a
, 1);
13655 // CHECK-LABEL: @test_vshl_n_s64(
13656 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13657 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13658 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
13659 // CHECK: ret <1 x i64> [[VSHL_N]]
13660 int64x1_t
test_vshl_n_s64(int64x1_t a
) {
13661 return vshl_n_s64(a
, 1);
13664 // CHECK-LABEL: @test_vshld_n_u64(
13665 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63
13666 // CHECK: ret i64 [[SHLD_N]]
13667 uint64_t test_vshld_n_u64(uint64_t a
) {
13668 return (uint64_t)vshld_n_u64(a
, 63);
13671 // CHECK-LABEL: @test_vshl_n_u64(
13672 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13673 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13674 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1)
13675 // CHECK: ret <1 x i64> [[VSHL_N]]
13676 uint64x1_t
test_vshl_n_u64(uint64x1_t a
) {
13677 return vshl_n_u64(a
, 1);
13680 // CHECK-LABEL: @test_vqshlb_n_s8(
13681 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13682 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13683 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
13684 // CHECK: ret i8 [[TMP1]]
13685 int8_t test_vqshlb_n_s8(int8_t a
) {
13686 return (int8_t)vqshlb_n_s8(a
, 7);
13689 // CHECK-LABEL: @test_vqshlh_n_s16(
13690 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13691 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13692 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
13693 // CHECK: ret i16 [[TMP1]]
13694 int16_t test_vqshlh_n_s16(int16_t a
) {
13695 return (int16_t)vqshlh_n_s16(a
, 15);
13698 // CHECK-LABEL: @test_vqshls_n_s32(
13699 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
13700 // CHECK: ret i32 [[VQSHLS_N_S32]]
13701 int32_t test_vqshls_n_s32(int32_t a
) {
13702 return (int32_t)vqshls_n_s32(a
, 31);
13705 // CHECK-LABEL: @test_vqshld_n_s64(
13706 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
13707 // CHECK: ret i64 [[VQSHL_N]]
13708 int64_t test_vqshld_n_s64(int64_t a
) {
13709 return (int64_t)vqshld_n_s64(a
, 63);
13712 // CHECK-LABEL: @test_vqshl_n_s8(
13713 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
13714 // CHECK: ret <8 x i8> [[VQSHL_N]]
13715 int8x8_t
test_vqshl_n_s8(int8x8_t a
) {
13716 return vqshl_n_s8(a
, 0);
13719 // CHECK-LABEL: @test_vqshlq_n_s8(
13720 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
13721 // CHECK: ret <16 x i8> [[VQSHL_N]]
13722 int8x16_t
test_vqshlq_n_s8(int8x16_t a
) {
13723 return vqshlq_n_s8(a
, 0);
13726 // CHECK-LABEL: @test_vqshl_n_s16(
13727 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13728 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13729 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
13730 // CHECK: ret <4 x i16> [[VQSHL_N1]]
13731 int16x4_t
test_vqshl_n_s16(int16x4_t a
) {
13732 return vqshl_n_s16(a
, 0);
13735 // CHECK-LABEL: @test_vqshlq_n_s16(
13736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13737 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13738 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
13739 // CHECK: ret <8 x i16> [[VQSHL_N1]]
13740 int16x8_t
test_vqshlq_n_s16(int16x8_t a
) {
13741 return vqshlq_n_s16(a
, 0);
13744 // CHECK-LABEL: @test_vqshl_n_s32(
13745 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13746 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13747 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
13748 // CHECK: ret <2 x i32> [[VQSHL_N1]]
13749 int32x2_t
test_vqshl_n_s32(int32x2_t a
) {
13750 return vqshl_n_s32(a
, 0);
13753 // CHECK-LABEL: @test_vqshlq_n_s32(
13754 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13755 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13756 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
13757 // CHECK: ret <4 x i32> [[VQSHL_N1]]
13758 int32x4_t
test_vqshlq_n_s32(int32x4_t a
) {
13759 return vqshlq_n_s32(a
, 0);
13762 // CHECK-LABEL: @test_vqshlq_n_s64(
13763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13764 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13765 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
13766 // CHECK: ret <2 x i64> [[VQSHL_N1]]
13767 int64x2_t
test_vqshlq_n_s64(int64x2_t a
) {
13768 return vqshlq_n_s64(a
, 0);
13771 // CHECK-LABEL: @test_vqshl_n_u8(
13772 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
13773 // CHECK: ret <8 x i8> [[VQSHL_N]]
13774 uint8x8_t
test_vqshl_n_u8(uint8x8_t a
) {
13775 return vqshl_n_u8(a
, 0);
13778 // CHECK-LABEL: @test_vqshlq_n_u8(
13779 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
13780 // CHECK: ret <16 x i8> [[VQSHL_N]]
13781 uint8x16_t
test_vqshlq_n_u8(uint8x16_t a
) {
13782 return vqshlq_n_u8(a
, 0);
13785 // CHECK-LABEL: @test_vqshl_n_u16(
13786 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13787 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13788 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
13789 // CHECK: ret <4 x i16> [[VQSHL_N1]]
13790 uint16x4_t
test_vqshl_n_u16(uint16x4_t a
) {
13791 return vqshl_n_u16(a
, 0);
13794 // CHECK-LABEL: @test_vqshlq_n_u16(
13795 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13796 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13797 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
13798 // CHECK: ret <8 x i16> [[VQSHL_N1]]
13799 uint16x8_t
test_vqshlq_n_u16(uint16x8_t a
) {
13800 return vqshlq_n_u16(a
, 0);
13803 // CHECK-LABEL: @test_vqshl_n_u32(
13804 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13805 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13806 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
13807 // CHECK: ret <2 x i32> [[VQSHL_N1]]
13808 uint32x2_t
test_vqshl_n_u32(uint32x2_t a
) {
13809 return vqshl_n_u32(a
, 0);
13812 // CHECK-LABEL: @test_vqshlq_n_u32(
13813 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13814 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13815 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
13816 // CHECK: ret <4 x i32> [[VQSHL_N1]]
13817 uint32x4_t
test_vqshlq_n_u32(uint32x4_t a
) {
13818 return vqshlq_n_u32(a
, 0);
13821 // CHECK-LABEL: @test_vqshlq_n_u64(
13822 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13823 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13824 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
13825 // CHECK: ret <2 x i64> [[VQSHL_N1]]
13826 uint64x2_t
test_vqshlq_n_u64(uint64x2_t a
) {
13827 return vqshlq_n_u64(a
, 0);
13830 // CHECK-LABEL: @test_vqshl_n_s64(
13831 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13832 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13833 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
13834 // CHECK: ret <1 x i64> [[VQSHL_N1]]
13835 int64x1_t
test_vqshl_n_s64(int64x1_t a
) {
13836 return vqshl_n_s64(a
, 1);
13839 // CHECK-LABEL: @test_vqshlb_n_u8(
13840 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13841 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13842 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
13843 // CHECK: ret i8 [[TMP1]]
13844 uint8_t test_vqshlb_n_u8(uint8_t a
) {
13845 return (uint8_t)vqshlb_n_u8(a
, 7);
13848 // CHECK-LABEL: @test_vqshlh_n_u16(
13849 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13850 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13851 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
13852 // CHECK: ret i16 [[TMP1]]
13853 uint16_t test_vqshlh_n_u16(uint16_t a
) {
13854 return (uint16_t)vqshlh_n_u16(a
, 15);
13857 // CHECK-LABEL: @test_vqshls_n_u32(
13858 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
13859 // CHECK: ret i32 [[VQSHLS_N_U32]]
13860 uint32_t test_vqshls_n_u32(uint32_t a
) {
13861 return (uint32_t)vqshls_n_u32(a
, 31);
13864 // CHECK-LABEL: @test_vqshld_n_u64(
13865 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
13866 // CHECK: ret i64 [[VQSHL_N]]
13867 uint64_t test_vqshld_n_u64(uint64_t a
) {
13868 return (uint64_t)vqshld_n_u64(a
, 63);
13871 // CHECK-LABEL: @test_vqshl_n_u64(
13872 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13873 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13874 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1))
13875 // CHECK: ret <1 x i64> [[VQSHL_N1]]
13876 uint64x1_t
test_vqshl_n_u64(uint64x1_t a
) {
13877 return vqshl_n_u64(a
, 1);
13880 // CHECK-LABEL: @test_vqshlub_n_s8(
13881 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13882 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13883 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
13884 // CHECK: ret i8 [[TMP1]]
13885 int8_t test_vqshlub_n_s8(int8_t a
) {
13886 return (int8_t)vqshlub_n_s8(a
, 7);
13889 // CHECK-LABEL: @test_vqshluh_n_s16(
13890 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13891 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13892 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
13893 // CHECK: ret i16 [[TMP1]]
13894 int16_t test_vqshluh_n_s16(int16_t a
) {
13895 return (int16_t)vqshluh_n_s16(a
, 15);
13898 // CHECK-LABEL: @test_vqshlus_n_s32(
13899 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
13900 // CHECK: ret i32 [[VQSHLUS_N_S32]]
13901 int32_t test_vqshlus_n_s32(int32_t a
) {
13902 return (int32_t)vqshlus_n_s32(a
, 31);
13905 // CHECK-LABEL: @test_vqshlud_n_s64(
13906 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
13907 // CHECK: ret i64 [[VQSHLU_N]]
13908 int64_t test_vqshlud_n_s64(int64_t a
) {
13909 return (int64_t)vqshlud_n_s64(a
, 63);
13912 // CHECK-LABEL: @test_vqshlu_n_s64(
13913 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13914 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13915 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1))
13916 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
13917 uint64x1_t
test_vqshlu_n_s64(int64x1_t a
) {
13918 return vqshlu_n_s64(a
, 1);
13921 // CHECK-LABEL: @test_vsrid_n_s64(
13922 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
13923 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
13924 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
13925 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
13926 // CHECK: ret i64 [[VSRID_N_S643]]
13927 int64_t test_vsrid_n_s64(int64_t a
, int64_t b
) {
13928 return (int64_t)vsrid_n_s64(a
, b
, 63);
13931 // CHECK-LABEL: @test_vsri_n_s64(
13932 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13933 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13934 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13935 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13936 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
13937 // CHECK: ret <1 x i64> [[VSRI_N2]]
13938 int64x1_t
test_vsri_n_s64(int64x1_t a
, int64x1_t b
) {
13939 return vsri_n_s64(a
, b
, 1);
13942 // CHECK-LABEL: @test_vsrid_n_u64(
13943 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
13944 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
13945 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
13946 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
13947 // CHECK: ret i64 [[VSRID_N_U643]]
13948 uint64_t test_vsrid_n_u64(uint64_t a
, uint64_t b
) {
13949 return (uint64_t)vsrid_n_u64(a
, b
, 63);
13952 // CHECK-LABEL: @test_vsri_n_u64(
13953 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13954 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13955 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13956 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13957 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
13958 // CHECK: ret <1 x i64> [[VSRI_N2]]
13959 uint64x1_t
test_vsri_n_u64(uint64x1_t a
, uint64x1_t b
) {
13960 return vsri_n_u64(a
, b
, 1);
13963 // CHECK-LABEL: @test_vslid_n_s64(
13964 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
13965 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
13966 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
13967 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
13968 // CHECK: ret i64 [[VSLID_N_S643]]
13969 int64_t test_vslid_n_s64(int64_t a
, int64_t b
) {
13970 return (int64_t)vslid_n_s64(a
, b
, 63);
13973 // CHECK-LABEL: @test_vsli_n_s64(
13974 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13975 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13976 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13977 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13978 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
13979 // CHECK: ret <1 x i64> [[VSLI_N2]]
13980 int64x1_t
test_vsli_n_s64(int64x1_t a
, int64x1_t b
) {
13981 return vsli_n_s64(a
, b
, 1);
13984 // CHECK-LABEL: @test_vslid_n_u64(
13985 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
13986 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
13987 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
13988 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
13989 // CHECK: ret i64 [[VSLID_N_U643]]
13990 uint64_t test_vslid_n_u64(uint64_t a
, uint64_t b
) {
13991 return (uint64_t)vslid_n_u64(a
, b
, 63);
13994 // CHECK-LABEL: @test_vsli_n_u64(
13995 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13996 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13997 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13998 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13999 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14000 // CHECK: ret <1 x i64> [[VSLI_N2]]
14001 uint64x1_t
test_vsli_n_u64(uint64x1_t a
, uint64x1_t b
) {
14002 return vsli_n_u64(a
, b
, 1);
14005 // CHECK-LABEL: @test_vqshrnh_n_s16(
14006 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14007 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14008 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14009 // CHECK: ret i8 [[TMP1]]
14010 int8_t test_vqshrnh_n_s16(int16_t a
) {
14011 return (int8_t)vqshrnh_n_s16(a
, 8);
14014 // CHECK-LABEL: @test_vqshrns_n_s32(
14015 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14016 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14017 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14018 // CHECK: ret i16 [[TMP1]]
14019 int16_t test_vqshrns_n_s32(int32_t a
) {
14020 return (int16_t)vqshrns_n_s32(a
, 16);
14023 // CHECK-LABEL: @test_vqshrnd_n_s64(
14024 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14025 // CHECK: ret i32 [[VQSHRND_N_S64]]
14026 int32_t test_vqshrnd_n_s64(int64_t a
) {
14027 return (int32_t)vqshrnd_n_s64(a
, 32);
14030 // CHECK-LABEL: @test_vqshrnh_n_u16(
14031 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14032 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14033 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14034 // CHECK: ret i8 [[TMP1]]
14035 uint8_t test_vqshrnh_n_u16(uint16_t a
) {
14036 return (uint8_t)vqshrnh_n_u16(a
, 8);
14039 // CHECK-LABEL: @test_vqshrns_n_u32(
14040 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14041 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14042 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14043 // CHECK: ret i16 [[TMP1]]
14044 uint16_t test_vqshrns_n_u32(uint32_t a
) {
14045 return (uint16_t)vqshrns_n_u32(a
, 16);
14048 // CHECK-LABEL: @test_vqshrnd_n_u64(
14049 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14050 // CHECK: ret i32 [[VQSHRND_N_U64]]
14051 uint32_t test_vqshrnd_n_u64(uint64_t a
) {
14052 return (uint32_t)vqshrnd_n_u64(a
, 32);
14055 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14056 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14057 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14058 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14059 // CHECK: ret i8 [[TMP1]]
14060 int8_t test_vqrshrnh_n_s16(int16_t a
) {
14061 return (int8_t)vqrshrnh_n_s16(a
, 8);
14064 // CHECK-LABEL: @test_vqrshrns_n_s32(
14065 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14066 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14067 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14068 // CHECK: ret i16 [[TMP1]]
14069 int16_t test_vqrshrns_n_s32(int32_t a
) {
14070 return (int16_t)vqrshrns_n_s32(a
, 16);
14073 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14074 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14075 // CHECK: ret i32 [[VQRSHRND_N_S64]]
14076 int32_t test_vqrshrnd_n_s64(int64_t a
) {
14077 return (int32_t)vqrshrnd_n_s64(a
, 32);
14080 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14081 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14082 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14083 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14084 // CHECK: ret i8 [[TMP1]]
14085 uint8_t test_vqrshrnh_n_u16(uint16_t a
) {
14086 return (uint8_t)vqrshrnh_n_u16(a
, 8);
14089 // CHECK-LABEL: @test_vqrshrns_n_u32(
14090 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14091 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14092 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14093 // CHECK: ret i16 [[TMP1]]
14094 uint16_t test_vqrshrns_n_u32(uint32_t a
) {
14095 return (uint16_t)vqrshrns_n_u32(a
, 16);
14098 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14099 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14100 // CHECK: ret i32 [[VQRSHRND_N_U64]]
14101 uint32_t test_vqrshrnd_n_u64(uint64_t a
) {
14102 return (uint32_t)vqrshrnd_n_u64(a
, 32);
14105 // CHECK-LABEL: @test_vqshrunh_n_s16(
14106 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14107 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14108 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14109 // CHECK: ret i8 [[TMP1]]
14110 int8_t test_vqshrunh_n_s16(int16_t a
) {
14111 return (int8_t)vqshrunh_n_s16(a
, 8);
14114 // CHECK-LABEL: @test_vqshruns_n_s32(
14115 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14116 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14117 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14118 // CHECK: ret i16 [[TMP1]]
14119 int16_t test_vqshruns_n_s32(int32_t a
) {
14120 return (int16_t)vqshruns_n_s32(a
, 16);
14123 // CHECK-LABEL: @test_vqshrund_n_s64(
14124 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14125 // CHECK: ret i32 [[VQSHRUND_N_S64]]
14126 int32_t test_vqshrund_n_s64(int64_t a
) {
14127 return (int32_t)vqshrund_n_s64(a
, 32);
14130 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14131 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14132 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14133 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14134 // CHECK: ret i8 [[TMP1]]
14135 uint8_t test_vqrshrunh_n_s16(int16_t a
) {
14136 return (uint8_t)vqrshrunh_n_s16(a
, 8);
14139 // CHECK-LABEL: @test_vqrshruns_n_s32(
14140 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14141 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14142 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14143 // CHECK: ret i16 [[TMP1]]
14144 uint16_t test_vqrshruns_n_s32(int32_t a
) {
14145 return (uint16_t)vqrshruns_n_s32(a
, 16);
14148 // CHECK-LABEL: @test_vqrshrund_n_s64(
14149 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14150 // CHECK: ret i32 [[VQRSHRUND_N_S64]]
14151 uint32_t test_vqrshrund_n_s64(int64_t a
) {
14152 return (uint32_t)vqrshrund_n_s64(a
, 32);
14155 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14156 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14157 // CHECK: ret float [[VCVTS_N_F32_S32]]
14158 float32_t
test_vcvts_n_f32_s32(int32_t a
) {
14159 return vcvts_n_f32_s32(a
, 1);
14162 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14163 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14164 // CHECK: ret double [[VCVTD_N_F64_S64]]
14165 float64_t
test_vcvtd_n_f64_s64(int64_t a
) {
14166 return vcvtd_n_f64_s64(a
, 1);
14169 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14170 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14171 // CHECK: ret float [[VCVTS_N_F32_U32]]
14172 float32_t
test_vcvts_n_f32_u32(uint32_t a
) {
14173 return vcvts_n_f32_u32(a
, 32);
14176 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14177 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14178 // CHECK: ret double [[VCVTD_N_F64_U64]]
14179 float64_t
test_vcvtd_n_f64_u64(uint64_t a
) {
14180 return vcvtd_n_f64_u64(a
, 64);
14183 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14184 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14185 // CHECK: ret i32 [[VCVTS_N_S32_F32]]
14186 int32_t test_vcvts_n_s32_f32(float32_t a
) {
14187 return (int32_t)vcvts_n_s32_f32(a
, 1);
14190 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14191 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14192 // CHECK: ret i64 [[VCVTD_N_S64_F64]]
14193 int64_t test_vcvtd_n_s64_f64(float64_t a
) {
14194 return (int64_t)vcvtd_n_s64_f64(a
, 1);
14197 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14198 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14199 // CHECK: ret i32 [[VCVTS_N_U32_F32]]
14200 uint32_t test_vcvts_n_u32_f32(float32_t a
) {
14201 return (uint32_t)vcvts_n_u32_f32(a
, 32);
14204 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
14205 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
14206 // CHECK: ret i64 [[VCVTD_N_U64_F64]]
14207 uint64_t test_vcvtd_n_u64_f64(float64_t a
) {
14208 return (uint64_t)vcvtd_n_u64_f64(a
, 64);
14211 // CHECK-LABEL: @test_vreinterpret_s8_s16(
14212 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14213 // CHECK: ret <8 x i8> [[TMP0]]
14214 int8x8_t
test_vreinterpret_s8_s16(int16x4_t a
) {
14215 return vreinterpret_s8_s16(a
);
14218 // CHECK-LABEL: @test_vreinterpret_s8_s32(
14219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14220 // CHECK: ret <8 x i8> [[TMP0]]
14221 int8x8_t
test_vreinterpret_s8_s32(int32x2_t a
) {
14222 return vreinterpret_s8_s32(a
);
14225 // CHECK-LABEL: @test_vreinterpret_s8_s64(
14226 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14227 // CHECK: ret <8 x i8> [[TMP0]]
14228 int8x8_t
test_vreinterpret_s8_s64(int64x1_t a
) {
14229 return vreinterpret_s8_s64(a
);
14232 // CHECK-LABEL: @test_vreinterpret_s8_u8(
14233 // CHECK: ret <8 x i8> %a
14234 int8x8_t
test_vreinterpret_s8_u8(uint8x8_t a
) {
14235 return vreinterpret_s8_u8(a
);
14238 // CHECK-LABEL: @test_vreinterpret_s8_u16(
14239 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14240 // CHECK: ret <8 x i8> [[TMP0]]
14241 int8x8_t
test_vreinterpret_s8_u16(uint16x4_t a
) {
14242 return vreinterpret_s8_u16(a
);
14245 // CHECK-LABEL: @test_vreinterpret_s8_u32(
14246 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14247 // CHECK: ret <8 x i8> [[TMP0]]
14248 int8x8_t
test_vreinterpret_s8_u32(uint32x2_t a
) {
14249 return vreinterpret_s8_u32(a
);
14252 // CHECK-LABEL: @test_vreinterpret_s8_u64(
14253 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14254 // CHECK: ret <8 x i8> [[TMP0]]
14255 int8x8_t
test_vreinterpret_s8_u64(uint64x1_t a
) {
14256 return vreinterpret_s8_u64(a
);
14259 // CHECK-LABEL: @test_vreinterpret_s8_f16(
14260 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14261 // CHECK: ret <8 x i8> [[TMP0]]
14262 int8x8_t
test_vreinterpret_s8_f16(float16x4_t a
) {
14263 return vreinterpret_s8_f16(a
);
14266 // CHECK-LABEL: @test_vreinterpret_s8_f32(
14267 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14268 // CHECK: ret <8 x i8> [[TMP0]]
14269 int8x8_t
test_vreinterpret_s8_f32(float32x2_t a
) {
14270 return vreinterpret_s8_f32(a
);
14273 // CHECK-LABEL: @test_vreinterpret_s8_f64(
14274 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
14275 // CHECK: ret <8 x i8> [[TMP0]]
14276 int8x8_t
test_vreinterpret_s8_f64(float64x1_t a
) {
14277 return vreinterpret_s8_f64(a
);
14280 // CHECK-LABEL: @test_vreinterpret_s8_p8(
14281 // CHECK: ret <8 x i8> %a
14282 int8x8_t
test_vreinterpret_s8_p8(poly8x8_t a
) {
14283 return vreinterpret_s8_p8(a
);
14286 // CHECK-LABEL: @test_vreinterpret_s8_p16(
14287 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14288 // CHECK: ret <8 x i8> [[TMP0]]
14289 int8x8_t
test_vreinterpret_s8_p16(poly16x4_t a
) {
14290 return vreinterpret_s8_p16(a
);
14293 // CHECK-LABEL: @test_vreinterpret_s8_p64(
14294 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14295 // CHECK: ret <8 x i8> [[TMP0]]
14296 int8x8_t
test_vreinterpret_s8_p64(poly64x1_t a
) {
14297 return vreinterpret_s8_p64(a
);
14300 // CHECK-LABEL: @test_vreinterpret_s16_s8(
14301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14302 // CHECK: ret <4 x i16> [[TMP0]]
14303 int16x4_t
test_vreinterpret_s16_s8(int8x8_t a
) {
14304 return vreinterpret_s16_s8(a
);
14307 // CHECK-LABEL: @test_vreinterpret_s16_s32(
14308 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14309 // CHECK: ret <4 x i16> [[TMP0]]
14310 int16x4_t
test_vreinterpret_s16_s32(int32x2_t a
) {
14311 return vreinterpret_s16_s32(a
);
14314 // CHECK-LABEL: @test_vreinterpret_s16_s64(
14315 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14316 // CHECK: ret <4 x i16> [[TMP0]]
14317 int16x4_t
test_vreinterpret_s16_s64(int64x1_t a
) {
14318 return vreinterpret_s16_s64(a
);
14321 // CHECK-LABEL: @test_vreinterpret_s16_u8(
14322 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14323 // CHECK: ret <4 x i16> [[TMP0]]
14324 int16x4_t
test_vreinterpret_s16_u8(uint8x8_t a
) {
14325 return vreinterpret_s16_u8(a
);
14328 // CHECK-LABEL: @test_vreinterpret_s16_u16(
14329 // CHECK: ret <4 x i16> %a
14330 int16x4_t
test_vreinterpret_s16_u16(uint16x4_t a
) {
14331 return vreinterpret_s16_u16(a
);
14334 // CHECK-LABEL: @test_vreinterpret_s16_u32(
14335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14336 // CHECK: ret <4 x i16> [[TMP0]]
14337 int16x4_t
test_vreinterpret_s16_u32(uint32x2_t a
) {
14338 return vreinterpret_s16_u32(a
);
14341 // CHECK-LABEL: @test_vreinterpret_s16_u64(
14342 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14343 // CHECK: ret <4 x i16> [[TMP0]]
14344 int16x4_t
test_vreinterpret_s16_u64(uint64x1_t a
) {
14345 return vreinterpret_s16_u64(a
);
14348 // CHECK-LABEL: @test_vreinterpret_s16_f16(
14349 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14350 // CHECK: ret <4 x i16> [[TMP0]]
14351 int16x4_t
test_vreinterpret_s16_f16(float16x4_t a
) {
14352 return vreinterpret_s16_f16(a
);
14355 // CHECK-LABEL: @test_vreinterpret_s16_f32(
14356 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14357 // CHECK: ret <4 x i16> [[TMP0]]
14358 int16x4_t
test_vreinterpret_s16_f32(float32x2_t a
) {
14359 return vreinterpret_s16_f32(a
);
14362 // CHECK-LABEL: @test_vreinterpret_s16_f64(
14363 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
14364 // CHECK: ret <4 x i16> [[TMP0]]
14365 int16x4_t
test_vreinterpret_s16_f64(float64x1_t a
) {
14366 return vreinterpret_s16_f64(a
);
14369 // CHECK-LABEL: @test_vreinterpret_s16_p8(
14370 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14371 // CHECK: ret <4 x i16> [[TMP0]]
14372 int16x4_t
test_vreinterpret_s16_p8(poly8x8_t a
) {
14373 return vreinterpret_s16_p8(a
);
14376 // CHECK-LABEL: @test_vreinterpret_s16_p16(
14377 // CHECK: ret <4 x i16> %a
14378 int16x4_t
test_vreinterpret_s16_p16(poly16x4_t a
) {
14379 return vreinterpret_s16_p16(a
);
14382 // CHECK-LABEL: @test_vreinterpret_s16_p64(
14383 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14384 // CHECK: ret <4 x i16> [[TMP0]]
14385 int16x4_t
test_vreinterpret_s16_p64(poly64x1_t a
) {
14386 return vreinterpret_s16_p64(a
);
14389 // CHECK-LABEL: @test_vreinterpret_s32_s8(
14390 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14391 // CHECK: ret <2 x i32> [[TMP0]]
14392 int32x2_t
test_vreinterpret_s32_s8(int8x8_t a
) {
14393 return vreinterpret_s32_s8(a
);
14396 // CHECK-LABEL: @test_vreinterpret_s32_s16(
14397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14398 // CHECK: ret <2 x i32> [[TMP0]]
14399 int32x2_t
test_vreinterpret_s32_s16(int16x4_t a
) {
14400 return vreinterpret_s32_s16(a
);
14403 // CHECK-LABEL: @test_vreinterpret_s32_s64(
14404 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14405 // CHECK: ret <2 x i32> [[TMP0]]
14406 int32x2_t
test_vreinterpret_s32_s64(int64x1_t a
) {
14407 return vreinterpret_s32_s64(a
);
14410 // CHECK-LABEL: @test_vreinterpret_s32_u8(
14411 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14412 // CHECK: ret <2 x i32> [[TMP0]]
14413 int32x2_t
test_vreinterpret_s32_u8(uint8x8_t a
) {
14414 return vreinterpret_s32_u8(a
);
14417 // CHECK-LABEL: @test_vreinterpret_s32_u16(
14418 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14419 // CHECK: ret <2 x i32> [[TMP0]]
14420 int32x2_t
test_vreinterpret_s32_u16(uint16x4_t a
) {
14421 return vreinterpret_s32_u16(a
);
14424 // CHECK-LABEL: @test_vreinterpret_s32_u32(
14425 // CHECK: ret <2 x i32> %a
14426 int32x2_t
test_vreinterpret_s32_u32(uint32x2_t a
) {
14427 return vreinterpret_s32_u32(a
);
14430 // CHECK-LABEL: @test_vreinterpret_s32_u64(
14431 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14432 // CHECK: ret <2 x i32> [[TMP0]]
14433 int32x2_t
test_vreinterpret_s32_u64(uint64x1_t a
) {
14434 return vreinterpret_s32_u64(a
);
14437 // CHECK-LABEL: @test_vreinterpret_s32_f16(
14438 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
14439 // CHECK: ret <2 x i32> [[TMP0]]
14440 int32x2_t
test_vreinterpret_s32_f16(float16x4_t a
) {
14441 return vreinterpret_s32_f16(a
);
14444 // CHECK-LABEL: @test_vreinterpret_s32_f32(
14445 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
14446 // CHECK: ret <2 x i32> [[TMP0]]
14447 int32x2_t
test_vreinterpret_s32_f32(float32x2_t a
) {
14448 return vreinterpret_s32_f32(a
);
14451 // CHECK-LABEL: @test_vreinterpret_s32_f64(
14452 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
14453 // CHECK: ret <2 x i32> [[TMP0]]
14454 int32x2_t
test_vreinterpret_s32_f64(float64x1_t a
) {
14455 return vreinterpret_s32_f64(a
);
14458 // CHECK-LABEL: @test_vreinterpret_s32_p8(
14459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14460 // CHECK: ret <2 x i32> [[TMP0]]
14461 int32x2_t
test_vreinterpret_s32_p8(poly8x8_t a
) {
14462 return vreinterpret_s32_p8(a
);
14465 // CHECK-LABEL: @test_vreinterpret_s32_p16(
14466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14467 // CHECK: ret <2 x i32> [[TMP0]]
14468 int32x2_t
test_vreinterpret_s32_p16(poly16x4_t a
) {
14469 return vreinterpret_s32_p16(a
);
14472 // CHECK-LABEL: @test_vreinterpret_s32_p64(
14473 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14474 // CHECK: ret <2 x i32> [[TMP0]]
14475 int32x2_t
test_vreinterpret_s32_p64(poly64x1_t a
) {
14476 return vreinterpret_s32_p64(a
);
14479 // CHECK-LABEL: @test_vreinterpret_s64_s8(
14480 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14481 // CHECK: ret <1 x i64> [[TMP0]]
14482 int64x1_t
test_vreinterpret_s64_s8(int8x8_t a
) {
14483 return vreinterpret_s64_s8(a
);
14486 // CHECK-LABEL: @test_vreinterpret_s64_s16(
14487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14488 // CHECK: ret <1 x i64> [[TMP0]]
14489 int64x1_t
test_vreinterpret_s64_s16(int16x4_t a
) {
14490 return vreinterpret_s64_s16(a
);
14493 // CHECK-LABEL: @test_vreinterpret_s64_s32(
14494 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14495 // CHECK: ret <1 x i64> [[TMP0]]
14496 int64x1_t
test_vreinterpret_s64_s32(int32x2_t a
) {
14497 return vreinterpret_s64_s32(a
);
14500 // CHECK-LABEL: @test_vreinterpret_s64_u8(
14501 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14502 // CHECK: ret <1 x i64> [[TMP0]]
14503 int64x1_t
test_vreinterpret_s64_u8(uint8x8_t a
) {
14504 return vreinterpret_s64_u8(a
);
14507 // CHECK-LABEL: @test_vreinterpret_s64_u16(
14508 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14509 // CHECK: ret <1 x i64> [[TMP0]]
14510 int64x1_t
test_vreinterpret_s64_u16(uint16x4_t a
) {
14511 return vreinterpret_s64_u16(a
);
14514 // CHECK-LABEL: @test_vreinterpret_s64_u32(
14515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14516 // CHECK: ret <1 x i64> [[TMP0]]
14517 int64x1_t
test_vreinterpret_s64_u32(uint32x2_t a
) {
14518 return vreinterpret_s64_u32(a
);
14521 // CHECK-LABEL: @test_vreinterpret_s64_u64(
14522 // CHECK: ret <1 x i64> %a
14523 int64x1_t
test_vreinterpret_s64_u64(uint64x1_t a
) {
14524 return vreinterpret_s64_u64(a
);
14527 // CHECK-LABEL: @test_vreinterpret_s64_f16(
14528 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
14529 // CHECK: ret <1 x i64> [[TMP0]]
14530 int64x1_t
test_vreinterpret_s64_f16(float16x4_t a
) {
14531 return vreinterpret_s64_f16(a
);
14534 // CHECK-LABEL: @test_vreinterpret_s64_f32(
14535 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
14536 // CHECK: ret <1 x i64> [[TMP0]]
14537 int64x1_t
test_vreinterpret_s64_f32(float32x2_t a
) {
14538 return vreinterpret_s64_f32(a
);
14541 // CHECK-LABEL: @test_vreinterpret_s64_f64(
14542 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
14543 // CHECK: ret <1 x i64> [[TMP0]]
14544 int64x1_t
test_vreinterpret_s64_f64(float64x1_t a
) {
14545 return vreinterpret_s64_f64(a
);
14548 // CHECK-LABEL: @test_vreinterpret_s64_p8(
14549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14550 // CHECK: ret <1 x i64> [[TMP0]]
14551 int64x1_t
test_vreinterpret_s64_p8(poly8x8_t a
) {
14552 return vreinterpret_s64_p8(a
);
14555 // CHECK-LABEL: @test_vreinterpret_s64_p16(
14556 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14557 // CHECK: ret <1 x i64> [[TMP0]]
14558 int64x1_t
test_vreinterpret_s64_p16(poly16x4_t a
) {
14559 return vreinterpret_s64_p16(a
);
14562 // CHECK-LABEL: @test_vreinterpret_s64_p64(
14563 // CHECK: ret <1 x i64> %a
14564 int64x1_t
test_vreinterpret_s64_p64(poly64x1_t a
) {
14565 return vreinterpret_s64_p64(a
);
14568 // CHECK-LABEL: @test_vreinterpret_u8_s8(
14569 // CHECK: ret <8 x i8> %a
14570 uint8x8_t
test_vreinterpret_u8_s8(int8x8_t a
) {
14571 return vreinterpret_u8_s8(a
);
14574 // CHECK-LABEL: @test_vreinterpret_u8_s16(
14575 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14576 // CHECK: ret <8 x i8> [[TMP0]]
14577 uint8x8_t
test_vreinterpret_u8_s16(int16x4_t a
) {
14578 return vreinterpret_u8_s16(a
);
14581 // CHECK-LABEL: @test_vreinterpret_u8_s32(
14582 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14583 // CHECK: ret <8 x i8> [[TMP0]]
14584 uint8x8_t
test_vreinterpret_u8_s32(int32x2_t a
) {
14585 return vreinterpret_u8_s32(a
);
14588 // CHECK-LABEL: @test_vreinterpret_u8_s64(
14589 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14590 // CHECK: ret <8 x i8> [[TMP0]]
14591 uint8x8_t
test_vreinterpret_u8_s64(int64x1_t a
) {
14592 return vreinterpret_u8_s64(a
);
14595 // CHECK-LABEL: @test_vreinterpret_u8_u16(
14596 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14597 // CHECK: ret <8 x i8> [[TMP0]]
14598 uint8x8_t
test_vreinterpret_u8_u16(uint16x4_t a
) {
14599 return vreinterpret_u8_u16(a
);
14602 // CHECK-LABEL: @test_vreinterpret_u8_u32(
14603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14604 // CHECK: ret <8 x i8> [[TMP0]]
14605 uint8x8_t
test_vreinterpret_u8_u32(uint32x2_t a
) {
14606 return vreinterpret_u8_u32(a
);
14609 // CHECK-LABEL: @test_vreinterpret_u8_u64(
14610 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14611 // CHECK: ret <8 x i8> [[TMP0]]
14612 uint8x8_t
test_vreinterpret_u8_u64(uint64x1_t a
) {
14613 return vreinterpret_u8_u64(a
);
14616 // CHECK-LABEL: @test_vreinterpret_u8_f16(
14617 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14618 // CHECK: ret <8 x i8> [[TMP0]]
14619 uint8x8_t
test_vreinterpret_u8_f16(float16x4_t a
) {
14620 return vreinterpret_u8_f16(a
);
14623 // CHECK-LABEL: @test_vreinterpret_u8_f32(
14624 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14625 // CHECK: ret <8 x i8> [[TMP0]]
14626 uint8x8_t
test_vreinterpret_u8_f32(float32x2_t a
) {
14627 return vreinterpret_u8_f32(a
);
14630 // CHECK-LABEL: @test_vreinterpret_u8_f64(
14631 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
14632 // CHECK: ret <8 x i8> [[TMP0]]
14633 uint8x8_t
test_vreinterpret_u8_f64(float64x1_t a
) {
14634 return vreinterpret_u8_f64(a
);
14637 // CHECK-LABEL: @test_vreinterpret_u8_p8(
14638 // CHECK: ret <8 x i8> %a
14639 uint8x8_t
test_vreinterpret_u8_p8(poly8x8_t a
) {
14640 return vreinterpret_u8_p8(a
);
14643 // CHECK-LABEL: @test_vreinterpret_u8_p16(
14644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14645 // CHECK: ret <8 x i8> [[TMP0]]
14646 uint8x8_t
test_vreinterpret_u8_p16(poly16x4_t a
) {
14647 return vreinterpret_u8_p16(a
);
14650 // CHECK-LABEL: @test_vreinterpret_u8_p64(
14651 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14652 // CHECK: ret <8 x i8> [[TMP0]]
14653 uint8x8_t
test_vreinterpret_u8_p64(poly64x1_t a
) {
14654 return vreinterpret_u8_p64(a
);
14657 // CHECK-LABEL: @test_vreinterpret_u16_s8(
14658 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14659 // CHECK: ret <4 x i16> [[TMP0]]
14660 uint16x4_t
test_vreinterpret_u16_s8(int8x8_t a
) {
14661 return vreinterpret_u16_s8(a
);
14664 // CHECK-LABEL: @test_vreinterpret_u16_s16(
14665 // CHECK: ret <4 x i16> %a
14666 uint16x4_t
test_vreinterpret_u16_s16(int16x4_t a
) {
14667 return vreinterpret_u16_s16(a
);
14670 // CHECK-LABEL: @test_vreinterpret_u16_s32(
14671 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14672 // CHECK: ret <4 x i16> [[TMP0]]
14673 uint16x4_t
test_vreinterpret_u16_s32(int32x2_t a
) {
14674 return vreinterpret_u16_s32(a
);
14677 // CHECK-LABEL: @test_vreinterpret_u16_s64(
14678 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14679 // CHECK: ret <4 x i16> [[TMP0]]
14680 uint16x4_t
test_vreinterpret_u16_s64(int64x1_t a
) {
14681 return vreinterpret_u16_s64(a
);
14684 // CHECK-LABEL: @test_vreinterpret_u16_u8(
14685 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14686 // CHECK: ret <4 x i16> [[TMP0]]
14687 uint16x4_t
test_vreinterpret_u16_u8(uint8x8_t a
) {
14688 return vreinterpret_u16_u8(a
);
14691 // CHECK-LABEL: @test_vreinterpret_u16_u32(
14692 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14693 // CHECK: ret <4 x i16> [[TMP0]]
14694 uint16x4_t
test_vreinterpret_u16_u32(uint32x2_t a
) {
14695 return vreinterpret_u16_u32(a
);
14698 // CHECK-LABEL: @test_vreinterpret_u16_u64(
14699 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14700 // CHECK: ret <4 x i16> [[TMP0]]
14701 uint16x4_t
test_vreinterpret_u16_u64(uint64x1_t a
) {
14702 return vreinterpret_u16_u64(a
);
14705 // CHECK-LABEL: @test_vreinterpret_u16_f16(
14706 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14707 // CHECK: ret <4 x i16> [[TMP0]]
14708 uint16x4_t
test_vreinterpret_u16_f16(float16x4_t a
) {
14709 return vreinterpret_u16_f16(a
);
14712 // CHECK-LABEL: @test_vreinterpret_u16_f32(
14713 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14714 // CHECK: ret <4 x i16> [[TMP0]]
14715 uint16x4_t
test_vreinterpret_u16_f32(float32x2_t a
) {
14716 return vreinterpret_u16_f32(a
);
14719 // CHECK-LABEL: @test_vreinterpret_u16_f64(
14720 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
14721 // CHECK: ret <4 x i16> [[TMP0]]
14722 uint16x4_t
test_vreinterpret_u16_f64(float64x1_t a
) {
14723 return vreinterpret_u16_f64(a
);
14726 // CHECK-LABEL: @test_vreinterpret_u16_p8(
14727 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14728 // CHECK: ret <4 x i16> [[TMP0]]
14729 uint16x4_t
test_vreinterpret_u16_p8(poly8x8_t a
) {
14730 return vreinterpret_u16_p8(a
);
14733 // CHECK-LABEL: @test_vreinterpret_u16_p16(
14734 // CHECK: ret <4 x i16> %a
14735 uint16x4_t
test_vreinterpret_u16_p16(poly16x4_t a
) {
14736 return vreinterpret_u16_p16(a
);
14739 // CHECK-LABEL: @test_vreinterpret_u16_p64(
14740 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14741 // CHECK: ret <4 x i16> [[TMP0]]
14742 uint16x4_t
test_vreinterpret_u16_p64(poly64x1_t a
) {
14743 return vreinterpret_u16_p64(a
);
14746 // CHECK-LABEL: @test_vreinterpret_u32_s8(
14747 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14748 // CHECK: ret <2 x i32> [[TMP0]]
14749 uint32x2_t
test_vreinterpret_u32_s8(int8x8_t a
) {
14750 return vreinterpret_u32_s8(a
);
14753 // CHECK-LABEL: @test_vreinterpret_u32_s16(
14754 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14755 // CHECK: ret <2 x i32> [[TMP0]]
14756 uint32x2_t
test_vreinterpret_u32_s16(int16x4_t a
) {
14757 return vreinterpret_u32_s16(a
);
14760 // CHECK-LABEL: @test_vreinterpret_u32_s32(
14761 // CHECK: ret <2 x i32> %a
14762 uint32x2_t
test_vreinterpret_u32_s32(int32x2_t a
) {
14763 return vreinterpret_u32_s32(a
);
14766 // CHECK-LABEL: @test_vreinterpret_u32_s64(
14767 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14768 // CHECK: ret <2 x i32> [[TMP0]]
14769 uint32x2_t
test_vreinterpret_u32_s64(int64x1_t a
) {
14770 return vreinterpret_u32_s64(a
);
14773 // CHECK-LABEL: @test_vreinterpret_u32_u8(
14774 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14775 // CHECK: ret <2 x i32> [[TMP0]]
14776 uint32x2_t
test_vreinterpret_u32_u8(uint8x8_t a
) {
14777 return vreinterpret_u32_u8(a
);
14780 // CHECK-LABEL: @test_vreinterpret_u32_u16(
14781 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14782 // CHECK: ret <2 x i32> [[TMP0]]
14783 uint32x2_t
test_vreinterpret_u32_u16(uint16x4_t a
) {
14784 return vreinterpret_u32_u16(a
);
14787 // CHECK-LABEL: @test_vreinterpret_u32_u64(
14788 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14789 // CHECK: ret <2 x i32> [[TMP0]]
14790 uint32x2_t
test_vreinterpret_u32_u64(uint64x1_t a
) {
14791 return vreinterpret_u32_u64(a
);
14794 // CHECK-LABEL: @test_vreinterpret_u32_f16(
14795 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
14796 // CHECK: ret <2 x i32> [[TMP0]]
14797 uint32x2_t
test_vreinterpret_u32_f16(float16x4_t a
) {
14798 return vreinterpret_u32_f16(a
);
14801 // CHECK-LABEL: @test_vreinterpret_u32_f32(
14802 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
14803 // CHECK: ret <2 x i32> [[TMP0]]
14804 uint32x2_t
test_vreinterpret_u32_f32(float32x2_t a
) {
14805 return vreinterpret_u32_f32(a
);
14808 // CHECK-LABEL: @test_vreinterpret_u32_f64(
14809 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
14810 // CHECK: ret <2 x i32> [[TMP0]]
14811 uint32x2_t
test_vreinterpret_u32_f64(float64x1_t a
) {
14812 return vreinterpret_u32_f64(a
);
14815 // CHECK-LABEL: @test_vreinterpret_u32_p8(
14816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14817 // CHECK: ret <2 x i32> [[TMP0]]
14818 uint32x2_t
test_vreinterpret_u32_p8(poly8x8_t a
) {
14819 return vreinterpret_u32_p8(a
);
14822 // CHECK-LABEL: @test_vreinterpret_u32_p16(
14823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14824 // CHECK: ret <2 x i32> [[TMP0]]
14825 uint32x2_t
test_vreinterpret_u32_p16(poly16x4_t a
) {
14826 return vreinterpret_u32_p16(a
);
14829 // CHECK-LABEL: @test_vreinterpret_u32_p64(
14830 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14831 // CHECK: ret <2 x i32> [[TMP0]]
14832 uint32x2_t
test_vreinterpret_u32_p64(poly64x1_t a
) {
14833 return vreinterpret_u32_p64(a
);
14836 // CHECK-LABEL: @test_vreinterpret_u64_s8(
14837 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14838 // CHECK: ret <1 x i64> [[TMP0]]
14839 uint64x1_t
test_vreinterpret_u64_s8(int8x8_t a
) {
14840 return vreinterpret_u64_s8(a
);
14843 // CHECK-LABEL: @test_vreinterpret_u64_s16(
14844 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14845 // CHECK: ret <1 x i64> [[TMP0]]
14846 uint64x1_t
test_vreinterpret_u64_s16(int16x4_t a
) {
14847 return vreinterpret_u64_s16(a
);
14850 // CHECK-LABEL: @test_vreinterpret_u64_s32(
14851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14852 // CHECK: ret <1 x i64> [[TMP0]]
14853 uint64x1_t
test_vreinterpret_u64_s32(int32x2_t a
) {
14854 return vreinterpret_u64_s32(a
);
14857 // CHECK-LABEL: @test_vreinterpret_u64_s64(
14858 // CHECK: ret <1 x i64> %a
14859 uint64x1_t
test_vreinterpret_u64_s64(int64x1_t a
) {
14860 return vreinterpret_u64_s64(a
);
14863 // CHECK-LABEL: @test_vreinterpret_u64_u8(
14864 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14865 // CHECK: ret <1 x i64> [[TMP0]]
14866 uint64x1_t
test_vreinterpret_u64_u8(uint8x8_t a
) {
14867 return vreinterpret_u64_u8(a
);
14870 // CHECK-LABEL: @test_vreinterpret_u64_u16(
14871 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14872 // CHECK: ret <1 x i64> [[TMP0]]
14873 uint64x1_t
test_vreinterpret_u64_u16(uint16x4_t a
) {
14874 return vreinterpret_u64_u16(a
);
14877 // CHECK-LABEL: @test_vreinterpret_u64_u32(
14878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14879 // CHECK: ret <1 x i64> [[TMP0]]
14880 uint64x1_t
test_vreinterpret_u64_u32(uint32x2_t a
) {
14881 return vreinterpret_u64_u32(a
);
14884 // CHECK-LABEL: @test_vreinterpret_u64_f16(
14885 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
14886 // CHECK: ret <1 x i64> [[TMP0]]
14887 uint64x1_t
test_vreinterpret_u64_f16(float16x4_t a
) {
14888 return vreinterpret_u64_f16(a
);
14891 // CHECK-LABEL: @test_vreinterpret_u64_f32(
14892 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
14893 // CHECK: ret <1 x i64> [[TMP0]]
14894 uint64x1_t
test_vreinterpret_u64_f32(float32x2_t a
) {
14895 return vreinterpret_u64_f32(a
);
14898 // CHECK-LABEL: @test_vreinterpret_u64_f64(
14899 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
14900 // CHECK: ret <1 x i64> [[TMP0]]
14901 uint64x1_t
test_vreinterpret_u64_f64(float64x1_t a
) {
14902 return vreinterpret_u64_f64(a
);
14905 // CHECK-LABEL: @test_vreinterpret_u64_p8(
14906 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14907 // CHECK: ret <1 x i64> [[TMP0]]
14908 uint64x1_t
test_vreinterpret_u64_p8(poly8x8_t a
) {
14909 return vreinterpret_u64_p8(a
);
14912 // CHECK-LABEL: @test_vreinterpret_u64_p16(
14913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14914 // CHECK: ret <1 x i64> [[TMP0]]
14915 uint64x1_t
test_vreinterpret_u64_p16(poly16x4_t a
) {
14916 return vreinterpret_u64_p16(a
);
14919 // CHECK-LABEL: @test_vreinterpret_u64_p64(
14920 // CHECK: ret <1 x i64> %a
14921 uint64x1_t
test_vreinterpret_u64_p64(poly64x1_t a
) {
14922 return vreinterpret_u64_p64(a
);
14925 // CHECK-LABEL: @test_vreinterpret_f16_s8(
14926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14927 // CHECK: ret <4 x half> [[TMP0]]
14928 float16x4_t
test_vreinterpret_f16_s8(int8x8_t a
) {
14929 return vreinterpret_f16_s8(a
);
14932 // CHECK-LABEL: @test_vreinterpret_f16_s16(
14933 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14934 // CHECK: ret <4 x half> [[TMP0]]
14935 float16x4_t
test_vreinterpret_f16_s16(int16x4_t a
) {
14936 return vreinterpret_f16_s16(a
);
14939 // CHECK-LABEL: @test_vreinterpret_f16_s32(
14940 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14941 // CHECK: ret <4 x half> [[TMP0]]
14942 float16x4_t
test_vreinterpret_f16_s32(int32x2_t a
) {
14943 return vreinterpret_f16_s32(a
);
14946 // CHECK-LABEL: @test_vreinterpret_f16_s64(
14947 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14948 // CHECK: ret <4 x half> [[TMP0]]
14949 float16x4_t
test_vreinterpret_f16_s64(int64x1_t a
) {
14950 return vreinterpret_f16_s64(a
);
14953 // CHECK-LABEL: @test_vreinterpret_f16_u8(
14954 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14955 // CHECK: ret <4 x half> [[TMP0]]
14956 float16x4_t
test_vreinterpret_f16_u8(uint8x8_t a
) {
14957 return vreinterpret_f16_u8(a
);
14960 // CHECK-LABEL: @test_vreinterpret_f16_u16(
14961 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14962 // CHECK: ret <4 x half> [[TMP0]]
14963 float16x4_t
test_vreinterpret_f16_u16(uint16x4_t a
) {
14964 return vreinterpret_f16_u16(a
);
14967 // CHECK-LABEL: @test_vreinterpret_f16_u32(
14968 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14969 // CHECK: ret <4 x half> [[TMP0]]
14970 float16x4_t
test_vreinterpret_f16_u32(uint32x2_t a
) {
14971 return vreinterpret_f16_u32(a
);
14974 // CHECK-LABEL: @test_vreinterpret_f16_u64(
14975 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14976 // CHECK: ret <4 x half> [[TMP0]]
14977 float16x4_t
test_vreinterpret_f16_u64(uint64x1_t a
) {
14978 return vreinterpret_f16_u64(a
);
14981 // CHECK-LABEL: @test_vreinterpret_f16_f32(
14982 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
14983 // CHECK: ret <4 x half> [[TMP0]]
14984 float16x4_t
test_vreinterpret_f16_f32(float32x2_t a
) {
14985 return vreinterpret_f16_f32(a
);
14988 // CHECK-LABEL: @test_vreinterpret_f16_f64(
14989 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
14990 // CHECK: ret <4 x half> [[TMP0]]
14991 float16x4_t
test_vreinterpret_f16_f64(float64x1_t a
) {
14992 return vreinterpret_f16_f64(a
);
14995 // CHECK-LABEL: @test_vreinterpret_f16_p8(
14996 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14997 // CHECK: ret <4 x half> [[TMP0]]
14998 float16x4_t
test_vreinterpret_f16_p8(poly8x8_t a
) {
14999 return vreinterpret_f16_p8(a
);
15002 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15003 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15004 // CHECK: ret <4 x half> [[TMP0]]
15005 float16x4_t
test_vreinterpret_f16_p16(poly16x4_t a
) {
15006 return vreinterpret_f16_p16(a
);
15009 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15010 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15011 // CHECK: ret <4 x half> [[TMP0]]
15012 float16x4_t
test_vreinterpret_f16_p64(poly64x1_t a
) {
15013 return vreinterpret_f16_p64(a
);
15016 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15017 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15018 // CHECK: ret <2 x float> [[TMP0]]
15019 float32x2_t
test_vreinterpret_f32_s8(int8x8_t a
) {
15020 return vreinterpret_f32_s8(a
);
15023 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15025 // CHECK: ret <2 x float> [[TMP0]]
15026 float32x2_t
test_vreinterpret_f32_s16(int16x4_t a
) {
15027 return vreinterpret_f32_s16(a
);
15030 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15031 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15032 // CHECK: ret <2 x float> [[TMP0]]
15033 float32x2_t
test_vreinterpret_f32_s32(int32x2_t a
) {
15034 return vreinterpret_f32_s32(a
);
15037 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15038 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15039 // CHECK: ret <2 x float> [[TMP0]]
15040 float32x2_t
test_vreinterpret_f32_s64(int64x1_t a
) {
15041 return vreinterpret_f32_s64(a
);
15044 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15045 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15046 // CHECK: ret <2 x float> [[TMP0]]
15047 float32x2_t
test_vreinterpret_f32_u8(uint8x8_t a
) {
15048 return vreinterpret_f32_u8(a
);
15051 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15052 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15053 // CHECK: ret <2 x float> [[TMP0]]
15054 float32x2_t
test_vreinterpret_f32_u16(uint16x4_t a
) {
15055 return vreinterpret_f32_u16(a
);
15058 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15060 // CHECK: ret <2 x float> [[TMP0]]
15061 float32x2_t
test_vreinterpret_f32_u32(uint32x2_t a
) {
15062 return vreinterpret_f32_u32(a
);
15065 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15066 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15067 // CHECK: ret <2 x float> [[TMP0]]
15068 float32x2_t
test_vreinterpret_f32_u64(uint64x1_t a
) {
15069 return vreinterpret_f32_u64(a
);
15072 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15073 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15074 // CHECK: ret <2 x float> [[TMP0]]
15075 float32x2_t
test_vreinterpret_f32_f16(float16x4_t a
) {
15076 return vreinterpret_f32_f16(a
);
15079 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15080 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15081 // CHECK: ret <2 x float> [[TMP0]]
15082 float32x2_t
test_vreinterpret_f32_f64(float64x1_t a
) {
15083 return vreinterpret_f32_f64(a
);
15086 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15087 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15088 // CHECK: ret <2 x float> [[TMP0]]
15089 float32x2_t
test_vreinterpret_f32_p8(poly8x8_t a
) {
15090 return vreinterpret_f32_p8(a
);
15093 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15094 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15095 // CHECK: ret <2 x float> [[TMP0]]
15096 float32x2_t
test_vreinterpret_f32_p16(poly16x4_t a
) {
15097 return vreinterpret_f32_p16(a
);
15100 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15101 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15102 // CHECK: ret <2 x float> [[TMP0]]
15103 float32x2_t
test_vreinterpret_f32_p64(poly64x1_t a
) {
15104 return vreinterpret_f32_p64(a
);
15107 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15108 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15109 // CHECK: ret <1 x double> [[TMP0]]
15110 float64x1_t
test_vreinterpret_f64_s8(int8x8_t a
) {
15111 return vreinterpret_f64_s8(a
);
15114 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15115 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15116 // CHECK: ret <1 x double> [[TMP0]]
15117 float64x1_t
test_vreinterpret_f64_s16(int16x4_t a
) {
15118 return vreinterpret_f64_s16(a
);
15121 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15122 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15123 // CHECK: ret <1 x double> [[TMP0]]
15124 float64x1_t
test_vreinterpret_f64_s32(int32x2_t a
) {
15125 return vreinterpret_f64_s32(a
);
15128 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15129 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15130 // CHECK: ret <1 x double> [[TMP0]]
15131 float64x1_t
test_vreinterpret_f64_s64(int64x1_t a
) {
15132 return vreinterpret_f64_s64(a
);
15135 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15137 // CHECK: ret <1 x double> [[TMP0]]
15138 float64x1_t
test_vreinterpret_f64_u8(uint8x8_t a
) {
15139 return vreinterpret_f64_u8(a
);
15142 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15143 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15144 // CHECK: ret <1 x double> [[TMP0]]
15145 float64x1_t
test_vreinterpret_f64_u16(uint16x4_t a
) {
15146 return vreinterpret_f64_u16(a
);
15149 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15150 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15151 // CHECK: ret <1 x double> [[TMP0]]
15152 float64x1_t
test_vreinterpret_f64_u32(uint32x2_t a
) {
15153 return vreinterpret_f64_u32(a
);
15156 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15157 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15158 // CHECK: ret <1 x double> [[TMP0]]
15159 float64x1_t
test_vreinterpret_f64_u64(uint64x1_t a
) {
15160 return vreinterpret_f64_u64(a
);
15163 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15164 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15165 // CHECK: ret <1 x double> [[TMP0]]
15166 float64x1_t
test_vreinterpret_f64_f16(float16x4_t a
) {
15167 return vreinterpret_f64_f16(a
);
15170 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15171 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15172 // CHECK: ret <1 x double> [[TMP0]]
15173 float64x1_t
test_vreinterpret_f64_f32(float32x2_t a
) {
15174 return vreinterpret_f64_f32(a
);
15177 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15178 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15179 // CHECK: ret <1 x double> [[TMP0]]
15180 float64x1_t
test_vreinterpret_f64_p8(poly8x8_t a
) {
15181 return vreinterpret_f64_p8(a
);
15184 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15185 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15186 // CHECK: ret <1 x double> [[TMP0]]
15187 float64x1_t
test_vreinterpret_f64_p16(poly16x4_t a
) {
15188 return vreinterpret_f64_p16(a
);
15191 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15193 // CHECK: ret <1 x double> [[TMP0]]
15194 float64x1_t
test_vreinterpret_f64_p64(poly64x1_t a
) {
15195 return vreinterpret_f64_p64(a
);
15198 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15199 // CHECK: ret <8 x i8> %a
15200 poly8x8_t
test_vreinterpret_p8_s8(int8x8_t a
) {
15201 return vreinterpret_p8_s8(a
);
15204 // CHECK-LABEL: @test_vreinterpret_p8_s16(
15205 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15206 // CHECK: ret <8 x i8> [[TMP0]]
15207 poly8x8_t
test_vreinterpret_p8_s16(int16x4_t a
) {
15208 return vreinterpret_p8_s16(a
);
15211 // CHECK-LABEL: @test_vreinterpret_p8_s32(
15212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15213 // CHECK: ret <8 x i8> [[TMP0]]
15214 poly8x8_t
test_vreinterpret_p8_s32(int32x2_t a
) {
15215 return vreinterpret_p8_s32(a
);
15218 // CHECK-LABEL: @test_vreinterpret_p8_s64(
15219 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15220 // CHECK: ret <8 x i8> [[TMP0]]
15221 poly8x8_t
test_vreinterpret_p8_s64(int64x1_t a
) {
15222 return vreinterpret_p8_s64(a
);
15225 // CHECK-LABEL: @test_vreinterpret_p8_u8(
15226 // CHECK: ret <8 x i8> %a
15227 poly8x8_t
test_vreinterpret_p8_u8(uint8x8_t a
) {
15228 return vreinterpret_p8_u8(a
);
15231 // CHECK-LABEL: @test_vreinterpret_p8_u16(
15232 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15233 // CHECK: ret <8 x i8> [[TMP0]]
15234 poly8x8_t
test_vreinterpret_p8_u16(uint16x4_t a
) {
15235 return vreinterpret_p8_u16(a
);
15238 // CHECK-LABEL: @test_vreinterpret_p8_u32(
15239 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15240 // CHECK: ret <8 x i8> [[TMP0]]
15241 poly8x8_t
test_vreinterpret_p8_u32(uint32x2_t a
) {
15242 return vreinterpret_p8_u32(a
);
15245 // CHECK-LABEL: @test_vreinterpret_p8_u64(
15246 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15247 // CHECK: ret <8 x i8> [[TMP0]]
15248 poly8x8_t
test_vreinterpret_p8_u64(uint64x1_t a
) {
15249 return vreinterpret_p8_u64(a
);
15252 // CHECK-LABEL: @test_vreinterpret_p8_f16(
15253 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15254 // CHECK: ret <8 x i8> [[TMP0]]
15255 poly8x8_t
test_vreinterpret_p8_f16(float16x4_t a
) {
15256 return vreinterpret_p8_f16(a
);
15259 // CHECK-LABEL: @test_vreinterpret_p8_f32(
15260 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15261 // CHECK: ret <8 x i8> [[TMP0]]
15262 poly8x8_t
test_vreinterpret_p8_f32(float32x2_t a
) {
15263 return vreinterpret_p8_f32(a
);
15266 // CHECK-LABEL: @test_vreinterpret_p8_f64(
15267 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15268 // CHECK: ret <8 x i8> [[TMP0]]
15269 poly8x8_t
test_vreinterpret_p8_f64(float64x1_t a
) {
15270 return vreinterpret_p8_f64(a
);
15273 // CHECK-LABEL: @test_vreinterpret_p8_p16(
15274 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15275 // CHECK: ret <8 x i8> [[TMP0]]
15276 poly8x8_t
test_vreinterpret_p8_p16(poly16x4_t a
) {
15277 return vreinterpret_p8_p16(a
);
15280 // CHECK-LABEL: @test_vreinterpret_p8_p64(
15281 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15282 // CHECK: ret <8 x i8> [[TMP0]]
15283 poly8x8_t
test_vreinterpret_p8_p64(poly64x1_t a
) {
15284 return vreinterpret_p8_p64(a
);
15287 // CHECK-LABEL: @test_vreinterpret_p16_s8(
15288 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15289 // CHECK: ret <4 x i16> [[TMP0]]
15290 poly16x4_t
test_vreinterpret_p16_s8(int8x8_t a
) {
15291 return vreinterpret_p16_s8(a
);
15294 // CHECK-LABEL: @test_vreinterpret_p16_s16(
15295 // CHECK: ret <4 x i16> %a
15296 poly16x4_t
test_vreinterpret_p16_s16(int16x4_t a
) {
15297 return vreinterpret_p16_s16(a
);
15300 // CHECK-LABEL: @test_vreinterpret_p16_s32(
15301 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15302 // CHECK: ret <4 x i16> [[TMP0]]
15303 poly16x4_t
test_vreinterpret_p16_s32(int32x2_t a
) {
15304 return vreinterpret_p16_s32(a
);
15307 // CHECK-LABEL: @test_vreinterpret_p16_s64(
15308 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15309 // CHECK: ret <4 x i16> [[TMP0]]
15310 poly16x4_t
test_vreinterpret_p16_s64(int64x1_t a
) {
15311 return vreinterpret_p16_s64(a
);
15314 // CHECK-LABEL: @test_vreinterpret_p16_u8(
15315 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15316 // CHECK: ret <4 x i16> [[TMP0]]
15317 poly16x4_t
test_vreinterpret_p16_u8(uint8x8_t a
) {
15318 return vreinterpret_p16_u8(a
);
15321 // CHECK-LABEL: @test_vreinterpret_p16_u16(
15322 // CHECK: ret <4 x i16> %a
15323 poly16x4_t
test_vreinterpret_p16_u16(uint16x4_t a
) {
15324 return vreinterpret_p16_u16(a
);
15327 // CHECK-LABEL: @test_vreinterpret_p16_u32(
15328 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15329 // CHECK: ret <4 x i16> [[TMP0]]
15330 poly16x4_t
test_vreinterpret_p16_u32(uint32x2_t a
) {
15331 return vreinterpret_p16_u32(a
);
15334 // CHECK-LABEL: @test_vreinterpret_p16_u64(
15335 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15336 // CHECK: ret <4 x i16> [[TMP0]]
15337 poly16x4_t
test_vreinterpret_p16_u64(uint64x1_t a
) {
15338 return vreinterpret_p16_u64(a
);
15341 // CHECK-LABEL: @test_vreinterpret_p16_f16(
15342 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15343 // CHECK: ret <4 x i16> [[TMP0]]
15344 poly16x4_t
test_vreinterpret_p16_f16(float16x4_t a
) {
15345 return vreinterpret_p16_f16(a
);
15348 // CHECK-LABEL: @test_vreinterpret_p16_f32(
15349 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15350 // CHECK: ret <4 x i16> [[TMP0]]
15351 poly16x4_t
test_vreinterpret_p16_f32(float32x2_t a
) {
15352 return vreinterpret_p16_f32(a
);
15355 // CHECK-LABEL: @test_vreinterpret_p16_f64(
15356 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15357 // CHECK: ret <4 x i16> [[TMP0]]
15358 poly16x4_t
test_vreinterpret_p16_f64(float64x1_t a
) {
15359 return vreinterpret_p16_f64(a
);
15362 // CHECK-LABEL: @test_vreinterpret_p16_p8(
15363 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15364 // CHECK: ret <4 x i16> [[TMP0]]
15365 poly16x4_t
test_vreinterpret_p16_p8(poly8x8_t a
) {
15366 return vreinterpret_p16_p8(a
);
15369 // CHECK-LABEL: @test_vreinterpret_p16_p64(
15370 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15371 // CHECK: ret <4 x i16> [[TMP0]]
15372 poly16x4_t
test_vreinterpret_p16_p64(poly64x1_t a
) {
15373 return vreinterpret_p16_p64(a
);
15376 // CHECK-LABEL: @test_vreinterpret_p64_s8(
15377 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15378 // CHECK: ret <1 x i64> [[TMP0]]
15379 poly64x1_t
test_vreinterpret_p64_s8(int8x8_t a
) {
15380 return vreinterpret_p64_s8(a
);
15383 // CHECK-LABEL: @test_vreinterpret_p64_s16(
15384 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15385 // CHECK: ret <1 x i64> [[TMP0]]
15386 poly64x1_t
test_vreinterpret_p64_s16(int16x4_t a
) {
15387 return vreinterpret_p64_s16(a
);
15390 // CHECK-LABEL: @test_vreinterpret_p64_s32(
15391 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15392 // CHECK: ret <1 x i64> [[TMP0]]
15393 poly64x1_t
test_vreinterpret_p64_s32(int32x2_t a
) {
15394 return vreinterpret_p64_s32(a
);
15397 // CHECK-LABEL: @test_vreinterpret_p64_s64(
15398 // CHECK: ret <1 x i64> %a
15399 poly64x1_t
test_vreinterpret_p64_s64(int64x1_t a
) {
15400 return vreinterpret_p64_s64(a
);
15403 // CHECK-LABEL: @test_vreinterpret_p64_u8(
15404 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15405 // CHECK: ret <1 x i64> [[TMP0]]
15406 poly64x1_t
test_vreinterpret_p64_u8(uint8x8_t a
) {
15407 return vreinterpret_p64_u8(a
);
15410 // CHECK-LABEL: @test_vreinterpret_p64_u16(
15411 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15412 // CHECK: ret <1 x i64> [[TMP0]]
15413 poly64x1_t
test_vreinterpret_p64_u16(uint16x4_t a
) {
15414 return vreinterpret_p64_u16(a
);
15417 // CHECK-LABEL: @test_vreinterpret_p64_u32(
15418 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15419 // CHECK: ret <1 x i64> [[TMP0]]
15420 poly64x1_t
test_vreinterpret_p64_u32(uint32x2_t a
) {
15421 return vreinterpret_p64_u32(a
);
15424 // CHECK-LABEL: @test_vreinterpret_p64_u64(
15425 // CHECK: ret <1 x i64> %a
15426 poly64x1_t
test_vreinterpret_p64_u64(uint64x1_t a
) {
15427 return vreinterpret_p64_u64(a
);
15430 // CHECK-LABEL: @test_vreinterpret_p64_f16(
15431 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15432 // CHECK: ret <1 x i64> [[TMP0]]
15433 poly64x1_t
test_vreinterpret_p64_f16(float16x4_t a
) {
15434 return vreinterpret_p64_f16(a
);
15437 // CHECK-LABEL: @test_vreinterpret_p64_f32(
15438 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15439 // CHECK: ret <1 x i64> [[TMP0]]
15440 poly64x1_t
test_vreinterpret_p64_f32(float32x2_t a
) {
15441 return vreinterpret_p64_f32(a
);
15444 // CHECK-LABEL: @test_vreinterpret_p64_f64(
15445 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15446 // CHECK: ret <1 x i64> [[TMP0]]
15447 poly64x1_t
test_vreinterpret_p64_f64(float64x1_t a
) {
15448 return vreinterpret_p64_f64(a
);
15451 // CHECK-LABEL: @test_vreinterpret_p64_p8(
15452 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15453 // CHECK: ret <1 x i64> [[TMP0]]
15454 poly64x1_t
test_vreinterpret_p64_p8(poly8x8_t a
) {
15455 return vreinterpret_p64_p8(a
);
15458 // CHECK-LABEL: @test_vreinterpret_p64_p16(
15459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15460 // CHECK: ret <1 x i64> [[TMP0]]
15461 poly64x1_t
test_vreinterpret_p64_p16(poly16x4_t a
) {
15462 return vreinterpret_p64_p16(a
);
15465 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
15466 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15467 // CHECK: ret <16 x i8> [[TMP0]]
15468 int8x16_t
test_vreinterpretq_s8_s16(int16x8_t a
) {
15469 return vreinterpretq_s8_s16(a
);
15472 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
15473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15474 // CHECK: ret <16 x i8> [[TMP0]]
15475 int8x16_t
test_vreinterpretq_s8_s32(int32x4_t a
) {
15476 return vreinterpretq_s8_s32(a
);
15479 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
15480 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15481 // CHECK: ret <16 x i8> [[TMP0]]
15482 int8x16_t
test_vreinterpretq_s8_s64(int64x2_t a
) {
15483 return vreinterpretq_s8_s64(a
);
15486 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
15487 // CHECK: ret <16 x i8> %a
15488 int8x16_t
test_vreinterpretq_s8_u8(uint8x16_t a
) {
15489 return vreinterpretq_s8_u8(a
);
15492 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
15493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15494 // CHECK: ret <16 x i8> [[TMP0]]
15495 int8x16_t
test_vreinterpretq_s8_u16(uint16x8_t a
) {
15496 return vreinterpretq_s8_u16(a
);
15499 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
15500 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15501 // CHECK: ret <16 x i8> [[TMP0]]
15502 int8x16_t
test_vreinterpretq_s8_u32(uint32x4_t a
) {
15503 return vreinterpretq_s8_u32(a
);
15506 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
15507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15508 // CHECK: ret <16 x i8> [[TMP0]]
15509 int8x16_t
test_vreinterpretq_s8_u64(uint64x2_t a
) {
15510 return vreinterpretq_s8_u64(a
);
15513 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
15514 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
15515 // CHECK: ret <16 x i8> [[TMP0]]
15516 int8x16_t
test_vreinterpretq_s8_f16(float16x8_t a
) {
15517 return vreinterpretq_s8_f16(a
);
15520 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
15521 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
15522 // CHECK: ret <16 x i8> [[TMP0]]
15523 int8x16_t
test_vreinterpretq_s8_f32(float32x4_t a
) {
15524 return vreinterpretq_s8_f32(a
);
15527 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
15528 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
15529 // CHECK: ret <16 x i8> [[TMP0]]
15530 int8x16_t
test_vreinterpretq_s8_f64(float64x2_t a
) {
15531 return vreinterpretq_s8_f64(a
);
15534 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
15535 // CHECK: ret <16 x i8> %a
15536 int8x16_t
test_vreinterpretq_s8_p8(poly8x16_t a
) {
15537 return vreinterpretq_s8_p8(a
);
15540 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
15541 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15542 // CHECK: ret <16 x i8> [[TMP0]]
15543 int8x16_t
test_vreinterpretq_s8_p16(poly16x8_t a
) {
15544 return vreinterpretq_s8_p16(a
);
15547 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
15548 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15549 // CHECK: ret <16 x i8> [[TMP0]]
15550 int8x16_t
test_vreinterpretq_s8_p64(poly64x2_t a
) {
15551 return vreinterpretq_s8_p64(a
);
15554 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
15555 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15556 // CHECK: ret <8 x i16> [[TMP0]]
15557 int16x8_t
test_vreinterpretq_s16_s8(int8x16_t a
) {
15558 return vreinterpretq_s16_s8(a
);
15561 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
15562 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15563 // CHECK: ret <8 x i16> [[TMP0]]
15564 int16x8_t
test_vreinterpretq_s16_s32(int32x4_t a
) {
15565 return vreinterpretq_s16_s32(a
);
15568 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
15569 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15570 // CHECK: ret <8 x i16> [[TMP0]]
15571 int16x8_t
test_vreinterpretq_s16_s64(int64x2_t a
) {
15572 return vreinterpretq_s16_s64(a
);
15575 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
15576 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15577 // CHECK: ret <8 x i16> [[TMP0]]
15578 int16x8_t
test_vreinterpretq_s16_u8(uint8x16_t a
) {
15579 return vreinterpretq_s16_u8(a
);
15582 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
15583 // CHECK: ret <8 x i16> %a
15584 int16x8_t
test_vreinterpretq_s16_u16(uint16x8_t a
) {
15585 return vreinterpretq_s16_u16(a
);
15588 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
15589 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15590 // CHECK: ret <8 x i16> [[TMP0]]
15591 int16x8_t
test_vreinterpretq_s16_u32(uint32x4_t a
) {
15592 return vreinterpretq_s16_u32(a
);
15595 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
15596 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15597 // CHECK: ret <8 x i16> [[TMP0]]
15598 int16x8_t
test_vreinterpretq_s16_u64(uint64x2_t a
) {
15599 return vreinterpretq_s16_u64(a
);
15602 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
15603 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15604 // CHECK: ret <8 x i16> [[TMP0]]
15605 int16x8_t
test_vreinterpretq_s16_f16(float16x8_t a
) {
15606 return vreinterpretq_s16_f16(a
);
15609 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
15610 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15611 // CHECK: ret <8 x i16> [[TMP0]]
15612 int16x8_t
test_vreinterpretq_s16_f32(float32x4_t a
) {
15613 return vreinterpretq_s16_f32(a
);
15616 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
15617 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
15618 // CHECK: ret <8 x i16> [[TMP0]]
15619 int16x8_t
test_vreinterpretq_s16_f64(float64x2_t a
) {
15620 return vreinterpretq_s16_f64(a
);
15623 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
15624 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15625 // CHECK: ret <8 x i16> [[TMP0]]
15626 int16x8_t
test_vreinterpretq_s16_p8(poly8x16_t a
) {
15627 return vreinterpretq_s16_p8(a
);
15630 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
15631 // CHECK: ret <8 x i16> %a
15632 int16x8_t
test_vreinterpretq_s16_p16(poly16x8_t a
) {
15633 return vreinterpretq_s16_p16(a
);
15636 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
15637 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15638 // CHECK: ret <8 x i16> [[TMP0]]
15639 int16x8_t
test_vreinterpretq_s16_p64(poly64x2_t a
) {
15640 return vreinterpretq_s16_p64(a
);
15643 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
15644 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15645 // CHECK: ret <4 x i32> [[TMP0]]
15646 int32x4_t
test_vreinterpretq_s32_s8(int8x16_t a
) {
15647 return vreinterpretq_s32_s8(a
);
15650 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
15651 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15652 // CHECK: ret <4 x i32> [[TMP0]]
15653 int32x4_t
test_vreinterpretq_s32_s16(int16x8_t a
) {
15654 return vreinterpretq_s32_s16(a
);
15657 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
15658 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15659 // CHECK: ret <4 x i32> [[TMP0]]
15660 int32x4_t
test_vreinterpretq_s32_s64(int64x2_t a
) {
15661 return vreinterpretq_s32_s64(a
);
15664 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
15665 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15666 // CHECK: ret <4 x i32> [[TMP0]]
15667 int32x4_t
test_vreinterpretq_s32_u8(uint8x16_t a
) {
15668 return vreinterpretq_s32_u8(a
);
15671 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
15672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15673 // CHECK: ret <4 x i32> [[TMP0]]
15674 int32x4_t
test_vreinterpretq_s32_u16(uint16x8_t a
) {
15675 return vreinterpretq_s32_u16(a
);
15678 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
15679 // CHECK: ret <4 x i32> %a
15680 int32x4_t
test_vreinterpretq_s32_u32(uint32x4_t a
) {
15681 return vreinterpretq_s32_u32(a
);
15684 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
15685 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15686 // CHECK: ret <4 x i32> [[TMP0]]
15687 int32x4_t
test_vreinterpretq_s32_u64(uint64x2_t a
) {
15688 return vreinterpretq_s32_u64(a
);
15691 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
15692 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
15693 // CHECK: ret <4 x i32> [[TMP0]]
15694 int32x4_t
test_vreinterpretq_s32_f16(float16x8_t a
) {
15695 return vreinterpretq_s32_f16(a
);
15698 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
15699 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
15700 // CHECK: ret <4 x i32> [[TMP0]]
15701 int32x4_t
test_vreinterpretq_s32_f32(float32x4_t a
) {
15702 return vreinterpretq_s32_f32(a
);
15705 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
15706 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
15707 // CHECK: ret <4 x i32> [[TMP0]]
15708 int32x4_t
test_vreinterpretq_s32_f64(float64x2_t a
) {
15709 return vreinterpretq_s32_f64(a
);
15712 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
15713 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15714 // CHECK: ret <4 x i32> [[TMP0]]
15715 int32x4_t
test_vreinterpretq_s32_p8(poly8x16_t a
) {
15716 return vreinterpretq_s32_p8(a
);
15719 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
15720 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15721 // CHECK: ret <4 x i32> [[TMP0]]
15722 int32x4_t
test_vreinterpretq_s32_p16(poly16x8_t a
) {
15723 return vreinterpretq_s32_p16(a
);
15726 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
15727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15728 // CHECK: ret <4 x i32> [[TMP0]]
15729 int32x4_t
test_vreinterpretq_s32_p64(poly64x2_t a
) {
15730 return vreinterpretq_s32_p64(a
);
15733 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
15734 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15735 // CHECK: ret <2 x i64> [[TMP0]]
15736 int64x2_t
test_vreinterpretq_s64_s8(int8x16_t a
) {
15737 return vreinterpretq_s64_s8(a
);
15740 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
15741 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15742 // CHECK: ret <2 x i64> [[TMP0]]
15743 int64x2_t
test_vreinterpretq_s64_s16(int16x8_t a
) {
15744 return vreinterpretq_s64_s16(a
);
15747 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
15748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15749 // CHECK: ret <2 x i64> [[TMP0]]
15750 int64x2_t
test_vreinterpretq_s64_s32(int32x4_t a
) {
15751 return vreinterpretq_s64_s32(a
);
15754 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
15755 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15756 // CHECK: ret <2 x i64> [[TMP0]]
15757 int64x2_t
test_vreinterpretq_s64_u8(uint8x16_t a
) {
15758 return vreinterpretq_s64_u8(a
);
15761 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
15762 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15763 // CHECK: ret <2 x i64> [[TMP0]]
15764 int64x2_t
test_vreinterpretq_s64_u16(uint16x8_t a
) {
15765 return vreinterpretq_s64_u16(a
);
15768 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
15769 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15770 // CHECK: ret <2 x i64> [[TMP0]]
15771 int64x2_t
test_vreinterpretq_s64_u32(uint32x4_t a
) {
15772 return vreinterpretq_s64_u32(a
);
15775 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
15776 // CHECK: ret <2 x i64> %a
15777 int64x2_t
test_vreinterpretq_s64_u64(uint64x2_t a
) {
15778 return vreinterpretq_s64_u64(a
);
15781 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
15782 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
15783 // CHECK: ret <2 x i64> [[TMP0]]
15784 int64x2_t
test_vreinterpretq_s64_f16(float16x8_t a
) {
15785 return vreinterpretq_s64_f16(a
);
15788 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
15789 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
15790 // CHECK: ret <2 x i64> [[TMP0]]
15791 int64x2_t
test_vreinterpretq_s64_f32(float32x4_t a
) {
15792 return vreinterpretq_s64_f32(a
);
15795 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
15796 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
15797 // CHECK: ret <2 x i64> [[TMP0]]
15798 int64x2_t
test_vreinterpretq_s64_f64(float64x2_t a
) {
15799 return vreinterpretq_s64_f64(a
);
15802 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
15803 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15804 // CHECK: ret <2 x i64> [[TMP0]]
15805 int64x2_t
test_vreinterpretq_s64_p8(poly8x16_t a
) {
15806 return vreinterpretq_s64_p8(a
);
15809 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
15810 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15811 // CHECK: ret <2 x i64> [[TMP0]]
15812 int64x2_t
test_vreinterpretq_s64_p16(poly16x8_t a
) {
15813 return vreinterpretq_s64_p16(a
);
15816 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
15817 // CHECK: ret <2 x i64> %a
15818 int64x2_t
test_vreinterpretq_s64_p64(poly64x2_t a
) {
15819 return vreinterpretq_s64_p64(a
);
15822 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
15823 // CHECK: ret <16 x i8> %a
15824 uint8x16_t
test_vreinterpretq_u8_s8(int8x16_t a
) {
15825 return vreinterpretq_u8_s8(a
);
15828 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
15829 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15830 // CHECK: ret <16 x i8> [[TMP0]]
15831 uint8x16_t
test_vreinterpretq_u8_s16(int16x8_t a
) {
15832 return vreinterpretq_u8_s16(a
);
15835 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
15836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15837 // CHECK: ret <16 x i8> [[TMP0]]
15838 uint8x16_t
test_vreinterpretq_u8_s32(int32x4_t a
) {
15839 return vreinterpretq_u8_s32(a
);
15842 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
15843 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15844 // CHECK: ret <16 x i8> [[TMP0]]
15845 uint8x16_t
test_vreinterpretq_u8_s64(int64x2_t a
) {
15846 return vreinterpretq_u8_s64(a
);
15849 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
15850 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15851 // CHECK: ret <16 x i8> [[TMP0]]
15852 uint8x16_t
test_vreinterpretq_u8_u16(uint16x8_t a
) {
15853 return vreinterpretq_u8_u16(a
);
15856 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
15857 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15858 // CHECK: ret <16 x i8> [[TMP0]]
15859 uint8x16_t
test_vreinterpretq_u8_u32(uint32x4_t a
) {
15860 return vreinterpretq_u8_u32(a
);
15863 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
15864 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15865 // CHECK: ret <16 x i8> [[TMP0]]
15866 uint8x16_t
test_vreinterpretq_u8_u64(uint64x2_t a
) {
15867 return vreinterpretq_u8_u64(a
);
15870 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
15871 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
15872 // CHECK: ret <16 x i8> [[TMP0]]
15873 uint8x16_t
test_vreinterpretq_u8_f16(float16x8_t a
) {
15874 return vreinterpretq_u8_f16(a
);
15877 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
15878 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
15879 // CHECK: ret <16 x i8> [[TMP0]]
15880 uint8x16_t
test_vreinterpretq_u8_f32(float32x4_t a
) {
15881 return vreinterpretq_u8_f32(a
);
15884 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
15885 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
15886 // CHECK: ret <16 x i8> [[TMP0]]
15887 uint8x16_t
test_vreinterpretq_u8_f64(float64x2_t a
) {
15888 return vreinterpretq_u8_f64(a
);
15891 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
15892 // CHECK: ret <16 x i8> %a
15893 uint8x16_t
test_vreinterpretq_u8_p8(poly8x16_t a
) {
15894 return vreinterpretq_u8_p8(a
);
15897 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
15898 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15899 // CHECK: ret <16 x i8> [[TMP0]]
15900 uint8x16_t
test_vreinterpretq_u8_p16(poly16x8_t a
) {
15901 return vreinterpretq_u8_p16(a
);
15904 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
15905 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15906 // CHECK: ret <16 x i8> [[TMP0]]
15907 uint8x16_t
test_vreinterpretq_u8_p64(poly64x2_t a
) {
15908 return vreinterpretq_u8_p64(a
);
15911 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
15912 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15913 // CHECK: ret <8 x i16> [[TMP0]]
15914 uint16x8_t
test_vreinterpretq_u16_s8(int8x16_t a
) {
15915 return vreinterpretq_u16_s8(a
);
15918 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
15919 // CHECK: ret <8 x i16> %a
15920 uint16x8_t
test_vreinterpretq_u16_s16(int16x8_t a
) {
15921 return vreinterpretq_u16_s16(a
);
15924 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
15925 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15926 // CHECK: ret <8 x i16> [[TMP0]]
15927 uint16x8_t
test_vreinterpretq_u16_s32(int32x4_t a
) {
15928 return vreinterpretq_u16_s32(a
);
15931 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
15932 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15933 // CHECK: ret <8 x i16> [[TMP0]]
15934 uint16x8_t
test_vreinterpretq_u16_s64(int64x2_t a
) {
15935 return vreinterpretq_u16_s64(a
);
15938 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
15939 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15940 // CHECK: ret <8 x i16> [[TMP0]]
15941 uint16x8_t
test_vreinterpretq_u16_u8(uint8x16_t a
) {
15942 return vreinterpretq_u16_u8(a
);
15945 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
15946 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15947 // CHECK: ret <8 x i16> [[TMP0]]
15948 uint16x8_t
test_vreinterpretq_u16_u32(uint32x4_t a
) {
15949 return vreinterpretq_u16_u32(a
);
15952 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
15953 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15954 // CHECK: ret <8 x i16> [[TMP0]]
15955 uint16x8_t
test_vreinterpretq_u16_u64(uint64x2_t a
) {
15956 return vreinterpretq_u16_u64(a
);
15959 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
15960 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15961 // CHECK: ret <8 x i16> [[TMP0]]
15962 uint16x8_t
test_vreinterpretq_u16_f16(float16x8_t a
) {
15963 return vreinterpretq_u16_f16(a
);
15966 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
15967 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15968 // CHECK: ret <8 x i16> [[TMP0]]
15969 uint16x8_t
test_vreinterpretq_u16_f32(float32x4_t a
) {
15970 return vreinterpretq_u16_f32(a
);
15973 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
15974 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
15975 // CHECK: ret <8 x i16> [[TMP0]]
15976 uint16x8_t
test_vreinterpretq_u16_f64(float64x2_t a
) {
15977 return vreinterpretq_u16_f64(a
);
15980 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
15981 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15982 // CHECK: ret <8 x i16> [[TMP0]]
15983 uint16x8_t
test_vreinterpretq_u16_p8(poly8x16_t a
) {
15984 return vreinterpretq_u16_p8(a
);
15987 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
15988 // CHECK: ret <8 x i16> %a
15989 uint16x8_t
test_vreinterpretq_u16_p16(poly16x8_t a
) {
15990 return vreinterpretq_u16_p16(a
);
15993 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
15994 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15995 // CHECK: ret <8 x i16> [[TMP0]]
15996 uint16x8_t
test_vreinterpretq_u16_p64(poly64x2_t a
) {
15997 return vreinterpretq_u16_p64(a
);
16000 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16001 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16002 // CHECK: ret <4 x i32> [[TMP0]]
16003 uint32x4_t
test_vreinterpretq_u32_s8(int8x16_t a
) {
16004 return vreinterpretq_u32_s8(a
);
16007 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16008 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16009 // CHECK: ret <4 x i32> [[TMP0]]
16010 uint32x4_t
test_vreinterpretq_u32_s16(int16x8_t a
) {
16011 return vreinterpretq_u32_s16(a
);
16014 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16015 // CHECK: ret <4 x i32> %a
16016 uint32x4_t
test_vreinterpretq_u32_s32(int32x4_t a
) {
16017 return vreinterpretq_u32_s32(a
);
16020 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16021 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16022 // CHECK: ret <4 x i32> [[TMP0]]
16023 uint32x4_t
test_vreinterpretq_u32_s64(int64x2_t a
) {
16024 return vreinterpretq_u32_s64(a
);
16027 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16028 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16029 // CHECK: ret <4 x i32> [[TMP0]]
16030 uint32x4_t
test_vreinterpretq_u32_u8(uint8x16_t a
) {
16031 return vreinterpretq_u32_u8(a
);
16034 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16035 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16036 // CHECK: ret <4 x i32> [[TMP0]]
16037 uint32x4_t
test_vreinterpretq_u32_u16(uint16x8_t a
) {
16038 return vreinterpretq_u32_u16(a
);
16041 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16042 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16043 // CHECK: ret <4 x i32> [[TMP0]]
16044 uint32x4_t
test_vreinterpretq_u32_u64(uint64x2_t a
) {
16045 return vreinterpretq_u32_u64(a
);
16048 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16049 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16050 // CHECK: ret <4 x i32> [[TMP0]]
16051 uint32x4_t
test_vreinterpretq_u32_f16(float16x8_t a
) {
16052 return vreinterpretq_u32_f16(a
);
16055 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16056 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16057 // CHECK: ret <4 x i32> [[TMP0]]
16058 uint32x4_t
test_vreinterpretq_u32_f32(float32x4_t a
) {
16059 return vreinterpretq_u32_f32(a
);
16062 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16063 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16064 // CHECK: ret <4 x i32> [[TMP0]]
16065 uint32x4_t
test_vreinterpretq_u32_f64(float64x2_t a
) {
16066 return vreinterpretq_u32_f64(a
);
16069 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16070 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16071 // CHECK: ret <4 x i32> [[TMP0]]
16072 uint32x4_t
test_vreinterpretq_u32_p8(poly8x16_t a
) {
16073 return vreinterpretq_u32_p8(a
);
16076 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16077 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16078 // CHECK: ret <4 x i32> [[TMP0]]
16079 uint32x4_t
test_vreinterpretq_u32_p16(poly16x8_t a
) {
16080 return vreinterpretq_u32_p16(a
);
16083 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16084 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16085 // CHECK: ret <4 x i32> [[TMP0]]
16086 uint32x4_t
test_vreinterpretq_u32_p64(poly64x2_t a
) {
16087 return vreinterpretq_u32_p64(a
);
16090 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16091 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16092 // CHECK: ret <2 x i64> [[TMP0]]
16093 uint64x2_t
test_vreinterpretq_u64_s8(int8x16_t a
) {
16094 return vreinterpretq_u64_s8(a
);
16097 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16099 // CHECK: ret <2 x i64> [[TMP0]]
16100 uint64x2_t
test_vreinterpretq_u64_s16(int16x8_t a
) {
16101 return vreinterpretq_u64_s16(a
);
16104 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16105 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16106 // CHECK: ret <2 x i64> [[TMP0]]
16107 uint64x2_t
test_vreinterpretq_u64_s32(int32x4_t a
) {
16108 return vreinterpretq_u64_s32(a
);
16111 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16112 // CHECK: ret <2 x i64> %a
16113 uint64x2_t
test_vreinterpretq_u64_s64(int64x2_t a
) {
16114 return vreinterpretq_u64_s64(a
);
16117 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16118 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16119 // CHECK: ret <2 x i64> [[TMP0]]
16120 uint64x2_t
test_vreinterpretq_u64_u8(uint8x16_t a
) {
16121 return vreinterpretq_u64_u8(a
);
16124 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16125 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16126 // CHECK: ret <2 x i64> [[TMP0]]
16127 uint64x2_t
test_vreinterpretq_u64_u16(uint16x8_t a
) {
16128 return vreinterpretq_u64_u16(a
);
16131 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16132 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16133 // CHECK: ret <2 x i64> [[TMP0]]
16134 uint64x2_t
test_vreinterpretq_u64_u32(uint32x4_t a
) {
16135 return vreinterpretq_u64_u32(a
);
16138 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16139 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16140 // CHECK: ret <2 x i64> [[TMP0]]
16141 uint64x2_t
test_vreinterpretq_u64_f16(float16x8_t a
) {
16142 return vreinterpretq_u64_f16(a
);
16145 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16146 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16147 // CHECK: ret <2 x i64> [[TMP0]]
16148 uint64x2_t
test_vreinterpretq_u64_f32(float32x4_t a
) {
16149 return vreinterpretq_u64_f32(a
);
16152 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16153 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16154 // CHECK: ret <2 x i64> [[TMP0]]
16155 uint64x2_t
test_vreinterpretq_u64_f64(float64x2_t a
) {
16156 return vreinterpretq_u64_f64(a
);
16159 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16160 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16161 // CHECK: ret <2 x i64> [[TMP0]]
16162 uint64x2_t
test_vreinterpretq_u64_p8(poly8x16_t a
) {
16163 return vreinterpretq_u64_p8(a
);
16166 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16167 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16168 // CHECK: ret <2 x i64> [[TMP0]]
16169 uint64x2_t
test_vreinterpretq_u64_p16(poly16x8_t a
) {
16170 return vreinterpretq_u64_p16(a
);
16173 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16174 // CHECK: ret <2 x i64> %a
16175 uint64x2_t
test_vreinterpretq_u64_p64(poly64x2_t a
) {
16176 return vreinterpretq_u64_p64(a
);
16179 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16180 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16181 // CHECK: ret <8 x half> [[TMP0]]
16182 float16x8_t
test_vreinterpretq_f16_s8(int8x16_t a
) {
16183 return vreinterpretq_f16_s8(a
);
16186 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16187 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16188 // CHECK: ret <8 x half> [[TMP0]]
16189 float16x8_t
test_vreinterpretq_f16_s16(int16x8_t a
) {
16190 return vreinterpretq_f16_s16(a
);
16193 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16194 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16195 // CHECK: ret <8 x half> [[TMP0]]
16196 float16x8_t
test_vreinterpretq_f16_s32(int32x4_t a
) {
16197 return vreinterpretq_f16_s32(a
);
16200 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16202 // CHECK: ret <8 x half> [[TMP0]]
16203 float16x8_t
test_vreinterpretq_f16_s64(int64x2_t a
) {
16204 return vreinterpretq_f16_s64(a
);
16207 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
16208 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16209 // CHECK: ret <8 x half> [[TMP0]]
16210 float16x8_t
test_vreinterpretq_f16_u8(uint8x16_t a
) {
16211 return vreinterpretq_f16_u8(a
);
16214 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
16215 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16216 // CHECK: ret <8 x half> [[TMP0]]
16217 float16x8_t
test_vreinterpretq_f16_u16(uint16x8_t a
) {
16218 return vreinterpretq_f16_u16(a
);
16221 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
16222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16223 // CHECK: ret <8 x half> [[TMP0]]
16224 float16x8_t
test_vreinterpretq_f16_u32(uint32x4_t a
) {
16225 return vreinterpretq_f16_u32(a
);
16228 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
16229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16230 // CHECK: ret <8 x half> [[TMP0]]
16231 float16x8_t
test_vreinterpretq_f16_u64(uint64x2_t a
) {
16232 return vreinterpretq_f16_u64(a
);
16235 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
16236 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
16237 // CHECK: ret <8 x half> [[TMP0]]
16238 float16x8_t
test_vreinterpretq_f16_f32(float32x4_t a
) {
16239 return vreinterpretq_f16_f32(a
);
16242 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
16243 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
16244 // CHECK: ret <8 x half> [[TMP0]]
16245 float16x8_t
test_vreinterpretq_f16_f64(float64x2_t a
) {
16246 return vreinterpretq_f16_f64(a
);
16249 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
16250 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16251 // CHECK: ret <8 x half> [[TMP0]]
16252 float16x8_t
test_vreinterpretq_f16_p8(poly8x16_t a
) {
16253 return vreinterpretq_f16_p8(a
);
16256 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
16257 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16258 // CHECK: ret <8 x half> [[TMP0]]
16259 float16x8_t
test_vreinterpretq_f16_p16(poly16x8_t a
) {
16260 return vreinterpretq_f16_p16(a
);
16263 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
16264 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16265 // CHECK: ret <8 x half> [[TMP0]]
16266 float16x8_t
test_vreinterpretq_f16_p64(poly64x2_t a
) {
16267 return vreinterpretq_f16_p64(a
);
16270 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
16271 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16272 // CHECK: ret <4 x float> [[TMP0]]
16273 float32x4_t
test_vreinterpretq_f32_s8(int8x16_t a
) {
16274 return vreinterpretq_f32_s8(a
);
16277 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
16278 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16279 // CHECK: ret <4 x float> [[TMP0]]
16280 float32x4_t
test_vreinterpretq_f32_s16(int16x8_t a
) {
16281 return vreinterpretq_f32_s16(a
);
16284 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
16285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
16286 // CHECK: ret <4 x float> [[TMP0]]
16287 float32x4_t
test_vreinterpretq_f32_s32(int32x4_t a
) {
16288 return vreinterpretq_f32_s32(a
);
16291 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
16292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16293 // CHECK: ret <4 x float> [[TMP0]]
16294 float32x4_t
test_vreinterpretq_f32_s64(int64x2_t a
) {
16295 return vreinterpretq_f32_s64(a
);
16298 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
16299 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16300 // CHECK: ret <4 x float> [[TMP0]]
16301 float32x4_t
test_vreinterpretq_f32_u8(uint8x16_t a
) {
16302 return vreinterpretq_f32_u8(a
);
16305 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
16306 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16307 // CHECK: ret <4 x float> [[TMP0]]
16308 float32x4_t
test_vreinterpretq_f32_u16(uint16x8_t a
) {
16309 return vreinterpretq_f32_u16(a
);
16312 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
16313 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
16314 // CHECK: ret <4 x float> [[TMP0]]
16315 float32x4_t
test_vreinterpretq_f32_u32(uint32x4_t a
) {
16316 return vreinterpretq_f32_u32(a
);
16319 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
16320 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16321 // CHECK: ret <4 x float> [[TMP0]]
16322 float32x4_t
test_vreinterpretq_f32_u64(uint64x2_t a
) {
16323 return vreinterpretq_f32_u64(a
);
16326 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
16327 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
16328 // CHECK: ret <4 x float> [[TMP0]]
16329 float32x4_t
test_vreinterpretq_f32_f16(float16x8_t a
) {
16330 return vreinterpretq_f32_f16(a
);
16333 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
16334 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
16335 // CHECK: ret <4 x float> [[TMP0]]
16336 float32x4_t
test_vreinterpretq_f32_f64(float64x2_t a
) {
16337 return vreinterpretq_f32_f64(a
);
16340 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
16341 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16342 // CHECK: ret <4 x float> [[TMP0]]
16343 float32x4_t
test_vreinterpretq_f32_p8(poly8x16_t a
) {
16344 return vreinterpretq_f32_p8(a
);
16347 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
16348 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16349 // CHECK: ret <4 x float> [[TMP0]]
16350 float32x4_t
test_vreinterpretq_f32_p16(poly16x8_t a
) {
16351 return vreinterpretq_f32_p16(a
);
16354 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
16355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16356 // CHECK: ret <4 x float> [[TMP0]]
16357 float32x4_t
test_vreinterpretq_f32_p64(poly64x2_t a
) {
16358 return vreinterpretq_f32_p64(a
);
16361 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
16362 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16363 // CHECK: ret <2 x double> [[TMP0]]
16364 float64x2_t
test_vreinterpretq_f64_s8(int8x16_t a
) {
16365 return vreinterpretq_f64_s8(a
);
16368 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
16369 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16370 // CHECK: ret <2 x double> [[TMP0]]
16371 float64x2_t
test_vreinterpretq_f64_s16(int16x8_t a
) {
16372 return vreinterpretq_f64_s16(a
);
16375 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
16376 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
16377 // CHECK: ret <2 x double> [[TMP0]]
16378 float64x2_t
test_vreinterpretq_f64_s32(int32x4_t a
) {
16379 return vreinterpretq_f64_s32(a
);
16382 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
16383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16384 // CHECK: ret <2 x double> [[TMP0]]
16385 float64x2_t
test_vreinterpretq_f64_s64(int64x2_t a
) {
16386 return vreinterpretq_f64_s64(a
);
16389 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
16390 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16391 // CHECK: ret <2 x double> [[TMP0]]
16392 float64x2_t
test_vreinterpretq_f64_u8(uint8x16_t a
) {
16393 return vreinterpretq_f64_u8(a
);
16396 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
16397 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16398 // CHECK: ret <2 x double> [[TMP0]]
16399 float64x2_t
test_vreinterpretq_f64_u16(uint16x8_t a
) {
16400 return vreinterpretq_f64_u16(a
);
16403 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
16404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
16405 // CHECK: ret <2 x double> [[TMP0]]
16406 float64x2_t
test_vreinterpretq_f64_u32(uint32x4_t a
) {
16407 return vreinterpretq_f64_u32(a
);
16410 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
16411 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16412 // CHECK: ret <2 x double> [[TMP0]]
16413 float64x2_t
test_vreinterpretq_f64_u64(uint64x2_t a
) {
16414 return vreinterpretq_f64_u64(a
);
16417 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
16418 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
16419 // CHECK: ret <2 x double> [[TMP0]]
16420 float64x2_t
test_vreinterpretq_f64_f16(float16x8_t a
) {
16421 return vreinterpretq_f64_f16(a
);
16424 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
16425 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
16426 // CHECK: ret <2 x double> [[TMP0]]
16427 float64x2_t
test_vreinterpretq_f64_f32(float32x4_t a
) {
16428 return vreinterpretq_f64_f32(a
);
16431 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
16432 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16433 // CHECK: ret <2 x double> [[TMP0]]
16434 float64x2_t
test_vreinterpretq_f64_p8(poly8x16_t a
) {
16435 return vreinterpretq_f64_p8(a
);
16438 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
16439 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16440 // CHECK: ret <2 x double> [[TMP0]]
16441 float64x2_t
test_vreinterpretq_f64_p16(poly16x8_t a
) {
16442 return vreinterpretq_f64_p16(a
);
16445 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
16446 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16447 // CHECK: ret <2 x double> [[TMP0]]
16448 float64x2_t
test_vreinterpretq_f64_p64(poly64x2_t a
) {
16449 return vreinterpretq_f64_p64(a
);
16452 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
16453 // CHECK: ret <16 x i8> %a
16454 poly8x16_t
test_vreinterpretq_p8_s8(int8x16_t a
) {
16455 return vreinterpretq_p8_s8(a
);
16458 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
16459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16460 // CHECK: ret <16 x i8> [[TMP0]]
16461 poly8x16_t
test_vreinterpretq_p8_s16(int16x8_t a
) {
16462 return vreinterpretq_p8_s16(a
);
16465 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
16466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16467 // CHECK: ret <16 x i8> [[TMP0]]
16468 poly8x16_t
test_vreinterpretq_p8_s32(int32x4_t a
) {
16469 return vreinterpretq_p8_s32(a
);
16472 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
16473 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16474 // CHECK: ret <16 x i8> [[TMP0]]
16475 poly8x16_t
test_vreinterpretq_p8_s64(int64x2_t a
) {
16476 return vreinterpretq_p8_s64(a
);
16479 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
16480 // CHECK: ret <16 x i8> %a
16481 poly8x16_t
test_vreinterpretq_p8_u8(uint8x16_t a
) {
16482 return vreinterpretq_p8_u8(a
);
16485 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
16486 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16487 // CHECK: ret <16 x i8> [[TMP0]]
16488 poly8x16_t
test_vreinterpretq_p8_u16(uint16x8_t a
) {
16489 return vreinterpretq_p8_u16(a
);
16492 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
16493 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16494 // CHECK: ret <16 x i8> [[TMP0]]
16495 poly8x16_t
test_vreinterpretq_p8_u32(uint32x4_t a
) {
16496 return vreinterpretq_p8_u32(a
);
16499 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
16500 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16501 // CHECK: ret <16 x i8> [[TMP0]]
16502 poly8x16_t
test_vreinterpretq_p8_u64(uint64x2_t a
) {
16503 return vreinterpretq_p8_u64(a
);
16506 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
16507 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16508 // CHECK: ret <16 x i8> [[TMP0]]
16509 poly8x16_t
test_vreinterpretq_p8_f16(float16x8_t a
) {
16510 return vreinterpretq_p8_f16(a
);
16513 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
16514 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16515 // CHECK: ret <16 x i8> [[TMP0]]
16516 poly8x16_t
test_vreinterpretq_p8_f32(float32x4_t a
) {
16517 return vreinterpretq_p8_f32(a
);
16520 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
16521 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16522 // CHECK: ret <16 x i8> [[TMP0]]
16523 poly8x16_t
test_vreinterpretq_p8_f64(float64x2_t a
) {
16524 return vreinterpretq_p8_f64(a
);
16527 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
16528 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16529 // CHECK: ret <16 x i8> [[TMP0]]
16530 poly8x16_t
test_vreinterpretq_p8_p16(poly16x8_t a
) {
16531 return vreinterpretq_p8_p16(a
);
16534 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
16535 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16536 // CHECK: ret <16 x i8> [[TMP0]]
16537 poly8x16_t
test_vreinterpretq_p8_p64(poly64x2_t a
) {
16538 return vreinterpretq_p8_p64(a
);
16541 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
16542 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16543 // CHECK: ret <8 x i16> [[TMP0]]
16544 poly16x8_t
test_vreinterpretq_p16_s8(int8x16_t a
) {
16545 return vreinterpretq_p16_s8(a
);
16548 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
16549 // CHECK: ret <8 x i16> %a
16550 poly16x8_t
test_vreinterpretq_p16_s16(int16x8_t a
) {
16551 return vreinterpretq_p16_s16(a
);
16554 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
16555 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16556 // CHECK: ret <8 x i16> [[TMP0]]
16557 poly16x8_t
test_vreinterpretq_p16_s32(int32x4_t a
) {
16558 return vreinterpretq_p16_s32(a
);
16561 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
16562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16563 // CHECK: ret <8 x i16> [[TMP0]]
16564 poly16x8_t
test_vreinterpretq_p16_s64(int64x2_t a
) {
16565 return vreinterpretq_p16_s64(a
);
16568 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
16569 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16570 // CHECK: ret <8 x i16> [[TMP0]]
16571 poly16x8_t
test_vreinterpretq_p16_u8(uint8x16_t a
) {
16572 return vreinterpretq_p16_u8(a
);
16575 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
16576 // CHECK: ret <8 x i16> %a
16577 poly16x8_t
test_vreinterpretq_p16_u16(uint16x8_t a
) {
16578 return vreinterpretq_p16_u16(a
);
16581 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
16582 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16583 // CHECK: ret <8 x i16> [[TMP0]]
16584 poly16x8_t
test_vreinterpretq_p16_u32(uint32x4_t a
) {
16585 return vreinterpretq_p16_u32(a
);
16588 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
16589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16590 // CHECK: ret <8 x i16> [[TMP0]]
16591 poly16x8_t
test_vreinterpretq_p16_u64(uint64x2_t a
) {
16592 return vreinterpretq_p16_u64(a
);
16595 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
16596 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16597 // CHECK: ret <8 x i16> [[TMP0]]
16598 poly16x8_t
test_vreinterpretq_p16_f16(float16x8_t a
) {
16599 return vreinterpretq_p16_f16(a
);
16602 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
16603 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16604 // CHECK: ret <8 x i16> [[TMP0]]
16605 poly16x8_t
test_vreinterpretq_p16_f32(float32x4_t a
) {
16606 return vreinterpretq_p16_f32(a
);
16609 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
16610 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16611 // CHECK: ret <8 x i16> [[TMP0]]
16612 poly16x8_t
test_vreinterpretq_p16_f64(float64x2_t a
) {
16613 return vreinterpretq_p16_f64(a
);
16616 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
16617 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16618 // CHECK: ret <8 x i16> [[TMP0]]
16619 poly16x8_t
test_vreinterpretq_p16_p8(poly8x16_t a
) {
16620 return vreinterpretq_p16_p8(a
);
16623 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
16624 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16625 // CHECK: ret <8 x i16> [[TMP0]]
16626 poly16x8_t
test_vreinterpretq_p16_p64(poly64x2_t a
) {
16627 return vreinterpretq_p16_p64(a
);
16630 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
16631 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16632 // CHECK: ret <2 x i64> [[TMP0]]
16633 poly64x2_t
test_vreinterpretq_p64_s8(int8x16_t a
) {
16634 return vreinterpretq_p64_s8(a
);
16637 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
16638 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16639 // CHECK: ret <2 x i64> [[TMP0]]
16640 poly64x2_t
test_vreinterpretq_p64_s16(int16x8_t a
) {
16641 return vreinterpretq_p64_s16(a
);
16644 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
16645 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16646 // CHECK: ret <2 x i64> [[TMP0]]
16647 poly64x2_t
test_vreinterpretq_p64_s32(int32x4_t a
) {
16648 return vreinterpretq_p64_s32(a
);
16651 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
16652 // CHECK: ret <2 x i64> %a
16653 poly64x2_t
test_vreinterpretq_p64_s64(int64x2_t a
) {
16654 return vreinterpretq_p64_s64(a
);
16657 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
16658 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16659 // CHECK: ret <2 x i64> [[TMP0]]
16660 poly64x2_t
test_vreinterpretq_p64_u8(uint8x16_t a
) {
16661 return vreinterpretq_p64_u8(a
);
16664 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
16665 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16666 // CHECK: ret <2 x i64> [[TMP0]]
16667 poly64x2_t
test_vreinterpretq_p64_u16(uint16x8_t a
) {
16668 return vreinterpretq_p64_u16(a
);
16671 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
16672 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16673 // CHECK: ret <2 x i64> [[TMP0]]
16674 poly64x2_t
test_vreinterpretq_p64_u32(uint32x4_t a
) {
16675 return vreinterpretq_p64_u32(a
);
16678 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
16679 // CHECK: ret <2 x i64> %a
16680 poly64x2_t
test_vreinterpretq_p64_u64(uint64x2_t a
) {
16681 return vreinterpretq_p64_u64(a
);
16684 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
16685 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16686 // CHECK: ret <2 x i64> [[TMP0]]
16687 poly64x2_t
test_vreinterpretq_p64_f16(float16x8_t a
) {
16688 return vreinterpretq_p64_f16(a
);
16691 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
16692 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16693 // CHECK: ret <2 x i64> [[TMP0]]
16694 poly64x2_t
test_vreinterpretq_p64_f32(float32x4_t a
) {
16695 return vreinterpretq_p64_f32(a
);
16698 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
16699 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16700 // CHECK: ret <2 x i64> [[TMP0]]
16701 poly64x2_t
test_vreinterpretq_p64_f64(float64x2_t a
) {
16702 return vreinterpretq_p64_f64(a
);
16705 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
16706 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16707 // CHECK: ret <2 x i64> [[TMP0]]
16708 poly64x2_t
test_vreinterpretq_p64_p8(poly8x16_t a
) {
16709 return vreinterpretq_p64_p8(a
);
16712 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
16713 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16714 // CHECK: ret <2 x i64> [[TMP0]]
16715 poly64x2_t
test_vreinterpretq_p64_p16(poly16x8_t a
) {
16716 return vreinterpretq_p64_p16(a
);
16719 // CHECK-LABEL: @test_vabds_f32(
16720 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
16721 // CHECK: ret float [[VABDS_F32_I]]
16722 float32_t
test_vabds_f32(float32_t a
, float32_t b
) {
16723 return vabds_f32(a
, b
);
16726 // CHECK-LABEL: @test_vabdd_f64(
16727 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
16728 // CHECK: ret double [[VABDD_F64_I]]
16729 float64_t
test_vabdd_f64(float64_t a
, float64_t b
) {
16730 return vabdd_f64(a
, b
);
16733 // CHECK-LABEL: @test_vuqaddq_s8(
16735 // CHECK-NEXT: [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
16736 // CHECK-NEXT: ret <16 x i8> [[V]]
16737 int8x16_t
test_vuqaddq_s8(int8x16_t a
, uint8x16_t b
) {
16738 return vuqaddq_s8(a
, b
);
16741 // CHECK-LABEL: @test_vuqaddq_s32(
16742 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
16743 // CHECK-NEXT: ret <4 x i32> [[V]]
16744 int32x4_t
test_vuqaddq_s32(int32x4_t a
, uint32x4_t b
) {
16745 return vuqaddq_s32(a
, b
);
16748 // CHECK-LABEL: @test_vuqaddq_s64(
16749 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
16750 // CHECK-NEXT: ret <2 x i64> [[V]]
16751 int64x2_t
test_vuqaddq_s64(int64x2_t a
, uint64x2_t b
) {
16752 return vuqaddq_s64(a
, b
);
16755 // CHECK-LABEL: @test_vuqaddq_s16(
16756 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
16757 // CHECK-NEXT: ret <8 x i16> [[V]]
16758 int16x8_t
test_vuqaddq_s16(int16x8_t a
, uint16x8_t b
) {
16759 return vuqaddq_s16(a
, b
);
16762 // CHECK-LABEL: @test_vuqadd_s8(
16764 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
16765 // CHECK-NEXT: ret <8 x i8> [[V]]
16766 int8x8_t
test_vuqadd_s8(int8x8_t a
, uint8x8_t b
) {
16767 return vuqadd_s8(a
, b
);
16770 // CHECK-LABEL: @test_vuqadd_s32(
16771 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
16772 // CHECK-NEXT: ret <2 x i32> [[V]]
16773 int32x2_t
test_vuqadd_s32(int32x2_t a
, uint32x2_t b
) {
16774 return vuqadd_s32(a
, b
);
16777 // CHECK-LABEL: @test_vuqadd_s64(
16778 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16779 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16780 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
16781 // CHECK: ret <1 x i64> [[VUQADD2_I]]
16782 int64x1_t
test_vuqadd_s64(int64x1_t a
, uint64x1_t b
) {
16783 return vuqadd_s64(a
, b
);
16786 // CHECK-LABEL: @test_vuqadd_s16(
16787 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
16788 // CHECK-NEXT: ret <4 x i16> [[V]]
16789 int16x4_t
test_vuqadd_s16(int16x4_t a
, uint16x4_t b
) {
16790 return vuqadd_s16(a
, b
);
16793 // CHECK-LABEL: @test_vsqadd_u64(
16794 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16795 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16796 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
16797 // CHECK: ret <1 x i64> [[VSQADD2_I]]
16798 uint64x1_t
test_vsqadd_u64(uint64x1_t a
, int64x1_t b
) {
16799 return vsqadd_u64(a
, b
);
16802 // CHECK-LABEL: @test_vsqadd_u8(
16803 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
16804 // CHECK: ret <8 x i8> [[VSQADD_I]]
16805 uint8x8_t
test_vsqadd_u8(uint8x8_t a
, int8x8_t b
) {
16806 return vsqadd_u8(a
, b
);
16809 // CHECK-LABEL: @test_vsqaddq_u8(
16810 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
16811 // CHECK: ret <16 x i8> [[VSQADD_I]]
16812 uint8x16_t
test_vsqaddq_u8(uint8x16_t a
, int8x16_t b
) {
16813 return vsqaddq_u8(a
, b
);
16816 // CHECK-LABEL: @test_vsqadd_u16(
16817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16819 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
16820 // CHECK: ret <4 x i16> [[VSQADD2_I]]
16821 uint16x4_t
test_vsqadd_u16(uint16x4_t a
, int16x4_t b
) {
16822 return vsqadd_u16(a
, b
);
16825 // CHECK-LABEL: @test_vsqaddq_u16(
16826 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16827 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16828 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
16829 // CHECK: ret <8 x i16> [[VSQADD2_I]]
16830 uint16x8_t
test_vsqaddq_u16(uint16x8_t a
, int16x8_t b
) {
16831 return vsqaddq_u16(a
, b
);
16834 // CHECK-LABEL: @test_vsqadd_u32(
16835 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16836 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16837 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
16838 // CHECK: ret <2 x i32> [[VSQADD2_I]]
16839 uint32x2_t
test_vsqadd_u32(uint32x2_t a
, int32x2_t b
) {
16840 return vsqadd_u32(a
, b
);
16843 // CHECK-LABEL: @test_vsqaddq_u32(
16844 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16845 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16846 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
16847 // CHECK: ret <4 x i32> [[VSQADD2_I]]
16848 uint32x4_t
test_vsqaddq_u32(uint32x4_t a
, int32x4_t b
) {
16849 return vsqaddq_u32(a
, b
);
16852 // CHECK-LABEL: @test_vsqaddq_u64(
16853 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16854 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16855 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
16856 // CHECK: ret <2 x i64> [[VSQADD2_I]]
16857 uint64x2_t
test_vsqaddq_u64(uint64x2_t a
, int64x2_t b
) {
16858 return vsqaddq_u64(a
, b
);
16861 // CHECK-LABEL: @test_vabs_s64(
16862 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16863 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
16864 // CHECK: ret <1 x i64> [[VABS1_I]]
16865 int64x1_t
test_vabs_s64(int64x1_t a
) {
16866 return vabs_s64(a
);
16869 // CHECK-LABEL: @test_vqabs_s64(
16870 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16871 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
16872 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
16873 // CHECK: ret <1 x i64> [[VQABS_V1_I]]
16874 int64x1_t
test_vqabs_s64(int64x1_t a
) {
16875 return vqabs_s64(a
);
16878 // CHECK-LABEL: @test_vqneg_s64(
16879 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16880 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
16881 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
16882 // CHECK: ret <1 x i64> [[VQNEG_V1_I]]
16883 int64x1_t
test_vqneg_s64(int64x1_t a
) {
16884 return vqneg_s64(a
);
16887 // CHECK-LABEL: @test_vneg_s64(
16888 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
16889 // CHECK: ret <1 x i64> [[SUB_I]]
16890 int64x1_t
test_vneg_s64(int64x1_t a
) {
16891 return vneg_s64(a
);
16894 // CHECK-LABEL: @test_vaddv_f32(
16895 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
16896 // CHECK: ret float [[VADDV_F32_I]]
16897 float32_t
test_vaddv_f32(float32x2_t a
) {
16898 return vaddv_f32(a
);
16901 // CHECK-LABEL: @test_vaddvq_f32(
16902 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
16903 // CHECK: ret float [[VADDVQ_F32_I]]
16904 float32_t
test_vaddvq_f32(float32x4_t a
) {
16905 return vaddvq_f32(a
);
16908 // CHECK-LABEL: @test_vaddvq_f64(
16909 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
16910 // CHECK: ret double [[VADDVQ_F64_I]]
16911 float64_t
test_vaddvq_f64(float64x2_t a
) {
16912 return vaddvq_f64(a
);
16915 // CHECK-LABEL: @test_vmaxv_f32(
16916 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
16917 // CHECK: ret float [[VMAXV_F32_I]]
16918 float32_t
test_vmaxv_f32(float32x2_t a
) {
16919 return vmaxv_f32(a
);
16922 // CHECK-LABEL: @test_vmaxvq_f64(
16923 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
16924 // CHECK: ret double [[VMAXVQ_F64_I]]
16925 float64_t
test_vmaxvq_f64(float64x2_t a
) {
16926 return vmaxvq_f64(a
);
16929 // CHECK-LABEL: @test_vminv_f32(
16930 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
16931 // CHECK: ret float [[VMINV_F32_I]]
16932 float32_t
test_vminv_f32(float32x2_t a
) {
16933 return vminv_f32(a
);
16936 // CHECK-LABEL: @test_vminvq_f64(
16937 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
16938 // CHECK: ret double [[VMINVQ_F64_I]]
16939 float64_t
test_vminvq_f64(float64x2_t a
) {
16940 return vminvq_f64(a
);
16943 // CHECK-LABEL: @test_vmaxnmvq_f64(
16944 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
16945 // CHECK: ret double [[VMAXNMVQ_F64_I]]
16946 float64_t
test_vmaxnmvq_f64(float64x2_t a
) {
16947 return vmaxnmvq_f64(a
);
16950 // CHECK-LABEL: @test_vmaxnmv_f32(
16951 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
16952 // CHECK: ret float [[VMAXNMV_F32_I]]
16953 float32_t
test_vmaxnmv_f32(float32x2_t a
) {
16954 return vmaxnmv_f32(a
);
16957 // CHECK-LABEL: @test_vminnmvq_f64(
16958 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
16959 // CHECK: ret double [[VMINNMVQ_F64_I]]
16960 float64_t
test_vminnmvq_f64(float64x2_t a
) {
16961 return vminnmvq_f64(a
);
16964 // CHECK-LABEL: @test_vminnmv_f32(
16965 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
16966 // CHECK: ret float [[VMINNMV_F32_I]]
16967 float32_t
test_vminnmv_f32(float32x2_t a
) {
16968 return vminnmv_f32(a
);
16971 // CHECK-LABEL: @test_vpaddq_s64(
16972 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
16973 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
16974 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
16975 int64x2_t
test_vpaddq_s64(int64x2_t a
, int64x2_t b
) {
16976 return vpaddq_s64(a
, b
);
16979 // CHECK-LABEL: @test_vpaddq_u64(
16980 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
16981 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
16982 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]]
16983 uint64x2_t
test_vpaddq_u64(uint64x2_t a
, uint64x2_t b
) {
16984 return vpaddq_u64(a
, b
);
16987 // CHECK-LABEL: @test_vpaddd_u64(
16988 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
16989 // CHECK: ret i64 [[VPADDD_U64_I]]
16990 uint64_t test_vpaddd_u64(uint64x2_t a
) {
16991 return vpaddd_u64(a
);
16994 // CHECK-LABEL: @test_vaddvq_s64(
16995 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
16996 // CHECK: ret i64 [[VADDVQ_S64_I]]
16997 int64_t test_vaddvq_s64(int64x2_t a
) {
16998 return vaddvq_s64(a
);
17001 // CHECK-LABEL: @test_vaddvq_u64(
17002 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17003 // CHECK: ret i64 [[VADDVQ_U64_I]]
17004 uint64_t test_vaddvq_u64(uint64x2_t a
) {
17005 return vaddvq_u64(a
);
17008 // CHECK-LABEL: @test_vadd_f64(
17009 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17010 // CHECK: ret <1 x double> [[ADD_I]]
17011 float64x1_t
test_vadd_f64(float64x1_t a
, float64x1_t b
) {
17012 return vadd_f64(a
, b
);
17015 // CHECK-LABEL: @test_vmul_f64(
17016 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17017 // CHECK: ret <1 x double> [[MUL_I]]
17018 float64x1_t
test_vmul_f64(float64x1_t a
, float64x1_t b
) {
17019 return vmul_f64(a
, b
);
17022 // CHECK-LABEL: @test_vdiv_f64(
17023 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17024 // CHECK: ret <1 x double> [[DIV_I]]
17025 float64x1_t
test_vdiv_f64(float64x1_t a
, float64x1_t b
) {
17026 return vdiv_f64(a
, b
);
17029 // CHECK-LABEL: @test_vmla_f64(
17030 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17031 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17032 // CHECK: ret <1 x double> [[ADD_I]]
17033 float64x1_t
test_vmla_f64(float64x1_t a
, float64x1_t b
, float64x1_t c
) {
17034 return vmla_f64(a
, b
, c
);
17037 // CHECK-LABEL: @test_vmls_f64(
17038 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17039 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17040 // CHECK: ret <1 x double> [[SUB_I]]
17041 float64x1_t
test_vmls_f64(float64x1_t a
, float64x1_t b
, float64x1_t c
) {
17042 return vmls_f64(a
, b
, c
);
17045 // CHECK-LABEL: @test_vfma_f64(
17046 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17047 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17048 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17049 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17050 // CHECK: ret <1 x double> [[TMP3]]
17051 float64x1_t
test_vfma_f64(float64x1_t a
, float64x1_t b
, float64x1_t c
) {
17052 return vfma_f64(a
, b
, c
);
17055 // CHECK-LABEL: @test_vfms_f64(
17056 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %b
17057 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17058 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17059 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17060 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17061 // CHECK: ret <1 x double> [[TMP3]]
17062 float64x1_t
test_vfms_f64(float64x1_t a
, float64x1_t b
, float64x1_t c
) {
17063 return vfms_f64(a
, b
, c
);
17066 // CHECK-LABEL: @test_vsub_f64(
17067 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17068 // CHECK: ret <1 x double> [[SUB_I]]
17069 float64x1_t
test_vsub_f64(float64x1_t a
, float64x1_t b
) {
17070 return vsub_f64(a
, b
);
17073 // CHECK-LABEL: @test_vabd_f64(
17074 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17075 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17076 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17077 // CHECK: ret <1 x double> [[VABD2_I]]
17078 float64x1_t
test_vabd_f64(float64x1_t a
, float64x1_t b
) {
17079 return vabd_f64(a
, b
);
17082 // CHECK-LABEL: @test_vmax_f64(
17083 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17084 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17085 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17086 // CHECK: ret <1 x double> [[VMAX2_I]]
17087 float64x1_t
test_vmax_f64(float64x1_t a
, float64x1_t b
) {
17088 return vmax_f64(a
, b
);
17091 // CHECK-LABEL: @test_vmin_f64(
17092 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17093 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17094 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17095 // CHECK: ret <1 x double> [[VMIN2_I]]
17096 float64x1_t
test_vmin_f64(float64x1_t a
, float64x1_t b
) {
17097 return vmin_f64(a
, b
);
17100 // CHECK-LABEL: @test_vmaxnm_f64(
17101 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17102 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17103 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17104 // CHECK: ret <1 x double> [[VMAXNM2_I]]
17105 float64x1_t
test_vmaxnm_f64(float64x1_t a
, float64x1_t b
) {
17106 return vmaxnm_f64(a
, b
);
17109 // CHECK-LABEL: @test_vminnm_f64(
17110 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17111 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17112 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17113 // CHECK: ret <1 x double> [[VMINNM2_I]]
17114 float64x1_t
test_vminnm_f64(float64x1_t a
, float64x1_t b
) {
17115 return vminnm_f64(a
, b
);
17118 // CHECK-LABEL: @test_vabs_f64(
17119 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17120 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17121 // CHECK: ret <1 x double> [[VABS1_I]]
17122 float64x1_t
test_vabs_f64(float64x1_t a
) {
17123 return vabs_f64(a
);
17126 // CHECK-LABEL: @test_vneg_f64(
17127 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %a
17128 // CHECK: ret <1 x double> [[SUB_I]]
17129 float64x1_t
test_vneg_f64(float64x1_t a
) {
17130 return vneg_f64(a
);
17133 // CHECK-LABEL: @test_vcvt_s64_f64(
17134 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17135 // CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
17136 // CHECK: ret <1 x i64> [[TMP1]]
17137 int64x1_t
test_vcvt_s64_f64(float64x1_t a
) {
17138 return vcvt_s64_f64(a
);
17141 // CHECK-LABEL: @test_vcvt_u64_f64(
17142 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17143 // CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
17144 // CHECK: ret <1 x i64> [[TMP1]]
17145 uint64x1_t
test_vcvt_u64_f64(float64x1_t a
) {
17146 return vcvt_u64_f64(a
);
17149 // CHECK-LABEL: @test_vcvtn_s64_f64(
17150 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17151 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17152 // CHECK: ret <1 x i64> [[VCVTN1_I]]
17153 int64x1_t
test_vcvtn_s64_f64(float64x1_t a
) {
17154 return vcvtn_s64_f64(a
);
17157 // CHECK-LABEL: @test_vcvtn_u64_f64(
17158 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17159 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17160 // CHECK: ret <1 x i64> [[VCVTN1_I]]
17161 uint64x1_t
test_vcvtn_u64_f64(float64x1_t a
) {
17162 return vcvtn_u64_f64(a
);
17165 // CHECK-LABEL: @test_vcvtp_s64_f64(
17166 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17167 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17168 // CHECK: ret <1 x i64> [[VCVTP1_I]]
17169 int64x1_t
test_vcvtp_s64_f64(float64x1_t a
) {
17170 return vcvtp_s64_f64(a
);
17173 // CHECK-LABEL: @test_vcvtp_u64_f64(
17174 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17175 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17176 // CHECK: ret <1 x i64> [[VCVTP1_I]]
17177 uint64x1_t
test_vcvtp_u64_f64(float64x1_t a
) {
17178 return vcvtp_u64_f64(a
);
17181 // CHECK-LABEL: @test_vcvtm_s64_f64(
17182 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17183 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
17184 // CHECK: ret <1 x i64> [[VCVTM1_I]]
17185 int64x1_t
test_vcvtm_s64_f64(float64x1_t a
) {
17186 return vcvtm_s64_f64(a
);
17189 // CHECK-LABEL: @test_vcvtm_u64_f64(
17190 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17191 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
17192 // CHECK: ret <1 x i64> [[VCVTM1_I]]
17193 uint64x1_t
test_vcvtm_u64_f64(float64x1_t a
) {
17194 return vcvtm_u64_f64(a
);
17197 // CHECK-LABEL: @test_vcvta_s64_f64(
17198 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17199 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
17200 // CHECK: ret <1 x i64> [[VCVTA1_I]]
17201 int64x1_t
test_vcvta_s64_f64(float64x1_t a
) {
17202 return vcvta_s64_f64(a
);
17205 // CHECK-LABEL: @test_vcvta_u64_f64(
17206 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17207 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
17208 // CHECK: ret <1 x i64> [[VCVTA1_I]]
17209 uint64x1_t
test_vcvta_u64_f64(float64x1_t a
) {
17210 return vcvta_u64_f64(a
);
17213 // CHECK-LABEL: @test_vcvt_f64_s64(
17214 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17215 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
17216 // CHECK: ret <1 x double> [[VCVT_I]]
17217 float64x1_t
test_vcvt_f64_s64(int64x1_t a
) {
17218 return vcvt_f64_s64(a
);
17221 // CHECK-LABEL: @test_vcvt_f64_u64(
17222 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17223 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
17224 // CHECK: ret <1 x double> [[VCVT_I]]
17225 float64x1_t
test_vcvt_f64_u64(uint64x1_t a
) {
17226 return vcvt_f64_u64(a
);
17229 // CHECK-LABEL: @test_vcvt_n_s64_f64(
17230 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17231 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
17232 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
17233 // CHECK: ret <1 x i64> [[VCVT_N1]]
17234 int64x1_t
test_vcvt_n_s64_f64(float64x1_t a
) {
17235 return vcvt_n_s64_f64(a
, 64);
17238 // CHECK-LABEL: @test_vcvt_n_u64_f64(
17239 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17240 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
17241 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
17242 // CHECK: ret <1 x i64> [[VCVT_N1]]
17243 uint64x1_t
test_vcvt_n_u64_f64(float64x1_t a
) {
17244 return vcvt_n_u64_f64(a
, 64);
17247 // CHECK-LABEL: @test_vcvt_n_f64_s64(
17248 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17249 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17250 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
17251 // CHECK: ret <1 x double> [[VCVT_N1]]
17252 float64x1_t
test_vcvt_n_f64_s64(int64x1_t a
) {
17253 return vcvt_n_f64_s64(a
, 64);
17256 // CHECK-LABEL: @test_vcvt_n_f64_u64(
17257 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17258 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17259 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
17260 // CHECK: ret <1 x double> [[VCVT_N1]]
17261 float64x1_t
test_vcvt_n_f64_u64(uint64x1_t a
) {
17262 return vcvt_n_f64_u64(a
, 64);
17265 // CHECK-LABEL: @test_vrndn_f64(
17266 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17267 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
17268 // CHECK: ret <1 x double> [[VRNDN1_I]]
17269 float64x1_t
test_vrndn_f64(float64x1_t a
) {
17270 return vrndn_f64(a
);
17273 // CHECK-LABEL: @test_vrnda_f64(
17274 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17275 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
17276 // CHECK: ret <1 x double> [[VRNDA1_I]]
17277 float64x1_t
test_vrnda_f64(float64x1_t a
) {
17278 return vrnda_f64(a
);
17281 // CHECK-LABEL: @test_vrndp_f64(
17282 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17283 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
17284 // CHECK: ret <1 x double> [[VRNDP1_I]]
17285 float64x1_t
test_vrndp_f64(float64x1_t a
) {
17286 return vrndp_f64(a
);
17289 // CHECK-LABEL: @test_vrndm_f64(
17290 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17291 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
17292 // CHECK: ret <1 x double> [[VRNDM1_I]]
17293 float64x1_t
test_vrndm_f64(float64x1_t a
) {
17294 return vrndm_f64(a
);
17297 // CHECK-LABEL: @test_vrndx_f64(
17298 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17299 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
17300 // CHECK: ret <1 x double> [[VRNDX1_I]]
17301 float64x1_t
test_vrndx_f64(float64x1_t a
) {
17302 return vrndx_f64(a
);
17305 // CHECK-LABEL: @test_vrnd_f64(
17306 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17307 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
17308 // CHECK: ret <1 x double> [[VRNDZ1_I]]
17309 float64x1_t
test_vrnd_f64(float64x1_t a
) {
17310 return vrnd_f64(a
);
17313 // CHECK-LABEL: @test_vrndi_f64(
17314 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17315 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
17316 // CHECK: ret <1 x double> [[VRNDI1_I]]
17317 float64x1_t
test_vrndi_f64(float64x1_t a
) {
17318 return vrndi_f64(a
);
17321 // CHECK-LABEL: @test_vrsqrte_f64(
17322 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17323 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
17324 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]]
17325 float64x1_t
test_vrsqrte_f64(float64x1_t a
) {
17326 return vrsqrte_f64(a
);
17329 // CHECK-LABEL: @test_vrecpe_f64(
17330 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17331 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
17332 // CHECK: ret <1 x double> [[VRECPE_V1_I]]
17333 float64x1_t
test_vrecpe_f64(float64x1_t a
) {
17334 return vrecpe_f64(a
);
17337 // CHECK-LABEL: @test_vsqrt_f64(
17338 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17339 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
17340 // CHECK: ret <1 x double> [[VSQRT_I]]
17341 float64x1_t
test_vsqrt_f64(float64x1_t a
) {
17342 return vsqrt_f64(a
);
17345 // CHECK-LABEL: @test_vrecps_f64(
17346 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17347 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17348 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
17349 // CHECK: ret <1 x double> [[VRECPS_V2_I]]
17350 float64x1_t
test_vrecps_f64(float64x1_t a
, float64x1_t b
) {
17351 return vrecps_f64(a
, b
);
17354 // CHECK-LABEL: @test_vrsqrts_f64(
17355 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17356 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17357 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
17358 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
17359 // CHECK: ret <1 x double> [[VRSQRTS_V2_I]]
17360 float64x1_t
test_vrsqrts_f64(float64x1_t a
, float64x1_t b
) {
17361 return vrsqrts_f64(a
, b
);
17364 // CHECK-LABEL: @test_vminv_s32(
17365 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
17366 // CHECK: ret i32 [[VMINV_S32_I]]
17367 int32_t test_vminv_s32(int32x2_t a
) {
17368 return vminv_s32(a
);
17371 // CHECK-LABEL: @test_vminv_u32(
17372 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
17373 // CHECK: ret i32 [[VMINV_U32_I]]
17374 uint32_t test_vminv_u32(uint32x2_t a
) {
17375 return vminv_u32(a
);
17378 // CHECK-LABEL: @test_vmaxv_s32(
17379 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
17380 // CHECK: ret i32 [[VMAXV_S32_I]]
17381 int32_t test_vmaxv_s32(int32x2_t a
) {
17382 return vmaxv_s32(a
);
17385 // CHECK-LABEL: @test_vmaxv_u32(
17386 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
17387 // CHECK: ret i32 [[VMAXV_U32_I]]
17388 uint32_t test_vmaxv_u32(uint32x2_t a
) {
17389 return vmaxv_u32(a
);
17392 // CHECK-LABEL: @test_vaddv_s32(
17393 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
17394 // CHECK: ret i32 [[VADDV_S32_I]]
17395 int32_t test_vaddv_s32(int32x2_t a
) {
17396 return vaddv_s32(a
);
17399 // CHECK-LABEL: @test_vaddv_u32(
17400 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
17401 // CHECK: ret i32 [[VADDV_U32_I]]
17402 uint32_t test_vaddv_u32(uint32x2_t a
) {
17403 return vaddv_u32(a
);
17406 // CHECK-LABEL: @test_vaddlv_s32(
17407 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
17408 // CHECK: ret i64 [[VADDLV_S32_I]]
17409 int64_t test_vaddlv_s32(int32x2_t a
) {
17410 return vaddlv_s32(a
);
17413 // CHECK-LABEL: @test_vaddlv_u32(
17414 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
17415 // CHECK: ret i64 [[VADDLV_U32_I]]
17416 uint64_t test_vaddlv_u32(uint32x2_t a
) {
17417 return vaddlv_u32(a
);