Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / arm_neon_intrinsics.c
blob67b0b5779a5bc148eb12619dd10c2fede2d0536f
1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN: -target-cpu swift \
3 // RUN: -target-feature +fullfp16 -ffreestanding \
4 // RUN: -flax-vector-conversions=none \
5 // RUN: -disable-O0-optnone -emit-llvm -o - %s \
6 // RUN: | opt -S -passes=mem2reg | FileCheck %s
8 // REQUIRES: aarch64-registered-target || arm-registered-target
10 #include <arm_neon.h>
12 // CHECK-LABEL: @test_vaba_s8(
13 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
14 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
15 // CHECK: ret <8 x i8> [[ADD_I]]
16 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
17 return vaba_s8(a, b, c);
20 // CHECK-LABEL: @test_vaba_s16(
21 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
22 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
23 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
24 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
25 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
26 // CHECK: ret <4 x i16> [[ADD_I]]
27 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
28 return vaba_s16(a, b, c);
31 // CHECK-LABEL: @test_vaba_s32(
32 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
33 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
34 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
35 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
36 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
37 // CHECK: ret <2 x i32> [[ADD_I]]
38 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
39 return vaba_s32(a, b, c);
42 // CHECK-LABEL: @test_vaba_u8(
43 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
44 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
45 // CHECK: ret <8 x i8> [[ADD_I]]
46 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
47 return vaba_u8(a, b, c);
50 // CHECK-LABEL: @test_vaba_u16(
51 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
52 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
53 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
54 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
55 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
56 // CHECK: ret <4 x i16> [[ADD_I]]
57 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
58 return vaba_u16(a, b, c);
61 // CHECK-LABEL: @test_vaba_u32(
62 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
63 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
64 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
65 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
66 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
67 // CHECK: ret <2 x i32> [[ADD_I]]
68 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
69 return vaba_u32(a, b, c);
72 // CHECK-LABEL: @test_vabaq_s8(
73 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
74 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
75 // CHECK: ret <16 x i8> [[ADD_I]]
76 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
77 return vabaq_s8(a, b, c);
80 // CHECK-LABEL: @test_vabaq_s16(
81 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
82 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
83 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
84 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
85 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
86 // CHECK: ret <8 x i16> [[ADD_I]]
87 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
88 return vabaq_s16(a, b, c);
91 // CHECK-LABEL: @test_vabaq_s32(
92 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
93 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
94 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
95 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
96 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
97 // CHECK: ret <4 x i32> [[ADD_I]]
98 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
99 return vabaq_s32(a, b, c);
102 // CHECK-LABEL: @test_vabaq_u8(
103 // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
104 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
105 // CHECK: ret <16 x i8> [[ADD_I]]
106 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
107 return vabaq_u8(a, b, c);
110 // CHECK-LABEL: @test_vabaq_u16(
111 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
112 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
113 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
114 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
115 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
116 // CHECK: ret <8 x i16> [[ADD_I]]
117 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
118 return vabaq_u16(a, b, c);
121 // CHECK-LABEL: @test_vabaq_u32(
122 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
123 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
124 // CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
125 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
126 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
127 // CHECK: ret <4 x i32> [[ADD_I]]
128 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
129 return vabaq_u32(a, b, c);
132 // CHECK-LABEL: @test_vabal_s8(
133 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
134 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
135 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
136 // CHECK: ret <8 x i16> [[ADD_I]]
137 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
138 return vabal_s8(a, b, c);
141 // CHECK-LABEL: @test_vabal_s16(
142 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
143 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
144 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
145 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
146 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
147 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
148 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
149 // CHECK: ret <4 x i32> [[ADD_I]]
150 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
151 return vabal_s16(a, b, c);
154 // CHECK-LABEL: @test_vabal_s32(
155 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
156 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
157 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
158 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
159 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
160 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
161 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
162 // CHECK: ret <2 x i64> [[ADD_I]]
163 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
164 return vabal_s32(a, b, c);
167 // CHECK-LABEL: @test_vabal_u8(
168 // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
169 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
170 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
171 // CHECK: ret <8 x i16> [[ADD_I]]
172 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
173 return vabal_u8(a, b, c);
176 // CHECK-LABEL: @test_vabal_u16(
177 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
178 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
179 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
180 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
181 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
182 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
183 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
184 // CHECK: ret <4 x i32> [[ADD_I]]
185 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
186 return vabal_u16(a, b, c);
189 // CHECK-LABEL: @test_vabal_u32(
190 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
191 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
192 // CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
193 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
194 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
195 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
196 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
197 // CHECK: ret <2 x i64> [[ADD_I]]
198 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
199 return vabal_u32(a, b, c);
202 // CHECK-LABEL: @test_vabd_s8(
203 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
204 // CHECK: ret <8 x i8> [[VABD_V_I]]
205 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
206 return vabd_s8(a, b);
209 // CHECK-LABEL: @test_vabd_s16(
210 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
211 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
212 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
213 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
214 // CHECK: ret <4 x i16> [[VABD_V2_I]]
215 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
216 return vabd_s16(a, b);
219 // CHECK-LABEL: @test_vabd_s32(
220 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
221 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
222 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
223 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
224 // CHECK: ret <2 x i32> [[VABD_V2_I]]
225 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
226 return vabd_s32(a, b);
229 // CHECK-LABEL: @test_vabd_u8(
230 // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
231 // CHECK: ret <8 x i8> [[VABD_V_I]]
232 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
233 return vabd_u8(a, b);
236 // CHECK-LABEL: @test_vabd_u16(
237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
239 // CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
240 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
241 // CHECK: ret <4 x i16> [[VABD_V2_I]]
242 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
243 return vabd_u16(a, b);
246 // CHECK-LABEL: @test_vabd_u32(
247 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
248 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
249 // CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
250 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
251 // CHECK: ret <2 x i32> [[VABD_V2_I]]
252 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
253 return vabd_u32(a, b);
256 // CHECK-LABEL: @test_vabd_f32(
257 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
258 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
259 // CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
260 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
261 // CHECK: ret <2 x float> [[VABD_V2_I]]
262 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
263 return vabd_f32(a, b);
266 // CHECK-LABEL: @test_vabdq_s8(
267 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
268 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
269 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
270 return vabdq_s8(a, b);
273 // CHECK-LABEL: @test_vabdq_s16(
274 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
275 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
276 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
277 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
278 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
279 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
280 return vabdq_s16(a, b);
283 // CHECK-LABEL: @test_vabdq_s32(
284 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
285 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
286 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
287 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
288 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
289 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
290 return vabdq_s32(a, b);
293 // CHECK-LABEL: @test_vabdq_u8(
294 // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
295 // CHECK: ret <16 x i8> [[VABDQ_V_I]]
296 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
297 return vabdq_u8(a, b);
300 // CHECK-LABEL: @test_vabdq_u16(
301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
302 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
303 // CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
304 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
305 // CHECK: ret <8 x i16> [[VABDQ_V2_I]]
306 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
307 return vabdq_u16(a, b);
310 // CHECK-LABEL: @test_vabdq_u32(
311 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
312 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
313 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
314 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
315 // CHECK: ret <4 x i32> [[VABDQ_V2_I]]
316 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
317 return vabdq_u32(a, b);
320 // CHECK-LABEL: @test_vabdq_f32(
321 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
322 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
323 // CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
324 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
325 // CHECK: ret <4 x float> [[VABDQ_V2_I]]
326 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
327 return vabdq_f32(a, b);
330 // CHECK-LABEL: @test_vabdl_s8(
331 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
332 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
333 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
334 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
335 return vabdl_s8(a, b);
338 // CHECK-LABEL: @test_vabdl_s16(
339 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
340 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
341 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
342 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
343 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
344 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
345 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
346 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
347 return vabdl_s16(a, b);
350 // CHECK-LABEL: @test_vabdl_s32(
351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
352 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
353 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
354 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
355 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
356 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
357 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
358 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
359 return vabdl_s32(a, b);
362 // CHECK-LABEL: @test_vabdl_u8(
363 // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
364 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
365 // CHECK: ret <8 x i16> [[VMOVL_I_I]]
366 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
367 return vabdl_u8(a, b);
370 // CHECK-LABEL: @test_vabdl_u16(
371 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
372 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
373 // CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
374 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
375 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
376 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
377 // CHECK: ret <4 x i32> [[VMOVL_I_I]]
378 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
379 return vabdl_u16(a, b);
382 // CHECK-LABEL: @test_vabdl_u32(
383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
384 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
385 // CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
386 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
387 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
388 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
389 // CHECK: ret <2 x i64> [[VMOVL_I_I]]
390 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
391 return vabdl_u32(a, b);
394 // CHECK-LABEL: @test_vabs_s8(
395 // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
396 // CHECK: ret <8 x i8> [[VABS_I]]
397 int8x8_t test_vabs_s8(int8x8_t a) {
398 return vabs_s8(a);
401 // CHECK-LABEL: @test_vabs_s16(
402 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
403 // CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
404 // CHECK: ret <4 x i16> [[VABS1_I]]
405 int16x4_t test_vabs_s16(int16x4_t a) {
406 return vabs_s16(a);
409 // CHECK-LABEL: @test_vabs_s32(
410 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
411 // CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
412 // CHECK: ret <2 x i32> [[VABS1_I]]
413 int32x2_t test_vabs_s32(int32x2_t a) {
414 return vabs_s32(a);
417 // CHECK-LABEL: @test_vabs_f32(
418 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
419 // CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
420 // CHECK: ret <2 x float> [[VABS1_I]]
421 float32x2_t test_vabs_f32(float32x2_t a) {
422 return vabs_f32(a);
425 // CHECK-LABEL: @test_vabsq_s8(
426 // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
427 // CHECK: ret <16 x i8> [[VABS_I]]
428 int8x16_t test_vabsq_s8(int8x16_t a) {
429 return vabsq_s8(a);
432 // CHECK-LABEL: @test_vabsq_s16(
433 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
434 // CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
435 // CHECK: ret <8 x i16> [[VABS1_I]]
436 int16x8_t test_vabsq_s16(int16x8_t a) {
437 return vabsq_s16(a);
440 // CHECK-LABEL: @test_vabsq_s32(
441 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
442 // CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
443 // CHECK: ret <4 x i32> [[VABS1_I]]
444 int32x4_t test_vabsq_s32(int32x4_t a) {
445 return vabsq_s32(a);
448 // CHECK-LABEL: @test_vabsq_f32(
449 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
450 // CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
451 // CHECK: ret <4 x float> [[VABS1_I]]
452 float32x4_t test_vabsq_f32(float32x4_t a) {
453 return vabsq_f32(a);
456 // CHECK-LABEL: @test_vadd_s8(
457 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
458 // CHECK: ret <8 x i8> [[ADD_I]]
459 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
460 return vadd_s8(a, b);
463 // CHECK-LABEL: @test_vadd_s16(
464 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
465 // CHECK: ret <4 x i16> [[ADD_I]]
466 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
467 return vadd_s16(a, b);
470 // CHECK-LABEL: @test_vadd_s32(
471 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
472 // CHECK: ret <2 x i32> [[ADD_I]]
473 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
474 return vadd_s32(a, b);
477 // CHECK-LABEL: @test_vadd_s64(
478 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
479 // CHECK: ret <1 x i64> [[ADD_I]]
480 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
481 return vadd_s64(a, b);
484 // CHECK-LABEL: @test_vadd_f32(
485 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b
486 // CHECK: ret <2 x float> [[ADD_I]]
487 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
488 return vadd_f32(a, b);
491 // CHECK-LABEL: @test_vadd_u8(
492 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b
493 // CHECK: ret <8 x i8> [[ADD_I]]
494 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
495 return vadd_u8(a, b);
498 // CHECK-LABEL: @test_vadd_u16(
499 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b
500 // CHECK: ret <4 x i16> [[ADD_I]]
501 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
502 return vadd_u16(a, b);
505 // CHECK-LABEL: @test_vadd_u32(
506 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b
507 // CHECK: ret <2 x i32> [[ADD_I]]
508 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
509 return vadd_u32(a, b);
512 // CHECK-LABEL: @test_vadd_u64(
513 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b
514 // CHECK: ret <1 x i64> [[ADD_I]]
515 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
516 return vadd_u64(a, b);
519 // CHECK-LABEL: @test_vaddq_s8(
520 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
521 // CHECK: ret <16 x i8> [[ADD_I]]
522 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
523 return vaddq_s8(a, b);
526 // CHECK-LABEL: @test_vaddq_s16(
527 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
528 // CHECK: ret <8 x i16> [[ADD_I]]
529 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
530 return vaddq_s16(a, b);
533 // CHECK-LABEL: @test_vaddq_s32(
534 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
535 // CHECK: ret <4 x i32> [[ADD_I]]
536 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
537 return vaddq_s32(a, b);
540 // CHECK-LABEL: @test_vaddq_s64(
541 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
542 // CHECK: ret <2 x i64> [[ADD_I]]
543 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
544 return vaddq_s64(a, b);
547 // CHECK-LABEL: @test_vaddq_f32(
548 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b
549 // CHECK: ret <4 x float> [[ADD_I]]
550 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
551 return vaddq_f32(a, b);
554 // CHECK-LABEL: @test_vaddq_u8(
555 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b
556 // CHECK: ret <16 x i8> [[ADD_I]]
557 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
558 return vaddq_u8(a, b);
561 // CHECK-LABEL: @test_vaddq_u16(
562 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b
563 // CHECK: ret <8 x i16> [[ADD_I]]
564 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
565 return vaddq_u16(a, b);
568 // CHECK-LABEL: @test_vaddq_u32(
569 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b
570 // CHECK: ret <4 x i32> [[ADD_I]]
571 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
572 return vaddq_u32(a, b);
575 // CHECK-LABEL: @test_vaddq_u64(
576 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b
577 // CHECK: ret <2 x i64> [[ADD_I]]
578 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
579 return vaddq_u64(a, b);
582 // CHECK-LABEL: @test_vaddhn_s16(
583 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
584 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
585 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
586 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
587 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
588 // CHECK: ret <8 x i8> [[VADDHN2_I]]
589 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
590 return vaddhn_s16(a, b);
593 // CHECK-LABEL: @test_vaddhn_s32(
594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
596 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
597 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
598 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
599 // CHECK: ret <4 x i16> [[VADDHN2_I]]
600 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
601 return vaddhn_s32(a, b);
604 // CHECK-LABEL: @test_vaddhn_s64(
605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
606 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
607 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
608 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
609 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
610 // CHECK: ret <2 x i32> [[VADDHN2_I]]
611 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
612 return vaddhn_s64(a, b);
615 // CHECK-LABEL: @test_vaddhn_u16(
616 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
617 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
618 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
619 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
620 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
621 // CHECK: ret <8 x i8> [[VADDHN2_I]]
622 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
623 return vaddhn_u16(a, b);
626 // CHECK-LABEL: @test_vaddhn_u32(
627 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
628 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
629 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
630 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
631 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
632 // CHECK: ret <4 x i16> [[VADDHN2_I]]
633 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
634 return vaddhn_u32(a, b);
637 // CHECK-LABEL: @test_vaddhn_u64(
638 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
639 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
640 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
641 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
642 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
643 // CHECK: ret <2 x i32> [[VADDHN2_I]]
644 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
645 return vaddhn_u64(a, b);
648 // CHECK-LABEL: @test_vaddl_s8(
649 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
650 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
651 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
652 // CHECK: ret <8 x i16> [[ADD_I]]
653 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
654 return vaddl_s8(a, b);
657 // CHECK-LABEL: @test_vaddl_s16(
658 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
659 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
660 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
661 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
662 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
663 // CHECK: ret <4 x i32> [[ADD_I]]
664 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
665 return vaddl_s16(a, b);
668 // CHECK-LABEL: @test_vaddl_s32(
669 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
670 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
671 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
672 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
673 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
674 // CHECK: ret <2 x i64> [[ADD_I]]
675 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
676 return vaddl_s32(a, b);
679 // CHECK-LABEL: @test_vaddl_u8(
680 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
681 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
682 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
683 // CHECK: ret <8 x i16> [[ADD_I]]
684 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
685 return vaddl_u8(a, b);
688 // CHECK-LABEL: @test_vaddl_u16(
689 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
690 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
691 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
692 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
693 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
694 // CHECK: ret <4 x i32> [[ADD_I]]
695 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
696 return vaddl_u16(a, b);
699 // CHECK-LABEL: @test_vaddl_u32(
700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
701 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
702 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
703 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
704 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
705 // CHECK: ret <2 x i64> [[ADD_I]]
706 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
707 return vaddl_u32(a, b);
710 // CHECK-LABEL: @test_vaddw_s8(
711 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
712 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
713 // CHECK: ret <8 x i16> [[ADD_I]]
714 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
715 return vaddw_s8(a, b);
718 // CHECK-LABEL: @test_vaddw_s16(
719 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
720 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
721 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
722 // CHECK: ret <4 x i32> [[ADD_I]]
723 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
724 return vaddw_s16(a, b);
727 // CHECK-LABEL: @test_vaddw_s32(
728 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
729 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
730 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
731 // CHECK: ret <2 x i64> [[ADD_I]]
732 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
733 return vaddw_s32(a, b);
736 // CHECK-LABEL: @test_vaddw_u8(
737 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
738 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
739 // CHECK: ret <8 x i16> [[ADD_I]]
740 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
741 return vaddw_u8(a, b);
744 // CHECK-LABEL: @test_vaddw_u16(
745 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
746 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
747 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
748 // CHECK: ret <4 x i32> [[ADD_I]]
749 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
750 return vaddw_u16(a, b);
753 // CHECK-LABEL: @test_vaddw_u32(
754 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
755 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
756 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
757 // CHECK: ret <2 x i64> [[ADD_I]]
758 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
759 return vaddw_u32(a, b);
762 // CHECK-LABEL: @test_vand_s8(
763 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
764 // CHECK: ret <8 x i8> [[AND_I]]
765 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
766 return vand_s8(a, b);
769 // CHECK-LABEL: @test_vand_s16(
770 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
771 // CHECK: ret <4 x i16> [[AND_I]]
772 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
773 return vand_s16(a, b);
776 // CHECK-LABEL: @test_vand_s32(
777 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
778 // CHECK: ret <2 x i32> [[AND_I]]
779 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
780 return vand_s32(a, b);
783 // CHECK-LABEL: @test_vand_s64(
784 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
785 // CHECK: ret <1 x i64> [[AND_I]]
786 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
787 return vand_s64(a, b);
790 // CHECK-LABEL: @test_vand_u8(
791 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b
792 // CHECK: ret <8 x i8> [[AND_I]]
793 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
794 return vand_u8(a, b);
797 // CHECK-LABEL: @test_vand_u16(
798 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b
799 // CHECK: ret <4 x i16> [[AND_I]]
800 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
801 return vand_u16(a, b);
804 // CHECK-LABEL: @test_vand_u32(
805 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b
806 // CHECK: ret <2 x i32> [[AND_I]]
807 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
808 return vand_u32(a, b);
811 // CHECK-LABEL: @test_vand_u64(
812 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b
813 // CHECK: ret <1 x i64> [[AND_I]]
814 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
815 return vand_u64(a, b);
818 // CHECK-LABEL: @test_vandq_s8(
819 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
820 // CHECK: ret <16 x i8> [[AND_I]]
821 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
822 return vandq_s8(a, b);
825 // CHECK-LABEL: @test_vandq_s16(
826 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
827 // CHECK: ret <8 x i16> [[AND_I]]
828 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
829 return vandq_s16(a, b);
832 // CHECK-LABEL: @test_vandq_s32(
833 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
834 // CHECK: ret <4 x i32> [[AND_I]]
835 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
836 return vandq_s32(a, b);
839 // CHECK-LABEL: @test_vandq_s64(
840 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
841 // CHECK: ret <2 x i64> [[AND_I]]
842 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
843 return vandq_s64(a, b);
846 // CHECK-LABEL: @test_vandq_u8(
847 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
848 // CHECK: ret <16 x i8> [[AND_I]]
849 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
850 return vandq_u8(a, b);
853 // CHECK-LABEL: @test_vandq_u16(
854 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
855 // CHECK: ret <8 x i16> [[AND_I]]
856 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
857 return vandq_u16(a, b);
860 // CHECK-LABEL: @test_vandq_u32(
861 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
862 // CHECK: ret <4 x i32> [[AND_I]]
863 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
864 return vandq_u32(a, b);
867 // CHECK-LABEL: @test_vandq_u64(
868 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
869 // CHECK: ret <2 x i64> [[AND_I]]
870 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
871 return vandq_u64(a, b);
874 // CHECK-LABEL: @test_vbic_s8(
875 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
876 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
877 // CHECK: ret <8 x i8> [[AND_I]]
878 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
879 return vbic_s8(a, b);
882 // CHECK-LABEL: @test_vbic_s16(
883 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
884 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
885 // CHECK: ret <4 x i16> [[AND_I]]
886 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
887 return vbic_s16(a, b);
890 // CHECK-LABEL: @test_vbic_s32(
891 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
892 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
893 // CHECK: ret <2 x i32> [[AND_I]]
894 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
895 return vbic_s32(a, b);
898 // CHECK-LABEL: @test_vbic_s64(
899 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
900 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
901 // CHECK: ret <1 x i64> [[AND_I]]
902 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
903 return vbic_s64(a, b);
906 // CHECK-LABEL: @test_vbic_u8(
907 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
908 // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
909 // CHECK: ret <8 x i8> [[AND_I]]
910 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
911 return vbic_u8(a, b);
914 // CHECK-LABEL: @test_vbic_u16(
915 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
916 // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
917 // CHECK: ret <4 x i16> [[AND_I]]
918 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
919 return vbic_u16(a, b);
922 // CHECK-LABEL: @test_vbic_u32(
923 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
924 // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
925 // CHECK: ret <2 x i32> [[AND_I]]
926 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
927 return vbic_u32(a, b);
930 // CHECK-LABEL: @test_vbic_u64(
931 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
932 // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
933 // CHECK: ret <1 x i64> [[AND_I]]
934 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
935 return vbic_u64(a, b);
938 // CHECK-LABEL: @test_vbicq_s8(
939 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
940 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
941 // CHECK: ret <16 x i8> [[AND_I]]
942 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
943 return vbicq_s8(a, b);
946 // CHECK-LABEL: @test_vbicq_s16(
947 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
948 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
949 // CHECK: ret <8 x i16> [[AND_I]]
950 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
951 return vbicq_s16(a, b);
954 // CHECK-LABEL: @test_vbicq_s32(
955 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
956 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
957 // CHECK: ret <4 x i32> [[AND_I]]
958 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
959 return vbicq_s32(a, b);
962 // CHECK-LABEL: @test_vbicq_s64(
963 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
964 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
965 // CHECK: ret <2 x i64> [[AND_I]]
966 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
967 return vbicq_s64(a, b);
970 // CHECK-LABEL: @test_vbicq_u8(
971 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
972 // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
973 // CHECK: ret <16 x i8> [[AND_I]]
974 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
975 return vbicq_u8(a, b);
978 // CHECK-LABEL: @test_vbicq_u16(
979 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
980 // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
981 // CHECK: ret <8 x i16> [[AND_I]]
982 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
983 return vbicq_u16(a, b);
986 // CHECK-LABEL: @test_vbicq_u32(
987 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
988 // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
989 // CHECK: ret <4 x i32> [[AND_I]]
990 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
991 return vbicq_u32(a, b);
994 // CHECK-LABEL: @test_vbicq_u64(
995 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
996 // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
997 // CHECK: ret <2 x i64> [[AND_I]]
998 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
999 return vbicq_u64(a, b);
1002 // CHECK-LABEL: @test_vbsl_s8(
1003 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1004 // CHECK: ret <8 x i8> [[VBSL_V_I]]
1005 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1006 return vbsl_s8(a, b, c);
1009 // CHECK-LABEL: @test_vbsl_s16(
1010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1011 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1012 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1013 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1014 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1015 // CHECK: ret <4 x i16> [[TMP3]]
1016 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1017 return vbsl_s16(a, b, c);
1020 // CHECK-LABEL: @test_vbsl_s32(
1021 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1022 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1023 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1024 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1025 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1026 // CHECK: ret <2 x i32> [[TMP3]]
1027 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1028 return vbsl_s32(a, b, c);
1031 // CHECK-LABEL: @test_vbsl_s64(
1032 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1033 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1034 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1035 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1036 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1037 // CHECK: ret <1 x i64> [[TMP3]]
1038 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1039 return vbsl_s64(a, b, c);
1042 // CHECK-LABEL: @test_vbsl_u8(
1043 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1044 // CHECK: ret <8 x i8> [[VBSL_V_I]]
1045 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1046 return vbsl_u8(a, b, c);
1049 // CHECK-LABEL: @test_vbsl_u16(
1050 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1051 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1052 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1053 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1054 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1055 // CHECK: ret <4 x i16> [[TMP3]]
1056 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1057 return vbsl_u16(a, b, c);
1060 // CHECK-LABEL: @test_vbsl_u32(
1061 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1062 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1063 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1064 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1065 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1066 // CHECK: ret <2 x i32> [[TMP3]]
1067 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1068 return vbsl_u32(a, b, c);
1071 // CHECK-LABEL: @test_vbsl_u64(
1072 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1073 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1074 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1075 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1076 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1077 // CHECK: ret <1 x i64> [[TMP3]]
1078 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1079 return vbsl_u64(a, b, c);
1082 // CHECK-LABEL: @test_vbsl_f32(
1083 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1084 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1085 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1086 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1087 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1088 // CHECK: ret <2 x float> [[TMP3]]
1089 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1090 return vbsl_f32(a, b, c);
1093 // CHECK-LABEL: @test_vbsl_p8(
1094 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1095 // CHECK: ret <8 x i8> [[VBSL_V_I]]
1096 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1097 return vbsl_p8(a, b, c);
1100 // CHECK-LABEL: @test_vbsl_p16(
1101 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1102 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1103 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1104 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1105 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1106 // CHECK: ret <4 x i16> [[TMP3]]
1107 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1108 return vbsl_p16(a, b, c);
1111 // CHECK-LABEL: @test_vbslq_s8(
1112 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1113 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
1114 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1115 return vbslq_s8(a, b, c);
1118 // CHECK-LABEL: @test_vbslq_s16(
1119 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1120 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1122 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1123 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1124 // CHECK: ret <8 x i16> [[TMP3]]
1125 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1126 return vbslq_s16(a, b, c);
1129 // CHECK-LABEL: @test_vbslq_s32(
1130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1131 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1132 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1133 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1134 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1135 // CHECK: ret <4 x i32> [[TMP3]]
1136 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1137 return vbslq_s32(a, b, c);
1140 // CHECK-LABEL: @test_vbslq_s64(
1141 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1142 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1143 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1144 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1145 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1146 // CHECK: ret <2 x i64> [[TMP3]]
1147 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1148 return vbslq_s64(a, b, c);
1151 // CHECK-LABEL: @test_vbslq_u8(
1152 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1153 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
1154 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1155 return vbslq_u8(a, b, c);
1158 // CHECK-LABEL: @test_vbslq_u16(
1159 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1160 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1161 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1162 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1163 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1164 // CHECK: ret <8 x i16> [[TMP3]]
1165 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1166 return vbslq_u16(a, b, c);
1169 // CHECK-LABEL: @test_vbslq_u32(
1170 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1171 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1172 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1173 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1174 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1175 // CHECK: ret <4 x i32> [[TMP3]]
1176 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1177 return vbslq_u32(a, b, c);
1180 // CHECK-LABEL: @test_vbslq_u64(
1181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1182 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1183 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1184 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1185 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1186 // CHECK: ret <2 x i64> [[TMP3]]
1187 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1188 return vbslq_u64(a, b, c);
1191 // CHECK-LABEL: @test_vbslq_f32(
1192 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1193 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1194 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1195 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1196 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1197 // CHECK: ret <4 x float> [[TMP3]]
1198 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1199 return vbslq_f32(a, b, c);
1202 // CHECK-LABEL: @test_vbslq_p8(
1203 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1204 // CHECK: ret <16 x i8> [[VBSLQ_V_I]]
1205 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1206 return vbslq_p8(a, b, c);
1209 // CHECK-LABEL: @test_vbslq_p16(
1210 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1211 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1212 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1213 // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1214 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1215 // CHECK: ret <8 x i16> [[TMP3]]
1216 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1217 return vbslq_p16(a, b, c);
1220 // CHECK-LABEL: @test_vcage_f32(
1221 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1222 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1223 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1224 // CHECK: ret <2 x i32> [[VCAGE_V2_I]]
1225 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1226 return vcage_f32(a, b);
1229 // CHECK-LABEL: @test_vcageq_f32(
1230 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1231 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1232 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1233 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]]
1234 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1235 return vcageq_f32(a, b);
1238 // CHECK-LABEL: @test_vcagt_f32(
1239 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1240 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1241 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1242 // CHECK: ret <2 x i32> [[VCAGT_V2_I]]
1243 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1244 return vcagt_f32(a, b);
1247 // CHECK-LABEL: @test_vcagtq_f32(
1248 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1249 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1250 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1251 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]]
1252 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1253 return vcagtq_f32(a, b);
1256 // CHECK-LABEL: @test_vcale_f32(
1257 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1258 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1259 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1260 // CHECK: ret <2 x i32> [[VCALE_V2_I]]
1261 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1262 return vcale_f32(a, b);
1265 // CHECK-LABEL: @test_vcaleq_f32(
1266 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1267 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1268 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1269 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]]
1270 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1271 return vcaleq_f32(a, b);
1274 // CHECK-LABEL: @test_vcalt_f32(
1275 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1276 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1277 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1278 // CHECK: ret <2 x i32> [[VCALT_V2_I]]
1279 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1280 return vcalt_f32(a, b);
1283 // CHECK-LABEL: @test_vcaltq_f32(
1284 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1285 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1286 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1287 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]]
1288 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1289 return vcaltq_f32(a, b);
1292 // CHECK-LABEL: @test_vceq_s8(
1293 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1294 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1295 // CHECK: ret <8 x i8> [[SEXT_I]]
1296 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1297 return vceq_s8(a, b);
1300 // CHECK-LABEL: @test_vceq_s16(
1301 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1302 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1303 // CHECK: ret <4 x i16> [[SEXT_I]]
1304 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1305 return vceq_s16(a, b);
1308 // CHECK-LABEL: @test_vceq_s32(
1309 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1310 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1311 // CHECK: ret <2 x i32> [[SEXT_I]]
1312 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1313 return vceq_s32(a, b);
1316 // CHECK-LABEL: @test_vceq_f32(
1317 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1318 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1319 // CHECK: ret <2 x i32> [[SEXT_I]]
1320 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1321 return vceq_f32(a, b);
1324 // CHECK-LABEL: @test_vceq_u8(
1325 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1326 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1327 // CHECK: ret <8 x i8> [[SEXT_I]]
1328 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1329 return vceq_u8(a, b);
1332 // CHECK-LABEL: @test_vceq_u16(
1333 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1334 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1335 // CHECK: ret <4 x i16> [[SEXT_I]]
1336 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1337 return vceq_u16(a, b);
1340 // CHECK-LABEL: @test_vceq_u32(
1341 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1342 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1343 // CHECK: ret <2 x i32> [[SEXT_I]]
1344 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1345 return vceq_u32(a, b);
1348 // CHECK-LABEL: @test_vceq_p8(
1349 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1350 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1351 // CHECK: ret <8 x i8> [[SEXT_I]]
1352 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1353 return vceq_p8(a, b);
1356 // CHECK-LABEL: @test_vceqq_s8(
1357 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1358 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1359 // CHECK: ret <16 x i8> [[SEXT_I]]
1360 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1361 return vceqq_s8(a, b);
1364 // CHECK-LABEL: @test_vceqq_s16(
1365 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1366 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1367 // CHECK: ret <8 x i16> [[SEXT_I]]
1368 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1369 return vceqq_s16(a, b);
1372 // CHECK-LABEL: @test_vceqq_s32(
1373 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1374 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1375 // CHECK: ret <4 x i32> [[SEXT_I]]
1376 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1377 return vceqq_s32(a, b);
1380 // CHECK-LABEL: @test_vceqq_f32(
1381 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1382 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1383 // CHECK: ret <4 x i32> [[SEXT_I]]
1384 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1385 return vceqq_f32(a, b);
1388 // CHECK-LABEL: @test_vceqq_u8(
1389 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1390 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1391 // CHECK: ret <16 x i8> [[SEXT_I]]
1392 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1393 return vceqq_u8(a, b);
1396 // CHECK-LABEL: @test_vceqq_u16(
1397 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1398 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1399 // CHECK: ret <8 x i16> [[SEXT_I]]
1400 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1401 return vceqq_u16(a, b);
1404 // CHECK-LABEL: @test_vceqq_u32(
1405 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1406 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1407 // CHECK: ret <4 x i32> [[SEXT_I]]
1408 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1409 return vceqq_u32(a, b);
1412 // CHECK-LABEL: @test_vceqq_p8(
1413 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1414 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1415 // CHECK: ret <16 x i8> [[SEXT_I]]
1416 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1417 return vceqq_p8(a, b);
1420 // CHECK-LABEL: @test_vcge_s8(
1421 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1422 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1423 // CHECK: ret <8 x i8> [[SEXT_I]]
1424 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1425 return vcge_s8(a, b);
1428 // CHECK-LABEL: @test_vcge_s16(
1429 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1430 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1431 // CHECK: ret <4 x i16> [[SEXT_I]]
1432 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1433 return vcge_s16(a, b);
1436 // CHECK-LABEL: @test_vcge_s32(
1437 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1438 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1439 // CHECK: ret <2 x i32> [[SEXT_I]]
1440 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1441 return vcge_s32(a, b);
1444 // CHECK-LABEL: @test_vcge_f32(
1445 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1446 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1447 // CHECK: ret <2 x i32> [[SEXT_I]]
1448 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1449 return vcge_f32(a, b);
1452 // CHECK-LABEL: @test_vcge_u8(
1453 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1454 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1455 // CHECK: ret <8 x i8> [[SEXT_I]]
1456 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1457 return vcge_u8(a, b);
1460 // CHECK-LABEL: @test_vcge_u16(
1461 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1462 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1463 // CHECK: ret <4 x i16> [[SEXT_I]]
1464 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1465 return vcge_u16(a, b);
1468 // CHECK-LABEL: @test_vcge_u32(
1469 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1470 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1471 // CHECK: ret <2 x i32> [[SEXT_I]]
1472 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1473 return vcge_u32(a, b);
1476 // CHECK-LABEL: @test_vcgeq_s8(
1477 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1478 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1479 // CHECK: ret <16 x i8> [[SEXT_I]]
1480 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1481 return vcgeq_s8(a, b);
1484 // CHECK-LABEL: @test_vcgeq_s16(
1485 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1486 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1487 // CHECK: ret <8 x i16> [[SEXT_I]]
1488 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1489 return vcgeq_s16(a, b);
1492 // CHECK-LABEL: @test_vcgeq_s32(
1493 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1494 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1495 // CHECK: ret <4 x i32> [[SEXT_I]]
1496 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1497 return vcgeq_s32(a, b);
1500 // CHECK-LABEL: @test_vcgeq_f32(
1501 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1502 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1503 // CHECK: ret <4 x i32> [[SEXT_I]]
1504 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1505 return vcgeq_f32(a, b);
1508 // CHECK-LABEL: @test_vcgeq_u8(
1509 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1510 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1511 // CHECK: ret <16 x i8> [[SEXT_I]]
1512 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1513 return vcgeq_u8(a, b);
1516 // CHECK-LABEL: @test_vcgeq_u16(
1517 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1518 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1519 // CHECK: ret <8 x i16> [[SEXT_I]]
1520 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1521 return vcgeq_u16(a, b);
1524 // CHECK-LABEL: @test_vcgeq_u32(
1525 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1526 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1527 // CHECK: ret <4 x i32> [[SEXT_I]]
1528 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1529 return vcgeq_u32(a, b);
1532 // CHECK-LABEL: @test_vcgt_s8(
1533 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1534 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1535 // CHECK: ret <8 x i8> [[SEXT_I]]
1536 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1537 return vcgt_s8(a, b);
1540 // CHECK-LABEL: @test_vcgt_s16(
1541 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1542 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1543 // CHECK: ret <4 x i16> [[SEXT_I]]
1544 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1545 return vcgt_s16(a, b);
1548 // CHECK-LABEL: @test_vcgt_s32(
1549 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1550 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1551 // CHECK: ret <2 x i32> [[SEXT_I]]
1552 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1553 return vcgt_s32(a, b);
1556 // CHECK-LABEL: @test_vcgt_f32(
1557 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1558 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1559 // CHECK: ret <2 x i32> [[SEXT_I]]
1560 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1561 return vcgt_f32(a, b);
1564 // CHECK-LABEL: @test_vcgt_u8(
1565 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1566 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1567 // CHECK: ret <8 x i8> [[SEXT_I]]
1568 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1569 return vcgt_u8(a, b);
1572 // CHECK-LABEL: @test_vcgt_u16(
1573 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1574 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1575 // CHECK: ret <4 x i16> [[SEXT_I]]
1576 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1577 return vcgt_u16(a, b);
1580 // CHECK-LABEL: @test_vcgt_u32(
1581 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1582 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1583 // CHECK: ret <2 x i32> [[SEXT_I]]
1584 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1585 return vcgt_u32(a, b);
1588 // CHECK-LABEL: @test_vcgtq_s8(
1589 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1590 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1591 // CHECK: ret <16 x i8> [[SEXT_I]]
1592 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1593 return vcgtq_s8(a, b);
1596 // CHECK-LABEL: @test_vcgtq_s16(
1597 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1598 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1599 // CHECK: ret <8 x i16> [[SEXT_I]]
1600 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1601 return vcgtq_s16(a, b);
1604 // CHECK-LABEL: @test_vcgtq_s32(
1605 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1606 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1607 // CHECK: ret <4 x i32> [[SEXT_I]]
1608 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1609 return vcgtq_s32(a, b);
1612 // CHECK-LABEL: @test_vcgtq_f32(
1613 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1614 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1615 // CHECK: ret <4 x i32> [[SEXT_I]]
1616 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1617 return vcgtq_f32(a, b);
1620 // CHECK-LABEL: @test_vcgtq_u8(
1621 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1622 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1623 // CHECK: ret <16 x i8> [[SEXT_I]]
1624 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1625 return vcgtq_u8(a, b);
1628 // CHECK-LABEL: @test_vcgtq_u16(
1629 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1630 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1631 // CHECK: ret <8 x i16> [[SEXT_I]]
1632 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1633 return vcgtq_u16(a, b);
1636 // CHECK-LABEL: @test_vcgtq_u32(
1637 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1638 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1639 // CHECK: ret <4 x i32> [[SEXT_I]]
1640 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1641 return vcgtq_u32(a, b);
1644 // CHECK-LABEL: @test_vcle_s8(
1645 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1646 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1647 // CHECK: ret <8 x i8> [[SEXT_I]]
1648 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1649 return vcle_s8(a, b);
1652 // CHECK-LABEL: @test_vcle_s16(
1653 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1654 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1655 // CHECK: ret <4 x i16> [[SEXT_I]]
1656 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1657 return vcle_s16(a, b);
1660 // CHECK-LABEL: @test_vcle_s32(
1661 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1662 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1663 // CHECK: ret <2 x i32> [[SEXT_I]]
1664 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1665 return vcle_s32(a, b);
1668 // CHECK-LABEL: @test_vcle_f32(
1669 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1670 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1671 // CHECK: ret <2 x i32> [[SEXT_I]]
1672 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1673 return vcle_f32(a, b);
1676 // CHECK-LABEL: @test_vcle_u8(
1677 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1678 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1679 // CHECK: ret <8 x i8> [[SEXT_I]]
1680 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1681 return vcle_u8(a, b);
1684 // CHECK-LABEL: @test_vcle_u16(
1685 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1686 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1687 // CHECK: ret <4 x i16> [[SEXT_I]]
1688 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1689 return vcle_u16(a, b);
1692 // CHECK-LABEL: @test_vcle_u32(
1693 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1694 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1695 // CHECK: ret <2 x i32> [[SEXT_I]]
1696 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1697 return vcle_u32(a, b);
1700 // CHECK-LABEL: @test_vcleq_s8(
1701 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1702 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1703 // CHECK: ret <16 x i8> [[SEXT_I]]
1704 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1705 return vcleq_s8(a, b);
1708 // CHECK-LABEL: @test_vcleq_s16(
1709 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1710 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1711 // CHECK: ret <8 x i16> [[SEXT_I]]
1712 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1713 return vcleq_s16(a, b);
1716 // CHECK-LABEL: @test_vcleq_s32(
1717 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1718 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1719 // CHECK: ret <4 x i32> [[SEXT_I]]
1720 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1721 return vcleq_s32(a, b);
1724 // CHECK-LABEL: @test_vcleq_f32(
1725 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1726 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1727 // CHECK: ret <4 x i32> [[SEXT_I]]
1728 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1729 return vcleq_f32(a, b);
1732 // CHECK-LABEL: @test_vcleq_u8(
1733 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1734 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1735 // CHECK: ret <16 x i8> [[SEXT_I]]
1736 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1737 return vcleq_u8(a, b);
1740 // CHECK-LABEL: @test_vcleq_u16(
1741 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1742 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1743 // CHECK: ret <8 x i16> [[SEXT_I]]
1744 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1745 return vcleq_u16(a, b);
1748 // CHECK-LABEL: @test_vcleq_u32(
1749 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1750 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1751 // CHECK: ret <4 x i32> [[SEXT_I]]
1752 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1753 return vcleq_u32(a, b);
1756 // CHECK-LABEL: @test_vcls_s8(
1757 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1758 // CHECK: ret <8 x i8> [[VCLS_V_I]]
1759 int8x8_t test_vcls_s8(int8x8_t a) {
1760 return vcls_s8(a);
1763 // CHECK-LABEL: @test_vcls_s16(
1764 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1765 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1766 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1767 // CHECK: ret <4 x i16> [[VCLS_V1_I]]
1768 int16x4_t test_vcls_s16(int16x4_t a) {
1769 return vcls_s16(a);
1772 // CHECK-LABEL: @test_vcls_s32(
1773 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1774 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1775 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1776 // CHECK: ret <2 x i32> [[VCLS_V1_I]]
1777 int32x2_t test_vcls_s32(int32x2_t a) {
1778 return vcls_s32(a);
1781 // CHECK-LABEL: @test_vcls_u8(
1782 // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1783 // CHECK: ret <8 x i8> [[VCLS_V_I]]
1784 int8x8_t test_vcls_u8(uint8x8_t a) {
1785 return vcls_u8(a);
1788 // CHECK-LABEL: @test_vcls_u16(
1789 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1790 // CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1791 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1792 // CHECK: ret <4 x i16> [[VCLS_V1_I]]
1793 int16x4_t test_vcls_u16(uint16x4_t a) {
1794 return vcls_u16(a);
1797 // CHECK-LABEL: @test_vcls_u32(
1798 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1799 // CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1800 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1801 // CHECK: ret <2 x i32> [[VCLS_V1_I]]
1802 int32x2_t test_vcls_u32(uint32x2_t a) {
1803 return vcls_u32(a);
1806 // CHECK-LABEL: @test_vclsq_s8(
1807 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1808 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
1809 int8x16_t test_vclsq_s8(int8x16_t a) {
1810 return vclsq_s8(a);
1813 // CHECK-LABEL: @test_vclsq_s16(
1814 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1815 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1816 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1817 // CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
1818 int16x8_t test_vclsq_s16(int16x8_t a) {
1819 return vclsq_s16(a);
1822 // CHECK-LABEL: @test_vclsq_s32(
1823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1824 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1825 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1826 // CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
1827 int32x4_t test_vclsq_s32(int32x4_t a) {
1828 return vclsq_s32(a);
1831 // CHECK-LABEL: @test_vclsq_u8(
1832 // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1833 // CHECK: ret <16 x i8> [[VCLSQ_V_I]]
1834 int8x16_t test_vclsq_u8(uint8x16_t a) {
1835 return vclsq_u8(a);
1838 // CHECK-LABEL: @test_vclsq_u16(
1839 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1840 // CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1841 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1842 // CHECK: ret <8 x i16> [[VCLSQ_V1_I]]
1843 int16x8_t test_vclsq_u16(uint16x8_t a) {
1844 return vclsq_u16(a);
1847 // CHECK-LABEL: @test_vclsq_u32(
1848 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1849 // CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1850 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1851 // CHECK: ret <4 x i32> [[VCLSQ_V1_I]]
1852 int32x4_t test_vclsq_u32(uint32x4_t a) {
1853 return vclsq_u32(a);
1856 // CHECK-LABEL: @test_vclt_s8(
1857 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1858 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1859 // CHECK: ret <8 x i8> [[SEXT_I]]
1860 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1861 return vclt_s8(a, b);
1864 // CHECK-LABEL: @test_vclt_s16(
1865 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1866 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1867 // CHECK: ret <4 x i16> [[SEXT_I]]
1868 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1869 return vclt_s16(a, b);
1872 // CHECK-LABEL: @test_vclt_s32(
1873 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1874 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1875 // CHECK: ret <2 x i32> [[SEXT_I]]
1876 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1877 return vclt_s32(a, b);
1880 // CHECK-LABEL: @test_vclt_f32(
1881 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1882 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1883 // CHECK: ret <2 x i32> [[SEXT_I]]
1884 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1885 return vclt_f32(a, b);
1888 // CHECK-LABEL: @test_vclt_u8(
1889 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1890 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1891 // CHECK: ret <8 x i8> [[SEXT_I]]
1892 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
1893 return vclt_u8(a, b);
1896 // CHECK-LABEL: @test_vclt_u16(
1897 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
1898 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1899 // CHECK: ret <4 x i16> [[SEXT_I]]
1900 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
1901 return vclt_u16(a, b);
1904 // CHECK-LABEL: @test_vclt_u32(
1905 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
1906 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1907 // CHECK: ret <2 x i32> [[SEXT_I]]
1908 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
1909 return vclt_u32(a, b);
1912 // CHECK-LABEL: @test_vcltq_s8(
1913 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
1914 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1915 // CHECK: ret <16 x i8> [[SEXT_I]]
1916 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
1917 return vcltq_s8(a, b);
1920 // CHECK-LABEL: @test_vcltq_s16(
1921 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
1922 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1923 // CHECK: ret <8 x i16> [[SEXT_I]]
1924 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
1925 return vcltq_s16(a, b);
1928 // CHECK-LABEL: @test_vcltq_s32(
1929 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
1930 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1931 // CHECK: ret <4 x i32> [[SEXT_I]]
1932 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
1933 return vcltq_s32(a, b);
1936 // CHECK-LABEL: @test_vcltq_f32(
1937 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
1938 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1939 // CHECK: ret <4 x i32> [[SEXT_I]]
1940 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
1941 return vcltq_f32(a, b);
1944 // CHECK-LABEL: @test_vcltq_u8(
1945 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
1946 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1947 // CHECK: ret <16 x i8> [[SEXT_I]]
1948 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
1949 return vcltq_u8(a, b);
1952 // CHECK-LABEL: @test_vcltq_u16(
1953 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
1954 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1955 // CHECK: ret <8 x i16> [[SEXT_I]]
1956 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
1957 return vcltq_u16(a, b);
1960 // CHECK-LABEL: @test_vcltq_u32(
1961 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
1962 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1963 // CHECK: ret <4 x i32> [[SEXT_I]]
1964 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
1965 return vcltq_u32(a, b);
1968 // CHECK-LABEL: @test_vclz_s8(
1969 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1970 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
1971 int8x8_t test_vclz_s8(int8x8_t a) {
1972 return vclz_s8(a);
1975 // CHECK-LABEL: @test_vclz_s16(
1976 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1977 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1978 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1979 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
1980 int16x4_t test_vclz_s16(int16x4_t a) {
1981 return vclz_s16(a);
1984 // CHECK-LABEL: @test_vclz_s32(
1985 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1986 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1987 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1988 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
1989 int32x2_t test_vclz_s32(int32x2_t a) {
1990 return vclz_s32(a);
1993 // CHECK-LABEL: @test_vclz_u8(
1994 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1995 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
1996 uint8x8_t test_vclz_u8(uint8x8_t a) {
1997 return vclz_u8(a);
2000 // CHECK-LABEL: @test_vclz_u16(
2001 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2002 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
2003 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2004 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
2005 uint16x4_t test_vclz_u16(uint16x4_t a) {
2006 return vclz_u16(a);
2009 // CHECK-LABEL: @test_vclz_u32(
2010 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2011 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
2012 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2013 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
2014 uint32x2_t test_vclz_u32(uint32x2_t a) {
2015 return vclz_u32(a);
2018 // CHECK-LABEL: @test_vclzq_s8(
2019 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2020 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
2021 int8x16_t test_vclzq_s8(int8x16_t a) {
2022 return vclzq_s8(a);
2025 // CHECK-LABEL: @test_vclzq_s16(
2026 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2027 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2028 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2029 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
2030 int16x8_t test_vclzq_s16(int16x8_t a) {
2031 return vclzq_s16(a);
2034 // CHECK-LABEL: @test_vclzq_s32(
2035 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2036 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2037 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2038 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
2039 int32x4_t test_vclzq_s32(int32x4_t a) {
2040 return vclzq_s32(a);
2043 // CHECK-LABEL: @test_vclzq_u8(
2044 // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2045 // CHECK: ret <16 x i8> [[VCLZQ_V_I]]
2046 uint8x16_t test_vclzq_u8(uint8x16_t a) {
2047 return vclzq_u8(a);
2050 // CHECK-LABEL: @test_vclzq_u16(
2051 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2052 // CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2053 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2054 // CHECK: ret <8 x i16> [[VCLZQ_V1_I]]
2055 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2056 return vclzq_u16(a);
2059 // CHECK-LABEL: @test_vclzq_u32(
2060 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2061 // CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2062 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2063 // CHECK: ret <4 x i32> [[VCLZQ_V1_I]]
2064 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2065 return vclzq_u32(a);
2068 // CHECK-LABEL: @test_vcnt_u8(
2069 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2070 // CHECK: ret <8 x i8> [[VCNT_V_I]]
2071 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2072 return vcnt_u8(a);
2075 // CHECK-LABEL: @test_vcnt_s8(
2076 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2077 // CHECK: ret <8 x i8> [[VCNT_V_I]]
2078 int8x8_t test_vcnt_s8(int8x8_t a) {
2079 return vcnt_s8(a);
2082 // CHECK-LABEL: @test_vcnt_p8(
2083 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2084 // CHECK: ret <8 x i8> [[VCNT_V_I]]
2085 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2086 return vcnt_p8(a);
2089 // CHECK-LABEL: @test_vcntq_u8(
2090 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2091 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
2092 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2093 return vcntq_u8(a);
2096 // CHECK-LABEL: @test_vcntq_s8(
2097 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2098 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
2099 int8x16_t test_vcntq_s8(int8x16_t a) {
2100 return vcntq_s8(a);
2103 // CHECK-LABEL: @test_vcntq_p8(
2104 // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2105 // CHECK: ret <16 x i8> [[VCNTQ_V_I]]
2106 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2107 return vcntq_p8(a);
2110 // CHECK-LABEL: @test_vcombine_s8(
2111 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2112 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
2113 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2114 return vcombine_s8(a, b);
2117 // CHECK-LABEL: @test_vcombine_s16(
2118 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2119 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
2120 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2121 return vcombine_s16(a, b);
2124 // CHECK-LABEL: @test_vcombine_s32(
2125 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2126 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
2127 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2128 return vcombine_s32(a, b);
2131 // CHECK-LABEL: @test_vcombine_s64(
2132 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2133 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
2134 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2135 return vcombine_s64(a, b);
2138 // CHECK-LABEL: @test_vcombine_f16(
2139 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2140 // CHECK: ret <8 x half> [[SHUFFLE_I]]
2141 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2142 return vcombine_f16(a, b);
2145 // CHECK-LABEL: @test_vcombine_f32(
2146 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2147 // CHECK: ret <4 x float> [[SHUFFLE_I]]
2148 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2149 return vcombine_f32(a, b);
2152 // CHECK-LABEL: @test_vcombine_u8(
2153 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2154 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
2155 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2156 return vcombine_u8(a, b);
2159 // CHECK-LABEL: @test_vcombine_u16(
2160 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2161 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
2162 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2163 return vcombine_u16(a, b);
2166 // CHECK-LABEL: @test_vcombine_u32(
2167 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2168 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
2169 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2170 return vcombine_u32(a, b);
2173 // CHECK-LABEL: @test_vcombine_u64(
2174 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2175 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
2176 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2177 return vcombine_u64(a, b);
2180 // CHECK-LABEL: @test_vcombine_p8(
2181 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2182 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
2183 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2184 return vcombine_p8(a, b);
2187 // CHECK-LABEL: @test_vcombine_p16(
2188 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2189 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
2190 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2191 return vcombine_p16(a, b);
2194 // CHECK-LABEL: @test_vcreate_s8(
2195 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2196 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2197 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
2198 int8x8_t test_vcreate_s8(uint64_t a) {
2199 return vclz_s8(vcreate_s8(a));
2202 // CHECK-LABEL: @test_vcreate_imm
2203 // CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
2204 // CHECK: ret <4 x i16> [[RES]]
2205 int16x4_t test_vcreate_imm(void) {
2206 return vcreate_s16(0);
2209 // CHECK-LABEL: @test_vcreate_s16(
2210 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2211 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2212 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2213 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2214 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
2215 int16x4_t test_vcreate_s16(uint64_t a) {
2216 return vclz_s16(vcreate_s16(a));
2219 // CHECK-LABEL: @test_vcreate_s32(
2220 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2221 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2222 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2223 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2224 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
2225 int32x2_t test_vcreate_s32(uint64_t a) {
2226 return vclz_s32(vcreate_s32(a));
2229 // CHECK-LABEL: @test_vcreate_f16(
2230 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2231 // CHECK: ret <4 x half> [[TMP0]]
2232 float16x4_t test_vcreate_f16(uint64_t a) {
2233 return vcreate_f16(a);
2236 // CHECK-LABEL: @test_vcreate_f32(
2237 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2238 // CHECK: ret <2 x float> [[TMP0]]
2239 float32x2_t test_vcreate_f32(uint64_t a) {
2240 return vcreate_f32(a);
2243 // CHECK-LABEL: @test_vcreate_u8(
2244 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2245 // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2246 // CHECK: ret <8 x i8> [[VCLZ_V_I]]
2247 int8x8_t test_vcreate_u8(uint64_t a) {
2248 return vclz_s8((int8x8_t)vcreate_u8(a));
2251 // CHECK-LABEL: @test_vcreate_u16(
2252 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2253 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2254 // CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2255 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2256 // CHECK: ret <4 x i16> [[VCLZ_V1_I]]
2257 int16x4_t test_vcreate_u16(uint64_t a) {
2258 return vclz_s16((int16x4_t)vcreate_u16(a));
2261 // CHECK-LABEL: @test_vcreate_u32(
2262 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2263 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2264 // CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2265 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2266 // CHECK: ret <2 x i32> [[VCLZ_V1_I]]
2267 int32x2_t test_vcreate_u32(uint64_t a) {
2268 return vclz_s32((int32x2_t)vcreate_u32(a));
2271 // CHECK-LABEL: @test_vcreate_u64(
2272 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2273 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2274 // CHECK: ret <1 x i64> [[ADD_I]]
2275 uint64x1_t test_vcreate_u64(uint64_t a) {
2276 uint64x1_t tmp = vcreate_u64(a);
2277 return vadd_u64(tmp, tmp);
2280 // CHECK-LABEL: @test_vcreate_p8(
2281 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2282 // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
2283 // CHECK: ret <8 x i8> [[VCNT_V_I]]
2284 poly8x8_t test_vcreate_p8(uint64_t a) {
2285 return vcnt_p8(vcreate_p8(a));
2288 // CHECK-LABEL: @test_vcreate_p16(
2289 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2290 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2291 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2292 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2293 // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
2294 // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2295 // CHECK: ret <4 x i16> [[TMP4]]
2296 poly16x4_t test_vcreate_p16(uint64_t a) {
2297 poly16x4_t tmp = vcreate_p16(a);
2298 return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
2301 // CHECK-LABEL: @test_vcreate_s64(
2302 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2303 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2304 // CHECK: ret <1 x i64> [[ADD_I]]
2305 int64x1_t test_vcreate_s64(uint64_t a) {
2306 int64x1_t tmp = vcreate_s64(a);
2307 return vadd_s64(tmp, tmp);
2310 // CHECK-LABEL: @test_vcvt_f16_f32(
2311 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2312 // CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
2313 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2314 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2315 // CHECK: ret <4 x half> [[TMP1]]
2316 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2317 return vcvt_f16_f32(a);
2320 // CHECK-LABEL: @test_vcvt_f32_s32(
2321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2322 // CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
2323 // CHECK: ret <2 x float> [[VCVT_I]]
2324 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2325 return vcvt_f32_s32(a);
2328 // CHECK-LABEL: @test_vcvt_f32_u32(
2329 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2330 // CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
2331 // CHECK: ret <2 x float> [[VCVT_I]]
2332 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2333 return vcvt_f32_u32(a);
2336 // CHECK-LABEL: @test_vcvtq_f32_s32(
2337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2338 // CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
2339 // CHECK: ret <4 x float> [[VCVT_I]]
2340 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2341 return vcvtq_f32_s32(a);
2344 // CHECK-LABEL: @test_vcvtq_f32_u32(
2345 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2346 // CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
2347 // CHECK: ret <4 x float> [[VCVT_I]]
2348 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2349 return vcvtq_f32_u32(a);
2352 // CHECK-LABEL: @test_vcvt_f32_f16(
2353 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2354 // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2355 // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
2356 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2357 // CHECK: ret <4 x float> [[VCVT_F32_F161_I]]
2358 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2359 return vcvt_f32_f16(a);
2362 // CHECK-LABEL: @test_vcvt_n_f32_s32(
2363 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2364 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2365 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2366 // CHECK: ret <2 x float> [[VCVT_N1]]
2367 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2368 return vcvt_n_f32_s32(a, 1);
2371 // CHECK-LABEL: @test_vcvt_n_f32_u32(
2372 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2373 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2374 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2375 // CHECK: ret <2 x float> [[VCVT_N1]]
2376 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2377 return vcvt_n_f32_u32(a, 1);
2380 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
2381 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2382 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2383 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2384 // CHECK: ret <4 x float> [[VCVT_N1]]
2385 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2386 return vcvtq_n_f32_s32(a, 3);
2389 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
2390 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2391 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2392 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2393 // CHECK: ret <4 x float> [[VCVT_N1]]
2394 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2395 return vcvtq_n_f32_u32(a, 3);
2398 // CHECK-LABEL: @test_vcvt_n_s32_f32(
2399 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2400 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2401 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2402 // CHECK: ret <2 x i32> [[VCVT_N1]]
2403 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2404 return vcvt_n_s32_f32(a, 1);
2407 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
2408 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2409 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2410 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2411 // CHECK: ret <4 x i32> [[VCVT_N1]]
2412 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2413 return vcvtq_n_s32_f32(a, 3);
2416 // CHECK-LABEL: @test_vcvt_n_u32_f32(
2417 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2418 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2419 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2420 // CHECK: ret <2 x i32> [[VCVT_N1]]
2421 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2422 return vcvt_n_u32_f32(a, 1);
2425 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
2426 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2427 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2428 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2429 // CHECK: ret <4 x i32> [[VCVT_N1]]
2430 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2431 return vcvtq_n_u32_f32(a, 3);
2434 // CHECK-LABEL: @test_vcvt_s32_f32(
2435 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2436 // CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
2437 // CHECK: ret <2 x i32> [[VCVT_I]]
2438 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2439 return vcvt_s32_f32(a);
2442 // CHECK-LABEL: @test_vcvtq_s32_f32(
2443 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2444 // CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
2445 // CHECK: ret <4 x i32> [[VCVT_I]]
2446 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2447 return vcvtq_s32_f32(a);
2450 // CHECK-LABEL: @test_vcvt_u32_f32(
2451 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2452 // CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
2453 // CHECK: ret <2 x i32> [[VCVT_I]]
2454 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2455 return vcvt_u32_f32(a);
2458 // CHECK-LABEL: @test_vcvtq_u32_f32(
2459 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2460 // CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
2461 // CHECK: ret <4 x i32> [[VCVT_I]]
2462 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2463 return vcvtq_u32_f32(a);
2466 // CHECK-LABEL: @test_vdup_lane_u8(
2467 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2468 // CHECK: ret <8 x i8> [[SHUFFLE]]
2469 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2470 return vdup_lane_u8(a, 7);
2473 // CHECK-LABEL: @test_vdup_lane_u16(
2474 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2475 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2476 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2477 // CHECK: ret <4 x i16> [[LANE]]
2478 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2479 return vdup_lane_u16(a, 3);
2482 // CHECK-LABEL: @test_vdup_lane_u32(
2483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2484 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2485 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2486 // CHECK: ret <2 x i32> [[LANE]]
2487 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2488 return vdup_lane_u32(a, 1);
2491 // CHECK-LABEL: @test_vdup_lane_s8(
2492 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2493 // CHECK: ret <8 x i8> [[SHUFFLE]]
2494 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2495 return vdup_lane_s8(a, 7);
2498 // CHECK-LABEL: @test_vdup_lane_s16(
2499 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2500 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2501 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2502 // CHECK: ret <4 x i16> [[LANE]]
2503 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2504 return vdup_lane_s16(a, 3);
2507 // CHECK-LABEL: @test_vdup_lane_s32(
2508 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2509 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2510 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2511 // CHECK: ret <2 x i32> [[LANE]]
2512 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2513 return vdup_lane_s32(a, 1);
2516 // CHECK-LABEL: @test_vdup_lane_p8(
2517 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2518 // CHECK: ret <8 x i8> [[SHUFFLE]]
2519 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2520 return vdup_lane_p8(a, 7);
2523 // CHECK-LABEL: @test_vdup_lane_p16(
2524 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2525 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2526 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2527 // CHECK: ret <4 x i16> [[LANE]]
2528 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2529 return vdup_lane_p16(a, 3);
2532 // CHECK-LABEL: @test_vdup_lane_f32(
2533 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2534 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2535 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
2536 // CHECK: ret <2 x float> [[LANE]]
2537 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2538 return vdup_lane_f32(a, 1);
2541 // CHECK-LABEL: @test_vdupq_lane_u8(
2542 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2543 // CHECK: ret <16 x i8> [[SHUFFLE]]
2544 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2545 return vdupq_lane_u8(a, 7);
2548 // CHECK-LABEL: @test_vdupq_lane_u16(
2549 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2550 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2551 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2552 // CHECK: ret <8 x i16> [[LANE]]
2553 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2554 return vdupq_lane_u16(a, 3);
2557 // CHECK-LABEL: @test_vdupq_lane_u32(
2558 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2559 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2560 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2561 // CHECK: ret <4 x i32> [[LANE]]
2562 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2563 return vdupq_lane_u32(a, 1);
2566 // CHECK-LABEL: @test_vdupq_lane_s8(
2567 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2568 // CHECK: ret <16 x i8> [[SHUFFLE]]
2569 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2570 return vdupq_lane_s8(a, 7);
2573 // CHECK-LABEL: @test_vdupq_lane_s16(
2574 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2575 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2576 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2577 // CHECK: ret <8 x i16> [[LANE]]
2578 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2579 return vdupq_lane_s16(a, 3);
2582 // CHECK-LABEL: @test_vdupq_lane_s32(
2583 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2584 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2585 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2586 // CHECK: ret <4 x i32> [[LANE]]
2587 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2588 return vdupq_lane_s32(a, 1);
2591 // CHECK-LABEL: @test_vdupq_lane_p8(
2592 // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2593 // CHECK: ret <16 x i8> [[SHUFFLE]]
2594 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2595 return vdupq_lane_p8(a, 7);
2598 // CHECK-LABEL: @test_vdupq_lane_p16(
2599 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2600 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2601 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2602 // CHECK: ret <8 x i16> [[LANE]]
2603 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2604 return vdupq_lane_p16(a, 3);
2607 // CHECK-LABEL: @test_vdupq_lane_f32(
2608 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2609 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2610 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2611 // CHECK: ret <4 x float> [[LANE]]
2612 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2613 return vdupq_lane_f32(a, 1);
2616 // CHECK-LABEL: @test_vdup_lane_s64(
2617 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2618 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2619 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2620 // CHECK: ret <1 x i64> [[LANE]]
2621 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2622 return vdup_lane_s64(a, 0);
2625 // CHECK-LABEL: @test_vdup_lane_u64(
2626 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2627 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2628 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2629 // CHECK: ret <1 x i64> [[LANE]]
2630 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2631 return vdup_lane_u64(a, 0);
2634 // CHECK-LABEL: @test_vdupq_lane_s64(
2635 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2636 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2637 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2638 // CHECK: ret <2 x i64> [[LANE]]
2639 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2640 return vdupq_lane_s64(a, 0);
2643 // CHECK-LABEL: @test_vdupq_lane_u64(
2644 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2645 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2646 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2647 // CHECK: ret <2 x i64> [[LANE]]
2648 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2649 return vdupq_lane_u64(a, 0);
2652 // CHECK-LABEL: @test_vdup_n_u8(
2653 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2654 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2655 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2656 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2657 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2658 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2659 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2660 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2661 // CHECK: ret <8 x i8> [[VECINIT7_I]]
2662 uint8x8_t test_vdup_n_u8(uint8_t a) {
2663 return vdup_n_u8(a);
2666 // CHECK-LABEL: @test_vdup_n_u16(
2667 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2668 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2669 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2670 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2671 // CHECK: ret <4 x i16> [[VECINIT3_I]]
2672 uint16x4_t test_vdup_n_u16(uint16_t a) {
2673 return vdup_n_u16(a);
2676 // CHECK-LABEL: @test_vdup_n_u32(
2677 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2678 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2679 // CHECK: ret <2 x i32> [[VECINIT1_I]]
2680 uint32x2_t test_vdup_n_u32(uint32_t a) {
2681 return vdup_n_u32(a);
2684 // CHECK-LABEL: @test_vdup_n_s8(
2685 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2686 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2687 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2688 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2689 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2690 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2691 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2692 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2693 // CHECK: ret <8 x i8> [[VECINIT7_I]]
2694 int8x8_t test_vdup_n_s8(int8_t a) {
2695 return vdup_n_s8(a);
2698 // CHECK-LABEL: @test_vdup_n_s16(
2699 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2700 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2701 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2702 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2703 // CHECK: ret <4 x i16> [[VECINIT3_I]]
2704 int16x4_t test_vdup_n_s16(int16_t a) {
2705 return vdup_n_s16(a);
2708 // CHECK-LABEL: @test_vdup_n_s32(
2709 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2710 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2711 // CHECK: ret <2 x i32> [[VECINIT1_I]]
2712 int32x2_t test_vdup_n_s32(int32_t a) {
2713 return vdup_n_s32(a);
2716 // CHECK-LABEL: @test_vdup_n_p8(
2717 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2718 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2719 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2720 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2721 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2722 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2723 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2724 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2725 // CHECK: ret <8 x i8> [[VECINIT7_I]]
2726 poly8x8_t test_vdup_n_p8(poly8_t a) {
2727 return vdup_n_p8(a);
2730 // CHECK-LABEL: @test_vdup_n_p16(
2731 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2732 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2733 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2734 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2735 // CHECK: ret <4 x i16> [[VECINIT3_I]]
2736 poly16x4_t test_vdup_n_p16(poly16_t a) {
2737 return vdup_n_p16(a);
2740 // CHECK-LABEL: @test_vdup_n_f16(
2741 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
2742 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2743 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2744 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2745 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2746 // CHECK: ret <4 x half> [[VECINIT3]]
2747 float16x4_t test_vdup_n_f16(float16_t *a) {
2748 return vdup_n_f16(*a);
2751 // CHECK-LABEL: @test_vdup_n_f32(
2752 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2753 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2754 // CHECK: ret <2 x float> [[VECINIT1_I]]
2755 float32x2_t test_vdup_n_f32(float32_t a) {
2756 return vdup_n_f32(a);
2759 // CHECK-LABEL: @test_vdupq_n_u8(
2760 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2761 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2762 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2763 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2764 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2765 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2766 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2767 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2768 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2769 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2770 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2771 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2772 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2773 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2774 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2775 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2776 // CHECK: ret <16 x i8> [[VECINIT15_I]]
2777 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2778 return vdupq_n_u8(a);
2781 // CHECK-LABEL: @test_vdupq_n_u16(
2782 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2783 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2784 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2785 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2786 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2787 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2788 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2789 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2790 // CHECK: ret <8 x i16> [[VECINIT7_I]]
2791 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2792 return vdupq_n_u16(a);
2795 // CHECK-LABEL: @test_vdupq_n_u32(
2796 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2797 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2798 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2799 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2800 // CHECK: ret <4 x i32> [[VECINIT3_I]]
2801 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2802 return vdupq_n_u32(a);
2805 // CHECK-LABEL: @test_vdupq_n_s8(
2806 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2807 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2808 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2809 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2810 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2811 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2812 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2813 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2814 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2815 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2816 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2817 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2818 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2819 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2820 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2821 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2822 // CHECK: ret <16 x i8> [[VECINIT15_I]]
2823 int8x16_t test_vdupq_n_s8(int8_t a) {
2824 return vdupq_n_s8(a);
2827 // CHECK-LABEL: @test_vdupq_n_s16(
2828 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2829 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2830 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2831 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2832 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2833 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2834 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2835 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2836 // CHECK: ret <8 x i16> [[VECINIT7_I]]
2837 int16x8_t test_vdupq_n_s16(int16_t a) {
2838 return vdupq_n_s16(a);
2841 // CHECK-LABEL: @test_vdupq_n_s32(
2842 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2843 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2844 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2845 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2846 // CHECK: ret <4 x i32> [[VECINIT3_I]]
2847 int32x4_t test_vdupq_n_s32(int32_t a) {
2848 return vdupq_n_s32(a);
2851 // CHECK-LABEL: @test_vdupq_n_p8(
2852 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2853 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2854 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2855 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2856 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2857 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2858 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2859 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2860 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2861 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2862 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2863 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2864 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2865 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2866 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2867 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2868 // CHECK: ret <16 x i8> [[VECINIT15_I]]
2869 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2870 return vdupq_n_p8(a);
2873 // CHECK-LABEL: @test_vdupq_n_p16(
2874 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2875 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2876 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2877 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2878 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2879 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2880 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2881 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2882 // CHECK: ret <8 x i16> [[VECINIT7_I]]
2883 poly16x8_t test_vdupq_n_p16(poly16_t a) {
2884 return vdupq_n_p16(a);
2887 // CHECK-LABEL: @test_vdupq_n_f16(
2888 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
2889 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
2890 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
2891 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
2892 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
2893 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
2894 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
2895 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
2896 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
2897 // CHECK: ret <8 x half> [[VECINIT7]]
2898 float16x8_t test_vdupq_n_f16(float16_t *a) {
2899 return vdupq_n_f16(*a);
2902 // CHECK-LABEL: @test_vdupq_n_f32(
2903 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
2904 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
2905 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
2906 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
2907 // CHECK: ret <4 x float> [[VECINIT3_I]]
2908 float32x4_t test_vdupq_n_f32(float32_t a) {
2909 return vdupq_n_f32(a);
2912 // CHECK-LABEL: @test_vdup_n_s64(
2913 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2914 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2915 // CHECK: ret <1 x i64> [[ADD_I]]
2916 int64x1_t test_vdup_n_s64(int64_t a) {
2917 int64x1_t tmp = vdup_n_s64(a);
2918 return vadd_s64(tmp, tmp);
2921 // CHECK-LABEL: @test_vdup_n_u64(
2922 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2923 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2924 // CHECK: ret <1 x i64> [[ADD_I]]
2925 int64x1_t test_vdup_n_u64(uint64_t a) {
2926 int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
2927 return vadd_s64(tmp, tmp);
2930 // CHECK-LABEL: @test_vdupq_n_s64(
2931 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2932 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2933 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2934 // CHECK: ret <2 x i64> [[ADD_I]]
2935 int64x2_t test_vdupq_n_s64(int64_t a) {
2936 int64x2_t tmp = vdupq_n_s64(a);
2937 return vaddq_s64(tmp, tmp);
2940 // CHECK-LABEL: @test_vdupq_n_u64(
2941 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2942 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2943 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2944 // CHECK: ret <2 x i64> [[ADD_I]]
2945 uint64x2_t test_vdupq_n_u64(uint64_t a) {
2946 uint64x2_t tmp = vdupq_n_u64(a);
2947 return vaddq_u64(tmp, tmp);
2950 // CHECK-LABEL: @test_veor_s8(
2951 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2952 // CHECK: ret <8 x i8> [[XOR_I]]
2953 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
2954 return veor_s8(a, b);
2957 // CHECK-LABEL: @test_veor_s16(
2958 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2959 // CHECK: ret <4 x i16> [[XOR_I]]
2960 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
2961 return veor_s16(a, b);
2964 // CHECK-LABEL: @test_veor_s32(
2965 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2966 // CHECK: ret <2 x i32> [[XOR_I]]
2967 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
2968 return veor_s32(a, b);
2971 // CHECK-LABEL: @test_veor_s64(
2972 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2973 // CHECK: ret <1 x i64> [[XOR_I]]
2974 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
2975 return veor_s64(a, b);
2978 // CHECK-LABEL: @test_veor_u8(
2979 // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2980 // CHECK: ret <8 x i8> [[XOR_I]]
2981 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
2982 return veor_u8(a, b);
2985 // CHECK-LABEL: @test_veor_u16(
2986 // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2987 // CHECK: ret <4 x i16> [[XOR_I]]
2988 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
2989 return veor_u16(a, b);
2992 // CHECK-LABEL: @test_veor_u32(
2993 // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2994 // CHECK: ret <2 x i32> [[XOR_I]]
2995 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
2996 return veor_u32(a, b);
2999 // CHECK-LABEL: @test_veor_u64(
3000 // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b
3001 // CHECK: ret <1 x i64> [[XOR_I]]
3002 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
3003 return veor_u64(a, b);
3006 // CHECK-LABEL: @test_veorq_s8(
3007 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3008 // CHECK: ret <16 x i8> [[XOR_I]]
3009 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
3010 return veorq_s8(a, b);
3013 // CHECK-LABEL: @test_veorq_s16(
3014 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3015 // CHECK: ret <8 x i16> [[XOR_I]]
3016 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
3017 return veorq_s16(a, b);
3020 // CHECK-LABEL: @test_veorq_s32(
3021 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3022 // CHECK: ret <4 x i32> [[XOR_I]]
3023 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
3024 return veorq_s32(a, b);
3027 // CHECK-LABEL: @test_veorq_s64(
3028 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3029 // CHECK: ret <2 x i64> [[XOR_I]]
3030 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
3031 return veorq_s64(a, b);
3034 // CHECK-LABEL: @test_veorq_u8(
3035 // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3036 // CHECK: ret <16 x i8> [[XOR_I]]
3037 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
3038 return veorq_u8(a, b);
3041 // CHECK-LABEL: @test_veorq_u16(
3042 // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3043 // CHECK: ret <8 x i16> [[XOR_I]]
3044 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
3045 return veorq_u16(a, b);
3048 // CHECK-LABEL: @test_veorq_u32(
3049 // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3050 // CHECK: ret <4 x i32> [[XOR_I]]
3051 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3052 return veorq_u32(a, b);
3055 // CHECK-LABEL: @test_veorq_u64(
3056 // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3057 // CHECK: ret <2 x i64> [[XOR_I]]
3058 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3059 return veorq_u64(a, b);
3062 // CHECK-LABEL: @test_vext_s8(
3063 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3064 // CHECK: ret <8 x i8> [[VEXT]]
3065 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3066 return vext_s8(a, b, 7);
3069 // CHECK-LABEL: @test_vext_u8(
3070 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3071 // CHECK: ret <8 x i8> [[VEXT]]
3072 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3073 return vext_u8(a, b, 7);
3076 // CHECK-LABEL: @test_vext_p8(
3077 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3078 // CHECK: ret <8 x i8> [[VEXT]]
3079 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3080 return vext_p8(a, b, 7);
3083 // CHECK-LABEL: @test_vext_s16(
3084 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3085 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3086 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3087 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3088 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3089 // CHECK: ret <4 x i16> [[VEXT]]
3090 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3091 return vext_s16(a, b, 3);
3094 // CHECK-LABEL: @test_vext_u16(
3095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3096 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3097 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3098 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3099 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3100 // CHECK: ret <4 x i16> [[VEXT]]
3101 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3102 return vext_u16(a, b, 3);
3105 // CHECK-LABEL: @test_vext_p16(
3106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3108 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3109 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3110 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3111 // CHECK: ret <4 x i16> [[VEXT]]
3112 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3113 return vext_p16(a, b, 3);
3116 // CHECK-LABEL: @test_vext_s32(
3117 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3118 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3119 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3120 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3121 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3122 // CHECK: ret <2 x i32> [[VEXT]]
3123 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3124 return vext_s32(a, b, 1);
3127 // CHECK-LABEL: @test_vext_u32(
3128 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3129 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3130 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3131 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3132 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3133 // CHECK: ret <2 x i32> [[VEXT]]
3134 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3135 return vext_u32(a, b, 1);
3138 // CHECK-LABEL: @test_vext_s64(
3139 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3140 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3141 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3142 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3143 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3144 // CHECK: ret <1 x i64> [[VEXT]]
3145 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3146 return vext_s64(a, b, 0);
3149 // CHECK-LABEL: @test_vext_u64(
3150 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3151 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3152 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3153 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3154 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3155 // CHECK: ret <1 x i64> [[VEXT]]
3156 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3157 return vext_u64(a, b, 0);
3160 // CHECK-LABEL: @test_vext_f32(
3161 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3162 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3163 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3164 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3165 // CHECK: [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3166 // CHECK: ret <2 x float> [[VEXT]]
3167 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3168 return vext_f32(a, b, 1);
3171 // CHECK-LABEL: @test_vextq_s8(
3172 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3173 // CHECK: ret <16 x i8> [[VEXT]]
3174 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3175 return vextq_s8(a, b, 15);
3178 // CHECK-LABEL: @test_vextq_u8(
3179 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3180 // CHECK: ret <16 x i8> [[VEXT]]
3181 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3182 return vextq_u8(a, b, 15);
3185 // CHECK-LABEL: @test_vextq_p8(
3186 // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3187 // CHECK: ret <16 x i8> [[VEXT]]
3188 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3189 return vextq_p8(a, b, 15);
3192 // CHECK-LABEL: @test_vextq_s16(
3193 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3194 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3195 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3196 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3197 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3198 // CHECK: ret <8 x i16> [[VEXT]]
3199 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3200 return vextq_s16(a, b, 7);
3203 // CHECK-LABEL: @test_vextq_u16(
3204 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3205 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3206 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3207 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3208 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3209 // CHECK: ret <8 x i16> [[VEXT]]
3210 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3211 return vextq_u16(a, b, 7);
3214 // CHECK-LABEL: @test_vextq_p16(
3215 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3216 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3217 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3218 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3219 // CHECK: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3220 // CHECK: ret <8 x i16> [[VEXT]]
3221 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3222 return vextq_p16(a, b, 7);
3225 // CHECK-LABEL: @test_vextq_s32(
3226 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3227 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3228 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3229 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3230 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3231 // CHECK: ret <4 x i32> [[VEXT]]
3232 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3233 return vextq_s32(a, b, 3);
3236 // CHECK-LABEL: @test_vextq_u32(
3237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3239 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3240 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3241 // CHECK: [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3242 // CHECK: ret <4 x i32> [[VEXT]]
3243 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3244 return vextq_u32(a, b, 3);
3247 // CHECK-LABEL: @test_vextq_s64(
3248 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3249 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3250 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3251 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3252 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3253 // CHECK: ret <2 x i64> [[VEXT]]
3254 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3255 return vextq_s64(a, b, 1);
3258 // CHECK-LABEL: @test_vextq_u64(
3259 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3260 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3261 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3262 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3263 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3264 // CHECK: ret <2 x i64> [[VEXT]]
3265 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3266 return vextq_u64(a, b, 1);
3269 // CHECK-LABEL: @test_vextq_f32(
3270 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3271 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3272 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3273 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3274 // CHECK: [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3275 // CHECK: ret <4 x float> [[VEXT]]
3276 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3277 return vextq_f32(a, b, 3);
3280 // CHECK-LABEL: @test_vfma_f32(
3281 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3282 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3283 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3284 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
3285 // CHECK: ret <2 x float> [[TMP3]]
3286 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3287 return vfma_f32(a, b, c);
3290 // CHECK-LABEL: @test_vfmaq_f32(
3291 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3292 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3293 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3294 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
3295 // CHECK: ret <4 x float> [[TMP3]]
3296 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3297 return vfmaq_f32(a, b, c);
3300 // CHECK-LABEL: @test_vfms_f32(
3301 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %b
3302 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3303 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3304 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3305 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
3306 // CHECK: ret <2 x float> [[TMP3]]
3307 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3308 return vfms_f32(a, b, c);
3311 // CHECK-LABEL: @test_vfmsq_f32(
3312 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %b
3313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3314 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3315 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3316 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
3317 // CHECK: ret <4 x float> [[TMP3]]
3318 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3319 return vfmsq_f32(a, b, c);
3322 // CHECK-LABEL: @test_vget_high_s8(
3323 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3324 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3325 int8x8_t test_vget_high_s8(int8x16_t a) {
3326 return vget_high_s8(a);
3329 // CHECK-LABEL: @test_vget_high_s16(
3330 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3331 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3332 int16x4_t test_vget_high_s16(int16x8_t a) {
3333 return vget_high_s16(a);
3336 // CHECK-LABEL: @test_vget_high_s32(
3337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3338 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
3339 int32x2_t test_vget_high_s32(int32x4_t a) {
3340 return vget_high_s32(a);
3343 // CHECK-LABEL: @test_vget_high_s64(
3344 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3345 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
3346 int64x1_t test_vget_high_s64(int64x2_t a) {
3347 return vget_high_s64(a);
3350 // CHECK-LABEL: @test_vget_high_f16(
3351 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3352 // CHECK: ret <4 x half> [[SHUFFLE_I]]
3353 float16x4_t test_vget_high_f16(float16x8_t a) {
3354 return vget_high_f16(a);
3357 // CHECK-LABEL: @test_vget_high_f32(
3358 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3359 // CHECK: ret <2 x float> [[SHUFFLE_I]]
3360 float32x2_t test_vget_high_f32(float32x4_t a) {
3361 return vget_high_f32(a);
3364 // CHECK-LABEL: @test_vget_high_u8(
3365 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3366 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3367 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3368 return vget_high_u8(a);
3371 // CHECK-LABEL: @test_vget_high_u16(
3372 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3373 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3374 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3375 return vget_high_u16(a);
3378 // CHECK-LABEL: @test_vget_high_u32(
3379 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3380 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
3381 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3382 return vget_high_u32(a);
3385 // CHECK-LABEL: @test_vget_high_u64(
3386 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3387 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
3388 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3389 return vget_high_u64(a);
3392 // CHECK-LABEL: @test_vget_high_p8(
3393 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3394 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3395 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3396 return vget_high_p8(a);
3399 // CHECK-LABEL: @test_vget_high_p16(
3400 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3401 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3402 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3403 return vget_high_p16(a);
3406 // CHECK-LABEL: @test_vget_lane_u8(
3407 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3408 // CHECK: ret i8 [[VGET_LANE]]
3409 uint8_t test_vget_lane_u8(uint8x8_t a) {
3410 return vget_lane_u8(a, 7);
3413 // CHECK-LABEL: @test_vget_lane_u16(
3414 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3415 // CHECK: ret i16 [[VGET_LANE]]
3416 uint16_t test_vget_lane_u16(uint16x4_t a) {
3417 return vget_lane_u16(a, 3);
3420 // CHECK-LABEL: @test_vget_lane_u32(
3421 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3422 // CHECK: ret i32 [[VGET_LANE]]
3423 uint32_t test_vget_lane_u32(uint32x2_t a) {
3424 return vget_lane_u32(a, 1);
3427 // CHECK-LABEL: @test_vget_lane_s8(
3428 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3429 // CHECK: ret i8 [[VGET_LANE]]
3430 int8_t test_vget_lane_s8(int8x8_t a) {
3431 return vget_lane_s8(a, 7);
3434 // CHECK-LABEL: @test_vget_lane_s16(
3435 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3436 // CHECK: ret i16 [[VGET_LANE]]
3437 int16_t test_vget_lane_s16(int16x4_t a) {
3438 return vget_lane_s16(a, 3);
3441 // CHECK-LABEL: @test_vget_lane_s32(
3442 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3443 // CHECK: ret i32 [[VGET_LANE]]
3444 int32_t test_vget_lane_s32(int32x2_t a) {
3445 return vget_lane_s32(a, 1);
3448 // CHECK-LABEL: @test_vget_lane_p8(
3449 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3450 // CHECK: ret i8 [[VGET_LANE]]
3451 poly8_t test_vget_lane_p8(poly8x8_t a) {
3452 return vget_lane_p8(a, 7);
3455 // CHECK-LABEL: @test_vget_lane_p16(
3456 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3457 // CHECK: ret i16 [[VGET_LANE]]
3458 poly16_t test_vget_lane_p16(poly16x4_t a) {
3459 return vget_lane_p16(a, 3);
3462 // CHECK-LABEL: @test_vget_lane_f32(
3463 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
3464 // CHECK: ret float [[VGET_LANE]]
3465 float32_t test_vget_lane_f32(float32x2_t a) {
3466 return vget_lane_f32(a, 1);
3469 // CHECK-LABEL: @test_vget_lane_f16(
3470 // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3471 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2
3472 // CHECK: store <4 x half> %a, ptr [[__REINT_242]], align 8
3473 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_242]], align 8
3474 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
3475 // CHECK: store i16 [[VGET_LANE]], ptr [[__REINT1_242]], align 2
3476 // CHECK: [[TMP5:%.*]] = load half, ptr [[__REINT1_242]], align 2
3477 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3478 // CHECK: ret float [[CONV]]
3479 float32_t test_vget_lane_f16(float16x4_t a) {
3480 return vget_lane_f16(a, 1);
3483 // CHECK-LABEL: @test_vgetq_lane_u8(
3484 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3485 // CHECK: ret i8 [[VGET_LANE]]
3486 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3487 return vgetq_lane_u8(a, 15);
3490 // CHECK-LABEL: @test_vgetq_lane_u16(
3491 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3492 // CHECK: ret i16 [[VGET_LANE]]
3493 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3494 return vgetq_lane_u16(a, 7);
3497 // CHECK-LABEL: @test_vgetq_lane_u32(
3498 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3499 // CHECK: ret i32 [[VGET_LANE]]
3500 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3501 return vgetq_lane_u32(a, 3);
3504 // CHECK-LABEL: @test_vgetq_lane_s8(
3505 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3506 // CHECK: ret i8 [[VGET_LANE]]
3507 int8_t test_vgetq_lane_s8(int8x16_t a) {
3508 return vgetq_lane_s8(a, 15);
3511 // CHECK-LABEL: @test_vgetq_lane_s16(
3512 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3513 // CHECK: ret i16 [[VGET_LANE]]
3514 int16_t test_vgetq_lane_s16(int16x8_t a) {
3515 return vgetq_lane_s16(a, 7);
3518 // CHECK-LABEL: @test_vgetq_lane_s32(
3519 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3520 // CHECK: ret i32 [[VGET_LANE]]
3521 int32_t test_vgetq_lane_s32(int32x4_t a) {
3522 return vgetq_lane_s32(a, 3);
3525 // CHECK-LABEL: @test_vgetq_lane_p8(
3526 // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3527 // CHECK: ret i8 [[VGET_LANE]]
3528 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3529 return vgetq_lane_p8(a, 15);
3532 // CHECK-LABEL: @test_vgetq_lane_p16(
3533 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3534 // CHECK: ret i16 [[VGET_LANE]]
3535 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3536 return vgetq_lane_p16(a, 7);
3539 // CHECK-LABEL: @test_vgetq_lane_f32(
3540 // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
3541 // CHECK: ret float [[VGET_LANE]]
3542 float32_t test_vgetq_lane_f32(float32x4_t a) {
3543 return vgetq_lane_f32(a, 3);
3546 // CHECK-LABEL: @test_vgetq_lane_f16(
3547 // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3548 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
3549 // CHECK: store <8 x half> %a, ptr [[__REINT_244]], align 16
3550 // CHECK: [[TMP1:%.*]] = load <8 x i16>, ptr [[__REINT_244]], align 16
3551 // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
3552 // CHECK: store i16 [[VGET_LANE]], ptr [[__REINT1_244]], align 2
3553 // CHECK: [[TMP5:%.*]] = load half, ptr [[__REINT1_244]], align 2
3554 // CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float
3555 // CHECK: ret float [[CONV]]
3556 float32_t test_vgetq_lane_f16(float16x8_t a) {
3557 return vgetq_lane_f16(a, 3);
3560 // CHECK-LABEL: @test_vget_lane_s64(
3561 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3562 // CHECK: ret i64 [[VGET_LANE]]
3563 int64_t test_vget_lane_s64(int64x1_t a) {
3564 return vget_lane_s64(a, 0);
3567 // CHECK-LABEL: @test_vget_lane_u64(
3568 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3569 // CHECK: ret i64 [[VGET_LANE]]
3570 uint64_t test_vget_lane_u64(uint64x1_t a) {
3571 return vget_lane_u64(a, 0);
3574 // CHECK-LABEL: @test_vgetq_lane_s64(
3575 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3576 // CHECK: ret i64 [[VGET_LANE]]
3577 int64_t test_vgetq_lane_s64(int64x2_t a) {
3578 return vgetq_lane_s64(a, 1);
3581 // CHECK-LABEL: @test_vgetq_lane_u64(
3582 // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3583 // CHECK: ret i64 [[VGET_LANE]]
3584 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3585 return vgetq_lane_u64(a, 1);
3588 // CHECK-LABEL: @test_vget_low_s8(
3589 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3590 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3591 int8x8_t test_vget_low_s8(int8x16_t a) {
3592 return vget_low_s8(a);
3595 // CHECK-LABEL: @test_vget_low_s16(
3596 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3597 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3598 int16x4_t test_vget_low_s16(int16x8_t a) {
3599 return vget_low_s16(a);
3602 // CHECK-LABEL: @test_vget_low_s32(
3603 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3604 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
3605 int32x2_t test_vget_low_s32(int32x4_t a) {
3606 return vget_low_s32(a);
3609 // CHECK-LABEL: @test_vget_low_s64(
3610 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3611 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
3612 int64x1_t test_vget_low_s64(int64x2_t a) {
3613 return vget_low_s64(a);
3616 // CHECK-LABEL: @test_vget_low_f16(
3617 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3618 // CHECK: ret <4 x half> [[SHUFFLE_I]]
3619 float16x4_t test_vget_low_f16(float16x8_t a) {
3620 return vget_low_f16(a);
3623 // CHECK-LABEL: @test_vget_low_f32(
3624 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3625 // CHECK: ret <2 x float> [[SHUFFLE_I]]
3626 float32x2_t test_vget_low_f32(float32x4_t a) {
3627 return vget_low_f32(a);
3630 // CHECK-LABEL: @test_vget_low_u8(
3631 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3632 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3633 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3634 return vget_low_u8(a);
3637 // CHECK-LABEL: @test_vget_low_u16(
3638 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3639 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3640 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3641 return vget_low_u16(a);
3644 // CHECK-LABEL: @test_vget_low_u32(
3645 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3646 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
3647 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3648 return vget_low_u32(a);
3651 // CHECK-LABEL: @test_vget_low_u64(
3652 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3653 // CHECK: ret <1 x i64> [[SHUFFLE_I]]
3654 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3655 return vget_low_u64(a);
3658 // CHECK-LABEL: @test_vget_low_p8(
3659 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3660 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
3661 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3662 return vget_low_p8(a);
3665 // CHECK-LABEL: @test_vget_low_p16(
3666 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3667 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
3668 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3669 return vget_low_p16(a);
3672 // CHECK-LABEL: @test_vhadd_s8(
3673 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
3674 // CHECK: ret <8 x i8> [[VHADD_V_I]]
3675 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3676 return vhadd_s8(a, b);
3679 // CHECK-LABEL: @test_vhadd_s16(
3680 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3681 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3682 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
3683 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3684 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
3685 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3686 return vhadd_s16(a, b);
3689 // CHECK-LABEL: @test_vhadd_s32(
3690 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3691 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3692 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
3693 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3694 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
3695 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3696 return vhadd_s32(a, b);
3699 // CHECK-LABEL: @test_vhadd_u8(
3700 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
3701 // CHECK: ret <8 x i8> [[VHADD_V_I]]
3702 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3703 return vhadd_u8(a, b);
3706 // CHECK-LABEL: @test_vhadd_u16(
3707 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3708 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3709 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
3710 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3711 // CHECK: ret <4 x i16> [[VHADD_V2_I]]
3712 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3713 return vhadd_u16(a, b);
3716 // CHECK-LABEL: @test_vhadd_u32(
3717 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3718 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3719 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
3720 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3721 // CHECK: ret <2 x i32> [[VHADD_V2_I]]
3722 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3723 return vhadd_u32(a, b);
3726 // CHECK-LABEL: @test_vhaddq_s8(
3727 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
3728 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
3729 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3730 return vhaddq_s8(a, b);
3733 // CHECK-LABEL: @test_vhaddq_s16(
3734 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3735 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3736 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
3737 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3738 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
3739 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3740 return vhaddq_s16(a, b);
3743 // CHECK-LABEL: @test_vhaddq_s32(
3744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3745 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3746 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
3747 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3748 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
3749 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3750 return vhaddq_s32(a, b);
3753 // CHECK-LABEL: @test_vhaddq_u8(
3754 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
3755 // CHECK: ret <16 x i8> [[VHADDQ_V_I]]
3756 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3757 return vhaddq_u8(a, b);
3760 // CHECK-LABEL: @test_vhaddq_u16(
3761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3762 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3763 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
3764 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3765 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]]
3766 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3767 return vhaddq_u16(a, b);
3770 // CHECK-LABEL: @test_vhaddq_u32(
3771 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3772 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3773 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
3774 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3775 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]]
3776 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3777 return vhaddq_u32(a, b);
3780 // CHECK-LABEL: @test_vhsub_s8(
3781 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
3782 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
3783 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3784 return vhsub_s8(a, b);
3787 // CHECK-LABEL: @test_vhsub_s16(
3788 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3789 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3790 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
3791 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3792 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
3793 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
3794 return vhsub_s16(a, b);
3797 // CHECK-LABEL: @test_vhsub_s32(
3798 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3799 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3800 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
3801 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3802 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
3803 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
3804 return vhsub_s32(a, b);
3807 // CHECK-LABEL: @test_vhsub_u8(
3808 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
3809 // CHECK: ret <8 x i8> [[VHSUB_V_I]]
3810 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
3811 return vhsub_u8(a, b);
3814 // CHECK-LABEL: @test_vhsub_u16(
3815 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3816 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3817 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
3818 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3819 // CHECK: ret <4 x i16> [[VHSUB_V2_I]]
3820 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
3821 return vhsub_u16(a, b);
3824 // CHECK-LABEL: @test_vhsub_u32(
3825 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3826 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3827 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
3828 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3829 // CHECK: ret <2 x i32> [[VHSUB_V2_I]]
3830 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
3831 return vhsub_u32(a, b);
3834 // CHECK-LABEL: @test_vhsubq_s8(
3835 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
3836 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
3837 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
3838 return vhsubq_s8(a, b);
3841 // CHECK-LABEL: @test_vhsubq_s16(
3842 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3843 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3844 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
3845 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3846 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
3847 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
3848 return vhsubq_s16(a, b);
3851 // CHECK-LABEL: @test_vhsubq_s32(
3852 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3853 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3854 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
3855 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3856 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
3857 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
3858 return vhsubq_s32(a, b);
3861 // CHECK-LABEL: @test_vhsubq_u8(
3862 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
3863 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]]
3864 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
3865 return vhsubq_u8(a, b);
3868 // CHECK-LABEL: @test_vhsubq_u16(
3869 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3870 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3871 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
3872 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3873 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]]
3874 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
3875 return vhsubq_u16(a, b);
3878 // CHECK-LABEL: @test_vhsubq_u32(
3879 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3880 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3881 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
3882 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3883 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]]
3884 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
3885 return vhsubq_u32(a, b);
3888 // CHECK-LABEL: @test_vld1q_u8(
3889 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
3890 // CHECK: ret <16 x i8> [[VLD1]]
3891 uint8x16_t test_vld1q_u8(uint8_t const * a) {
3892 return vld1q_u8(a);
3895 // CHECK-LABEL: @test_vld1q_u16(
3896 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
3897 // CHECK: ret <8 x i16> [[VLD1]]
3898 uint16x8_t test_vld1q_u16(uint16_t const * a) {
3899 return vld1q_u16(a);
3902 // CHECK-LABEL: @test_vld1q_u32(
3903 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %a, i32 4)
3904 // CHECK: ret <4 x i32> [[VLD1]]
3905 uint32x4_t test_vld1q_u32(uint32_t const * a) {
3906 return vld1q_u32(a);
3909 // CHECK-LABEL: @test_vld1q_u64(
3910 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %a, i32 4)
3911 // CHECK: ret <2 x i64> [[VLD1]]
3912 uint64x2_t test_vld1q_u64(uint64_t const * a) {
3913 return vld1q_u64(a);
3916 // CHECK-LABEL: @test_vld1q_s8(
3917 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
3918 // CHECK: ret <16 x i8> [[VLD1]]
3919 int8x16_t test_vld1q_s8(int8_t const * a) {
3920 return vld1q_s8(a);
3923 // CHECK-LABEL: @test_vld1q_s16(
3924 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
3925 // CHECK: ret <8 x i16> [[VLD1]]
3926 int16x8_t test_vld1q_s16(int16_t const * a) {
3927 return vld1q_s16(a);
3930 // CHECK-LABEL: @test_vld1q_s32(
3931 // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %a, i32 4)
3932 // CHECK: ret <4 x i32> [[VLD1]]
3933 int32x4_t test_vld1q_s32(int32_t const * a) {
3934 return vld1q_s32(a);
3937 // CHECK-LABEL: @test_vld1q_s64(
3938 // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %a, i32 4)
3939 // CHECK: ret <2 x i64> [[VLD1]]
3940 int64x2_t test_vld1q_s64(int64_t const * a) {
3941 return vld1q_s64(a);
3944 // CHECK-LABEL: @test_vld1q_f16(
3945 // CHECK: [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0(ptr %a, i32 2)
3946 // CHECK: ret <8 x half> [[VLD1]]
3947 float16x8_t test_vld1q_f16(float16_t const * a) {
3948 return vld1q_f16(a);
3951 // CHECK-LABEL: @test_vld1q_f32(
3952 // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0(ptr %a, i32 4)
3953 // CHECK: ret <4 x float> [[VLD1]]
3954 float32x4_t test_vld1q_f32(float32_t const * a) {
3955 return vld1q_f32(a);
3958 // CHECK-LABEL: @test_vld1q_p8(
3959 // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %a, i32 1)
3960 // CHECK: ret <16 x i8> [[VLD1]]
3961 poly8x16_t test_vld1q_p8(poly8_t const * a) {
3962 return vld1q_p8(a);
3965 // CHECK-LABEL: @test_vld1q_p16(
3966 // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %a, i32 2)
3967 // CHECK: ret <8 x i16> [[VLD1]]
3968 poly16x8_t test_vld1q_p16(poly16_t const * a) {
3969 return vld1q_p16(a);
3972 // CHECK-LABEL: @test_vld1_u8(
3973 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
3974 // CHECK: ret <8 x i8> [[VLD1]]
3975 uint8x8_t test_vld1_u8(uint8_t const * a) {
3976 return vld1_u8(a);
3979 // CHECK-LABEL: @test_vld1_u16(
3980 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
3981 // CHECK: ret <4 x i16> [[VLD1]]
3982 uint16x4_t test_vld1_u16(uint16_t const * a) {
3983 return vld1_u16(a);
3986 // CHECK-LABEL: @test_vld1_u32(
3987 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %a, i32 4)
3988 // CHECK: ret <2 x i32> [[VLD1]]
3989 uint32x2_t test_vld1_u32(uint32_t const * a) {
3990 return vld1_u32(a);
3993 // CHECK-LABEL: @test_vld1_u64(
3994 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
3995 // CHECK: ret <1 x i64> [[VLD1]]
3996 uint64x1_t test_vld1_u64(uint64_t const * a) {
3997 return vld1_u64(a);
4000 // CHECK-LABEL: @test_vld1_s8(
4001 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
4002 // CHECK: ret <8 x i8> [[VLD1]]
4003 int8x8_t test_vld1_s8(int8_t const * a) {
4004 return vld1_s8(a);
4007 // CHECK-LABEL: @test_vld1_s16(
4008 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
4009 // CHECK: ret <4 x i16> [[VLD1]]
4010 int16x4_t test_vld1_s16(int16_t const * a) {
4011 return vld1_s16(a);
4014 // CHECK-LABEL: @test_vld1_s32(
4015 // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %a, i32 4)
4016 // CHECK: ret <2 x i32> [[VLD1]]
4017 int32x2_t test_vld1_s32(int32_t const * a) {
4018 return vld1_s32(a);
4021 // CHECK-LABEL: @test_vld1_s64(
4022 // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
4023 // CHECK: ret <1 x i64> [[VLD1]]
4024 int64x1_t test_vld1_s64(int64_t const * a) {
4025 return vld1_s64(a);
4028 // CHECK-LABEL: @test_vld1_f16(
4029 // CHECK: [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0(ptr %a, i32 2)
4030 // CHECK: ret <4 x half> [[VLD1]]
4031 float16x4_t test_vld1_f16(float16_t const * a) {
4032 return vld1_f16(a);
4035 // CHECK-LABEL: @test_vld1_f32(
4036 // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0(ptr %a, i32 4)
4037 // CHECK: ret <2 x float> [[VLD1]]
4038 float32x2_t test_vld1_f32(float32_t const * a) {
4039 return vld1_f32(a);
4042 // CHECK-LABEL: @test_vld1_p8(
4043 // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %a, i32 1)
4044 // CHECK: ret <8 x i8> [[VLD1]]
4045 poly8x8_t test_vld1_p8(poly8_t const * a) {
4046 return vld1_p8(a);
4049 // CHECK-LABEL: @test_vld1_p16(
4050 // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %a, i32 2)
4051 // CHECK: ret <4 x i16> [[VLD1]]
4052 poly16x4_t test_vld1_p16(poly16_t const * a) {
4053 return vld1_p16(a);
4056 // CHECK-LABEL: @test_vld1q_dup_u8(
4057 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4058 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
4059 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4060 // CHECK: ret <16 x i8> [[LANE]]
4061 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4062 return vld1q_dup_u8(a);
4065 // CHECK-LABEL: @test_vld1q_dup_u16(
4066 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4067 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
4068 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4069 // CHECK: ret <8 x i16> [[LANE]]
4070 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4071 return vld1q_dup_u16(a);
4074 // CHECK-LABEL: @test_vld1q_dup_u32(
4075 // CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
4076 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
4077 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4078 // CHECK: ret <4 x i32> [[LANE]]
4079 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4080 return vld1q_dup_u32(a);
4083 // CHECK-LABEL: @test_vld1q_dup_u64(
4084 // CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
4085 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
4086 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4087 // CHECK: ret <2 x i64> [[LANE]]
4088 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4089 return vld1q_dup_u64(a);
4092 // CHECK-LABEL: @test_vld1q_dup_s8(
4093 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4094 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
4095 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4096 // CHECK: ret <16 x i8> [[LANE]]
4097 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4098 return vld1q_dup_s8(a);
4101 // CHECK-LABEL: @test_vld1q_dup_s16(
4102 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4103 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
4104 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4105 // CHECK: ret <8 x i16> [[LANE]]
4106 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4107 return vld1q_dup_s16(a);
4110 // CHECK-LABEL: @test_vld1q_dup_s32(
4111 // CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
4112 // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
4113 // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4114 // CHECK: ret <4 x i32> [[LANE]]
4115 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4116 return vld1q_dup_s32(a);
4119 // CHECK-LABEL: @test_vld1q_dup_s64(
4120 // CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
4121 // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
4122 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4123 // CHECK: ret <2 x i64> [[LANE]]
4124 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4125 return vld1q_dup_s64(a);
4128 // CHECK-LABEL: @test_vld1q_dup_f16(
4129 // CHECK: [[TMP2:%.*]] = load half, ptr %a, align 2
4130 // CHECK: [[TMP3:%.*]] = insertelement <8 x half> poison, half [[TMP2]], i32 0
4131 // CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
4132 // CHECK: ret <8 x half> [[LANE]]
4133 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4134 return vld1q_dup_f16(a);
4137 // CHECK-LABEL: @test_vld1q_dup_f32(
4138 // CHECK: [[TMP2:%.*]] = load float, ptr %a, align 4
4139 // CHECK: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
4140 // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4141 // CHECK: ret <4 x float> [[LANE]]
4142 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4143 return vld1q_dup_f32(a);
4146 // CHECK-LABEL: @test_vld1q_dup_p8(
4147 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4148 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
4149 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4150 // CHECK: ret <16 x i8> [[LANE]]
4151 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4152 return vld1q_dup_p8(a);
4155 // CHECK-LABEL: @test_vld1q_dup_p16(
4156 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4157 // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0
4158 // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4159 // CHECK: ret <8 x i16> [[LANE]]
4160 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4161 return vld1q_dup_p16(a);
4164 // CHECK-LABEL: @test_vld1_dup_u8(
4165 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4166 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
4167 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4168 // CHECK: ret <8 x i8> [[LANE]]
4169 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4170 return vld1_dup_u8(a);
4173 // CHECK-LABEL: @test_vld1_dup_u16(
4174 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4175 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
4176 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4177 // CHECK: ret <4 x i16> [[LANE]]
4178 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4179 return vld1_dup_u16(a);
4182 // CHECK-LABEL: @test_vld1_dup_u32(
4183 // CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
4184 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
4185 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4186 // CHECK: ret <2 x i32> [[LANE]]
4187 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4188 return vld1_dup_u32(a);
4191 // CHECK-LABEL: @test_vld1_dup_u64(
4192 // CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
4193 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
4194 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4195 // CHECK: ret <1 x i64> [[LANE]]
4196 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4197 return vld1_dup_u64(a);
4200 // CHECK-LABEL: @test_vld1_dup_s8(
4201 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4202 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
4203 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4204 // CHECK: ret <8 x i8> [[LANE]]
4205 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4206 return vld1_dup_s8(a);
4209 // CHECK-LABEL: @test_vld1_dup_s16(
4210 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4211 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
4212 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4213 // CHECK: ret <4 x i16> [[LANE]]
4214 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4215 return vld1_dup_s16(a);
4218 // CHECK-LABEL: @test_vld1_dup_s32(
4219 // CHECK: [[TMP2:%.*]] = load i32, ptr %a, align 4
4220 // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
4221 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4222 // CHECK: ret <2 x i32> [[LANE]]
4223 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4224 return vld1_dup_s32(a);
4227 // CHECK-LABEL: @test_vld1_dup_s64(
4228 // CHECK: [[TMP2:%.*]] = load i64, ptr %a, align 4
4229 // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0
4230 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4231 // CHECK: ret <1 x i64> [[LANE]]
4232 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4233 return vld1_dup_s64(a);
4236 // CHECK-LABEL: @test_vld1_dup_f16(
4237 // CHECK: [[TMP2:%.*]] = load half, ptr %a, align 2
4238 // CHECK: [[TMP3:%.*]] = insertelement <4 x half> poison, half [[TMP2]], i32 0
4239 // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
4240 // CHECK: ret <4 x half> [[LANE]]
4241 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4242 return vld1_dup_f16(a);
4245 // CHECK-LABEL: @test_vld1_dup_f32(
4246 // CHECK: [[TMP2:%.*]] = load float, ptr %a, align 4
4247 // CHECK: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0
4248 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4249 // CHECK: ret <2 x float> [[LANE]]
4250 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4251 return vld1_dup_f32(a);
4254 // CHECK-LABEL: @test_vld1_dup_p8(
4255 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4256 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
4257 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4258 // CHECK: ret <8 x i8> [[LANE]]
4259 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4260 return vld1_dup_p8(a);
4263 // CHECK-LABEL: @test_vld1_dup_p16(
4264 // CHECK: [[TMP2:%.*]] = load i16, ptr %a, align 2
4265 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0
4266 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4267 // CHECK: ret <4 x i16> [[LANE]]
4268 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4269 return vld1_dup_p16(a);
4272 // CHECK-LABEL: @test_vld1q_lane_u8(
4273 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4274 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4275 // CHECK: ret <16 x i8> [[VLD1_LANE]]
4276 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4277 return vld1q_lane_u8(a, b, 15);
4280 // CHECK-LABEL: @test_vld1q_lane_u16(
4281 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4282 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4283 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4284 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4285 // CHECK: ret <8 x i16> [[VLD1_LANE]]
4286 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4287 return vld1q_lane_u16(a, b, 7);
4290 // CHECK-LABEL: @test_vld1q_lane_u32(
4291 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4292 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4293 // CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
4294 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4295 // CHECK: ret <4 x i32> [[VLD1_LANE]]
4296 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4297 return vld1q_lane_u32(a, b, 3);
4300 // CHECK-LABEL: @test_vld1q_lane_u64(
4301 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4302 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4303 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4304 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
4305 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4306 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
4307 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4308 return vld1q_lane_u64(a, b, 1);
4311 // CHECK-LABEL: @test_vld1q_lane_s8(
4312 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4313 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4314 // CHECK: ret <16 x i8> [[VLD1_LANE]]
4315 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4316 return vld1q_lane_s8(a, b, 15);
4319 // CHECK-LABEL: @test_vld1q_lane_s16(
4320 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4321 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4322 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4323 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4324 // CHECK: ret <8 x i16> [[VLD1_LANE]]
4325 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4326 return vld1q_lane_s16(a, b, 7);
4329 // CHECK-LABEL: @test_vld1q_lane_s32(
4330 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4331 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4332 // CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
4333 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4334 // CHECK: ret <4 x i32> [[VLD1_LANE]]
4335 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4336 return vld1q_lane_s32(a, b, 3);
4339 // CHECK-LABEL: @test_vld1q_lane_s64(
4340 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4341 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4342 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4343 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %a, i32 4)
4344 // CHECK: [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4345 // CHECK: ret <2 x i64> [[VLD1Q_LANE]]
4346 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4347 return vld1q_lane_s64(a, b, 1);
4350 // CHECK-LABEL: @test_vld1q_lane_f16(
4351 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4352 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
4353 // CHECK: [[TMP4:%.*]] = load half, ptr %a, align 2
4354 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
4355 // CHECK: ret <8 x half> [[VLD1_LANE]]
4356 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4357 return vld1q_lane_f16(a, b, 7);
4360 // CHECK-LABEL: @test_vld1q_lane_f32(
4361 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4362 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4363 // CHECK: [[TMP4:%.*]] = load float, ptr %a, align 4
4364 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4365 // CHECK: ret <4 x float> [[VLD1_LANE]]
4366 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4367 return vld1q_lane_f32(a, b, 3);
4370 // CHECK-LABEL: @test_vld1q_lane_p8(
4371 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4372 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4373 // CHECK: ret <16 x i8> [[VLD1_LANE]]
4374 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4375 return vld1q_lane_p8(a, b, 15);
4378 // CHECK-LABEL: @test_vld1q_lane_p16(
4379 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4380 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4381 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4382 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4383 // CHECK: ret <8 x i16> [[VLD1_LANE]]
4384 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4385 return vld1q_lane_p16(a, b, 7);
4388 // CHECK-LABEL: @test_vld1_lane_u8(
4389 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4390 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4391 // CHECK: ret <8 x i8> [[VLD1_LANE]]
4392 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4393 return vld1_lane_u8(a, b, 7);
4396 // CHECK-LABEL: @test_vld1_lane_u16(
4397 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4398 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4399 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4400 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4401 // CHECK: ret <4 x i16> [[VLD1_LANE]]
4402 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4403 return vld1_lane_u16(a, b, 3);
4406 // CHECK-LABEL: @test_vld1_lane_u32(
4407 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4408 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4409 // CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
4410 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4411 // CHECK: ret <2 x i32> [[VLD1_LANE]]
4412 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4413 return vld1_lane_u32(a, b, 1);
4416 // CHECK-LABEL: @test_vld1_lane_u64(
4417 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4418 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4419 // CHECK: [[TMP4:%.*]] = load i64, ptr %a, align 4
4420 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4421 // CHECK: ret <1 x i64> [[VLD1_LANE]]
4422 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4423 return vld1_lane_u64(a, b, 0);
4426 // CHECK-LABEL: @test_vld1_lane_s8(
4427 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4428 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4429 // CHECK: ret <8 x i8> [[VLD1_LANE]]
4430 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4431 return vld1_lane_s8(a, b, 7);
4434 // CHECK-LABEL: @test_vld1_lane_s16(
4435 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4436 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4437 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4438 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4439 // CHECK: ret <4 x i16> [[VLD1_LANE]]
4440 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4441 return vld1_lane_s16(a, b, 3);
4444 // CHECK-LABEL: @test_vld1_lane_s32(
4445 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4446 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4447 // CHECK: [[TMP4:%.*]] = load i32, ptr %a, align 4
4448 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4449 // CHECK: ret <2 x i32> [[VLD1_LANE]]
4450 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4451 return vld1_lane_s32(a, b, 1);
4454 // CHECK-LABEL: @test_vld1_lane_s64(
4455 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4456 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4457 // CHECK: [[TMP4:%.*]] = load i64, ptr %a, align 4
4458 // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4459 // CHECK: ret <1 x i64> [[VLD1_LANE]]
4460 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4461 return vld1_lane_s64(a, b, 0);
4464 // CHECK-LABEL: @test_vld1_lane_f16(
4465 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4466 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
4467 // CHECK: [[TMP4:%.*]] = load half, ptr %a, align 2
4468 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
4469 // CHECK: ret <4 x half> [[VLD1_LANE]]
4470 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4471 return vld1_lane_f16(a, b, 3);
4474 // CHECK-LABEL: @test_vld1_lane_f32(
4475 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4476 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4477 // CHECK: [[TMP4:%.*]] = load float, ptr %a, align 4
4478 // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4479 // CHECK: ret <2 x float> [[VLD1_LANE]]
4480 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4481 return vld1_lane_f32(a, b, 1);
4484 // CHECK-LABEL: @test_vld1_lane_p8(
4485 // CHECK: [[TMP0:%.*]] = load i8, ptr %a, align 1
4486 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4487 // CHECK: ret <8 x i8> [[VLD1_LANE]]
4488 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4489 return vld1_lane_p8(a, b, 7);
4492 // CHECK-LABEL: @test_vld1_lane_p16(
4493 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4494 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4495 // CHECK: [[TMP4:%.*]] = load i16, ptr %a, align 2
4496 // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4497 // CHECK: ret <4 x i16> [[VLD1_LANE]]
4498 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4499 return vld1_lane_p16(a, b, 3);
4502 // CHECK-LABEL: @test_vld2q_u8(
4503 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4504 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
4505 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4506 return vld2q_u8(a);
4509 // CHECK-LABEL: @test_vld2q_u16(
4510 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4511 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
4512 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4513 return vld2q_u16(a);
4516 // CHECK-LABEL: @test_vld2q_u32(
4517 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4518 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
4519 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4520 return vld2q_u32(a);
4523 // CHECK-LABEL: @test_vld2q_s8(
4524 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4525 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
4526 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4527 return vld2q_s8(a);
4530 // CHECK-LABEL: @test_vld2q_s16(
4531 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4532 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
4533 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4534 return vld2q_s16(a);
4537 // CHECK-LABEL: @test_vld2q_s32(
4538 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4539 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
4540 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4541 return vld2q_s32(a);
4544 // CHECK-LABEL: @test_vld2q_f16(
4545 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4546 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
4547 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4548 return vld2q_f16(a);
4551 // CHECK-LABEL: @test_vld2q_f32(
4552 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4553 // CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
4554 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4555 return vld2q_f32(a);
4558 // CHECK-LABEL: @test_vld2q_p8(
4559 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4560 // CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
4561 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4562 return vld2q_p8(a);
4565 // CHECK-LABEL: @test_vld2q_p16(
4566 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4567 // CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
4568 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4569 return vld2q_p16(a);
4572 // CHECK-LABEL: @test_vld2_u8(
4573 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4574 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
4575 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4576 return vld2_u8(a);
4579 // CHECK-LABEL: @test_vld2_u16(
4580 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4581 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
4582 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
4583 return vld2_u16(a);
4586 // CHECK-LABEL: @test_vld2_u32(
4587 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4588 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
4589 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
4590 return vld2_u32(a);
4593 // CHECK-LABEL: @test_vld2_u64(
4594 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
4595 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
4596 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
4597 return vld2_u64(a);
4600 // CHECK-LABEL: @test_vld2_s8(
4601 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4602 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
4603 int8x8x2_t test_vld2_s8(int8_t const * a) {
4604 return vld2_s8(a);
4607 // CHECK-LABEL: @test_vld2_s16(
4608 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4609 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
4610 int16x4x2_t test_vld2_s16(int16_t const * a) {
4611 return vld2_s16(a);
4614 // CHECK-LABEL: @test_vld2_s32(
4615 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4616 // CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
4617 int32x2x2_t test_vld2_s32(int32_t const * a) {
4618 return vld2_s32(a);
4621 // CHECK-LABEL: @test_vld2_s64(
4622 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
4623 // CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
4624 int64x1x2_t test_vld2_s64(int64_t const * a) {
4625 return vld2_s64(a);
4628 // CHECK-LABEL: @test_vld2_f16(
4629 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4630 // CHECK: [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
4631 float16x4x2_t test_vld2_f16(float16_t const * a) {
4632 return vld2_f16(a);
4635 // CHECK-LABEL: @test_vld2_f32(
4636 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4637 // CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
4638 float32x2x2_t test_vld2_f32(float32_t const * a) {
4639 return vld2_f32(a);
4642 // CHECK-LABEL: @test_vld2_p8(
4643 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4644 // CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
4645 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
4646 return vld2_p8(a);
4649 // CHECK-LABEL: @test_vld2_p16(
4650 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4651 // CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
4652 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
4653 return vld2_p16(a);
4656 // CHECK-LABEL: @test_vld2q_lane_u16(
4657 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
4658 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4659 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
4660 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
4661 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4662 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4663 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
4664 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
4665 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
4666 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4667 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
4668 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
4669 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
4670 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4671 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4672 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4673 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
4674 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
4675 return vld2q_lane_u16(a, b, 7);
4678 // CHECK-LABEL: @test_vld2q_lane_u32(
4679 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
4680 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4681 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
4682 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
4683 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4684 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4685 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
4686 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
4687 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
4688 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4689 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
4690 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
4691 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
4692 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4693 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4694 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4695 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
4696 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
4697 return vld2q_lane_u32(a, b, 3);
4700 // CHECK-LABEL: @test_vld2q_lane_s16(
4701 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
4702 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4703 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
4704 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
4705 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4706 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4707 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
4708 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
4709 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
4710 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4711 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
4712 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
4713 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
4714 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4715 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4716 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4717 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
4718 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
4719 return vld2q_lane_s16(a, b, 7);
4722 // CHECK-LABEL: @test_vld2q_lane_s32(
4723 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
4724 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4725 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
4726 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
4727 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4728 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4729 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
4730 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
4731 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
4732 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4733 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
4734 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
4735 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
4736 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4737 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4738 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4739 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
4740 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
4741 return vld2q_lane_s32(a, b, 3);
4744 // CHECK-LABEL: @test_vld2q_lane_f16(
4745 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
4746 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4747 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
4748 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
4749 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4750 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4751 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
4752 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
4753 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
4754 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
4755 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
4756 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
4757 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
4758 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
4759 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
4760 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
4761 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
4762 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
4763 return vld2q_lane_f16(a, b, 7);
4766 // CHECK-LABEL: @test_vld2q_lane_f32(
4767 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
4768 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4769 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
4770 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
4771 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4772 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4773 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
4774 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
4775 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
4776 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
4777 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
4778 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
4779 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
4780 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
4781 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
4782 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
4783 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
4784 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
4785 return vld2q_lane_f32(a, b, 3);
4788 // CHECK-LABEL: @test_vld2q_lane_p16(
4789 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
4790 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4791 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
4792 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
4793 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
4794 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
4795 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
4796 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
4797 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
4798 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4799 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
4800 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
4801 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
4802 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4803 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4804 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4805 // CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
4806 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
4807 return vld2q_lane_p16(a, b, 7);
4810 // CHECK-LABEL: @test_vld2_lane_u8(
4811 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
4812 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4813 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
4814 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
4815 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4816 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4817 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
4818 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
4819 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
4820 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
4821 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
4822 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
4823 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
4824 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
4825 return vld2_lane_u8(a, b, 7);
4828 // CHECK-LABEL: @test_vld2_lane_u16(
4829 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
4830 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4831 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
4832 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
4833 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4834 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4835 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
4836 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
4837 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
4838 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
4839 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
4840 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
4841 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
4842 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
4843 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
4844 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
4845 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
4846 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
4847 return vld2_lane_u16(a, b, 3);
4850 // CHECK-LABEL: @test_vld2_lane_u32(
4851 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
4852 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4853 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
4854 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
4855 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4856 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4857 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
4858 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
4859 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
4860 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
4861 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
4862 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
4863 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
4864 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
4865 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
4866 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
4867 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
4868 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
4869 return vld2_lane_u32(a, b, 1);
4872 // CHECK-LABEL: @test_vld2_lane_s8(
4873 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
4874 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4875 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
4876 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
4877 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4878 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4879 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
4880 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
4881 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
4882 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
4883 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
4884 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
4885 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
4886 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
4887 return vld2_lane_s8(a, b, 7);
4890 // CHECK-LABEL: @test_vld2_lane_s16(
4891 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
4892 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4893 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
4894 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
4895 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4896 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4897 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
4898 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
4899 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
4900 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
4901 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
4902 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
4903 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
4904 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
4905 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
4906 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
4907 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
4908 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
4909 return vld2_lane_s16(a, b, 3);
4912 // CHECK-LABEL: @test_vld2_lane_s32(
4913 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
4914 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4915 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
4916 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
4917 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4918 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4919 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
4920 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
4921 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
4922 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
4923 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
4924 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
4925 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
4926 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
4927 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
4928 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
4929 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
4930 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
4931 return vld2_lane_s32(a, b, 1);
4934 // CHECK-LABEL: @test_vld2_lane_f16(
4935 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
4936 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4937 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
4938 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
4939 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4940 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4941 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
4942 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
4943 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
4944 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
4945 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
4946 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
4947 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
4948 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
4949 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
4950 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
4951 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
4952 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
4953 return vld2_lane_f16(a, b, 3);
4956 // CHECK-LABEL: @test_vld2_lane_f32(
4957 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
4958 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4959 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
4960 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
4961 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4962 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4963 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
4964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
4965 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
4966 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
4967 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
4968 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
4969 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
4970 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
4971 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
4972 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
4973 // CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
4974 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
4975 return vld2_lane_f32(a, b, 1);
4978 // CHECK-LABEL: @test_vld2_lane_p8(
4979 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
4980 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4981 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
4982 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
4983 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
4984 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
4985 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
4986 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
4987 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
4988 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
4989 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
4990 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
4991 // CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
4992 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
4993 return vld2_lane_p8(a, b, 7);
4996 // CHECK-LABEL: @test_vld2_lane_p16(
4997 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
4998 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4999 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5000 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
5001 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5002 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
5003 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
5004 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
5005 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
5006 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5007 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
5008 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
5009 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
5010 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5011 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5012 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5013 // CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
5014 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5015 return vld2_lane_p16(a, b, 3);
5018 // CHECK-LABEL: @test_vld3q_u8(
5019 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5020 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
5021 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5022 return vld3q_u8(a);
5025 // CHECK-LABEL: @test_vld3q_u16(
5026 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5027 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5028 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5029 return vld3q_u16(a);
5032 // CHECK-LABEL: @test_vld3q_u32(
5033 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5034 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
5035 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5036 return vld3q_u32(a);
5039 // CHECK-LABEL: @test_vld3q_s8(
5040 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5041 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
5042 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5043 return vld3q_s8(a);
5046 // CHECK-LABEL: @test_vld3q_s16(
5047 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5048 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5049 int16x8x3_t test_vld3q_s16(int16_t const * a) {
5050 return vld3q_s16(a);
5053 // CHECK-LABEL: @test_vld3q_s32(
5054 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5055 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
5056 int32x4x3_t test_vld3q_s32(int32_t const * a) {
5057 return vld3q_s32(a);
5060 // CHECK-LABEL: @test_vld3q_f16(
5061 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5062 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
5063 float16x8x3_t test_vld3q_f16(float16_t const * a) {
5064 return vld3q_f16(a);
5067 // CHECK-LABEL: @test_vld3q_f32(
5068 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5069 // CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
5070 float32x4x3_t test_vld3q_f32(float32_t const * a) {
5071 return vld3q_f32(a);
5074 // CHECK-LABEL: @test_vld3q_p8(
5075 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
5076 // CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
5077 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
5078 return vld3q_p8(a);
5081 // CHECK-LABEL: @test_vld3q_p16(
5082 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5083 // CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5084 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
5085 return vld3q_p16(a);
5088 // CHECK-LABEL: @test_vld3_u8(
5089 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5090 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5091 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
5092 return vld3_u8(a);
5095 // CHECK-LABEL: @test_vld3_u16(
5096 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5097 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5098 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
5099 return vld3_u16(a);
5102 // CHECK-LABEL: @test_vld3_u32(
5103 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5104 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
5105 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
5106 return vld3_u32(a);
5109 // CHECK-LABEL: @test_vld3_u64(
5110 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
5111 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
5112 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
5113 return vld3_u64(a);
5116 // CHECK-LABEL: @test_vld3_s8(
5117 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5118 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5119 int8x8x3_t test_vld3_s8(int8_t const * a) {
5120 return vld3_s8(a);
5123 // CHECK-LABEL: @test_vld3_s16(
5124 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5125 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5126 int16x4x3_t test_vld3_s16(int16_t const * a) {
5127 return vld3_s16(a);
5130 // CHECK-LABEL: @test_vld3_s32(
5131 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5132 // CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
5133 int32x2x3_t test_vld3_s32(int32_t const * a) {
5134 return vld3_s32(a);
5137 // CHECK-LABEL: @test_vld3_s64(
5138 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
5139 // CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
5140 int64x1x3_t test_vld3_s64(int64_t const * a) {
5141 return vld3_s64(a);
5144 // CHECK-LABEL: @test_vld3_f16(
5145 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5146 // CHECK: [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
5147 float16x4x3_t test_vld3_f16(float16_t const * a) {
5148 return vld3_f16(a);
5151 // CHECK-LABEL: @test_vld3_f32(
5152 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5153 // CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
5154 float32x2x3_t test_vld3_f32(float32_t const * a) {
5155 return vld3_f32(a);
5158 // CHECK-LABEL: @test_vld3_p8(
5159 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5160 // CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5161 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
5162 return vld3_p8(a);
5165 // CHECK-LABEL: @test_vld3_p16(
5166 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5167 // CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5168 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
5169 return vld3_p16(a);
5172 // CHECK-LABEL: @test_vld3q_lane_u16(
5173 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
5174 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5175 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
5176 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
5177 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5178 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5179 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
5180 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5181 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5182 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5183 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
5184 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5185 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5186 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5187 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
5188 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5189 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5190 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5191 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5192 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5193 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5194 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5195 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
5196 return vld3q_lane_u16(a, b, 7);
5199 // CHECK-LABEL: @test_vld3q_lane_u32(
5200 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
5201 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5202 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
5203 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
5204 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5205 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5206 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
5207 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
5208 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
5209 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5210 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
5211 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
5212 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
5213 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5214 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
5215 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
5216 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
5217 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5218 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5219 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5220 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5221 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
5222 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
5223 return vld3q_lane_u32(a, b, 3);
5226 // CHECK-LABEL: @test_vld3q_lane_s16(
5227 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
5228 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5229 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
5230 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
5231 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5232 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5233 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
5234 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5235 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5236 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5237 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
5238 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5239 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5240 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5241 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
5242 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5243 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5244 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5245 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5246 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5247 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5248 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5249 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
5250 return vld3q_lane_s16(a, b, 7);
5253 // CHECK-LABEL: @test_vld3q_lane_s32(
5254 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
5255 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5256 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
5257 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
5258 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5259 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5260 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
5261 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
5262 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
5263 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5264 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
5265 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
5266 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
5267 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5268 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
5269 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
5270 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
5271 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5272 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5273 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5274 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5275 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
5276 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
5277 return vld3q_lane_s32(a, b, 3);
5280 // CHECK-LABEL: @test_vld3q_lane_f16(
5281 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
5282 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5283 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
5284 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
5285 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5286 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5287 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
5288 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
5289 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
5290 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5291 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
5292 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
5293 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
5294 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5295 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
5296 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
5297 // CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
5298 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5299 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5300 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5301 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5302 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
5303 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
5304 return vld3q_lane_f16(a, b, 7);
5307 // CHECK-LABEL: @test_vld3q_lane_f32(
5308 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
5309 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5310 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
5311 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
5312 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5313 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5314 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
5315 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
5316 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
5317 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5318 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
5319 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
5320 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
5321 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5322 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
5323 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
5324 // CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
5325 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5326 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5327 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5328 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5329 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
5330 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
5331 return vld3q_lane_f32(a, b, 3);
5334 // CHECK-LABEL: @test_vld3q_lane_p16(
5335 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
5336 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5337 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
5338 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
5339 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5340 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
5341 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
5342 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5343 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5344 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5345 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
5346 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5347 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5348 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5349 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
5350 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5351 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5352 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5353 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5354 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5355 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5356 // CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
5357 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
5358 return vld3q_lane_p16(a, b, 7);
5361 // CHECK-LABEL: @test_vld3_lane_u8(
5362 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
5363 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5364 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
5365 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
5366 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5367 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5368 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
5369 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
5370 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
5371 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
5372 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
5373 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
5374 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
5375 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
5376 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
5377 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5378 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
5379 return vld3_lane_u8(a, b, 7);
5382 // CHECK-LABEL: @test_vld3_lane_u16(
5383 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
5384 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5385 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
5386 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
5387 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5388 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5389 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
5390 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
5391 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
5392 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5393 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
5394 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
5395 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
5396 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5397 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
5398 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
5399 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
5400 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5401 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5402 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5403 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5404 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5405 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
5406 return vld3_lane_u16(a, b, 3);
5409 // CHECK-LABEL: @test_vld3_lane_u32(
5410 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
5411 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5412 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
5413 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
5414 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5415 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5416 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
5417 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
5418 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
5419 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5420 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
5421 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
5422 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
5423 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5424 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
5425 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
5426 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
5427 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5428 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5429 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5430 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5431 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
5432 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
5433 return vld3_lane_u32(a, b, 1);
5436 // CHECK-LABEL: @test_vld3_lane_s8(
5437 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
5438 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5439 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
5440 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
5441 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5442 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5443 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
5444 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
5445 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
5446 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
5447 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
5448 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
5449 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
5450 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
5451 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
5452 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5453 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
5454 return vld3_lane_s8(a, b, 7);
5457 // CHECK-LABEL: @test_vld3_lane_s16(
5458 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
5459 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5460 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
5461 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
5462 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5463 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5464 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
5465 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
5466 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
5467 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5468 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
5469 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
5470 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
5471 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5472 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
5473 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
5474 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
5475 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5476 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5477 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5478 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5479 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5480 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
5481 return vld3_lane_s16(a, b, 3);
5484 // CHECK-LABEL: @test_vld3_lane_s32(
5485 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
5486 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5487 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
5488 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
5489 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5490 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5491 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
5492 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
5493 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
5494 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5495 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
5496 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
5497 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
5498 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5499 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
5500 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
5501 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
5502 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5503 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5504 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5505 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5506 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
5507 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
5508 return vld3_lane_s32(a, b, 1);
5511 // CHECK-LABEL: @test_vld3_lane_f16(
5512 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
5513 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5514 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
5515 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
5516 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5517 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5518 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
5519 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
5520 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
5521 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
5523 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
5524 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
5525 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5526 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
5527 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
5528 // CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
5529 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
5530 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5531 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5532 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
5533 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
5534 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
5535 return vld3_lane_f16(a, b, 3);
5538 // CHECK-LABEL: @test_vld3_lane_f32(
5539 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
5540 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5541 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
5542 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
5543 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5544 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5545 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
5546 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
5547 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
5548 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5549 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
5550 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
5551 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
5552 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5553 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
5554 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
5555 // CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
5556 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
5557 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5558 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5559 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
5560 // CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
5561 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
5562 return vld3_lane_f32(a, b, 1);
5565 // CHECK-LABEL: @test_vld3_lane_p8(
5566 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
5567 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5568 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
5569 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
5570 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5571 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5572 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
5573 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
5574 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
5575 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
5576 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
5577 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
5578 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
5579 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
5580 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
5581 // CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
5582 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
5583 return vld3_lane_p8(a, b, 7);
5586 // CHECK-LABEL: @test_vld3_lane_p16(
5587 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
5588 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5589 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
5590 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
5591 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5592 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
5593 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
5594 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
5595 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
5596 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5597 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
5598 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
5599 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
5600 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5601 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
5602 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
5603 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
5604 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5605 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5606 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5607 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5608 // CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
5609 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
5610 return vld3_lane_p16(a, b, 3);
5613 // CHECK-LABEL: @test_vld4q_u8(
5614 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
5615 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
5616 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
5617 return vld4q_u8(a);
5620 // CHECK-LABEL: @test_vld4q_u16(
5621 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5622 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5623 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
5624 return vld4q_u16(a);
5627 // CHECK-LABEL: @test_vld4q_u32(
5628 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5629 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
5630 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
5631 return vld4q_u32(a);
5634 // CHECK-LABEL: @test_vld4q_s8(
5635 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
5636 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
5637 int8x16x4_t test_vld4q_s8(int8_t const * a) {
5638 return vld4q_s8(a);
5641 // CHECK-LABEL: @test_vld4q_s16(
5642 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5643 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5644 int16x8x4_t test_vld4q_s16(int16_t const * a) {
5645 return vld4q_s16(a);
5648 // CHECK-LABEL: @test_vld4q_s32(
5649 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5650 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
5651 int32x4x4_t test_vld4q_s32(int32_t const * a) {
5652 return vld4q_s32(a);
5655 // CHECK-LABEL: @test_vld4q_f16(
5656 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5657 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
5658 float16x8x4_t test_vld4q_f16(float16_t const * a) {
5659 return vld4q_f16(a);
5662 // CHECK-LABEL: @test_vld4q_f32(
5663 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
5664 // CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
5665 float32x4x4_t test_vld4q_f32(float32_t const * a) {
5666 return vld4q_f32(a);
5669 // CHECK-LABEL: @test_vld4q_p8(
5670 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
5671 // CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
5672 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
5673 return vld4q_p8(a);
5676 // CHECK-LABEL: @test_vld4q_p16(
5677 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
5678 // CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5679 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
5680 return vld4q_p16(a);
5683 // CHECK-LABEL: @test_vld4_u8(
5684 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
5685 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
5686 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
5687 return vld4_u8(a);
5690 // CHECK-LABEL: @test_vld4_u16(
5691 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
5692 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
5693 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
5694 return vld4_u16(a);
5697 // CHECK-LABEL: @test_vld4_u32(
5698 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
5699 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
5700 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
5701 return vld4_u32(a);
5704 // CHECK-LABEL: @test_vld4_u64(
5705 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
5706 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
5707 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
5708 return vld4_u64(a);
5711 // CHECK-LABEL: @test_vld4_s8(
5712 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
5713 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
5714 int8x8x4_t test_vld4_s8(int8_t const * a) {
5715 return vld4_s8(a);
5718 // CHECK-LABEL: @test_vld4_s16(
5719 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
5720 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
5721 int16x4x4_t test_vld4_s16(int16_t const * a) {
5722 return vld4_s16(a);
5725 // CHECK-LABEL: @test_vld4_s32(
5726 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
5727 // CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
5728 int32x2x4_t test_vld4_s32(int32_t const * a) {
5729 return vld4_s32(a);
5732 // CHECK-LABEL: @test_vld4_s64(
5733 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
5734 // CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
5735 int64x1x4_t test_vld4_s64(int64_t const * a) {
5736 return vld4_s64(a);
5739 // CHECK-LABEL: @test_vld4_f16(
5740 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
5741 // CHECK: [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
5742 float16x4x4_t test_vld4_f16(float16_t const * a) {
5743 return vld4_f16(a);
5746 // CHECK-LABEL: @test_vld4_f32(
5747 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
5748 // CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
5749 float32x2x4_t test_vld4_f32(float32_t const * a) {
5750 return vld4_f32(a);
5753 // CHECK-LABEL: @test_vld4_p8(
5754 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
5755 // CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
5756 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
5757 return vld4_p8(a);
5760 // CHECK-LABEL: @test_vld4_p16(
5761 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
5762 // CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
5763 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
5764 return vld4_p16(a);
5767 // CHECK-LABEL: @test_vld4q_lane_u16(
5768 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
5769 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5770 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
5771 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
5772 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5773 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5774 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
5775 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5776 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5777 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5778 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
5779 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5780 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5781 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5782 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
5783 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5784 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5785 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5786 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
5787 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
5788 // CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
5789 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
5790 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5791 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5792 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5793 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
5794 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5795 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
5796 return vld4q_lane_u16(a, b, 7);
5799 // CHECK-LABEL: @test_vld4q_lane_u32(
5800 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
5801 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5802 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
5803 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
5804 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5805 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5806 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
5807 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
5808 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
5809 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5810 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
5811 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
5812 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
5813 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5814 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
5815 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
5816 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
5817 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5818 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
5819 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
5820 // CHECK: [[TMP11:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
5821 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
5822 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5823 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5824 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5825 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
5826 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
5827 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
5828 return vld4q_lane_u32(a, b, 3);
5831 // CHECK-LABEL: @test_vld4q_lane_s16(
5832 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
5833 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5834 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
5835 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
5836 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5837 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5838 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
5839 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5840 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5841 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5842 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
5843 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5844 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5845 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5846 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
5847 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5848 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5849 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5850 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
5851 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
5852 // CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
5853 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
5854 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5855 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5856 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5857 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
5858 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5859 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
5860 return vld4q_lane_s16(a, b, 7);
5863 // CHECK-LABEL: @test_vld4q_lane_s32(
5864 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
5865 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5866 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
5867 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
5868 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5869 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5870 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
5871 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
5872 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
5873 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5874 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
5875 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
5876 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
5877 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5878 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
5879 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
5880 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
5881 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5882 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
5883 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
5884 // CHECK: [[TMP11:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
5885 // CHECK: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
5886 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5887 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5888 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5889 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
5890 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
5891 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
5892 return vld4q_lane_s32(a, b, 3);
5895 // CHECK-LABEL: @test_vld4q_lane_f16(
5896 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
5897 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5898 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
5899 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
5900 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5901 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5902 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
5903 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
5904 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
5905 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5906 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
5907 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
5908 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
5909 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5910 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
5911 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
5912 // CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
5913 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5914 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
5915 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
5916 // CHECK: [[TMP11:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
5917 // CHECK: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
5918 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5919 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5920 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5921 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
5922 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
5923 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
5924 return vld4q_lane_f16(a, b, 7);
5927 // CHECK-LABEL: @test_vld4q_lane_f32(
5928 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
5929 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
5930 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
5931 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
5932 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5933 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5934 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
5935 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
5936 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
5937 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5938 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
5939 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
5940 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
5941 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5942 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
5943 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
5944 // CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
5945 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5946 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
5947 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
5948 // CHECK: [[TMP11:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
5949 // CHECK: [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
5950 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5951 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5952 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5953 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
5954 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
5955 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
5956 return vld4q_lane_f32(a, b, 3);
5959 // CHECK-LABEL: @test_vld4q_lane_p16(
5960 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
5961 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
5962 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
5963 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
5964 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
5965 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
5966 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
5967 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
5968 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
5969 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5970 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
5971 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
5972 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
5973 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5974 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
5975 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
5976 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
5977 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5978 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
5979 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
5980 // CHECK: [[TMP11:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
5981 // CHECK: [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
5982 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5983 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5984 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5985 // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
5986 // CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
5987 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
5988 return vld4q_lane_p16(a, b, 7);
5991 // CHECK-LABEL: @test_vld4_lane_u8(
5992 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
5993 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
5994 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
5995 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
5996 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
5997 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
5998 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
5999 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
6000 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
6001 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
6002 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
6003 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
6004 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
6005 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
6006 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
6007 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
6008 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
6009 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
6010 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
6011 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
6012 return vld4_lane_u8(a, b, 7);
6015 // CHECK-LABEL: @test_vld4_lane_u16(
6016 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
6017 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6018 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
6019 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
6020 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6021 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6022 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
6023 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
6024 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
6025 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6026 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
6027 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
6028 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
6029 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6030 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
6031 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
6032 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
6033 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6034 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
6035 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
6036 // CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
6037 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6038 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6039 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6040 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6041 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6042 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
6043 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
6044 return vld4_lane_u16(a, b, 3);
6047 // CHECK-LABEL: @test_vld4_lane_u32(
6048 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
6049 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6050 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
6051 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
6052 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6053 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6054 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
6055 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
6056 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
6057 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6058 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
6059 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
6060 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
6061 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6062 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
6063 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
6064 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
6065 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6066 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
6067 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
6068 // CHECK: [[TMP11:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
6069 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6070 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6071 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6072 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6073 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6074 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
6075 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
6076 return vld4_lane_u32(a, b, 1);
6079 // CHECK-LABEL: @test_vld4_lane_s8(
6080 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
6081 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6082 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
6083 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
6084 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6085 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6086 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
6087 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
6088 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
6089 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
6090 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
6091 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
6092 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
6093 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
6094 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
6095 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
6096 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
6097 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
6098 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
6099 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
6100 return vld4_lane_s8(a, b, 7);
6103 // CHECK-LABEL: @test_vld4_lane_s16(
6104 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
6105 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6106 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
6107 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
6108 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6109 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6110 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
6111 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
6112 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
6113 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6114 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
6115 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
6116 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
6117 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6118 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
6119 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
6120 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
6121 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6122 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
6123 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
6124 // CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
6125 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6126 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6127 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6128 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6129 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6130 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
6131 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
6132 return vld4_lane_s16(a, b, 3);
6135 // CHECK-LABEL: @test_vld4_lane_s32(
6136 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
6137 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6138 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
6139 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
6140 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6141 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6142 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
6143 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
6144 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
6145 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6146 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
6147 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
6148 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
6149 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6150 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
6151 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
6152 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
6153 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6154 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
6155 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
6156 // CHECK: [[TMP11:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
6157 // CHECK: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6158 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6159 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6160 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6161 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6162 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
6163 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
6164 return vld4_lane_s32(a, b, 1);
6167 // CHECK-LABEL: @test_vld4_lane_f16(
6168 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
6169 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6170 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
6171 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
6172 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6173 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6174 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
6175 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
6176 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
6177 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
6178 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
6179 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
6180 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
6181 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
6182 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
6183 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
6184 // CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
6185 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
6186 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
6187 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
6188 // CHECK: [[TMP11:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
6189 // CHECK: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
6190 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
6191 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
6192 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
6193 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
6194 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
6195 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
6196 return vld4_lane_f16(a, b, 3);
6199 // CHECK-LABEL: @test_vld4_lane_f32(
6200 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
6201 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6202 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
6203 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
6204 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6205 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6206 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
6207 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
6208 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
6209 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
6210 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
6211 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
6212 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
6213 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
6214 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
6215 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
6216 // CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
6217 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
6218 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
6219 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
6220 // CHECK: [[TMP11:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
6221 // CHECK: [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
6222 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
6223 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
6224 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
6225 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
6226 // CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
6227 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
6228 return vld4_lane_f32(a, b, 1);
6231 // CHECK-LABEL: @test_vld4_lane_p8(
6232 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
6233 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6234 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
6235 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
6236 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6237 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6238 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
6239 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
6240 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
6241 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
6242 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
6243 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
6244 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
6245 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
6246 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
6247 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
6248 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
6249 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
6250 // CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
6251 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
6252 return vld4_lane_p8(a, b, 7);
6255 // CHECK-LABEL: @test_vld4_lane_p16(
6256 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
6257 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6258 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
6259 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
6260 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
6261 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
6262 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
6263 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
6264 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
6265 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6266 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
6267 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
6268 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
6269 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6270 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
6271 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
6272 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
6273 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6274 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
6275 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
6276 // CHECK: [[TMP11:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
6277 // CHECK: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6278 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6279 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6280 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6281 // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6282 // CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
6283 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
6284 return vld4_lane_p16(a, b, 3);
6287 // CHECK-LABEL: @test_vmax_s8(
6288 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
6289 // CHECK: ret <8 x i8> [[VMAX_V_I]]
6290 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
6291 return vmax_s8(a, b);
6294 // CHECK-LABEL: @test_vmax_s16(
6295 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6296 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6297 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
6298 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6299 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
6300 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
6301 return vmax_s16(a, b);
6304 // CHECK-LABEL: @test_vmax_s32(
6305 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6306 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6307 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
6308 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6309 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
6310 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
6311 return vmax_s32(a, b);
6314 // CHECK-LABEL: @test_vmax_u8(
6315 // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
6316 // CHECK: ret <8 x i8> [[VMAX_V_I]]
6317 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
6318 return vmax_u8(a, b);
6321 // CHECK-LABEL: @test_vmax_u16(
6322 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6323 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6324 // CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
6325 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6326 // CHECK: ret <4 x i16> [[VMAX_V2_I]]
6327 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
6328 return vmax_u16(a, b);
6331 // CHECK-LABEL: @test_vmax_u32(
6332 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6333 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6334 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
6335 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6336 // CHECK: ret <2 x i32> [[VMAX_V2_I]]
6337 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
6338 return vmax_u32(a, b);
6341 // CHECK-LABEL: @test_vmax_f32(
6342 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6343 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6344 // CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
6345 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
6346 // CHECK: ret <2 x float> [[VMAX_V2_I]]
6347 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
6348 return vmax_f32(a, b);
6351 // CHECK-LABEL: @test_vmaxq_s8(
6352 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
6353 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
6354 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
6355 return vmaxq_s8(a, b);
6358 // CHECK-LABEL: @test_vmaxq_s16(
6359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6360 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6361 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
6362 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6363 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
6364 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
6365 return vmaxq_s16(a, b);
6368 // CHECK-LABEL: @test_vmaxq_s32(
6369 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6370 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6371 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
6372 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6373 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
6374 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
6375 return vmaxq_s32(a, b);
6378 // CHECK-LABEL: @test_vmaxq_u8(
6379 // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
6380 // CHECK: ret <16 x i8> [[VMAXQ_V_I]]
6381 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
6382 return vmaxq_u8(a, b);
6385 // CHECK-LABEL: @test_vmaxq_u16(
6386 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6387 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6388 // CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
6389 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6390 // CHECK: ret <8 x i16> [[VMAXQ_V2_I]]
6391 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
6392 return vmaxq_u16(a, b);
6395 // CHECK-LABEL: @test_vmaxq_u32(
6396 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6397 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6398 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
6399 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6400 // CHECK: ret <4 x i32> [[VMAXQ_V2_I]]
6401 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
6402 return vmaxq_u32(a, b);
6405 // CHECK-LABEL: @test_vmaxq_f32(
6406 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6407 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6408 // CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
6409 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
6410 // CHECK: ret <4 x float> [[VMAXQ_V2_I]]
6411 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
6412 return vmaxq_f32(a, b);
6415 // CHECK-LABEL: @test_vmin_s8(
6416 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
6417 // CHECK: ret <8 x i8> [[VMIN_V_I]]
6418 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
6419 return vmin_s8(a, b);
6422 // CHECK-LABEL: @test_vmin_s16(
6423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6424 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6425 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
6426 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6427 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
6428 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
6429 return vmin_s16(a, b);
6432 // CHECK-LABEL: @test_vmin_s32(
6433 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6434 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6435 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
6436 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6437 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
6438 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
6439 return vmin_s32(a, b);
6442 // CHECK-LABEL: @test_vmin_u8(
6443 // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
6444 // CHECK: ret <8 x i8> [[VMIN_V_I]]
6445 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
6446 return vmin_u8(a, b);
6449 // CHECK-LABEL: @test_vmin_u16(
6450 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6451 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6452 // CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
6453 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6454 // CHECK: ret <4 x i16> [[VMIN_V2_I]]
6455 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
6456 return vmin_u16(a, b);
6459 // CHECK-LABEL: @test_vmin_u32(
6460 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6461 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6462 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
6463 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6464 // CHECK: ret <2 x i32> [[VMIN_V2_I]]
6465 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
6466 return vmin_u32(a, b);
6469 // CHECK-LABEL: @test_vmin_f32(
6470 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6471 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6472 // CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
6473 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
6474 // CHECK: ret <2 x float> [[VMIN_V2_I]]
6475 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
6476 return vmin_f32(a, b);
6479 // CHECK-LABEL: @test_vminq_s8(
6480 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
6481 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
6482 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
6483 return vminq_s8(a, b);
6486 // CHECK-LABEL: @test_vminq_s16(
6487 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6488 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6489 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
6490 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6491 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
6492 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
6493 return vminq_s16(a, b);
6496 // CHECK-LABEL: @test_vminq_s32(
6497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6498 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6499 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
6500 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6501 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
6502 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
6503 return vminq_s32(a, b);
6506 // CHECK-LABEL: @test_vminq_u8(
6507 // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
6508 // CHECK: ret <16 x i8> [[VMINQ_V_I]]
6509 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
6510 return vminq_u8(a, b);
6513 // CHECK-LABEL: @test_vminq_u16(
6514 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6515 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6516 // CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
6517 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6518 // CHECK: ret <8 x i16> [[VMINQ_V2_I]]
6519 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
6520 return vminq_u16(a, b);
6523 // CHECK-LABEL: @test_vminq_u32(
6524 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6525 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6526 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
6527 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6528 // CHECK: ret <4 x i32> [[VMINQ_V2_I]]
6529 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
6530 return vminq_u32(a, b);
6533 // CHECK-LABEL: @test_vminq_f32(
6534 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6535 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6536 // CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
6537 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
6538 // CHECK: ret <4 x float> [[VMINQ_V2_I]]
6539 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
6540 return vminq_f32(a, b);
6543 // CHECK-LABEL: @test_vmla_s8(
6544 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6545 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6546 // CHECK: ret <8 x i8> [[ADD_I]]
6547 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
6548 return vmla_s8(a, b, c);
6551 // CHECK-LABEL: @test_vmla_s16(
6552 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6553 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6554 // CHECK: ret <4 x i16> [[ADD_I]]
6555 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
6556 return vmla_s16(a, b, c);
6559 // CHECK-LABEL: @test_vmla_s32(
6560 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6561 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6562 // CHECK: ret <2 x i32> [[ADD_I]]
6563 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
6564 return vmla_s32(a, b, c);
6567 // CHECK-LABEL: @test_vmla_f32(
6568 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
6569 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
6570 // CHECK: ret <2 x float> [[ADD_I]]
6571 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
6572 return vmla_f32(a, b, c);
6575 // CHECK-LABEL: @test_vmla_u8(
6576 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6577 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6578 // CHECK: ret <8 x i8> [[ADD_I]]
6579 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
6580 return vmla_u8(a, b, c);
6583 // CHECK-LABEL: @test_vmla_u16(
6584 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
6585 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6586 // CHECK: ret <4 x i16> [[ADD_I]]
6587 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
6588 return vmla_u16(a, b, c);
6591 // CHECK-LABEL: @test_vmla_u32(
6592 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
6593 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6594 // CHECK: ret <2 x i32> [[ADD_I]]
6595 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
6596 return vmla_u32(a, b, c);
6599 // CHECK-LABEL: @test_vmlaq_s8(
6600 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
6601 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
6602 // CHECK: ret <16 x i8> [[ADD_I]]
6603 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
6604 return vmlaq_s8(a, b, c);
6607 // CHECK-LABEL: @test_vmlaq_s16(
6608 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
6609 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
6610 // CHECK: ret <8 x i16> [[ADD_I]]
6611 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
6612 return vmlaq_s16(a, b, c);
6615 // CHECK-LABEL: @test_vmlaq_s32(
6616 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
6617 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
6618 // CHECK: ret <4 x i32> [[ADD_I]]
6619 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
6620 return vmlaq_s32(a, b, c);
6623 // CHECK-LABEL: @test_vmlaq_f32(
6624 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
6625 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
6626 // CHECK: ret <4 x float> [[ADD_I]]
6627 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
6628 return vmlaq_f32(a, b, c);
6631 // CHECK-LABEL: @test_vmlaq_u8(
6632 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
6633 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
6634 // CHECK: ret <16 x i8> [[ADD_I]]
6635 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
6636 return vmlaq_u8(a, b, c);
6639 // CHECK-LABEL: @test_vmlaq_u16(
6640 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
6641 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
6642 // CHECK: ret <8 x i16> [[ADD_I]]
6643 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
6644 return vmlaq_u16(a, b, c);
6647 // CHECK-LABEL: @test_vmlaq_u32(
6648 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
6649 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
6650 // CHECK: ret <4 x i32> [[ADD_I]]
6651 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
6652 return vmlaq_u32(a, b, c);
6655 // CHECK-LABEL: @test_vmlal_s8(
6656 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
6657 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
6658 // CHECK: ret <8 x i16> [[ADD_I]]
6659 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
6660 return vmlal_s8(a, b, c);
6663 // CHECK-LABEL: @test_vmlal_s16(
6664 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6665 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
6666 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
6667 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
6668 // CHECK: ret <4 x i32> [[ADD_I]]
6669 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
6670 return vmlal_s16(a, b, c);
6673 // CHECK-LABEL: @test_vmlal_s32(
6674 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6675 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
6676 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
6677 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
6678 // CHECK: ret <2 x i64> [[ADD_I]]
6679 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
6680 return vmlal_s32(a, b, c);
6683 // CHECK-LABEL: @test_vmlal_u8(
6684 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
6685 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
6686 // CHECK: ret <8 x i16> [[ADD_I]]
6687 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
6688 return vmlal_u8(a, b, c);
6691 // CHECK-LABEL: @test_vmlal_u16(
6692 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6693 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
6694 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
6695 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
6696 // CHECK: ret <4 x i32> [[ADD_I]]
6697 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
6698 return vmlal_u16(a, b, c);
6701 // CHECK-LABEL: @test_vmlal_u32(
6702 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6703 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
6704 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
6705 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
6706 // CHECK: ret <2 x i64> [[ADD_I]]
6707 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
6708 return vmlal_u32(a, b, c);
6711 // CHECK-LABEL: @test_vmlal_lane_s16(
6712 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6713 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6714 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
6715 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
6716 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
6717 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
6718 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
6719 // CHECK: ret <4 x i32> [[ADD]]
6720 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
6721 return vmlal_lane_s16(a, b, c, 3);
6724 // CHECK-LABEL: @test_vmlal_lane_s32(
6725 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6726 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6727 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
6728 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
6729 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
6730 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
6731 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
6732 // CHECK: ret <2 x i64> [[ADD]]
6733 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
6734 return vmlal_lane_s32(a, b, c, 1);
6737 // CHECK-LABEL: @test_vmlal_lane_u16(
6738 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6739 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6740 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
6741 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
6742 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
6743 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
6744 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
6745 // CHECK: ret <4 x i32> [[ADD]]
6746 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
6747 return vmlal_lane_u16(a, b, c, 3);
6750 // CHECK-LABEL: @test_vmlal_lane_u32(
6751 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6752 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6753 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
6754 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
6755 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
6756 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
6757 // CHECK: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
6758 // CHECK: ret <2 x i64> [[ADD]]
6759 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
6760 return vmlal_lane_u32(a, b, c, 1);
6763 // CHECK-LABEL: @test_vmlal_n_s16(
6764 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
6765 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
6766 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
6767 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
6768 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6769 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
6770 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
6771 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
6772 // CHECK: ret <4 x i32> [[ADD_I]]
6773 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
6774 return vmlal_n_s16(a, b, c);
6777 // CHECK-LABEL: @test_vmlal_n_s32(
6778 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
6779 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
6780 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6781 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
6782 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
6783 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
6784 // CHECK: ret <2 x i64> [[ADD_I]]
6785 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
6786 return vmlal_n_s32(a, b, c);
6789 // CHECK-LABEL: @test_vmlal_n_u16(
6790 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
6791 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
6792 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
6793 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
6794 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6795 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
6796 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
6797 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
6798 // CHECK: ret <4 x i32> [[ADD_I]]
6799 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
6800 return vmlal_n_u16(a, b, c);
6803 // CHECK-LABEL: @test_vmlal_n_u32(
6804 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
6805 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
6806 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6807 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
6808 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
6809 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
6810 // CHECK: ret <2 x i64> [[ADD_I]]
6811 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
6812 return vmlal_n_u32(a, b, c);
6815 // CHECK-LABEL: @test_vmla_lane_s16(
6816 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6817 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6818 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
6819 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
6820 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
6821 // CHECK: ret <4 x i16> [[ADD]]
6822 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
6823 return vmla_lane_s16(a, b, c, 3);
6826 // CHECK-LABEL: @test_vmla_lane_s32(
6827 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6828 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6829 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
6830 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
6831 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
6832 // CHECK: ret <2 x i32> [[ADD]]
6833 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
6834 return vmla_lane_s32(a, b, c, 1);
6837 // CHECK-LABEL: @test_vmla_lane_u16(
6838 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6839 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6840 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
6841 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
6842 // CHECK: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
6843 // CHECK: ret <4 x i16> [[ADD]]
6844 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
6845 return vmla_lane_u16(a, b, c, 3);
6848 // CHECK-LABEL: @test_vmla_lane_u32(
6849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6850 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6851 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
6852 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
6853 // CHECK: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
6854 // CHECK: ret <2 x i32> [[ADD]]
6855 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
6856 return vmla_lane_u32(a, b, c, 1);
6859 // CHECK-LABEL: @test_vmla_lane_f32(
6860 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
6861 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6862 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
6863 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
6864 // CHECK: [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
6865 // CHECK: ret <2 x float> [[ADD]]
6866 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
6867 return vmla_lane_f32(a, b, c, 1);
6870 // CHECK-LABEL: @test_vmlaq_lane_s16(
6871 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6872 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6873 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
6874 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
6875 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
6876 // CHECK: ret <8 x i16> [[ADD]]
6877 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
6878 return vmlaq_lane_s16(a, b, c, 3);
6881 // CHECK-LABEL: @test_vmlaq_lane_s32(
6882 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6883 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6884 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
6885 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
6886 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
6887 // CHECK: ret <4 x i32> [[ADD]]
6888 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
6889 return vmlaq_lane_s32(a, b, c, 1);
6892 // CHECK-LABEL: @test_vmlaq_lane_u16(
6893 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
6894 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6895 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
6896 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
6897 // CHECK: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
6898 // CHECK: ret <8 x i16> [[ADD]]
6899 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
6900 return vmlaq_lane_u16(a, b, c, 3);
6903 // CHECK-LABEL: @test_vmlaq_lane_u32(
6904 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
6905 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6906 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
6907 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
6908 // CHECK: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
6909 // CHECK: ret <4 x i32> [[ADD]]
6910 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
6911 return vmlaq_lane_u32(a, b, c, 1);
6914 // CHECK-LABEL: @test_vmlaq_lane_f32(
6915 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
6916 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6917 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
6918 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
6919 // CHECK: [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
6920 // CHECK: ret <4 x float> [[ADD]]
6921 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
6922 return vmlaq_lane_f32(a, b, c, 1);
6925 // CHECK-LABEL: @test_vmla_n_s16(
6926 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
6927 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
6928 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
6929 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
6930 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
6931 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6932 // CHECK: ret <4 x i16> [[ADD_I]]
6933 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
6934 return vmla_n_s16(a, b, c);
6937 // CHECK-LABEL: @test_vmla_n_s32(
6938 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
6939 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
6940 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
6941 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6942 // CHECK: ret <2 x i32> [[ADD_I]]
6943 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
6944 return vmla_n_s32(a, b, c);
6947 // CHECK-LABEL: @test_vmla_n_u16(
6948 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
6949 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
6950 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
6951 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
6952 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
6953 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
6954 // CHECK: ret <4 x i16> [[ADD_I]]
6955 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
6956 return vmla_n_u16(a, b, c);
6959 // CHECK-LABEL: @test_vmla_n_u32(
6960 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
6961 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
6962 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
6963 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
6964 // CHECK: ret <2 x i32> [[ADD_I]]
6965 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
6966 return vmla_n_u32(a, b, c);
6969 // CHECK-LABEL: @test_vmla_n_f32(
6970 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
6971 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
6972 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
6973 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
6974 // CHECK: ret <2 x float> [[ADD_I]]
6975 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
6976 return vmla_n_f32(a, b, c);
6979 // CHECK-LABEL: @test_vmlaq_n_s16(
6980 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
6981 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
6982 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
6983 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
6984 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
6985 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
6986 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
6987 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
6988 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
6989 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
6990 // CHECK: ret <8 x i16> [[ADD_I]]
6991 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
6992 return vmlaq_n_s16(a, b, c);
6995 // CHECK-LABEL: @test_vmlaq_n_s32(
6996 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
6997 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
6998 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
6999 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7000 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7001 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7002 // CHECK: ret <4 x i32> [[ADD_I]]
7003 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7004 return vmlaq_n_s32(a, b, c);
7007 // CHECK-LABEL: @test_vmlaq_n_u16(
7008 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7009 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7010 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7011 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7012 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7013 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7014 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7015 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7016 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7017 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7018 // CHECK: ret <8 x i16> [[ADD_I]]
7019 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7020 return vmlaq_n_u16(a, b, c);
7023 // CHECK-LABEL: @test_vmlaq_n_u32(
7024 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7025 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7026 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7027 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7028 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7029 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7030 // CHECK: ret <4 x i32> [[ADD_I]]
7031 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7032 return vmlaq_n_u32(a, b, c);
7035 // CHECK-LABEL: @test_vmlaq_n_f32(
7036 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7037 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7038 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7039 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7040 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7041 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7042 // CHECK: ret <4 x float> [[ADD_I]]
7043 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7044 return vmlaq_n_f32(a, b, c);
7047 // CHECK-LABEL: @test_vmls_s8(
7048 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7049 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7050 // CHECK: ret <8 x i8> [[SUB_I]]
7051 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
7052 return vmls_s8(a, b, c);
7055 // CHECK-LABEL: @test_vmls_s16(
7056 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7057 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7058 // CHECK: ret <4 x i16> [[SUB_I]]
7059 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7060 return vmls_s16(a, b, c);
7063 // CHECK-LABEL: @test_vmls_s32(
7064 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7065 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7066 // CHECK: ret <2 x i32> [[SUB_I]]
7067 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7068 return vmls_s32(a, b, c);
7071 // CHECK-LABEL: @test_vmls_f32(
7072 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7073 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7074 // CHECK: ret <2 x float> [[SUB_I]]
7075 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7076 return vmls_f32(a, b, c);
7079 // CHECK-LABEL: @test_vmls_u8(
7080 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7081 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7082 // CHECK: ret <8 x i8> [[SUB_I]]
7083 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7084 return vmls_u8(a, b, c);
7087 // CHECK-LABEL: @test_vmls_u16(
7088 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7089 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7090 // CHECK: ret <4 x i16> [[SUB_I]]
7091 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7092 return vmls_u16(a, b, c);
7095 // CHECK-LABEL: @test_vmls_u32(
7096 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7097 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7098 // CHECK: ret <2 x i32> [[SUB_I]]
7099 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7100 return vmls_u32(a, b, c);
7103 // CHECK-LABEL: @test_vmlsq_s8(
7104 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7105 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7106 // CHECK: ret <16 x i8> [[SUB_I]]
7107 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7108 return vmlsq_s8(a, b, c);
7111 // CHECK-LABEL: @test_vmlsq_s16(
7112 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7113 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7114 // CHECK: ret <8 x i16> [[SUB_I]]
7115 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7116 return vmlsq_s16(a, b, c);
7119 // CHECK-LABEL: @test_vmlsq_s32(
7120 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7121 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7122 // CHECK: ret <4 x i32> [[SUB_I]]
7123 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7124 return vmlsq_s32(a, b, c);
7127 // CHECK-LABEL: @test_vmlsq_f32(
7128 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7129 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7130 // CHECK: ret <4 x float> [[SUB_I]]
7131 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7132 return vmlsq_f32(a, b, c);
7135 // CHECK-LABEL: @test_vmlsq_u8(
7136 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7137 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7138 // CHECK: ret <16 x i8> [[SUB_I]]
7139 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7140 return vmlsq_u8(a, b, c);
7143 // CHECK-LABEL: @test_vmlsq_u16(
7144 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7145 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7146 // CHECK: ret <8 x i16> [[SUB_I]]
7147 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7148 return vmlsq_u16(a, b, c);
7151 // CHECK-LABEL: @test_vmlsq_u32(
7152 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7153 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7154 // CHECK: ret <4 x i32> [[SUB_I]]
7155 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7156 return vmlsq_u32(a, b, c);
7159 // CHECK-LABEL: @test_vmlsl_s8(
7160 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7161 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7162 // CHECK: ret <8 x i16> [[SUB_I]]
7163 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7164 return vmlsl_s8(a, b, c);
7167 // CHECK-LABEL: @test_vmlsl_s16(
7168 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7169 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7170 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7171 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7172 // CHECK: ret <4 x i32> [[SUB_I]]
7173 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7174 return vmlsl_s16(a, b, c);
7177 // CHECK-LABEL: @test_vmlsl_s32(
7178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7179 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7180 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7181 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7182 // CHECK: ret <2 x i64> [[SUB_I]]
7183 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7184 return vmlsl_s32(a, b, c);
7187 // CHECK-LABEL: @test_vmlsl_u8(
7188 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7189 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7190 // CHECK: ret <8 x i16> [[SUB_I]]
7191 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7192 return vmlsl_u8(a, b, c);
7195 // CHECK-LABEL: @test_vmlsl_u16(
7196 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7197 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7198 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7199 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7200 // CHECK: ret <4 x i32> [[SUB_I]]
7201 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7202 return vmlsl_u16(a, b, c);
7205 // CHECK-LABEL: @test_vmlsl_u32(
7206 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7207 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7208 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7209 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7210 // CHECK: ret <2 x i64> [[SUB_I]]
7211 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7212 return vmlsl_u32(a, b, c);
7215 // CHECK-LABEL: @test_vmlsl_lane_s16(
7216 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7217 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7218 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7219 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7220 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7221 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7222 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7223 // CHECK: ret <4 x i32> [[SUB]]
7224 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7225 return vmlsl_lane_s16(a, b, c, 3);
7228 // CHECK-LABEL: @test_vmlsl_lane_s32(
7229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7230 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7231 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7232 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7233 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7234 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7235 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7236 // CHECK: ret <2 x i64> [[SUB]]
7237 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7238 return vmlsl_lane_s32(a, b, c, 1);
7241 // CHECK-LABEL: @test_vmlsl_lane_u16(
7242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7243 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7244 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7245 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7246 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7247 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7248 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7249 // CHECK: ret <4 x i32> [[SUB]]
7250 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7251 return vmlsl_lane_u16(a, b, c, 3);
7254 // CHECK-LABEL: @test_vmlsl_lane_u32(
7255 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7256 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7257 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7258 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7259 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7260 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7261 // CHECK: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7262 // CHECK: ret <2 x i64> [[SUB]]
7263 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7264 return vmlsl_lane_u32(a, b, c, 1);
7267 // CHECK-LABEL: @test_vmlsl_n_s16(
7268 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7269 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7270 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7271 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7274 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7275 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7276 // CHECK: ret <4 x i32> [[SUB_I]]
7277 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7278 return vmlsl_n_s16(a, b, c);
7281 // CHECK-LABEL: @test_vmlsl_n_s32(
7282 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7283 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7284 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7285 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7286 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7287 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7288 // CHECK: ret <2 x i64> [[SUB_I]]
7289 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7290 return vmlsl_n_s32(a, b, c);
7293 // CHECK-LABEL: @test_vmlsl_n_u16(
7294 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7295 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7296 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7297 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7298 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7299 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7300 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7301 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7302 // CHECK: ret <4 x i32> [[SUB_I]]
7303 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7304 return vmlsl_n_u16(a, b, c);
7307 // CHECK-LABEL: @test_vmlsl_n_u32(
7308 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7309 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7310 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7311 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7312 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7313 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7314 // CHECK: ret <2 x i64> [[SUB_I]]
7315 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7316 return vmlsl_n_u32(a, b, c);
7319 // CHECK-LABEL: @test_vmls_lane_s16(
7320 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7321 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7322 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7323 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7324 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7325 // CHECK: ret <4 x i16> [[SUB]]
7326 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7327 return vmls_lane_s16(a, b, c, 3);
7330 // CHECK-LABEL: @test_vmls_lane_s32(
7331 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7332 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7333 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7334 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7335 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7336 // CHECK: ret <2 x i32> [[SUB]]
7337 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7338 return vmls_lane_s32(a, b, c, 1);
7341 // CHECK-LABEL: @test_vmls_lane_u16(
7342 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7343 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7344 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7345 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7346 // CHECK: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7347 // CHECK: ret <4 x i16> [[SUB]]
7348 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7349 return vmls_lane_u16(a, b, c, 3);
7352 // CHECK-LABEL: @test_vmls_lane_u32(
7353 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7354 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7355 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7356 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7357 // CHECK: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7358 // CHECK: ret <2 x i32> [[SUB]]
7359 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7360 return vmls_lane_u32(a, b, c, 1);
7363 // CHECK-LABEL: @test_vmls_lane_f32(
7364 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7365 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7366 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7367 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7368 // CHECK: [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
7369 // CHECK: ret <2 x float> [[SUB]]
7370 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7371 return vmls_lane_f32(a, b, c, 1);
7374 // CHECK-LABEL: @test_vmlsq_lane_s16(
7375 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7376 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7377 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7378 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7379 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7380 // CHECK: ret <8 x i16> [[SUB]]
7381 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7382 return vmlsq_lane_s16(a, b, c, 3);
7385 // CHECK-LABEL: @test_vmlsq_lane_s32(
7386 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7387 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7388 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7389 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7390 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7391 // CHECK: ret <4 x i32> [[SUB]]
7392 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7393 return vmlsq_lane_s32(a, b, c, 1);
7396 // CHECK-LABEL: @test_vmlsq_lane_u16(
7397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7398 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7399 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7400 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7401 // CHECK: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7402 // CHECK: ret <8 x i16> [[SUB]]
7403 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7404 return vmlsq_lane_u16(a, b, c, 3);
7407 // CHECK-LABEL: @test_vmlsq_lane_u32(
7408 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7409 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7410 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7411 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7412 // CHECK: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7413 // CHECK: ret <4 x i32> [[SUB]]
7414 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7415 return vmlsq_lane_u32(a, b, c, 1);
7418 // CHECK-LABEL: @test_vmlsq_lane_f32(
7419 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7420 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7421 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7422 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7423 // CHECK: [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
7424 // CHECK: ret <4 x float> [[SUB]]
7425 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7426 return vmlsq_lane_f32(a, b, c, 1);
7429 // CHECK-LABEL: @test_vmls_n_s16(
7430 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7431 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7432 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7433 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7434 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7435 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7436 // CHECK: ret <4 x i16> [[SUB_I]]
7437 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7438 return vmls_n_s16(a, b, c);
7441 // CHECK-LABEL: @test_vmls_n_s32(
7442 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7443 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7444 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7445 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7446 // CHECK: ret <2 x i32> [[SUB_I]]
7447 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7448 return vmls_n_s32(a, b, c);
7451 // CHECK-LABEL: @test_vmls_n_u16(
7452 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7453 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7454 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7455 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7456 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7457 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7458 // CHECK: ret <4 x i16> [[SUB_I]]
7459 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7460 return vmls_n_u16(a, b, c);
7463 // CHECK-LABEL: @test_vmls_n_u32(
7464 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7465 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7466 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7467 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7468 // CHECK: ret <2 x i32> [[SUB_I]]
7469 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7470 return vmls_n_u32(a, b, c);
7473 // CHECK-LABEL: @test_vmls_n_f32(
7474 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7475 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7476 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7477 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7478 // CHECK: ret <2 x float> [[SUB_I]]
7479 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7480 return vmls_n_f32(a, b, c);
7483 // CHECK-LABEL: @test_vmlsq_n_s16(
7484 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7485 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7486 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7487 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7488 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7489 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7490 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7491 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7492 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7493 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7494 // CHECK: ret <8 x i16> [[SUB_I]]
7495 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7496 return vmlsq_n_s16(a, b, c);
7499 // CHECK-LABEL: @test_vmlsq_n_s32(
7500 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7501 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7502 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7503 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7504 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7505 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7506 // CHECK: ret <4 x i32> [[SUB_I]]
7507 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7508 return vmlsq_n_s32(a, b, c);
7511 // CHECK-LABEL: @test_vmlsq_n_u16(
7512 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7513 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7514 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7515 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7516 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7517 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7518 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7519 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7520 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7521 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7522 // CHECK: ret <8 x i16> [[SUB_I]]
7523 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7524 return vmlsq_n_u16(a, b, c);
7527 // CHECK-LABEL: @test_vmlsq_n_u32(
7528 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7529 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7530 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7531 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7532 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7533 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7534 // CHECK: ret <4 x i32> [[SUB_I]]
7535 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7536 return vmlsq_n_u32(a, b, c);
7539 // CHECK-LABEL: @test_vmlsq_n_f32(
7540 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7541 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7542 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7543 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7544 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7545 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7546 // CHECK: ret <4 x float> [[SUB_I]]
7547 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7548 return vmlsq_n_f32(a, b, c);
7551 // CHECK-LABEL: @test_vmovl_s8(
7552 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
7553 // CHECK: ret <8 x i16> [[VMOVL_I]]
7554 int16x8_t test_vmovl_s8(int8x8_t a) {
7555 return vmovl_s8(a);
7558 // CHECK-LABEL: @test_vmovl_s16(
7559 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7560 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
7561 // CHECK: ret <4 x i32> [[VMOVL_I]]
7562 int32x4_t test_vmovl_s16(int16x4_t a) {
7563 return vmovl_s16(a);
7566 // CHECK-LABEL: @test_vmovl_s32(
7567 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7568 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
7569 // CHECK: ret <2 x i64> [[VMOVL_I]]
7570 int64x2_t test_vmovl_s32(int32x2_t a) {
7571 return vmovl_s32(a);
7574 // CHECK-LABEL: @test_vmovl_u8(
7575 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
7576 // CHECK: ret <8 x i16> [[VMOVL_I]]
7577 uint16x8_t test_vmovl_u8(uint8x8_t a) {
7578 return vmovl_u8(a);
7581 // CHECK-LABEL: @test_vmovl_u16(
7582 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7583 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
7584 // CHECK: ret <4 x i32> [[VMOVL_I]]
7585 uint32x4_t test_vmovl_u16(uint16x4_t a) {
7586 return vmovl_u16(a);
7589 // CHECK-LABEL: @test_vmovl_u32(
7590 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7591 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
7592 // CHECK: ret <2 x i64> [[VMOVL_I]]
7593 uint64x2_t test_vmovl_u32(uint32x2_t a) {
7594 return vmovl_u32(a);
7597 // CHECK-LABEL: @test_vmovn_s16(
7598 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7599 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
7600 // CHECK: ret <8 x i8> [[VMOVN_I]]
7601 int8x8_t test_vmovn_s16(int16x8_t a) {
7602 return vmovn_s16(a);
7605 // CHECK-LABEL: @test_vmovn_s32(
7606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7607 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
7608 // CHECK: ret <4 x i16> [[VMOVN_I]]
7609 int16x4_t test_vmovn_s32(int32x4_t a) {
7610 return vmovn_s32(a);
7613 // CHECK-LABEL: @test_vmovn_s64(
7614 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7615 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
7616 // CHECK: ret <2 x i32> [[VMOVN_I]]
7617 int32x2_t test_vmovn_s64(int64x2_t a) {
7618 return vmovn_s64(a);
7621 // CHECK-LABEL: @test_vmovn_u16(
7622 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7623 // CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
7624 // CHECK: ret <8 x i8> [[VMOVN_I]]
7625 uint8x8_t test_vmovn_u16(uint16x8_t a) {
7626 return vmovn_u16(a);
7629 // CHECK-LABEL: @test_vmovn_u32(
7630 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7631 // CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
7632 // CHECK: ret <4 x i16> [[VMOVN_I]]
7633 uint16x4_t test_vmovn_u32(uint32x4_t a) {
7634 return vmovn_u32(a);
7637 // CHECK-LABEL: @test_vmovn_u64(
7638 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7639 // CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
7640 // CHECK: ret <2 x i32> [[VMOVN_I]]
7641 uint32x2_t test_vmovn_u64(uint64x2_t a) {
7642 return vmovn_u64(a);
7645 // CHECK-LABEL: @test_vmov_n_u8(
7646 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
7647 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
7648 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
7649 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
7650 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
7651 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
7652 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
7653 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
7654 // CHECK: ret <8 x i8> [[VECINIT7_I]]
7655 uint8x8_t test_vmov_n_u8(uint8_t a) {
7656 return vmov_n_u8(a);
7659 // CHECK-LABEL: @test_vmov_n_u16(
7660 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
7661 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
7662 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
7663 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
7664 // CHECK: ret <4 x i16> [[VECINIT3_I]]
7665 uint16x4_t test_vmov_n_u16(uint16_t a) {
7666 return vmov_n_u16(a);
7669 // CHECK-LABEL: @test_vmov_n_u32(
7670 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
7671 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
7672 // CHECK: ret <2 x i32> [[VECINIT1_I]]
7673 uint32x2_t test_vmov_n_u32(uint32_t a) {
7674 return vmov_n_u32(a);
7677 // CHECK-LABEL: @test_vmov_n_s8(
7678 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
7679 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
7680 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
7681 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
7682 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
7683 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
7684 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
7685 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
7686 // CHECK: ret <8 x i8> [[VECINIT7_I]]
7687 int8x8_t test_vmov_n_s8(int8_t a) {
7688 return vmov_n_s8(a);
7691 // CHECK-LABEL: @test_vmov_n_s16(
7692 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
7693 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
7694 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
7695 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
7696 // CHECK: ret <4 x i16> [[VECINIT3_I]]
7697 int16x4_t test_vmov_n_s16(int16_t a) {
7698 return vmov_n_s16(a);
7701 // CHECK-LABEL: @test_vmov_n_s32(
7702 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
7703 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
7704 // CHECK: ret <2 x i32> [[VECINIT1_I]]
7705 int32x2_t test_vmov_n_s32(int32_t a) {
7706 return vmov_n_s32(a);
7709 // CHECK-LABEL: @test_vmov_n_p8(
7710 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
7711 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
7712 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
7713 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
7714 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
7715 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
7716 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
7717 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
7718 // CHECK: ret <8 x i8> [[VECINIT7_I]]
7719 poly8x8_t test_vmov_n_p8(poly8_t a) {
7720 return vmov_n_p8(a);
7723 // CHECK-LABEL: @test_vmov_n_p16(
7724 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
7725 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
7726 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
7727 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
7728 // CHECK: ret <4 x i16> [[VECINIT3_I]]
7729 poly16x4_t test_vmov_n_p16(poly16_t a) {
7730 return vmov_n_p16(a);
7733 // CHECK-LABEL: @test_vmov_n_f16(
7734 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
7735 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
7736 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
7737 // CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
7738 // CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
7739 // CHECK: ret <4 x half> [[VECINIT3]]
7740 float16x4_t test_vmov_n_f16(float16_t *a) {
7741 return vmov_n_f16(*a);
7744 // CHECK-LABEL: @test_vmov_n_f32(
7745 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
7746 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
7747 // CHECK: ret <2 x float> [[VECINIT1_I]]
7748 float32x2_t test_vmov_n_f32(float32_t a) {
7749 return vmov_n_f32(a);
7752 // CHECK-LABEL: @test_vmovq_n_u8(
7753 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
7754 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
7755 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
7756 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
7757 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
7758 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
7759 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
7760 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
7761 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
7762 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
7763 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
7764 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
7765 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
7766 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
7767 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
7768 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
7769 // CHECK: ret <16 x i8> [[VECINIT15_I]]
7770 uint8x16_t test_vmovq_n_u8(uint8_t a) {
7771 return vmovq_n_u8(a);
7774 // CHECK-LABEL: @test_vmovq_n_u16(
7775 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
7776 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
7777 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
7778 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
7779 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
7780 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
7781 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
7782 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
7783 // CHECK: ret <8 x i16> [[VECINIT7_I]]
7784 uint16x8_t test_vmovq_n_u16(uint16_t a) {
7785 return vmovq_n_u16(a);
7788 // CHECK-LABEL: @test_vmovq_n_u32(
7789 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
7790 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
7791 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
7792 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
7793 // CHECK: ret <4 x i32> [[VECINIT3_I]]
7794 uint32x4_t test_vmovq_n_u32(uint32_t a) {
7795 return vmovq_n_u32(a);
7798 // CHECK-LABEL: @test_vmovq_n_s8(
7799 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
7800 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
7801 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
7802 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
7803 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
7804 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
7805 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
7806 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
7807 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
7808 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
7809 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
7810 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
7811 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
7812 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
7813 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
7814 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
7815 // CHECK: ret <16 x i8> [[VECINIT15_I]]
7816 int8x16_t test_vmovq_n_s8(int8_t a) {
7817 return vmovq_n_s8(a);
7820 // CHECK-LABEL: @test_vmovq_n_s16(
7821 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
7822 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
7823 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
7824 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
7825 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
7826 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
7827 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
7828 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
7829 // CHECK: ret <8 x i16> [[VECINIT7_I]]
7830 int16x8_t test_vmovq_n_s16(int16_t a) {
7831 return vmovq_n_s16(a);
7834 // CHECK-LABEL: @test_vmovq_n_s32(
7835 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
7836 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
7837 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
7838 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
7839 // CHECK: ret <4 x i32> [[VECINIT3_I]]
7840 int32x4_t test_vmovq_n_s32(int32_t a) {
7841 return vmovq_n_s32(a);
7844 // CHECK-LABEL: @test_vmovq_n_p8(
7845 // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
7846 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
7847 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
7848 // CHECK: [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
7849 // CHECK: [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
7850 // CHECK: [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
7851 // CHECK: [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
7852 // CHECK: [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
7853 // CHECK: [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
7854 // CHECK: [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
7855 // CHECK: [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
7856 // CHECK: [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
7857 // CHECK: [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
7858 // CHECK: [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
7859 // CHECK: [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
7860 // CHECK: [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
7861 // CHECK: ret <16 x i8> [[VECINIT15_I]]
7862 poly8x16_t test_vmovq_n_p8(poly8_t a) {
7863 return vmovq_n_p8(a);
7866 // CHECK-LABEL: @test_vmovq_n_p16(
7867 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
7868 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
7869 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
7870 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
7871 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
7872 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
7873 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
7874 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
7875 // CHECK: ret <8 x i16> [[VECINIT7_I]]
7876 poly16x8_t test_vmovq_n_p16(poly16_t a) {
7877 return vmovq_n_p16(a);
7880 // CHECK-LABEL: @test_vmovq_n_f16(
7881 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
7882 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
7883 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
7884 // CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
7885 // CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
7886 // CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
7887 // CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
7888 // CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
7889 // CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
7890 // CHECK: ret <8 x half> [[VECINIT7]]
7891 float16x8_t test_vmovq_n_f16(float16_t *a) {
7892 return vmovq_n_f16(*a);
7895 // CHECK-LABEL: @test_vmovq_n_f32(
7896 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
7897 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
7898 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
7899 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
7900 // CHECK: ret <4 x float> [[VECINIT3_I]]
7901 float32x4_t test_vmovq_n_f32(float32_t a) {
7902 return vmovq_n_f32(a);
7905 // CHECK-LABEL: @test_vmov_n_s64(
7906 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
7907 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
7908 // CHECK: ret <1 x i64> [[ADD_I]]
7909 int64x1_t test_vmov_n_s64(int64_t a) {
7910 int64x1_t tmp = vmov_n_s64(a);
7911 return vadd_s64(tmp, tmp);
7914 // CHECK-LABEL: @test_vmov_n_u64(
7915 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
7916 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
7917 // CHECK: ret <1 x i64> [[ADD_I]]
7918 uint64x1_t test_vmov_n_u64(uint64_t a) {
7919 uint64x1_t tmp = vmov_n_u64(a);
7920 return vadd_u64(tmp, tmp);
7923 // CHECK-LABEL: @test_vmovq_n_s64(
7924 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
7925 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
7926 // CHECK: ret <2 x i64> [[VECINIT1_I]]
7927 int64x2_t test_vmovq_n_s64(int64_t a) {
7928 return vmovq_n_s64(a);
7931 // CHECK-LABEL: @test_vmovq_n_u64(
7932 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
7933 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
7934 // CHECK: ret <2 x i64> [[VECINIT1_I]]
7935 uint64x2_t test_vmovq_n_u64(uint64_t a) {
7936 return vmovq_n_u64(a);
7939 // CHECK-LABEL: @test_vmul_s8(
7940 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
7941 // CHECK: ret <8 x i8> [[MUL_I]]
7942 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
7943 return vmul_s8(a, b);
7946 // CHECK-LABEL: @test_vmul_s16(
7947 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
7948 // CHECK: ret <4 x i16> [[MUL_I]]
7949 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
7950 return vmul_s16(a, b);
7953 // CHECK-LABEL: @test_vmul_s32(
7954 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
7955 // CHECK: ret <2 x i32> [[MUL_I]]
7956 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
7957 return vmul_s32(a, b);
7960 // CHECK-LABEL: @test_vmul_f32(
7961 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b
7962 // CHECK: ret <2 x float> [[MUL_I]]
7963 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
7964 return vmul_f32(a, b);
7967 // CHECK-LABEL: @test_vmul_u8(
7968 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b
7969 // CHECK: ret <8 x i8> [[MUL_I]]
7970 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
7971 return vmul_u8(a, b);
7974 // CHECK-LABEL: @test_vmul_u16(
7975 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b
7976 // CHECK: ret <4 x i16> [[MUL_I]]
7977 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
7978 return vmul_u16(a, b);
7981 // CHECK-LABEL: @test_vmul_u32(
7982 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b
7983 // CHECK: ret <2 x i32> [[MUL_I]]
7984 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
7985 return vmul_u32(a, b);
7988 // CHECK-LABEL: @test_vmulq_s8(
7989 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
7990 // CHECK: ret <16 x i8> [[MUL_I]]
7991 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
7992 return vmulq_s8(a, b);
7995 // CHECK-LABEL: @test_vmulq_s16(
7996 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
7997 // CHECK: ret <8 x i16> [[MUL_I]]
7998 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
7999 return vmulq_s16(a, b);
8002 // CHECK-LABEL: @test_vmulq_s32(
8003 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8004 // CHECK: ret <4 x i32> [[MUL_I]]
8005 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
8006 return vmulq_s32(a, b);
8009 // CHECK-LABEL: @test_vmulq_f32(
8010 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b
8011 // CHECK: ret <4 x float> [[MUL_I]]
8012 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
8013 return vmulq_f32(a, b);
8016 // CHECK-LABEL: @test_vmulq_u8(
8017 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8018 // CHECK: ret <16 x i8> [[MUL_I]]
8019 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
8020 return vmulq_u8(a, b);
8023 // CHECK-LABEL: @test_vmulq_u16(
8024 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8025 // CHECK: ret <8 x i16> [[MUL_I]]
8026 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
8027 return vmulq_u16(a, b);
8030 // CHECK-LABEL: @test_vmulq_u32(
8031 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8032 // CHECK: ret <4 x i32> [[MUL_I]]
8033 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
8034 return vmulq_u32(a, b);
8037 // CHECK-LABEL: @test_vmull_s8(
8038 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
8039 // CHECK: ret <8 x i16> [[VMULL_I]]
8040 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8041 return vmull_s8(a, b);
8044 // CHECK-LABEL: @test_vmull_s16(
8045 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8046 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8047 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
8048 // CHECK: ret <4 x i32> [[VMULL2_I]]
8049 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8050 return vmull_s16(a, b);
8053 // CHECK-LABEL: @test_vmull_s32(
8054 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8055 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8056 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
8057 // CHECK: ret <2 x i64> [[VMULL2_I]]
8058 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8059 return vmull_s32(a, b);
8062 // CHECK-LABEL: @test_vmull_u8(
8063 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
8064 // CHECK: ret <8 x i16> [[VMULL_I]]
8065 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8066 return vmull_u8(a, b);
8069 // CHECK-LABEL: @test_vmull_u16(
8070 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8071 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8072 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
8073 // CHECK: ret <4 x i32> [[VMULL2_I]]
8074 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8075 return vmull_u16(a, b);
8078 // CHECK-LABEL: @test_vmull_u32(
8079 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8080 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8081 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
8082 // CHECK: ret <2 x i64> [[VMULL2_I]]
8083 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8084 return vmull_u32(a, b);
8087 // CHECK-LABEL: @test_vmull_p8(
8088 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
8089 // CHECK: ret <8 x i16> [[VMULL_I]]
8090 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8091 return vmull_p8(a, b);
8094 // CHECK-LABEL: @test_vmull_lane_s16(
8095 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8096 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8097 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8098 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8099 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8100 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8101 // CHECK: ret <4 x i32> [[VMULL2_I]]
8102 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
8103 return vmull_lane_s16(a, b, 3);
8106 // CHECK-LABEL: @test_vmull_lane_s32(
8107 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8108 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8109 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8110 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8111 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8112 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8113 // CHECK: ret <2 x i64> [[VMULL2_I]]
8114 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
8115 return vmull_lane_s32(a, b, 1);
8118 // CHECK-LABEL: @test_vmull_lane_u16(
8119 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8120 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8121 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8122 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8123 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8124 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8125 // CHECK: ret <4 x i32> [[VMULL2_I]]
8126 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
8127 return vmull_lane_u16(a, b, 3);
8130 // CHECK-LABEL: @test_vmull_lane_u32(
8131 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8132 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8133 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8134 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8135 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8136 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8137 // CHECK: ret <2 x i64> [[VMULL2_I]]
8138 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
8139 return vmull_lane_u32(a, b, 1);
8142 // CHECK-LABEL: @test_vmull_n_s16(
8143 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8144 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8145 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8146 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8147 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8148 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8149 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8150 // CHECK: ret <4 x i32> [[VMULL5_I]]
8151 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
8152 return vmull_n_s16(a, b);
8155 // CHECK-LABEL: @test_vmull_n_s32(
8156 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8157 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8158 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8159 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8160 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8161 // CHECK: ret <2 x i64> [[VMULL3_I]]
8162 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
8163 return vmull_n_s32(a, b);
8166 // CHECK-LABEL: @test_vmull_n_u16(
8167 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8168 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8169 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8170 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8171 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8172 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8173 // CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8174 // CHECK: ret <4 x i32> [[VMULL5_I]]
8175 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
8176 return vmull_n_u16(a, b);
8179 // CHECK-LABEL: @test_vmull_n_u32(
8180 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8181 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8184 // CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8185 // CHECK: ret <2 x i64> [[VMULL3_I]]
8186 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
8187 return vmull_n_u32(a, b);
8190 // CHECK-LABEL: @test_vmul_p8(
8191 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
8192 // CHECK: ret <8 x i8> [[VMUL_V_I]]
8193 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
8194 return vmul_p8(a, b);
8197 // CHECK-LABEL: @test_vmulq_p8(
8198 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
8199 // CHECK: ret <16 x i8> [[VMULQ_V_I]]
8200 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
8201 return vmulq_p8(a, b);
8204 // CHECK-LABEL: @test_vmul_lane_s16(
8205 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8206 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8207 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8208 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8209 // CHECK: ret <4 x i16> [[MUL]]
8210 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
8211 return vmul_lane_s16(a, b, 3);
8214 // CHECK-LABEL: @test_vmul_lane_s32(
8215 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8216 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8217 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8218 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8219 // CHECK: ret <2 x i32> [[MUL]]
8220 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
8221 return vmul_lane_s32(a, b, 1);
8224 // CHECK-LABEL: @test_vmul_lane_f32(
8225 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8226 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8227 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
8228 // CHECK: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
8229 // CHECK: ret <2 x float> [[MUL]]
8230 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
8231 return vmul_lane_f32(a, b, 1);
8234 // CHECK-LABEL: @test_vmul_lane_u16(
8235 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8236 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8237 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8238 // CHECK: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8239 // CHECK: ret <4 x i16> [[MUL]]
8240 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
8241 return vmul_lane_u16(a, b, 3);
8244 // CHECK-LABEL: @test_vmul_lane_u32(
8245 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8246 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8247 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8248 // CHECK: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8249 // CHECK: ret <2 x i32> [[MUL]]
8250 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
8251 return vmul_lane_u32(a, b, 1);
8254 // CHECK-LABEL: @test_vmulq_lane_s16(
8255 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8256 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8257 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8258 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8259 // CHECK: ret <8 x i16> [[MUL]]
8260 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
8261 return vmulq_lane_s16(a, b, 3);
8264 // CHECK-LABEL: @test_vmulq_lane_s32(
8265 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8266 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8267 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8268 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8269 // CHECK: ret <4 x i32> [[MUL]]
8270 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
8271 return vmulq_lane_s32(a, b, 1);
8274 // CHECK-LABEL: @test_vmulq_lane_f32(
8275 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8276 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8277 // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8278 // CHECK: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
8279 // CHECK: ret <4 x float> [[MUL]]
8280 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
8281 return vmulq_lane_f32(a, b, 1);
8284 // CHECK-LABEL: @test_vmulq_lane_u16(
8285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8286 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8287 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8288 // CHECK: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8289 // CHECK: ret <8 x i16> [[MUL]]
8290 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
8291 return vmulq_lane_u16(a, b, 3);
8294 // CHECK-LABEL: @test_vmulq_lane_u32(
8295 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8296 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8297 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8298 // CHECK: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8299 // CHECK: ret <4 x i32> [[MUL]]
8300 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
8301 return vmulq_lane_u32(a, b, 1);
8304 // CHECK-LABEL: @test_vmul_n_s16(
8305 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8306 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8307 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8308 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8309 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8310 // CHECK: ret <4 x i16> [[MUL_I]]
8311 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
8312 return vmul_n_s16(a, b);
8315 // CHECK-LABEL: @test_vmul_n_s32(
8316 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8317 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8318 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8319 // CHECK: ret <2 x i32> [[MUL_I]]
8320 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
8321 return vmul_n_s32(a, b);
8324 // CHECK-LABEL: @test_vmul_n_f32(
8325 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
8326 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
8327 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
8328 // CHECK: ret <2 x float> [[MUL_I]]
8329 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
8330 return vmul_n_f32(a, b);
8333 // CHECK-LABEL: @test_vmul_n_u16(
8334 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8335 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8336 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8337 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8338 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8339 // CHECK: ret <4 x i16> [[MUL_I]]
8340 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
8341 return vmul_n_u16(a, b);
8344 // CHECK-LABEL: @test_vmul_n_u32(
8345 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8346 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8347 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8348 // CHECK: ret <2 x i32> [[MUL_I]]
8349 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
8350 return vmul_n_u32(a, b);
8353 // CHECK-LABEL: @test_vmulq_n_s16(
8354 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8355 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8356 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8357 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8358 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8359 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8360 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8361 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8362 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8363 // CHECK: ret <8 x i16> [[MUL_I]]
8364 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
8365 return vmulq_n_s16(a, b);
8368 // CHECK-LABEL: @test_vmulq_n_s32(
8369 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8370 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8371 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8372 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8373 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8374 // CHECK: ret <4 x i32> [[MUL_I]]
8375 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
8376 return vmulq_n_s32(a, b);
8379 // CHECK-LABEL: @test_vmulq_n_f32(
8380 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
8381 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
8382 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
8383 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
8384 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
8385 // CHECK: ret <4 x float> [[MUL_I]]
8386 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
8387 return vmulq_n_f32(a, b);
8390 // CHECK-LABEL: @test_vmulq_n_u16(
8391 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8392 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8393 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8394 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8395 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8396 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8397 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8398 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8399 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8400 // CHECK: ret <8 x i16> [[MUL_I]]
8401 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
8402 return vmulq_n_u16(a, b);
8405 // CHECK-LABEL: @test_vmulq_n_u32(
8406 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8407 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8408 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8409 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8410 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8411 // CHECK: ret <4 x i32> [[MUL_I]]
8412 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
8413 return vmulq_n_u32(a, b);
8416 // CHECK-LABEL: @test_vmvn_s8(
8417 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8418 // CHECK: ret <8 x i8> [[NEG_I]]
8419 int8x8_t test_vmvn_s8(int8x8_t a) {
8420 return vmvn_s8(a);
8423 // CHECK-LABEL: @test_vmvn_s16(
8424 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8425 // CHECK: ret <4 x i16> [[NEG_I]]
8426 int16x4_t test_vmvn_s16(int16x4_t a) {
8427 return vmvn_s16(a);
8430 // CHECK-LABEL: @test_vmvn_s32(
8431 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8432 // CHECK: ret <2 x i32> [[NEG_I]]
8433 int32x2_t test_vmvn_s32(int32x2_t a) {
8434 return vmvn_s32(a);
8437 // CHECK-LABEL: @test_vmvn_u8(
8438 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8439 // CHECK: ret <8 x i8> [[NEG_I]]
8440 uint8x8_t test_vmvn_u8(uint8x8_t a) {
8441 return vmvn_u8(a);
8444 // CHECK-LABEL: @test_vmvn_u16(
8445 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8446 // CHECK: ret <4 x i16> [[NEG_I]]
8447 uint16x4_t test_vmvn_u16(uint16x4_t a) {
8448 return vmvn_u16(a);
8451 // CHECK-LABEL: @test_vmvn_u32(
8452 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8453 // CHECK: ret <2 x i32> [[NEG_I]]
8454 uint32x2_t test_vmvn_u32(uint32x2_t a) {
8455 return vmvn_u32(a);
8458 // CHECK-LABEL: @test_vmvn_p8(
8459 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8460 // CHECK: ret <8 x i8> [[NEG_I]]
8461 poly8x8_t test_vmvn_p8(poly8x8_t a) {
8462 return vmvn_p8(a);
8465 // CHECK-LABEL: @test_vmvnq_s8(
8466 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8467 // CHECK: ret <16 x i8> [[NEG_I]]
8468 int8x16_t test_vmvnq_s8(int8x16_t a) {
8469 return vmvnq_s8(a);
8472 // CHECK-LABEL: @test_vmvnq_s16(
8473 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8474 // CHECK: ret <8 x i16> [[NEG_I]]
8475 int16x8_t test_vmvnq_s16(int16x8_t a) {
8476 return vmvnq_s16(a);
8479 // CHECK-LABEL: @test_vmvnq_s32(
8480 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8481 // CHECK: ret <4 x i32> [[NEG_I]]
8482 int32x4_t test_vmvnq_s32(int32x4_t a) {
8483 return vmvnq_s32(a);
8486 // CHECK-LABEL: @test_vmvnq_u8(
8487 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8488 // CHECK: ret <16 x i8> [[NEG_I]]
8489 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
8490 return vmvnq_u8(a);
8493 // CHECK-LABEL: @test_vmvnq_u16(
8494 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8495 // CHECK: ret <8 x i16> [[NEG_I]]
8496 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
8497 return vmvnq_u16(a);
8500 // CHECK-LABEL: @test_vmvnq_u32(
8501 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8502 // CHECK: ret <4 x i32> [[NEG_I]]
8503 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
8504 return vmvnq_u32(a);
8507 // CHECK-LABEL: @test_vmvnq_p8(
8508 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8509 // CHECK: ret <16 x i8> [[NEG_I]]
8510 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
8511 return vmvnq_p8(a);
8514 // CHECK-LABEL: @test_vneg_s8(
8515 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
8516 // CHECK: ret <8 x i8> [[SUB_I]]
8517 int8x8_t test_vneg_s8(int8x8_t a) {
8518 return vneg_s8(a);
8521 // CHECK-LABEL: @test_vneg_s16(
8522 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
8523 // CHECK: ret <4 x i16> [[SUB_I]]
8524 int16x4_t test_vneg_s16(int16x4_t a) {
8525 return vneg_s16(a);
8528 // CHECK-LABEL: @test_vneg_s32(
8529 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
8530 // CHECK: ret <2 x i32> [[SUB_I]]
8531 int32x2_t test_vneg_s32(int32x2_t a) {
8532 return vneg_s32(a);
8535 // CHECK-LABEL: @test_vneg_f32(
8536 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %a
8537 // CHECK: ret <2 x float> [[SUB_I]]
8538 float32x2_t test_vneg_f32(float32x2_t a) {
8539 return vneg_f32(a);
8542 // CHECK-LABEL: @test_vnegq_s8(
8543 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
8544 // CHECK: ret <16 x i8> [[SUB_I]]
8545 int8x16_t test_vnegq_s8(int8x16_t a) {
8546 return vnegq_s8(a);
8549 // CHECK-LABEL: @test_vnegq_s16(
8550 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
8551 // CHECK: ret <8 x i16> [[SUB_I]]
8552 int16x8_t test_vnegq_s16(int16x8_t a) {
8553 return vnegq_s16(a);
8556 // CHECK-LABEL: @test_vnegq_s32(
8557 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
8558 // CHECK: ret <4 x i32> [[SUB_I]]
8559 int32x4_t test_vnegq_s32(int32x4_t a) {
8560 return vnegq_s32(a);
8563 // CHECK-LABEL: @test_vnegq_f32(
8564 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %a
8565 // CHECK: ret <4 x float> [[SUB_I]]
8566 float32x4_t test_vnegq_f32(float32x4_t a) {
8567 return vnegq_f32(a);
8570 // CHECK-LABEL: @test_vorn_s8(
8571 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8572 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
8573 // CHECK: ret <8 x i8> [[OR_I]]
8574 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
8575 return vorn_s8(a, b);
8578 // CHECK-LABEL: @test_vorn_s16(
8579 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
8580 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
8581 // CHECK: ret <4 x i16> [[OR_I]]
8582 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
8583 return vorn_s16(a, b);
8586 // CHECK-LABEL: @test_vorn_s32(
8587 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
8588 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
8589 // CHECK: ret <2 x i32> [[OR_I]]
8590 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
8591 return vorn_s32(a, b);
8594 // CHECK-LABEL: @test_vorn_s64(
8595 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
8596 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
8597 // CHECK: ret <1 x i64> [[OR_I]]
8598 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
8599 return vorn_s64(a, b);
8602 // CHECK-LABEL: @test_vorn_u8(
8603 // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8604 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
8605 // CHECK: ret <8 x i8> [[OR_I]]
8606 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
8607 return vorn_u8(a, b);
8610 // CHECK-LABEL: @test_vorn_u16(
8611 // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
8612 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
8613 // CHECK: ret <4 x i16> [[OR_I]]
8614 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
8615 return vorn_u16(a, b);
8618 // CHECK-LABEL: @test_vorn_u32(
8619 // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
8620 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
8621 // CHECK: ret <2 x i32> [[OR_I]]
8622 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
8623 return vorn_u32(a, b);
8626 // CHECK-LABEL: @test_vorn_u64(
8627 // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
8628 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
8629 // CHECK: ret <1 x i64> [[OR_I]]
8630 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
8631 return vorn_u64(a, b);
8634 // CHECK-LABEL: @test_vornq_s8(
8635 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8636 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
8637 // CHECK: ret <16 x i8> [[OR_I]]
8638 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
8639 return vornq_s8(a, b);
8642 // CHECK-LABEL: @test_vornq_s16(
8643 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8644 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
8645 // CHECK: ret <8 x i16> [[OR_I]]
8646 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
8647 return vornq_s16(a, b);
8650 // CHECK-LABEL: @test_vornq_s32(
8651 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
8652 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
8653 // CHECK: ret <4 x i32> [[OR_I]]
8654 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
8655 return vornq_s32(a, b);
8658 // CHECK-LABEL: @test_vornq_s64(
8659 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
8660 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
8661 // CHECK: ret <2 x i64> [[OR_I]]
8662 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
8663 return vornq_s64(a, b);
8666 // CHECK-LABEL: @test_vornq_u8(
8667 // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8668 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
8669 // CHECK: ret <16 x i8> [[OR_I]]
8670 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
8671 return vornq_u8(a, b);
8674 // CHECK-LABEL: @test_vornq_u16(
8675 // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8676 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
8677 // CHECK: ret <8 x i16> [[OR_I]]
8678 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
8679 return vornq_u16(a, b);
8682 // CHECK-LABEL: @test_vornq_u32(
8683 // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
8684 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
8685 // CHECK: ret <4 x i32> [[OR_I]]
8686 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
8687 return vornq_u32(a, b);
8690 // CHECK-LABEL: @test_vornq_u64(
8691 // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
8692 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
8693 // CHECK: ret <2 x i64> [[OR_I]]
8694 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
8695 return vornq_u64(a, b);
8698 // CHECK-LABEL: @test_vorr_s8(
8699 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
8700 // CHECK: ret <8 x i8> [[OR_I]]
8701 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
8702 return vorr_s8(a, b);
8705 // CHECK-LABEL: @test_vorr_s16(
8706 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
8707 // CHECK: ret <4 x i16> [[OR_I]]
8708 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
8709 return vorr_s16(a, b);
8712 // CHECK-LABEL: @test_vorr_s32(
8713 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
8714 // CHECK: ret <2 x i32> [[OR_I]]
8715 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
8716 return vorr_s32(a, b);
8719 // CHECK-LABEL: @test_vorr_s64(
8720 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
8721 // CHECK: ret <1 x i64> [[OR_I]]
8722 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
8723 return vorr_s64(a, b);
8726 // CHECK-LABEL: @test_vorr_u8(
8727 // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b
8728 // CHECK: ret <8 x i8> [[OR_I]]
8729 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
8730 return vorr_u8(a, b);
8733 // CHECK-LABEL: @test_vorr_u16(
8734 // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b
8735 // CHECK: ret <4 x i16> [[OR_I]]
8736 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
8737 return vorr_u16(a, b);
8740 // CHECK-LABEL: @test_vorr_u32(
8741 // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b
8742 // CHECK: ret <2 x i32> [[OR_I]]
8743 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
8744 return vorr_u32(a, b);
8747 // CHECK-LABEL: @test_vorr_u64(
8748 // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b
8749 // CHECK: ret <1 x i64> [[OR_I]]
8750 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
8751 return vorr_u64(a, b);
8754 // CHECK-LABEL: @test_vorrq_s8(
8755 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
8756 // CHECK: ret <16 x i8> [[OR_I]]
8757 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
8758 return vorrq_s8(a, b);
8761 // CHECK-LABEL: @test_vorrq_s16(
8762 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
8763 // CHECK: ret <8 x i16> [[OR_I]]
8764 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
8765 return vorrq_s16(a, b);
8768 // CHECK-LABEL: @test_vorrq_s32(
8769 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
8770 // CHECK: ret <4 x i32> [[OR_I]]
8771 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
8772 return vorrq_s32(a, b);
8775 // CHECK-LABEL: @test_vorrq_s64(
8776 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
8777 // CHECK: ret <2 x i64> [[OR_I]]
8778 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
8779 return vorrq_s64(a, b);
8782 // CHECK-LABEL: @test_vorrq_u8(
8783 // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
8784 // CHECK: ret <16 x i8> [[OR_I]]
8785 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
8786 return vorrq_u8(a, b);
8789 // CHECK-LABEL: @test_vorrq_u16(
8790 // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
8791 // CHECK: ret <8 x i16> [[OR_I]]
8792 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
8793 return vorrq_u16(a, b);
8796 // CHECK-LABEL: @test_vorrq_u32(
8797 // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
8798 // CHECK: ret <4 x i32> [[OR_I]]
8799 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
8800 return vorrq_u32(a, b);
8803 // CHECK-LABEL: @test_vorrq_u64(
8804 // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
8805 // CHECK: ret <2 x i64> [[OR_I]]
8806 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
8807 return vorrq_u64(a, b);
8810 // CHECK-LABEL: @test_vpadal_s8(
8811 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8812 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
8813 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
8814 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
8815 return vpadal_s8(a, b);
8818 // CHECK-LABEL: @test_vpadal_s16(
8819 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8820 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8821 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
8822 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
8823 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
8824 return vpadal_s16(a, b);
8827 // CHECK-LABEL: @test_vpadal_s32(
8828 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
8829 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8830 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
8831 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
8832 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
8833 return vpadal_s32(a, b);
8836 // CHECK-LABEL: @test_vpadal_u8(
8837 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8838 // CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
8839 // CHECK: ret <4 x i16> [[VPADAL_V1_I]]
8840 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
8841 return vpadal_u8(a, b);
8844 // CHECK-LABEL: @test_vpadal_u16(
8845 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8846 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8847 // CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
8848 // CHECK: ret <2 x i32> [[VPADAL_V2_I]]
8849 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
8850 return vpadal_u16(a, b);
8853 // CHECK-LABEL: @test_vpadal_u32(
8854 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
8855 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8856 // CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
8857 // CHECK: ret <1 x i64> [[VPADAL_V2_I]]
8858 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
8859 return vpadal_u32(a, b);
8862 // CHECK-LABEL: @test_vpadalq_s8(
8863 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8864 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
8865 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
8866 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
8867 return vpadalq_s8(a, b);
8870 // CHECK-LABEL: @test_vpadalq_s16(
8871 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8872 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8873 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
8874 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
8875 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
8876 return vpadalq_s16(a, b);
8879 // CHECK-LABEL: @test_vpadalq_s32(
8880 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8881 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8882 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
8883 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
8884 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
8885 return vpadalq_s32(a, b);
8888 // CHECK-LABEL: @test_vpadalq_u8(
8889 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8890 // CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
8891 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]]
8892 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
8893 return vpadalq_u8(a, b);
8896 // CHECK-LABEL: @test_vpadalq_u16(
8897 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8898 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
8899 // CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
8900 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]]
8901 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
8902 return vpadalq_u16(a, b);
8905 // CHECK-LABEL: @test_vpadalq_u32(
8906 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8907 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
8908 // CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
8909 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]]
8910 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
8911 return vpadalq_u32(a, b);
8914 // CHECK-LABEL: @test_vpadd_s8(
8915 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
8916 // CHECK: ret <8 x i8> [[VPADD_V_I]]
8917 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
8918 return vpadd_s8(a, b);
8921 // CHECK-LABEL: @test_vpadd_s16(
8922 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8923 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8924 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
8925 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
8926 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
8927 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
8928 return vpadd_s16(a, b);
8931 // CHECK-LABEL: @test_vpadd_s32(
8932 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8933 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8934 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
8935 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
8936 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
8937 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
8938 return vpadd_s32(a, b);
8941 // CHECK-LABEL: @test_vpadd_u8(
8942 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
8943 // CHECK: ret <8 x i8> [[VPADD_V_I]]
8944 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
8945 return vpadd_u8(a, b);
8948 // CHECK-LABEL: @test_vpadd_u16(
8949 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8950 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8951 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
8952 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
8953 // CHECK: ret <4 x i16> [[VPADD_V2_I]]
8954 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
8955 return vpadd_u16(a, b);
8958 // CHECK-LABEL: @test_vpadd_u32(
8959 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8960 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8961 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
8962 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
8963 // CHECK: ret <2 x i32> [[VPADD_V2_I]]
8964 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
8965 return vpadd_u32(a, b);
8968 // CHECK-LABEL: @test_vpadd_f32(
8969 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
8970 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
8971 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
8972 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
8973 // CHECK: ret <2 x float> [[VPADD_V2_I]]
8974 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
8975 return vpadd_f32(a, b);
8978 // CHECK-LABEL: @test_vpaddl_s8(
8979 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
8980 // CHECK: ret <4 x i16> [[VPADDL_I]]
8981 int16x4_t test_vpaddl_s8(int8x8_t a) {
8982 return vpaddl_s8(a);
8985 // CHECK-LABEL: @test_vpaddl_s16(
8986 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8987 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
8988 // CHECK: ret <2 x i32> [[VPADDL1_I]]
8989 int32x2_t test_vpaddl_s16(int16x4_t a) {
8990 return vpaddl_s16(a);
8993 // CHECK-LABEL: @test_vpaddl_s32(
8994 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8995 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
8996 // CHECK: ret <1 x i64> [[VPADDL1_I]]
8997 int64x1_t test_vpaddl_s32(int32x2_t a) {
8998 return vpaddl_s32(a);
9001 // CHECK-LABEL: @test_vpaddl_u8(
9002 // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
9003 // CHECK: ret <4 x i16> [[VPADDL_I]]
9004 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
9005 return vpaddl_u8(a);
9008 // CHECK-LABEL: @test_vpaddl_u16(
9009 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9010 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
9011 // CHECK: ret <2 x i32> [[VPADDL1_I]]
9012 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
9013 return vpaddl_u16(a);
9016 // CHECK-LABEL: @test_vpaddl_u32(
9017 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9018 // CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
9019 // CHECK: ret <1 x i64> [[VPADDL1_I]]
9020 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
9021 return vpaddl_u32(a);
9024 // CHECK-LABEL: @test_vpaddlq_s8(
9025 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
9026 // CHECK: ret <8 x i16> [[VPADDL_I]]
9027 int16x8_t test_vpaddlq_s8(int8x16_t a) {
9028 return vpaddlq_s8(a);
9031 // CHECK-LABEL: @test_vpaddlq_s16(
9032 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9033 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
9034 // CHECK: ret <4 x i32> [[VPADDL1_I]]
9035 int32x4_t test_vpaddlq_s16(int16x8_t a) {
9036 return vpaddlq_s16(a);
9039 // CHECK-LABEL: @test_vpaddlq_s32(
9040 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9041 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
9042 // CHECK: ret <2 x i64> [[VPADDL1_I]]
9043 int64x2_t test_vpaddlq_s32(int32x4_t a) {
9044 return vpaddlq_s32(a);
9047 // CHECK-LABEL: @test_vpaddlq_u8(
9048 // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
9049 // CHECK: ret <8 x i16> [[VPADDL_I]]
9050 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
9051 return vpaddlq_u8(a);
9054 // CHECK-LABEL: @test_vpaddlq_u16(
9055 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9056 // CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
9057 // CHECK: ret <4 x i32> [[VPADDL1_I]]
9058 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
9059 return vpaddlq_u16(a);
9062 // CHECK-LABEL: @test_vpaddlq_u32(
9063 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9064 // CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
9065 // CHECK: ret <2 x i64> [[VPADDL1_I]]
9066 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
9067 return vpaddlq_u32(a);
9070 // CHECK-LABEL: @test_vpmax_s8(
9071 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
9072 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
9073 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
9074 return vpmax_s8(a, b);
9077 // CHECK-LABEL: @test_vpmax_s16(
9078 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9079 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9080 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
9081 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9082 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
9083 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
9084 return vpmax_s16(a, b);
9087 // CHECK-LABEL: @test_vpmax_s32(
9088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9090 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
9091 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9092 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
9093 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
9094 return vpmax_s32(a, b);
9097 // CHECK-LABEL: @test_vpmax_u8(
9098 // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
9099 // CHECK: ret <8 x i8> [[VPMAX_V_I]]
9100 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
9101 return vpmax_u8(a, b);
9104 // CHECK-LABEL: @test_vpmax_u16(
9105 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9106 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9107 // CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
9108 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9109 // CHECK: ret <4 x i16> [[VPMAX_V2_I]]
9110 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
9111 return vpmax_u16(a, b);
9114 // CHECK-LABEL: @test_vpmax_u32(
9115 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9116 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9117 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
9118 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9119 // CHECK: ret <2 x i32> [[VPMAX_V2_I]]
9120 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
9121 return vpmax_u32(a, b);
9124 // CHECK-LABEL: @test_vpmax_f32(
9125 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9126 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9127 // CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
9128 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
9129 // CHECK: ret <2 x float> [[VPMAX_V2_I]]
9130 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
9131 return vpmax_f32(a, b);
9134 // CHECK-LABEL: @test_vpmin_s8(
9135 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
9136 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
9137 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
9138 return vpmin_s8(a, b);
9141 // CHECK-LABEL: @test_vpmin_s16(
9142 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9143 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9144 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
9145 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9146 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
9147 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
9148 return vpmin_s16(a, b);
9151 // CHECK-LABEL: @test_vpmin_s32(
9152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9154 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
9155 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9156 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
9157 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
9158 return vpmin_s32(a, b);
9161 // CHECK-LABEL: @test_vpmin_u8(
9162 // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
9163 // CHECK: ret <8 x i8> [[VPMIN_V_I]]
9164 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
9165 return vpmin_u8(a, b);
9168 // CHECK-LABEL: @test_vpmin_u16(
9169 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9170 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9171 // CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
9172 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9173 // CHECK: ret <4 x i16> [[VPMIN_V2_I]]
9174 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
9175 return vpmin_u16(a, b);
9178 // CHECK-LABEL: @test_vpmin_u32(
9179 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9180 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9181 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
9182 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9183 // CHECK: ret <2 x i32> [[VPMIN_V2_I]]
9184 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
9185 return vpmin_u32(a, b);
9188 // CHECK-LABEL: @test_vpmin_f32(
9189 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9190 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9191 // CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
9192 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
9193 // CHECK: ret <2 x float> [[VPMIN_V2_I]]
9194 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
9195 return vpmin_f32(a, b);
9198 // CHECK-LABEL: @test_vqabs_s8(
9199 // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
9200 // CHECK: ret <8 x i8> [[VQABS_V_I]]
9201 int8x8_t test_vqabs_s8(int8x8_t a) {
9202 return vqabs_s8(a);
9205 // CHECK-LABEL: @test_vqabs_s16(
9206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9207 // CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
9208 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
9209 // CHECK: ret <4 x i16> [[VQABS_V1_I]]
9210 int16x4_t test_vqabs_s16(int16x4_t a) {
9211 return vqabs_s16(a);
9214 // CHECK-LABEL: @test_vqabs_s32(
9215 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9216 // CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
9217 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
9218 // CHECK: ret <2 x i32> [[VQABS_V1_I]]
9219 int32x2_t test_vqabs_s32(int32x2_t a) {
9220 return vqabs_s32(a);
9223 // CHECK-LABEL: @test_vqabsq_s8(
9224 // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
9225 // CHECK: ret <16 x i8> [[VQABSQ_V_I]]
9226 int8x16_t test_vqabsq_s8(int8x16_t a) {
9227 return vqabsq_s8(a);
9230 // CHECK-LABEL: @test_vqabsq_s16(
9231 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9232 // CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
9233 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
9234 // CHECK: ret <8 x i16> [[VQABSQ_V1_I]]
9235 int16x8_t test_vqabsq_s16(int16x8_t a) {
9236 return vqabsq_s16(a);
9239 // CHECK-LABEL: @test_vqabsq_s32(
9240 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9241 // CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
9242 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
9243 // CHECK: ret <4 x i32> [[VQABSQ_V1_I]]
9244 int32x4_t test_vqabsq_s32(int32x4_t a) {
9245 return vqabsq_s32(a);
9248 // CHECK-LABEL: @test_vqadd_s8(
9249 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9250 // CHECK: ret <8 x i8> [[VQADD_V_I]]
9251 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
9252 return vqadd_s8(a, b);
9255 // CHECK-LABEL: @test_vqadd_s16(
9256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9258 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9259 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9260 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
9261 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
9262 return vqadd_s16(a, b);
9265 // CHECK-LABEL: @test_vqadd_s32(
9266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9268 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9269 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9270 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
9271 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
9272 return vqadd_s32(a, b);
9275 // CHECK-LABEL: @test_vqadd_s64(
9276 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9277 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9278 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9279 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9280 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
9281 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
9282 return vqadd_s64(a, b);
9285 // CHECK-LABEL: @test_vqadd_u8(
9286 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9287 // CHECK: ret <8 x i8> [[VQADD_V_I]]
9288 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
9289 return vqadd_u8(a, b);
9292 // CHECK-LABEL: @test_vqadd_u16(
9293 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9294 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9295 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9296 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9297 // CHECK: ret <4 x i16> [[VQADD_V2_I]]
9298 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
9299 return vqadd_u16(a, b);
9302 // CHECK-LABEL: @test_vqadd_u32(
9303 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9304 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9305 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9306 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9307 // CHECK: ret <2 x i32> [[VQADD_V2_I]]
9308 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
9309 return vqadd_u32(a, b);
9312 // CHECK-LABEL: @test_vqadd_u64(
9313 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9314 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9315 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9316 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9317 // CHECK: ret <1 x i64> [[VQADD_V2_I]]
9318 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
9319 return vqadd_u64(a, b);
9322 // CHECK-LABEL: @test_vqaddq_s8(
9323 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9324 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
9325 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
9326 return vqaddq_s8(a, b);
9329 // CHECK-LABEL: @test_vqaddq_s16(
9330 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9331 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9332 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9333 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9334 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
9335 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
9336 return vqaddq_s16(a, b);
9339 // CHECK-LABEL: @test_vqaddq_s32(
9340 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9341 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9342 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9343 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9344 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
9345 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
9346 return vqaddq_s32(a, b);
9349 // CHECK-LABEL: @test_vqaddq_s64(
9350 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9351 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9352 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9353 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9354 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
9355 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
9356 return vqaddq_s64(a, b);
9359 // CHECK-LABEL: @test_vqaddq_u8(
9360 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9361 // CHECK: ret <16 x i8> [[VQADDQ_V_I]]
9362 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
9363 return vqaddq_u8(a, b);
9366 // CHECK-LABEL: @test_vqaddq_u16(
9367 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9368 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9369 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9370 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9371 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]]
9372 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
9373 return vqaddq_u16(a, b);
9376 // CHECK-LABEL: @test_vqaddq_u32(
9377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9379 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9380 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9381 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]]
9382 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
9383 return vqaddq_u32(a, b);
9386 // CHECK-LABEL: @test_vqaddq_u64(
9387 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9388 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9389 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9390 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9391 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]]
9392 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
9393 return vqaddq_u64(a, b);
9396 // CHECK-LABEL: @test_vqdmlal_s16(
9397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9398 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9399 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9400 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9401 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9402 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
9403 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9404 return vqdmlal_s16(a, b, c);
9407 // CHECK-LABEL: @test_vqdmlal_s32(
9408 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9409 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9410 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9411 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9412 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9413 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
9414 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9415 return vqdmlal_s32(a, b, c);
9418 // CHECK-LABEL: @test_vqdmlal_lane_s16(
9419 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9420 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9421 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9422 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9423 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9424 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9425 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9426 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9427 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]]
9428 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9429 return vqdmlal_lane_s16(a, b, c, 3);
9432 // CHECK-LABEL: @test_vqdmlal_lane_s32(
9433 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9434 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9435 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9436 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9437 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9438 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9439 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9440 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9441 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]]
9442 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9443 return vqdmlal_lane_s32(a, b, c, 1);
9446 // CHECK-LABEL: @test_vqdmlal_n_s16(
9447 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9448 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9449 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9450 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9451 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9452 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9453 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9454 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9455 // CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9456 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]]
9457 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9458 return vqdmlal_n_s16(a, b, c);
9461 // CHECK-LABEL: @test_vqdmlal_n_s32(
9462 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9463 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9464 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9465 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9466 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9467 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9468 // CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9469 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]]
9470 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9471 return vqdmlal_n_s32(a, b, c);
9474 // CHECK-LABEL: @test_vqdmlsl_s16(
9475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9476 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9477 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9478 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9479 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9480 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
9481 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9482 return vqdmlsl_s16(a, b, c);
9485 // CHECK-LABEL: @test_vqdmlsl_s32(
9486 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9487 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9488 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9489 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9490 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9491 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
9492 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9493 return vqdmlsl_s32(a, b, c);
9496 // CHECK-LABEL: @test_vqdmlsl_lane_s16(
9497 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9499 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9500 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9501 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9502 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9503 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9504 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9505 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]]
9506 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9507 return vqdmlsl_lane_s16(a, b, c, 3);
9510 // CHECK-LABEL: @test_vqdmlsl_lane_s32(
9511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9512 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9513 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9514 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9515 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9516 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9517 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9518 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9519 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]]
9520 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9521 return vqdmlsl_lane_s32(a, b, c, 1);
9524 // CHECK-LABEL: @test_vqdmlsl_n_s16(
9525 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9526 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9527 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9528 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9529 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9530 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9531 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9532 // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9533 // CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9534 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]]
9535 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9536 return vqdmlsl_n_s16(a, b, c);
9539 // CHECK-LABEL: @test_vqdmlsl_n_s32(
9540 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9541 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9542 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9543 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9544 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9545 // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9546 // CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9547 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]]
9548 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9549 return vqdmlsl_n_s32(a, b, c);
9552 // CHECK-LABEL: @test_vqdmulh_s16(
9553 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9554 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9555 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
9556 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
9557 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
9558 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
9559 return vqdmulh_s16(a, b);
9562 // CHECK-LABEL: @test_vqdmulh_s32(
9563 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9564 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9565 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
9566 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
9567 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
9568 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
9569 return vqdmulh_s32(a, b);
9572 // CHECK-LABEL: @test_vqdmulhq_s16(
9573 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9574 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9575 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
9576 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
9577 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
9578 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
9579 return vqdmulhq_s16(a, b);
9582 // CHECK-LABEL: @test_vqdmulhq_s32(
9583 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9584 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9585 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
9586 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
9587 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
9588 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
9589 return vqdmulhq_s32(a, b);
9592 // CHECK-LABEL: @test_vqdmulh_lane_s16(
9593 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9594 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9595 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9596 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
9597 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9598 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
9599 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
9600 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]]
9601 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
9602 return vqdmulh_lane_s16(a, b, 3);
9605 // CHECK-LABEL: @test_vqdmulh_lane_s32(
9606 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9607 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9608 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9609 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
9610 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9611 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
9612 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
9613 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]]
9614 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
9615 return vqdmulh_lane_s32(a, b, 1);
9618 // CHECK-LABEL: @test_vqdmulhq_lane_s16(
9619 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9621 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9622 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
9623 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
9624 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
9625 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
9626 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]]
9627 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
9628 return vqdmulhq_lane_s16(a, b, 3);
9631 // CHECK-LABEL: @test_vqdmulhq_lane_s32(
9632 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9633 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9634 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9635 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9636 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
9637 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
9638 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
9639 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]]
9640 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
9641 return vqdmulhq_lane_s32(a, b, 1);
9644 // CHECK-LABEL: @test_vqdmulh_n_s16(
9645 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
9646 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
9647 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
9648 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
9649 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9650 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9651 // CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
9652 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
9653 // CHECK: ret <4 x i16> [[VQDMULH_V5_I]]
9654 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
9655 return vqdmulh_n_s16(a, b);
9658 // CHECK-LABEL: @test_vqdmulh_n_s32(
9659 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
9660 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
9661 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9662 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9663 // CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
9664 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
9665 // CHECK: ret <2 x i32> [[VQDMULH_V3_I]]
9666 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
9667 return vqdmulh_n_s32(a, b);
9670 // CHECK-LABEL: @test_vqdmulhq_n_s16(
9671 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
9672 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
9673 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
9674 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
9675 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
9676 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
9677 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
9678 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
9679 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9680 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
9681 // CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
9682 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
9683 // CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]]
9684 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
9685 return vqdmulhq_n_s16(a, b);
9688 // CHECK-LABEL: @test_vqdmulhq_n_s32(
9689 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
9690 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
9691 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
9692 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
9693 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9694 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
9695 // CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
9696 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
9697 // CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]]
9698 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
9699 return vqdmulhq_n_s32(a, b);
9702 // CHECK-LABEL: @test_vqdmull_s16(
9703 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9704 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9705 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
9706 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
9707 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
9708 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
9709 return vqdmull_s16(a, b);
9712 // CHECK-LABEL: @test_vqdmull_s32(
9713 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9714 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9715 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
9716 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
9717 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
9718 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
9719 return vqdmull_s32(a, b);
9722 // CHECK-LABEL: @test_vqdmull_lane_s16(
9723 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9724 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9725 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9726 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
9727 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9728 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
9729 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
9730 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]]
9731 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
9732 return vqdmull_lane_s16(a, b, 3);
9735 // CHECK-LABEL: @test_vqdmull_lane_s32(
9736 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9737 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9738 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9739 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
9740 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9741 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
9742 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
9743 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]]
9744 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
9745 return vqdmull_lane_s32(a, b, 1);
9748 // CHECK-LABEL: @test_vqdmull_n_s16(
9749 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
9750 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
9751 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
9752 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
9753 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9754 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9755 // CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
9756 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
9757 // CHECK: ret <4 x i32> [[VQDMULL_V5_I]]
9758 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
9759 return vqdmull_n_s16(a, b);
9762 // CHECK-LABEL: @test_vqdmull_n_s32(
9763 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
9764 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
9765 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9766 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9767 // CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
9768 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
9769 // CHECK: ret <2 x i64> [[VQDMULL_V3_I]]
9770 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
9771 return vqdmull_n_s32(a, b);
9774 // CHECK-LABEL: @test_vqmovn_s16(
9775 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9776 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
9777 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
9778 int8x8_t test_vqmovn_s16(int16x8_t a) {
9779 return vqmovn_s16(a);
9782 // CHECK-LABEL: @test_vqmovn_s32(
9783 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9784 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
9785 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
9786 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
9787 int16x4_t test_vqmovn_s32(int32x4_t a) {
9788 return vqmovn_s32(a);
9791 // CHECK-LABEL: @test_vqmovn_s64(
9792 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9793 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
9794 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
9795 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
9796 int32x2_t test_vqmovn_s64(int64x2_t a) {
9797 return vqmovn_s64(a);
9800 // CHECK-LABEL: @test_vqmovn_u16(
9801 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9802 // CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
9803 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]]
9804 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
9805 return vqmovn_u16(a);
9808 // CHECK-LABEL: @test_vqmovn_u32(
9809 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9810 // CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
9811 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
9812 // CHECK: ret <4 x i16> [[VQMOVN_V1_I]]
9813 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
9814 return vqmovn_u32(a);
9817 // CHECK-LABEL: @test_vqmovn_u64(
9818 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9819 // CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
9820 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
9821 // CHECK: ret <2 x i32> [[VQMOVN_V1_I]]
9822 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
9823 return vqmovn_u64(a);
9826 // CHECK-LABEL: @test_vqmovun_s16(
9827 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9828 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
9829 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]]
9830 uint8x8_t test_vqmovun_s16(int16x8_t a) {
9831 return vqmovun_s16(a);
9834 // CHECK-LABEL: @test_vqmovun_s32(
9835 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9836 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
9837 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
9838 // CHECK: ret <4 x i16> [[VQMOVUN_V1_I]]
9839 uint16x4_t test_vqmovun_s32(int32x4_t a) {
9840 return vqmovun_s32(a);
9843 // CHECK-LABEL: @test_vqmovun_s64(
9844 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9845 // CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
9846 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
9847 // CHECK: ret <2 x i32> [[VQMOVUN_V1_I]]
9848 uint32x2_t test_vqmovun_s64(int64x2_t a) {
9849 return vqmovun_s64(a);
9852 // CHECK-LABEL: @test_vqneg_s8(
9853 // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
9854 // CHECK: ret <8 x i8> [[VQNEG_V_I]]
9855 int8x8_t test_vqneg_s8(int8x8_t a) {
9856 return vqneg_s8(a);
9859 // CHECK-LABEL: @test_vqneg_s16(
9860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9861 // CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
9862 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
9863 // CHECK: ret <4 x i16> [[VQNEG_V1_I]]
9864 int16x4_t test_vqneg_s16(int16x4_t a) {
9865 return vqneg_s16(a);
9868 // CHECK-LABEL: @test_vqneg_s32(
9869 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9870 // CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
9871 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
9872 // CHECK: ret <2 x i32> [[VQNEG_V1_I]]
9873 int32x2_t test_vqneg_s32(int32x2_t a) {
9874 return vqneg_s32(a);
9877 // CHECK-LABEL: @test_vqnegq_s8(
9878 // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
9879 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]]
9880 int8x16_t test_vqnegq_s8(int8x16_t a) {
9881 return vqnegq_s8(a);
9884 // CHECK-LABEL: @test_vqnegq_s16(
9885 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9886 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
9887 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
9888 // CHECK: ret <8 x i16> [[VQNEGQ_V1_I]]
9889 int16x8_t test_vqnegq_s16(int16x8_t a) {
9890 return vqnegq_s16(a);
9893 // CHECK-LABEL: @test_vqnegq_s32(
9894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9895 // CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
9896 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
9897 // CHECK: ret <4 x i32> [[VQNEGQ_V1_I]]
9898 int32x4_t test_vqnegq_s32(int32x4_t a) {
9899 return vqnegq_s32(a);
9902 // CHECK-LABEL: @test_vqrdmulh_s16(
9903 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9904 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9905 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
9906 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
9907 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
9908 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
9909 return vqrdmulh_s16(a, b);
9912 // CHECK-LABEL: @test_vqrdmulh_s32(
9913 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9914 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9915 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
9916 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
9917 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
9918 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
9919 return vqrdmulh_s32(a, b);
9922 // CHECK-LABEL: @test_vqrdmulhq_s16(
9923 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9924 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9925 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
9926 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
9927 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
9928 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
9929 return vqrdmulhq_s16(a, b);
9932 // CHECK-LABEL: @test_vqrdmulhq_s32(
9933 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9934 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9935 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
9936 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
9937 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
9938 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
9939 return vqrdmulhq_s32(a, b);
9942 // CHECK-LABEL: @test_vqrdmulh_lane_s16(
9943 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9944 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9945 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9946 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
9947 // CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9948 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
9949 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
9950 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]]
9951 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
9952 return vqrdmulh_lane_s16(a, b, 3);
9955 // CHECK-LABEL: @test_vqrdmulh_lane_s32(
9956 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9957 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9958 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9959 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
9960 // CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9961 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
9962 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
9963 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]]
9964 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
9965 return vqrdmulh_lane_s32(a, b, 1);
9968 // CHECK-LABEL: @test_vqrdmulhq_lane_s16(
9969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9970 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9971 // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
9972 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
9973 // CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
9974 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
9975 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
9976 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]]
9977 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
9978 return vqrdmulhq_lane_s16(a, b, 3);
9981 // CHECK-LABEL: @test_vqrdmulhq_lane_s32(
9982 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9983 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9984 // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9985 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9986 // CHECK: [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
9987 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
9988 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
9989 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]]
9990 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
9991 return vqrdmulhq_lane_s32(a, b, 1);
9994 // CHECK-LABEL: @test_vqrdmulh_n_s16(
9995 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
9996 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
9997 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
9998 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
9999 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10000 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10001 // CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10002 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
10003 // CHECK: ret <4 x i16> [[VQRDMULH_V5_I]]
10004 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
10005 return vqrdmulh_n_s16(a, b);
10008 // CHECK-LABEL: @test_vqrdmulh_n_s32(
10009 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10010 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10011 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10012 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10013 // CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10014 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
10015 // CHECK: ret <2 x i32> [[VQRDMULH_V3_I]]
10016 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
10017 return vqrdmulh_n_s32(a, b);
10020 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
10021 // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10022 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10023 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10024 // CHECK: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10025 // CHECK: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10026 // CHECK: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10027 // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10028 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10029 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10030 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10031 // CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10032 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
10033 // CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]]
10034 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
10035 return vqrdmulhq_n_s16(a, b);
10038 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
10039 // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10040 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10041 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10042 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10043 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10044 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10045 // CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10046 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
10047 // CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]]
10048 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
10049 return vqrdmulhq_n_s32(a, b);
10052 // CHECK-LABEL: @test_vqrshl_s8(
10053 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10054 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
10055 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
10056 return vqrshl_s8(a, b);
10059 // CHECK-LABEL: @test_vqrshl_s16(
10060 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10061 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10062 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10063 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10064 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
10065 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
10066 return vqrshl_s16(a, b);
10069 // CHECK-LABEL: @test_vqrshl_s32(
10070 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10071 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10072 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10073 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10074 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
10075 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
10076 return vqrshl_s32(a, b);
10079 // CHECK-LABEL: @test_vqrshl_s64(
10080 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10081 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10082 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10083 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10084 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
10085 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
10086 return vqrshl_s64(a, b);
10089 // CHECK-LABEL: @test_vqrshl_u8(
10090 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10091 // CHECK: ret <8 x i8> [[VQRSHL_V_I]]
10092 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
10093 return vqrshl_u8(a, b);
10096 // CHECK-LABEL: @test_vqrshl_u16(
10097 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10098 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10099 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10100 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10101 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]]
10102 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
10103 return vqrshl_u16(a, b);
10106 // CHECK-LABEL: @test_vqrshl_u32(
10107 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10108 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10109 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10110 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10111 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]]
10112 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
10113 return vqrshl_u32(a, b);
10116 // CHECK-LABEL: @test_vqrshl_u64(
10117 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10118 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10119 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10120 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10121 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]]
10122 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
10123 return vqrshl_u64(a, b);
10126 // CHECK-LABEL: @test_vqrshlq_s8(
10127 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10128 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
10129 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
10130 return vqrshlq_s8(a, b);
10133 // CHECK-LABEL: @test_vqrshlq_s16(
10134 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10135 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10136 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10137 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10138 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
10139 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
10140 return vqrshlq_s16(a, b);
10143 // CHECK-LABEL: @test_vqrshlq_s32(
10144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10146 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10147 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10148 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
10149 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
10150 return vqrshlq_s32(a, b);
10153 // CHECK-LABEL: @test_vqrshlq_s64(
10154 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10155 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10156 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10157 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10158 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
10159 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
10160 return vqrshlq_s64(a, b);
10163 // CHECK-LABEL: @test_vqrshlq_u8(
10164 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10165 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]]
10166 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
10167 return vqrshlq_u8(a, b);
10170 // CHECK-LABEL: @test_vqrshlq_u16(
10171 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10172 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10173 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10174 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10175 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]]
10176 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
10177 return vqrshlq_u16(a, b);
10180 // CHECK-LABEL: @test_vqrshlq_u32(
10181 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10182 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10183 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10184 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10185 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]]
10186 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
10187 return vqrshlq_u32(a, b);
10190 // CHECK-LABEL: @test_vqrshlq_u64(
10191 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10192 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10193 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10194 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10195 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]]
10196 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
10197 return vqrshlq_u64(a, b);
10200 // CHECK-LABEL: @test_vqrshrn_n_s16(
10201 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10202 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10203 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10204 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
10205 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
10206 return vqrshrn_n_s16(a, 1);
10209 // CHECK-LABEL: @test_vqrshrn_n_s32(
10210 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10211 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10212 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10213 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
10214 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
10215 return vqrshrn_n_s32(a, 1);
10218 // CHECK-LABEL: @test_vqrshrn_n_s64(
10219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10220 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10221 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10222 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
10223 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
10224 return vqrshrn_n_s64(a, 1);
10227 // CHECK-LABEL: @test_vqrshrn_n_u16(
10228 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10229 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10230 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10231 // CHECK: ret <8 x i8> [[VQRSHRN_N1]]
10232 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
10233 return vqrshrn_n_u16(a, 1);
10236 // CHECK-LABEL: @test_vqrshrn_n_u32(
10237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10238 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10239 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10240 // CHECK: ret <4 x i16> [[VQRSHRN_N1]]
10241 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
10242 return vqrshrn_n_u32(a, 1);
10245 // CHECK-LABEL: @test_vqrshrn_n_u64(
10246 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10247 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10248 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10249 // CHECK: ret <2 x i32> [[VQRSHRN_N1]]
10250 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
10251 return vqrshrn_n_u64(a, 1);
10254 // CHECK-LABEL: @test_vqrshrun_n_s16(
10255 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10256 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10257 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10258 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]]
10259 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
10260 return vqrshrun_n_s16(a, 1);
10263 // CHECK-LABEL: @test_vqrshrun_n_s32(
10264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10265 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10266 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10267 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]]
10268 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
10269 return vqrshrun_n_s32(a, 1);
10272 // CHECK-LABEL: @test_vqrshrun_n_s64(
10273 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10274 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10275 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10276 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]]
10277 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
10278 return vqrshrun_n_s64(a, 1);
10281 // CHECK-LABEL: @test_vqshl_s8(
10282 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10283 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
10284 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
10285 return vqshl_s8(a, b);
10288 // CHECK-LABEL: @test_vqshl_s16(
10289 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10290 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10291 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10292 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10293 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
10294 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
10295 return vqshl_s16(a, b);
10298 // CHECK-LABEL: @test_vqshl_s32(
10299 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10300 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10301 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10302 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10303 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
10304 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
10305 return vqshl_s32(a, b);
10308 // CHECK-LABEL: @test_vqshl_s64(
10309 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10310 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10311 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10312 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10313 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
10314 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
10315 return vqshl_s64(a, b);
10318 // CHECK-LABEL: @test_vqshl_u8(
10319 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10320 // CHECK: ret <8 x i8> [[VQSHL_V_I]]
10321 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
10322 return vqshl_u8(a, b);
10325 // CHECK-LABEL: @test_vqshl_u16(
10326 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10327 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10328 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10329 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10330 // CHECK: ret <4 x i16> [[VQSHL_V2_I]]
10331 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
10332 return vqshl_u16(a, b);
10335 // CHECK-LABEL: @test_vqshl_u32(
10336 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10337 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10338 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10339 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10340 // CHECK: ret <2 x i32> [[VQSHL_V2_I]]
10341 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
10342 return vqshl_u32(a, b);
10345 // CHECK-LABEL: @test_vqshl_u64(
10346 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10347 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10348 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10349 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10350 // CHECK: ret <1 x i64> [[VQSHL_V2_I]]
10351 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
10352 return vqshl_u64(a, b);
10355 // CHECK-LABEL: @test_vqshlq_s8(
10356 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10357 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
10358 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
10359 return vqshlq_s8(a, b);
10362 // CHECK-LABEL: @test_vqshlq_s16(
10363 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10364 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10365 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10366 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10367 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
10368 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
10369 return vqshlq_s16(a, b);
10372 // CHECK-LABEL: @test_vqshlq_s32(
10373 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10374 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10375 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10376 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10377 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
10378 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
10379 return vqshlq_s32(a, b);
10382 // CHECK-LABEL: @test_vqshlq_s64(
10383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10384 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10385 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10386 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10387 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
10388 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
10389 return vqshlq_s64(a, b);
10392 // CHECK-LABEL: @test_vqshlq_u8(
10393 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10394 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]]
10395 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
10396 return vqshlq_u8(a, b);
10399 // CHECK-LABEL: @test_vqshlq_u16(
10400 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10401 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10402 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10403 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10404 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]]
10405 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
10406 return vqshlq_u16(a, b);
10409 // CHECK-LABEL: @test_vqshlq_u32(
10410 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10411 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10412 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10413 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10414 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]]
10415 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
10416 return vqshlq_u32(a, b);
10419 // CHECK-LABEL: @test_vqshlq_u64(
10420 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10421 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10422 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10423 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10424 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]]
10425 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
10426 return vqshlq_u64(a, b);
10429 // CHECK-LABEL: @test_vqshlu_n_s8(
10430 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10431 // CHECK: ret <8 x i8> [[VQSHLU_N]]
10432 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
10433 return vqshlu_n_s8(a, 1);
10436 // CHECK-LABEL: @test_vqshlu_n_s16(
10437 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10438 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10439 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10440 // CHECK: ret <4 x i16> [[VQSHLU_N1]]
10441 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
10442 return vqshlu_n_s16(a, 1);
10445 // CHECK-LABEL: @test_vqshlu_n_s32(
10446 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10447 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10448 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
10449 // CHECK: ret <2 x i32> [[VQSHLU_N1]]
10450 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
10451 return vqshlu_n_s32(a, 1);
10454 // CHECK-LABEL: @test_vqshlu_n_s64(
10455 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10456 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10457 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
10458 // CHECK: ret <1 x i64> [[VQSHLU_N1]]
10459 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
10460 return vqshlu_n_s64(a, 1);
10463 // CHECK-LABEL: @test_vqshluq_n_s8(
10464 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10465 // CHECK: ret <16 x i8> [[VQSHLU_N]]
10466 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
10467 return vqshluq_n_s8(a, 1);
10470 // CHECK-LABEL: @test_vqshluq_n_s16(
10471 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10472 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10473 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10474 // CHECK: ret <8 x i16> [[VQSHLU_N1]]
10475 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
10476 return vqshluq_n_s16(a, 1);
10479 // CHECK-LABEL: @test_vqshluq_n_s32(
10480 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10481 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10482 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10483 // CHECK: ret <4 x i32> [[VQSHLU_N1]]
10484 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
10485 return vqshluq_n_s32(a, 1);
10488 // CHECK-LABEL: @test_vqshluq_n_s64(
10489 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10490 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10491 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
10492 // CHECK: ret <2 x i64> [[VQSHLU_N1]]
10493 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
10494 return vqshluq_n_s64(a, 1);
10497 // CHECK-LABEL: @test_vqshl_n_s8(
10498 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10499 // CHECK: ret <8 x i8> [[VQSHL_N]]
10500 int8x8_t test_vqshl_n_s8(int8x8_t a) {
10501 return vqshl_n_s8(a, 1);
10504 // CHECK-LABEL: @test_vqshl_n_s16(
10505 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10506 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10507 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10508 // CHECK: ret <4 x i16> [[VQSHL_N1]]
10509 int16x4_t test_vqshl_n_s16(int16x4_t a) {
10510 return vqshl_n_s16(a, 1);
10513 // CHECK-LABEL: @test_vqshl_n_s32(
10514 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10515 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10516 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10517 // CHECK: ret <2 x i32> [[VQSHL_N1]]
10518 int32x2_t test_vqshl_n_s32(int32x2_t a) {
10519 return vqshl_n_s32(a, 1);
10522 // CHECK-LABEL: @test_vqshl_n_s64(
10523 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10524 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10525 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10526 // CHECK: ret <1 x i64> [[VQSHL_N1]]
10527 int64x1_t test_vqshl_n_s64(int64x1_t a) {
10528 return vqshl_n_s64(a, 1);
10531 // CHECK-LABEL: @test_vqshl_n_u8(
10532 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10533 // CHECK: ret <8 x i8> [[VQSHL_N]]
10534 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
10535 return vqshl_n_u8(a, 1);
10538 // CHECK-LABEL: @test_vqshl_n_u16(
10539 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10540 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10541 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10542 // CHECK: ret <4 x i16> [[VQSHL_N1]]
10543 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
10544 return vqshl_n_u16(a, 1);
10547 // CHECK-LABEL: @test_vqshl_n_u32(
10548 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10549 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10550 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10551 // CHECK: ret <2 x i32> [[VQSHL_N1]]
10552 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
10553 return vqshl_n_u32(a, 1);
10556 // CHECK-LABEL: @test_vqshl_n_u64(
10557 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10558 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10559 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10560 // CHECK: ret <1 x i64> [[VQSHL_N1]]
10561 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
10562 return vqshl_n_u64(a, 1);
10565 // CHECK-LABEL: @test_vqshlq_n_s8(
10566 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10567 // CHECK: ret <16 x i8> [[VQSHL_N]]
10568 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
10569 return vqshlq_n_s8(a, 1);
10572 // CHECK-LABEL: @test_vqshlq_n_s16(
10573 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10574 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10575 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10576 // CHECK: ret <8 x i16> [[VQSHL_N1]]
10577 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
10578 return vqshlq_n_s16(a, 1);
10581 // CHECK-LABEL: @test_vqshlq_n_s32(
10582 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10583 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10584 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10585 // CHECK: ret <4 x i32> [[VQSHL_N1]]
10586 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
10587 return vqshlq_n_s32(a, 1);
10590 // CHECK-LABEL: @test_vqshlq_n_s64(
10591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10592 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10593 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
10594 // CHECK: ret <2 x i64> [[VQSHL_N1]]
10595 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
10596 return vqshlq_n_s64(a, 1);
10599 // CHECK-LABEL: @test_vqshlq_n_u8(
10600 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10601 // CHECK: ret <16 x i8> [[VQSHL_N]]
10602 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
10603 return vqshlq_n_u8(a, 1);
10606 // CHECK-LABEL: @test_vqshlq_n_u16(
10607 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10608 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10609 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10610 // CHECK: ret <8 x i16> [[VQSHL_N1]]
10611 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
10612 return vqshlq_n_u16(a, 1);
10615 // CHECK-LABEL: @test_vqshlq_n_u32(
10616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10617 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10618 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10619 // CHECK: ret <4 x i32> [[VQSHL_N1]]
10620 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
10621 return vqshlq_n_u32(a, 1);
10624 // CHECK-LABEL: @test_vqshlq_n_u64(
10625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10626 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10627 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
10628 // CHECK: ret <2 x i64> [[VQSHL_N1]]
10629 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
10630 return vqshlq_n_u64(a, 1);
10633 // CHECK-LABEL: @test_vqshrn_n_s16(
10634 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10635 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10636 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10637 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
10638 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
10639 return vqshrn_n_s16(a, 1);
10642 // CHECK-LABEL: @test_vqshrn_n_s32(
10643 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10644 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10645 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10646 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
10647 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
10648 return vqshrn_n_s32(a, 1);
10651 // CHECK-LABEL: @test_vqshrn_n_s64(
10652 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10653 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10654 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10655 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
10656 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
10657 return vqshrn_n_s64(a, 1);
10660 // CHECK-LABEL: @test_vqshrn_n_u16(
10661 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10662 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10663 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10664 // CHECK: ret <8 x i8> [[VQSHRN_N1]]
10665 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
10666 return vqshrn_n_u16(a, 1);
10669 // CHECK-LABEL: @test_vqshrn_n_u32(
10670 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10671 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10672 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10673 // CHECK: ret <4 x i16> [[VQSHRN_N1]]
10674 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
10675 return vqshrn_n_u32(a, 1);
10678 // CHECK-LABEL: @test_vqshrn_n_u64(
10679 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10680 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10681 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10682 // CHECK: ret <2 x i32> [[VQSHRN_N1]]
10683 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
10684 return vqshrn_n_u64(a, 1);
10687 // CHECK-LABEL: @test_vqshrun_n_s16(
10688 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10689 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10690 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10691 // CHECK: ret <8 x i8> [[VQSHRUN_N1]]
10692 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
10693 return vqshrun_n_s16(a, 1);
10696 // CHECK-LABEL: @test_vqshrun_n_s32(
10697 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10698 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10699 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10700 // CHECK: ret <4 x i16> [[VQSHRUN_N1]]
10701 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
10702 return vqshrun_n_s32(a, 1);
10705 // CHECK-LABEL: @test_vqshrun_n_s64(
10706 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10707 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10708 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10709 // CHECK: ret <2 x i32> [[VQSHRUN_N1]]
10710 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
10711 return vqshrun_n_s64(a, 1);
10714 // CHECK-LABEL: @test_vqsub_s8(
10715 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
10716 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
10717 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
10718 return vqsub_s8(a, b);
10721 // CHECK-LABEL: @test_vqsub_s16(
10722 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10723 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10724 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
10725 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
10726 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
10727 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
10728 return vqsub_s16(a, b);
10731 // CHECK-LABEL: @test_vqsub_s32(
10732 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10733 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10734 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
10735 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
10736 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
10737 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
10738 return vqsub_s32(a, b);
10741 // CHECK-LABEL: @test_vqsub_s64(
10742 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10743 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10744 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
10745 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
10746 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
10747 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
10748 return vqsub_s64(a, b);
10751 // CHECK-LABEL: @test_vqsub_u8(
10752 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
10753 // CHECK: ret <8 x i8> [[VQSUB_V_I]]
10754 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
10755 return vqsub_u8(a, b);
10758 // CHECK-LABEL: @test_vqsub_u16(
10759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10761 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
10762 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
10763 // CHECK: ret <4 x i16> [[VQSUB_V2_I]]
10764 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
10765 return vqsub_u16(a, b);
10768 // CHECK-LABEL: @test_vqsub_u32(
10769 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10770 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10771 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
10772 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
10773 // CHECK: ret <2 x i32> [[VQSUB_V2_I]]
10774 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
10775 return vqsub_u32(a, b);
10778 // CHECK-LABEL: @test_vqsub_u64(
10779 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10780 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10781 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
10782 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
10783 // CHECK: ret <1 x i64> [[VQSUB_V2_I]]
10784 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
10785 return vqsub_u64(a, b);
10788 // CHECK-LABEL: @test_vqsubq_s8(
10789 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
10790 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
10791 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
10792 return vqsubq_s8(a, b);
10795 // CHECK-LABEL: @test_vqsubq_s16(
10796 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10797 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10798 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
10799 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
10800 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
10801 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
10802 return vqsubq_s16(a, b);
10805 // CHECK-LABEL: @test_vqsubq_s32(
10806 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10807 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10808 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
10809 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
10810 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
10811 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
10812 return vqsubq_s32(a, b);
10815 // CHECK-LABEL: @test_vqsubq_s64(
10816 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10817 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10818 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
10819 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
10820 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
10821 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
10822 return vqsubq_s64(a, b);
10825 // CHECK-LABEL: @test_vqsubq_u8(
10826 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
10827 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]]
10828 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
10829 return vqsubq_u8(a, b);
10832 // CHECK-LABEL: @test_vqsubq_u16(
10833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10835 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
10836 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
10837 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]]
10838 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
10839 return vqsubq_u16(a, b);
10842 // CHECK-LABEL: @test_vqsubq_u32(
10843 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10844 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10845 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
10846 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
10847 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]]
10848 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
10849 return vqsubq_u32(a, b);
10852 // CHECK-LABEL: @test_vqsubq_u64(
10853 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10854 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10855 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
10856 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
10857 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]]
10858 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
10859 return vqsubq_u64(a, b);
10862 // CHECK-LABEL: @test_vraddhn_s16(
10863 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10864 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10865 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
10866 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
10867 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
10868 return vraddhn_s16(a, b);
10871 // CHECK-LABEL: @test_vraddhn_s32(
10872 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10873 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10874 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
10875 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
10876 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
10877 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
10878 return vraddhn_s32(a, b);
10881 // CHECK-LABEL: @test_vraddhn_s64(
10882 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10883 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10884 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
10885 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
10886 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
10887 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
10888 return vraddhn_s64(a, b);
10891 // CHECK-LABEL: @test_vraddhn_u16(
10892 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10893 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10894 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
10895 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]]
10896 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
10897 return vraddhn_u16(a, b);
10900 // CHECK-LABEL: @test_vraddhn_u32(
10901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10903 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
10904 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
10905 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]]
10906 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
10907 return vraddhn_u32(a, b);
10910 // CHECK-LABEL: @test_vraddhn_u64(
10911 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10912 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10913 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
10914 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
10915 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]]
10916 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
10917 return vraddhn_u64(a, b);
10920 // CHECK-LABEL: @test_vrecpe_f32(
10921 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
10922 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
10923 // CHECK: ret <2 x float> [[VRECPE_V1_I]]
10924 float32x2_t test_vrecpe_f32(float32x2_t a) {
10925 return vrecpe_f32(a);
10928 // CHECK-LABEL: @test_vrecpe_u32(
10929 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10930 // CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
10931 // CHECK: ret <2 x i32> [[VRECPE_V1_I]]
10932 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
10933 return vrecpe_u32(a);
10936 // CHECK-LABEL: @test_vrecpeq_f32(
10937 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
10938 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
10939 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]]
10940 float32x4_t test_vrecpeq_f32(float32x4_t a) {
10941 return vrecpeq_f32(a);
10944 // CHECK-LABEL: @test_vrecpeq_u32(
10945 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10946 // CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
10947 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]]
10948 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
10949 return vrecpeq_u32(a);
10952 // CHECK-LABEL: @test_vrecps_f32(
10953 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
10954 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10955 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
10956 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
10957 // CHECK: ret <2 x float> [[VRECPS_V2_I]]
10958 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
10959 return vrecps_f32(a, b);
10962 // CHECK-LABEL: @test_vrecpsq_f32(
10963 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
10964 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10965 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
10966 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
10967 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]]
10968 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
10969 return vrecpsq_f32(a, b);
10972 // CHECK-LABEL: @test_vreinterpret_s8_s16(
10973 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10974 // CHECK: ret <8 x i8> [[TMP0]]
10975 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
10976 return vreinterpret_s8_s16(a);
10979 // CHECK-LABEL: @test_vreinterpret_s8_s32(
10980 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10981 // CHECK: ret <8 x i8> [[TMP0]]
10982 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
10983 return vreinterpret_s8_s32(a);
10986 // CHECK-LABEL: @test_vreinterpret_s8_s64(
10987 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10988 // CHECK: ret <8 x i8> [[TMP0]]
10989 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
10990 return vreinterpret_s8_s64(a);
10993 // CHECK-LABEL: @test_vreinterpret_s8_u8(
10994 // CHECK: ret <8 x i8> %a
10995 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
10996 return vreinterpret_s8_u8(a);
10999 // CHECK-LABEL: @test_vreinterpret_s8_u16(
11000 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11001 // CHECK: ret <8 x i8> [[TMP0]]
11002 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
11003 return vreinterpret_s8_u16(a);
11006 // CHECK-LABEL: @test_vreinterpret_s8_u32(
11007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11008 // CHECK: ret <8 x i8> [[TMP0]]
11009 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
11010 return vreinterpret_s8_u32(a);
11013 // CHECK-LABEL: @test_vreinterpret_s8_u64(
11014 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11015 // CHECK: ret <8 x i8> [[TMP0]]
11016 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
11017 return vreinterpret_s8_u64(a);
11020 // CHECK-LABEL: @test_vreinterpret_s8_f16(
11021 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11022 // CHECK: ret <8 x i8> [[TMP0]]
11023 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
11024 return vreinterpret_s8_f16(a);
11027 // CHECK-LABEL: @test_vreinterpret_s8_f32(
11028 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11029 // CHECK: ret <8 x i8> [[TMP0]]
11030 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
11031 return vreinterpret_s8_f32(a);
11034 // CHECK-LABEL: @test_vreinterpret_s8_p8(
11035 // CHECK: ret <8 x i8> %a
11036 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
11037 return vreinterpret_s8_p8(a);
11040 // CHECK-LABEL: @test_vreinterpret_s8_p16(
11041 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11042 // CHECK: ret <8 x i8> [[TMP0]]
11043 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
11044 return vreinterpret_s8_p16(a);
11047 // CHECK-LABEL: @test_vreinterpret_s16_s8(
11048 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11049 // CHECK: ret <4 x i16> [[TMP0]]
11050 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
11051 return vreinterpret_s16_s8(a);
11054 // CHECK-LABEL: @test_vreinterpret_s16_s32(
11055 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11056 // CHECK: ret <4 x i16> [[TMP0]]
11057 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
11058 return vreinterpret_s16_s32(a);
11061 // CHECK-LABEL: @test_vreinterpret_s16_s64(
11062 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11063 // CHECK: ret <4 x i16> [[TMP0]]
11064 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
11065 return vreinterpret_s16_s64(a);
11068 // CHECK-LABEL: @test_vreinterpret_s16_u8(
11069 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11070 // CHECK: ret <4 x i16> [[TMP0]]
11071 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
11072 return vreinterpret_s16_u8(a);
11075 // CHECK-LABEL: @test_vreinterpret_s16_u16(
11076 // CHECK: ret <4 x i16> %a
11077 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
11078 return vreinterpret_s16_u16(a);
11081 // CHECK-LABEL: @test_vreinterpret_s16_u32(
11082 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11083 // CHECK: ret <4 x i16> [[TMP0]]
11084 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
11085 return vreinterpret_s16_u32(a);
11088 // CHECK-LABEL: @test_vreinterpret_s16_u64(
11089 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11090 // CHECK: ret <4 x i16> [[TMP0]]
11091 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
11092 return vreinterpret_s16_u64(a);
11095 // CHECK-LABEL: @test_vreinterpret_s16_f16(
11096 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11097 // CHECK: ret <4 x i16> [[TMP0]]
11098 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
11099 return vreinterpret_s16_f16(a);
11102 // CHECK-LABEL: @test_vreinterpret_s16_f32(
11103 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11104 // CHECK: ret <4 x i16> [[TMP0]]
11105 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
11106 return vreinterpret_s16_f32(a);
11109 // CHECK-LABEL: @test_vreinterpret_s16_p8(
11110 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11111 // CHECK: ret <4 x i16> [[TMP0]]
11112 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
11113 return vreinterpret_s16_p8(a);
11116 // CHECK-LABEL: @test_vreinterpret_s16_p16(
11117 // CHECK: ret <4 x i16> %a
11118 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
11119 return vreinterpret_s16_p16(a);
11122 // CHECK-LABEL: @test_vreinterpret_s32_s8(
11123 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11124 // CHECK: ret <2 x i32> [[TMP0]]
11125 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
11126 return vreinterpret_s32_s8(a);
11129 // CHECK-LABEL: @test_vreinterpret_s32_s16(
11130 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11131 // CHECK: ret <2 x i32> [[TMP0]]
11132 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
11133 return vreinterpret_s32_s16(a);
11136 // CHECK-LABEL: @test_vreinterpret_s32_s64(
11137 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11138 // CHECK: ret <2 x i32> [[TMP0]]
11139 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
11140 return vreinterpret_s32_s64(a);
11143 // CHECK-LABEL: @test_vreinterpret_s32_u8(
11144 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11145 // CHECK: ret <2 x i32> [[TMP0]]
11146 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
11147 return vreinterpret_s32_u8(a);
11150 // CHECK-LABEL: @test_vreinterpret_s32_u16(
11151 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11152 // CHECK: ret <2 x i32> [[TMP0]]
11153 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
11154 return vreinterpret_s32_u16(a);
11157 // CHECK-LABEL: @test_vreinterpret_s32_u32(
11158 // CHECK: ret <2 x i32> %a
11159 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
11160 return vreinterpret_s32_u32(a);
11163 // CHECK-LABEL: @test_vreinterpret_s32_u64(
11164 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11165 // CHECK: ret <2 x i32> [[TMP0]]
11166 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
11167 return vreinterpret_s32_u64(a);
11170 // CHECK-LABEL: @test_vreinterpret_s32_f16(
11171 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11172 // CHECK: ret <2 x i32> [[TMP0]]
11173 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
11174 return vreinterpret_s32_f16(a);
11177 // CHECK-LABEL: @test_vreinterpret_s32_f32(
11178 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11179 // CHECK: ret <2 x i32> [[TMP0]]
11180 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
11181 return vreinterpret_s32_f32(a);
11184 // CHECK-LABEL: @test_vreinterpret_s32_p8(
11185 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11186 // CHECK: ret <2 x i32> [[TMP0]]
11187 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
11188 return vreinterpret_s32_p8(a);
11191 // CHECK-LABEL: @test_vreinterpret_s32_p16(
11192 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11193 // CHECK: ret <2 x i32> [[TMP0]]
11194 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
11195 return vreinterpret_s32_p16(a);
11198 // CHECK-LABEL: @test_vreinterpret_s64_s8(
11199 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11200 // CHECK: ret <1 x i64> [[TMP0]]
11201 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
11202 return vreinterpret_s64_s8(a);
11205 // CHECK-LABEL: @test_vreinterpret_s64_s16(
11206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11207 // CHECK: ret <1 x i64> [[TMP0]]
11208 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
11209 return vreinterpret_s64_s16(a);
11212 // CHECK-LABEL: @test_vreinterpret_s64_s32(
11213 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11214 // CHECK: ret <1 x i64> [[TMP0]]
11215 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
11216 return vreinterpret_s64_s32(a);
11219 // CHECK-LABEL: @test_vreinterpret_s64_u8(
11220 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11221 // CHECK: ret <1 x i64> [[TMP0]]
11222 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
11223 return vreinterpret_s64_u8(a);
11226 // CHECK-LABEL: @test_vreinterpret_s64_u16(
11227 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11228 // CHECK: ret <1 x i64> [[TMP0]]
11229 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
11230 return vreinterpret_s64_u16(a);
11233 // CHECK-LABEL: @test_vreinterpret_s64_u32(
11234 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11235 // CHECK: ret <1 x i64> [[TMP0]]
11236 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
11237 return vreinterpret_s64_u32(a);
11240 // CHECK-LABEL: @test_vreinterpret_s64_u64(
11241 // CHECK: ret <1 x i64> %a
11242 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
11243 return vreinterpret_s64_u64(a);
11246 // CHECK-LABEL: @test_vreinterpret_s64_f16(
11247 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11248 // CHECK: ret <1 x i64> [[TMP0]]
11249 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
11250 return vreinterpret_s64_f16(a);
11253 // CHECK-LABEL: @test_vreinterpret_s64_f32(
11254 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11255 // CHECK: ret <1 x i64> [[TMP0]]
11256 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
11257 return vreinterpret_s64_f32(a);
11260 // CHECK-LABEL: @test_vreinterpret_s64_p8(
11261 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11262 // CHECK: ret <1 x i64> [[TMP0]]
11263 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
11264 return vreinterpret_s64_p8(a);
11267 // CHECK-LABEL: @test_vreinterpret_s64_p16(
11268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11269 // CHECK: ret <1 x i64> [[TMP0]]
11270 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
11271 return vreinterpret_s64_p16(a);
11274 // CHECK-LABEL: @test_vreinterpret_u8_s8(
11275 // CHECK: ret <8 x i8> %a
11276 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
11277 return vreinterpret_u8_s8(a);
11280 // CHECK-LABEL: @test_vreinterpret_u8_s16(
11281 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11282 // CHECK: ret <8 x i8> [[TMP0]]
11283 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
11284 return vreinterpret_u8_s16(a);
11287 // CHECK-LABEL: @test_vreinterpret_u8_s32(
11288 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11289 // CHECK: ret <8 x i8> [[TMP0]]
11290 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
11291 return vreinterpret_u8_s32(a);
11294 // CHECK-LABEL: @test_vreinterpret_u8_s64(
11295 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11296 // CHECK: ret <8 x i8> [[TMP0]]
11297 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
11298 return vreinterpret_u8_s64(a);
11301 // CHECK-LABEL: @test_vreinterpret_u8_u16(
11302 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11303 // CHECK: ret <8 x i8> [[TMP0]]
11304 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
11305 return vreinterpret_u8_u16(a);
11308 // CHECK-LABEL: @test_vreinterpret_u8_u32(
11309 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11310 // CHECK: ret <8 x i8> [[TMP0]]
11311 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
11312 return vreinterpret_u8_u32(a);
11315 // CHECK-LABEL: @test_vreinterpret_u8_u64(
11316 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11317 // CHECK: ret <8 x i8> [[TMP0]]
11318 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
11319 return vreinterpret_u8_u64(a);
11322 // CHECK-LABEL: @test_vreinterpret_u8_f16(
11323 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11324 // CHECK: ret <8 x i8> [[TMP0]]
11325 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
11326 return vreinterpret_u8_f16(a);
11329 // CHECK-LABEL: @test_vreinterpret_u8_f32(
11330 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11331 // CHECK: ret <8 x i8> [[TMP0]]
11332 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
11333 return vreinterpret_u8_f32(a);
11336 // CHECK-LABEL: @test_vreinterpret_u8_p8(
11337 // CHECK: ret <8 x i8> %a
11338 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
11339 return vreinterpret_u8_p8(a);
11342 // CHECK-LABEL: @test_vreinterpret_u8_p16(
11343 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11344 // CHECK: ret <8 x i8> [[TMP0]]
11345 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
11346 return vreinterpret_u8_p16(a);
11349 // CHECK-LABEL: @test_vreinterpret_u16_s8(
11350 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11351 // CHECK: ret <4 x i16> [[TMP0]]
11352 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
11353 return vreinterpret_u16_s8(a);
11356 // CHECK-LABEL: @test_vreinterpret_u16_s16(
11357 // CHECK: ret <4 x i16> %a
11358 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
11359 return vreinterpret_u16_s16(a);
11362 // CHECK-LABEL: @test_vreinterpret_u16_s32(
11363 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11364 // CHECK: ret <4 x i16> [[TMP0]]
11365 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
11366 return vreinterpret_u16_s32(a);
11369 // CHECK-LABEL: @test_vreinterpret_u16_s64(
11370 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11371 // CHECK: ret <4 x i16> [[TMP0]]
11372 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
11373 return vreinterpret_u16_s64(a);
11376 // CHECK-LABEL: @test_vreinterpret_u16_u8(
11377 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11378 // CHECK: ret <4 x i16> [[TMP0]]
11379 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
11380 return vreinterpret_u16_u8(a);
11383 // CHECK-LABEL: @test_vreinterpret_u16_u32(
11384 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11385 // CHECK: ret <4 x i16> [[TMP0]]
11386 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
11387 return vreinterpret_u16_u32(a);
11390 // CHECK-LABEL: @test_vreinterpret_u16_u64(
11391 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11392 // CHECK: ret <4 x i16> [[TMP0]]
11393 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
11394 return vreinterpret_u16_u64(a);
11397 // CHECK-LABEL: @test_vreinterpret_u16_f16(
11398 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11399 // CHECK: ret <4 x i16> [[TMP0]]
11400 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
11401 return vreinterpret_u16_f16(a);
11404 // CHECK-LABEL: @test_vreinterpret_u16_f32(
11405 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11406 // CHECK: ret <4 x i16> [[TMP0]]
11407 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
11408 return vreinterpret_u16_f32(a);
11411 // CHECK-LABEL: @test_vreinterpret_u16_p8(
11412 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11413 // CHECK: ret <4 x i16> [[TMP0]]
11414 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
11415 return vreinterpret_u16_p8(a);
11418 // CHECK-LABEL: @test_vreinterpret_u16_p16(
11419 // CHECK: ret <4 x i16> %a
11420 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
11421 return vreinterpret_u16_p16(a);
11424 // CHECK-LABEL: @test_vreinterpret_u32_s8(
11425 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11426 // CHECK: ret <2 x i32> [[TMP0]]
11427 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
11428 return vreinterpret_u32_s8(a);
11431 // CHECK-LABEL: @test_vreinterpret_u32_s16(
11432 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11433 // CHECK: ret <2 x i32> [[TMP0]]
11434 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
11435 return vreinterpret_u32_s16(a);
11438 // CHECK-LABEL: @test_vreinterpret_u32_s32(
11439 // CHECK: ret <2 x i32> %a
11440 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
11441 return vreinterpret_u32_s32(a);
11444 // CHECK-LABEL: @test_vreinterpret_u32_s64(
11445 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11446 // CHECK: ret <2 x i32> [[TMP0]]
11447 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
11448 return vreinterpret_u32_s64(a);
11451 // CHECK-LABEL: @test_vreinterpret_u32_u8(
11452 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11453 // CHECK: ret <2 x i32> [[TMP0]]
11454 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
11455 return vreinterpret_u32_u8(a);
11458 // CHECK-LABEL: @test_vreinterpret_u32_u16(
11459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11460 // CHECK: ret <2 x i32> [[TMP0]]
11461 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
11462 return vreinterpret_u32_u16(a);
11465 // CHECK-LABEL: @test_vreinterpret_u32_u64(
11466 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11467 // CHECK: ret <2 x i32> [[TMP0]]
11468 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
11469 return vreinterpret_u32_u64(a);
11472 // CHECK-LABEL: @test_vreinterpret_u32_f16(
11473 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11474 // CHECK: ret <2 x i32> [[TMP0]]
11475 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
11476 return vreinterpret_u32_f16(a);
11479 // CHECK-LABEL: @test_vreinterpret_u32_f32(
11480 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11481 // CHECK: ret <2 x i32> [[TMP0]]
11482 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
11483 return vreinterpret_u32_f32(a);
11486 // CHECK-LABEL: @test_vreinterpret_u32_p8(
11487 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11488 // CHECK: ret <2 x i32> [[TMP0]]
11489 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
11490 return vreinterpret_u32_p8(a);
11493 // CHECK-LABEL: @test_vreinterpret_u32_p16(
11494 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11495 // CHECK: ret <2 x i32> [[TMP0]]
11496 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
11497 return vreinterpret_u32_p16(a);
11500 // CHECK-LABEL: @test_vreinterpret_u64_s8(
11501 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11502 // CHECK: ret <1 x i64> [[TMP0]]
11503 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
11504 return vreinterpret_u64_s8(a);
11507 // CHECK-LABEL: @test_vreinterpret_u64_s16(
11508 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11509 // CHECK: ret <1 x i64> [[TMP0]]
11510 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
11511 return vreinterpret_u64_s16(a);
11514 // CHECK-LABEL: @test_vreinterpret_u64_s32(
11515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11516 // CHECK: ret <1 x i64> [[TMP0]]
11517 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
11518 return vreinterpret_u64_s32(a);
11521 // CHECK-LABEL: @test_vreinterpret_u64_s64(
11522 // CHECK: ret <1 x i64> %a
11523 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
11524 return vreinterpret_u64_s64(a);
11527 // CHECK-LABEL: @test_vreinterpret_u64_u8(
11528 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11529 // CHECK: ret <1 x i64> [[TMP0]]
11530 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
11531 return vreinterpret_u64_u8(a);
11534 // CHECK-LABEL: @test_vreinterpret_u64_u16(
11535 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11536 // CHECK: ret <1 x i64> [[TMP0]]
11537 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
11538 return vreinterpret_u64_u16(a);
11541 // CHECK-LABEL: @test_vreinterpret_u64_u32(
11542 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11543 // CHECK: ret <1 x i64> [[TMP0]]
11544 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
11545 return vreinterpret_u64_u32(a);
11548 // CHECK-LABEL: @test_vreinterpret_u64_f16(
11549 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11550 // CHECK: ret <1 x i64> [[TMP0]]
11551 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
11552 return vreinterpret_u64_f16(a);
11555 // CHECK-LABEL: @test_vreinterpret_u64_f32(
11556 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11557 // CHECK: ret <1 x i64> [[TMP0]]
11558 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
11559 return vreinterpret_u64_f32(a);
11562 // CHECK-LABEL: @test_vreinterpret_u64_p8(
11563 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11564 // CHECK: ret <1 x i64> [[TMP0]]
11565 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
11566 return vreinterpret_u64_p8(a);
11569 // CHECK-LABEL: @test_vreinterpret_u64_p16(
11570 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11571 // CHECK: ret <1 x i64> [[TMP0]]
11572 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
11573 return vreinterpret_u64_p16(a);
11576 // CHECK-LABEL: @test_vreinterpret_f16_s8(
11577 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
11578 // CHECK: ret <4 x half> [[TMP0]]
11579 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
11580 return vreinterpret_f16_s8(a);
11583 // CHECK-LABEL: @test_vreinterpret_f16_s16(
11584 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
11585 // CHECK: ret <4 x half> [[TMP0]]
11586 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
11587 return vreinterpret_f16_s16(a);
11590 // CHECK-LABEL: @test_vreinterpret_f16_s32(
11591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
11592 // CHECK: ret <4 x half> [[TMP0]]
11593 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
11594 return vreinterpret_f16_s32(a);
11597 // CHECK-LABEL: @test_vreinterpret_f16_s64(
11598 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
11599 // CHECK: ret <4 x half> [[TMP0]]
11600 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
11601 return vreinterpret_f16_s64(a);
11604 // CHECK-LABEL: @test_vreinterpret_f16_u8(
11605 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
11606 // CHECK: ret <4 x half> [[TMP0]]
11607 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
11608 return vreinterpret_f16_u8(a);
11611 // CHECK-LABEL: @test_vreinterpret_f16_u16(
11612 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
11613 // CHECK: ret <4 x half> [[TMP0]]
11614 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
11615 return vreinterpret_f16_u16(a);
11618 // CHECK-LABEL: @test_vreinterpret_f16_u32(
11619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
11620 // CHECK: ret <4 x half> [[TMP0]]
11621 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
11622 return vreinterpret_f16_u32(a);
11625 // CHECK-LABEL: @test_vreinterpret_f16_u64(
11626 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
11627 // CHECK: ret <4 x half> [[TMP0]]
11628 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
11629 return vreinterpret_f16_u64(a);
11632 // CHECK-LABEL: @test_vreinterpret_f16_f32(
11633 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
11634 // CHECK: ret <4 x half> [[TMP0]]
11635 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
11636 return vreinterpret_f16_f32(a);
11639 // CHECK-LABEL: @test_vreinterpret_f16_p8(
11640 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
11641 // CHECK: ret <4 x half> [[TMP0]]
11642 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
11643 return vreinterpret_f16_p8(a);
11646 // CHECK-LABEL: @test_vreinterpret_f16_p16(
11647 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
11648 // CHECK: ret <4 x half> [[TMP0]]
11649 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
11650 return vreinterpret_f16_p16(a);
11653 // CHECK-LABEL: @test_vreinterpret_f32_s8(
11654 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
11655 // CHECK: ret <2 x float> [[TMP0]]
11656 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
11657 return vreinterpret_f32_s8(a);
11660 // CHECK-LABEL: @test_vreinterpret_f32_s16(
11661 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
11662 // CHECK: ret <2 x float> [[TMP0]]
11663 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
11664 return vreinterpret_f32_s16(a);
11667 // CHECK-LABEL: @test_vreinterpret_f32_s32(
11668 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
11669 // CHECK: ret <2 x float> [[TMP0]]
11670 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
11671 return vreinterpret_f32_s32(a);
11674 // CHECK-LABEL: @test_vreinterpret_f32_s64(
11675 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
11676 // CHECK: ret <2 x float> [[TMP0]]
11677 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
11678 return vreinterpret_f32_s64(a);
11681 // CHECK-LABEL: @test_vreinterpret_f32_u8(
11682 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
11683 // CHECK: ret <2 x float> [[TMP0]]
11684 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
11685 return vreinterpret_f32_u8(a);
11688 // CHECK-LABEL: @test_vreinterpret_f32_u16(
11689 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
11690 // CHECK: ret <2 x float> [[TMP0]]
11691 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
11692 return vreinterpret_f32_u16(a);
11695 // CHECK-LABEL: @test_vreinterpret_f32_u32(
11696 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
11697 // CHECK: ret <2 x float> [[TMP0]]
11698 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
11699 return vreinterpret_f32_u32(a);
11702 // CHECK-LABEL: @test_vreinterpret_f32_u64(
11703 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
11704 // CHECK: ret <2 x float> [[TMP0]]
11705 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
11706 return vreinterpret_f32_u64(a);
11709 // CHECK-LABEL: @test_vreinterpret_f32_f16(
11710 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
11711 // CHECK: ret <2 x float> [[TMP0]]
11712 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
11713 return vreinterpret_f32_f16(a);
11716 // CHECK-LABEL: @test_vreinterpret_f32_p8(
11717 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
11718 // CHECK: ret <2 x float> [[TMP0]]
11719 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
11720 return vreinterpret_f32_p8(a);
11723 // CHECK-LABEL: @test_vreinterpret_f32_p16(
11724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
11725 // CHECK: ret <2 x float> [[TMP0]]
11726 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
11727 return vreinterpret_f32_p16(a);
11730 // CHECK-LABEL: @test_vreinterpret_p8_s8(
11731 // CHECK: ret <8 x i8> %a
11732 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
11733 return vreinterpret_p8_s8(a);
11736 // CHECK-LABEL: @test_vreinterpret_p8_s16(
11737 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11738 // CHECK: ret <8 x i8> [[TMP0]]
11739 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
11740 return vreinterpret_p8_s16(a);
11743 // CHECK-LABEL: @test_vreinterpret_p8_s32(
11744 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11745 // CHECK: ret <8 x i8> [[TMP0]]
11746 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
11747 return vreinterpret_p8_s32(a);
11750 // CHECK-LABEL: @test_vreinterpret_p8_s64(
11751 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11752 // CHECK: ret <8 x i8> [[TMP0]]
11753 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
11754 return vreinterpret_p8_s64(a);
11757 // CHECK-LABEL: @test_vreinterpret_p8_u8(
11758 // CHECK: ret <8 x i8> %a
11759 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
11760 return vreinterpret_p8_u8(a);
11763 // CHECK-LABEL: @test_vreinterpret_p8_u16(
11764 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11765 // CHECK: ret <8 x i8> [[TMP0]]
11766 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
11767 return vreinterpret_p8_u16(a);
11770 // CHECK-LABEL: @test_vreinterpret_p8_u32(
11771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11772 // CHECK: ret <8 x i8> [[TMP0]]
11773 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
11774 return vreinterpret_p8_u32(a);
11777 // CHECK-LABEL: @test_vreinterpret_p8_u64(
11778 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11779 // CHECK: ret <8 x i8> [[TMP0]]
11780 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
11781 return vreinterpret_p8_u64(a);
11784 // CHECK-LABEL: @test_vreinterpret_p8_f16(
11785 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11786 // CHECK: ret <8 x i8> [[TMP0]]
11787 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
11788 return vreinterpret_p8_f16(a);
11791 // CHECK-LABEL: @test_vreinterpret_p8_f32(
11792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11793 // CHECK: ret <8 x i8> [[TMP0]]
11794 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
11795 return vreinterpret_p8_f32(a);
11798 // CHECK-LABEL: @test_vreinterpret_p8_p16(
11799 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11800 // CHECK: ret <8 x i8> [[TMP0]]
11801 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
11802 return vreinterpret_p8_p16(a);
11805 // CHECK-LABEL: @test_vreinterpret_p16_s8(
11806 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11807 // CHECK: ret <4 x i16> [[TMP0]]
11808 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
11809 return vreinterpret_p16_s8(a);
11812 // CHECK-LABEL: @test_vreinterpret_p16_s16(
11813 // CHECK: ret <4 x i16> %a
11814 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
11815 return vreinterpret_p16_s16(a);
11818 // CHECK-LABEL: @test_vreinterpret_p16_s32(
11819 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11820 // CHECK: ret <4 x i16> [[TMP0]]
11821 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
11822 return vreinterpret_p16_s32(a);
11825 // CHECK-LABEL: @test_vreinterpret_p16_s64(
11826 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11827 // CHECK: ret <4 x i16> [[TMP0]]
11828 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
11829 return vreinterpret_p16_s64(a);
11832 // CHECK-LABEL: @test_vreinterpret_p16_u8(
11833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11834 // CHECK: ret <4 x i16> [[TMP0]]
11835 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
11836 return vreinterpret_p16_u8(a);
11839 // CHECK-LABEL: @test_vreinterpret_p16_u16(
11840 // CHECK: ret <4 x i16> %a
11841 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
11842 return vreinterpret_p16_u16(a);
11845 // CHECK-LABEL: @test_vreinterpret_p16_u32(
11846 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11847 // CHECK: ret <4 x i16> [[TMP0]]
11848 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
11849 return vreinterpret_p16_u32(a);
11852 // CHECK-LABEL: @test_vreinterpret_p16_u64(
11853 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11854 // CHECK: ret <4 x i16> [[TMP0]]
11855 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
11856 return vreinterpret_p16_u64(a);
11859 // CHECK-LABEL: @test_vreinterpret_p16_f16(
11860 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11861 // CHECK: ret <4 x i16> [[TMP0]]
11862 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
11863 return vreinterpret_p16_f16(a);
11866 // CHECK-LABEL: @test_vreinterpret_p16_f32(
11867 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11868 // CHECK: ret <4 x i16> [[TMP0]]
11869 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
11870 return vreinterpret_p16_f32(a);
11873 // CHECK-LABEL: @test_vreinterpret_p16_p8(
11874 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11875 // CHECK: ret <4 x i16> [[TMP0]]
11876 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
11877 return vreinterpret_p16_p8(a);
11880 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
11881 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11882 // CHECK: ret <16 x i8> [[TMP0]]
11883 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
11884 return vreinterpretq_s8_s16(a);
11887 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
11888 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11889 // CHECK: ret <16 x i8> [[TMP0]]
11890 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
11891 return vreinterpretq_s8_s32(a);
11894 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
11895 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11896 // CHECK: ret <16 x i8> [[TMP0]]
11897 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
11898 return vreinterpretq_s8_s64(a);
11901 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
11902 // CHECK: ret <16 x i8> %a
11903 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
11904 return vreinterpretq_s8_u8(a);
11907 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
11908 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11909 // CHECK: ret <16 x i8> [[TMP0]]
11910 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
11911 return vreinterpretq_s8_u16(a);
11914 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
11915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11916 // CHECK: ret <16 x i8> [[TMP0]]
11917 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
11918 return vreinterpretq_s8_u32(a);
11921 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
11922 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11923 // CHECK: ret <16 x i8> [[TMP0]]
11924 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
11925 return vreinterpretq_s8_u64(a);
11928 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
11929 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
11930 // CHECK: ret <16 x i8> [[TMP0]]
11931 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
11932 return vreinterpretq_s8_f16(a);
11935 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
11936 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11937 // CHECK: ret <16 x i8> [[TMP0]]
11938 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
11939 return vreinterpretq_s8_f32(a);
11942 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
11943 // CHECK: ret <16 x i8> %a
11944 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
11945 return vreinterpretq_s8_p8(a);
11948 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
11949 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11950 // CHECK: ret <16 x i8> [[TMP0]]
11951 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
11952 return vreinterpretq_s8_p16(a);
11955 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
11956 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
11957 // CHECK: ret <8 x i16> [[TMP0]]
11958 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
11959 return vreinterpretq_s16_s8(a);
11962 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
11963 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
11964 // CHECK: ret <8 x i16> [[TMP0]]
11965 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
11966 return vreinterpretq_s16_s32(a);
11969 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
11970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
11971 // CHECK: ret <8 x i16> [[TMP0]]
11972 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
11973 return vreinterpretq_s16_s64(a);
11976 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
11977 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
11978 // CHECK: ret <8 x i16> [[TMP0]]
11979 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
11980 return vreinterpretq_s16_u8(a);
11983 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
11984 // CHECK: ret <8 x i16> %a
11985 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
11986 return vreinterpretq_s16_u16(a);
11989 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
11990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
11991 // CHECK: ret <8 x i16> [[TMP0]]
11992 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
11993 return vreinterpretq_s16_u32(a);
11996 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
11997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
11998 // CHECK: ret <8 x i16> [[TMP0]]
11999 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
12000 return vreinterpretq_s16_u64(a);
12003 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
12004 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12005 // CHECK: ret <8 x i16> [[TMP0]]
12006 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
12007 return vreinterpretq_s16_f16(a);
12010 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
12011 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12012 // CHECK: ret <8 x i16> [[TMP0]]
12013 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
12014 return vreinterpretq_s16_f32(a);
12017 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
12018 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12019 // CHECK: ret <8 x i16> [[TMP0]]
12020 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
12021 return vreinterpretq_s16_p8(a);
12024 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
12025 // CHECK: ret <8 x i16> %a
12026 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
12027 return vreinterpretq_s16_p16(a);
12030 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
12031 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12032 // CHECK: ret <4 x i32> [[TMP0]]
12033 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
12034 return vreinterpretq_s32_s8(a);
12037 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
12038 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12039 // CHECK: ret <4 x i32> [[TMP0]]
12040 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
12041 return vreinterpretq_s32_s16(a);
12044 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
12045 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12046 // CHECK: ret <4 x i32> [[TMP0]]
12047 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
12048 return vreinterpretq_s32_s64(a);
12051 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
12052 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12053 // CHECK: ret <4 x i32> [[TMP0]]
12054 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
12055 return vreinterpretq_s32_u8(a);
12058 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
12059 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12060 // CHECK: ret <4 x i32> [[TMP0]]
12061 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
12062 return vreinterpretq_s32_u16(a);
12065 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
12066 // CHECK: ret <4 x i32> %a
12067 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
12068 return vreinterpretq_s32_u32(a);
12071 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
12072 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12073 // CHECK: ret <4 x i32> [[TMP0]]
12074 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
12075 return vreinterpretq_s32_u64(a);
12078 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
12079 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12080 // CHECK: ret <4 x i32> [[TMP0]]
12081 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
12082 return vreinterpretq_s32_f16(a);
12085 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
12086 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12087 // CHECK: ret <4 x i32> [[TMP0]]
12088 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
12089 return vreinterpretq_s32_f32(a);
12092 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
12093 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12094 // CHECK: ret <4 x i32> [[TMP0]]
12095 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
12096 return vreinterpretq_s32_p8(a);
12099 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
12100 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12101 // CHECK: ret <4 x i32> [[TMP0]]
12102 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
12103 return vreinterpretq_s32_p16(a);
12106 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
12107 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12108 // CHECK: ret <2 x i64> [[TMP0]]
12109 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
12110 return vreinterpretq_s64_s8(a);
12113 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
12114 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12115 // CHECK: ret <2 x i64> [[TMP0]]
12116 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
12117 return vreinterpretq_s64_s16(a);
12120 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
12121 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12122 // CHECK: ret <2 x i64> [[TMP0]]
12123 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
12124 return vreinterpretq_s64_s32(a);
12127 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
12128 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12129 // CHECK: ret <2 x i64> [[TMP0]]
12130 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
12131 return vreinterpretq_s64_u8(a);
12134 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
12135 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12136 // CHECK: ret <2 x i64> [[TMP0]]
12137 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
12138 return vreinterpretq_s64_u16(a);
12141 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
12142 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12143 // CHECK: ret <2 x i64> [[TMP0]]
12144 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
12145 return vreinterpretq_s64_u32(a);
12148 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
12149 // CHECK: ret <2 x i64> %a
12150 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
12151 return vreinterpretq_s64_u64(a);
12154 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
12155 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12156 // CHECK: ret <2 x i64> [[TMP0]]
12157 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
12158 return vreinterpretq_s64_f16(a);
12161 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
12162 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12163 // CHECK: ret <2 x i64> [[TMP0]]
12164 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
12165 return vreinterpretq_s64_f32(a);
12168 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
12169 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12170 // CHECK: ret <2 x i64> [[TMP0]]
12171 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
12172 return vreinterpretq_s64_p8(a);
12175 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
12176 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12177 // CHECK: ret <2 x i64> [[TMP0]]
12178 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
12179 return vreinterpretq_s64_p16(a);
12182 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
12183 // CHECK: ret <16 x i8> %a
12184 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
12185 return vreinterpretq_u8_s8(a);
12188 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
12189 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12190 // CHECK: ret <16 x i8> [[TMP0]]
12191 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
12192 return vreinterpretq_u8_s16(a);
12195 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
12196 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12197 // CHECK: ret <16 x i8> [[TMP0]]
12198 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
12199 return vreinterpretq_u8_s32(a);
12202 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
12203 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12204 // CHECK: ret <16 x i8> [[TMP0]]
12205 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
12206 return vreinterpretq_u8_s64(a);
12209 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
12210 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12211 // CHECK: ret <16 x i8> [[TMP0]]
12212 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
12213 return vreinterpretq_u8_u16(a);
12216 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
12217 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12218 // CHECK: ret <16 x i8> [[TMP0]]
12219 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
12220 return vreinterpretq_u8_u32(a);
12223 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
12224 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12225 // CHECK: ret <16 x i8> [[TMP0]]
12226 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
12227 return vreinterpretq_u8_u64(a);
12230 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
12231 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12232 // CHECK: ret <16 x i8> [[TMP0]]
12233 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
12234 return vreinterpretq_u8_f16(a);
12237 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
12238 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12239 // CHECK: ret <16 x i8> [[TMP0]]
12240 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
12241 return vreinterpretq_u8_f32(a);
12244 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
12245 // CHECK: ret <16 x i8> %a
12246 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
12247 return vreinterpretq_u8_p8(a);
12250 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
12251 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12252 // CHECK: ret <16 x i8> [[TMP0]]
12253 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
12254 return vreinterpretq_u8_p16(a);
12257 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
12258 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12259 // CHECK: ret <8 x i16> [[TMP0]]
12260 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
12261 return vreinterpretq_u16_s8(a);
12264 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
12265 // CHECK: ret <8 x i16> %a
12266 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
12267 return vreinterpretq_u16_s16(a);
12270 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
12271 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12272 // CHECK: ret <8 x i16> [[TMP0]]
12273 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
12274 return vreinterpretq_u16_s32(a);
12277 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
12278 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12279 // CHECK: ret <8 x i16> [[TMP0]]
12280 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
12281 return vreinterpretq_u16_s64(a);
12284 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
12285 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12286 // CHECK: ret <8 x i16> [[TMP0]]
12287 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
12288 return vreinterpretq_u16_u8(a);
12291 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
12292 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12293 // CHECK: ret <8 x i16> [[TMP0]]
12294 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
12295 return vreinterpretq_u16_u32(a);
12298 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
12299 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12300 // CHECK: ret <8 x i16> [[TMP0]]
12301 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
12302 return vreinterpretq_u16_u64(a);
12305 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
12306 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12307 // CHECK: ret <8 x i16> [[TMP0]]
12308 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
12309 return vreinterpretq_u16_f16(a);
12312 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
12313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12314 // CHECK: ret <8 x i16> [[TMP0]]
12315 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
12316 return vreinterpretq_u16_f32(a);
12319 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
12320 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12321 // CHECK: ret <8 x i16> [[TMP0]]
12322 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
12323 return vreinterpretq_u16_p8(a);
12326 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
12327 // CHECK: ret <8 x i16> %a
12328 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
12329 return vreinterpretq_u16_p16(a);
12332 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
12333 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12334 // CHECK: ret <4 x i32> [[TMP0]]
12335 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
12336 return vreinterpretq_u32_s8(a);
12339 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
12340 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12341 // CHECK: ret <4 x i32> [[TMP0]]
12342 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
12343 return vreinterpretq_u32_s16(a);
12346 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
12347 // CHECK: ret <4 x i32> %a
12348 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
12349 return vreinterpretq_u32_s32(a);
12352 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
12353 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12354 // CHECK: ret <4 x i32> [[TMP0]]
12355 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
12356 return vreinterpretq_u32_s64(a);
12359 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
12360 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12361 // CHECK: ret <4 x i32> [[TMP0]]
12362 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
12363 return vreinterpretq_u32_u8(a);
12366 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
12367 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12368 // CHECK: ret <4 x i32> [[TMP0]]
12369 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
12370 return vreinterpretq_u32_u16(a);
12373 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
12374 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12375 // CHECK: ret <4 x i32> [[TMP0]]
12376 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
12377 return vreinterpretq_u32_u64(a);
12380 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
12381 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12382 // CHECK: ret <4 x i32> [[TMP0]]
12383 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
12384 return vreinterpretq_u32_f16(a);
12387 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
12388 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12389 // CHECK: ret <4 x i32> [[TMP0]]
12390 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
12391 return vreinterpretq_u32_f32(a);
12394 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
12395 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12396 // CHECK: ret <4 x i32> [[TMP0]]
12397 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
12398 return vreinterpretq_u32_p8(a);
12401 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
12402 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12403 // CHECK: ret <4 x i32> [[TMP0]]
12404 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
12405 return vreinterpretq_u32_p16(a);
12408 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
12409 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12410 // CHECK: ret <2 x i64> [[TMP0]]
12411 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
12412 return vreinterpretq_u64_s8(a);
12415 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
12416 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12417 // CHECK: ret <2 x i64> [[TMP0]]
12418 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
12419 return vreinterpretq_u64_s16(a);
12422 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
12423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12424 // CHECK: ret <2 x i64> [[TMP0]]
12425 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
12426 return vreinterpretq_u64_s32(a);
12429 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
12430 // CHECK: ret <2 x i64> %a
12431 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
12432 return vreinterpretq_u64_s64(a);
12435 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
12436 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12437 // CHECK: ret <2 x i64> [[TMP0]]
12438 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
12439 return vreinterpretq_u64_u8(a);
12442 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
12443 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12444 // CHECK: ret <2 x i64> [[TMP0]]
12445 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
12446 return vreinterpretq_u64_u16(a);
12449 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
12450 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12451 // CHECK: ret <2 x i64> [[TMP0]]
12452 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
12453 return vreinterpretq_u64_u32(a);
12456 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
12457 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12458 // CHECK: ret <2 x i64> [[TMP0]]
12459 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
12460 return vreinterpretq_u64_f16(a);
12463 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
12464 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12465 // CHECK: ret <2 x i64> [[TMP0]]
12466 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
12467 return vreinterpretq_u64_f32(a);
12470 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
12471 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12472 // CHECK: ret <2 x i64> [[TMP0]]
12473 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
12474 return vreinterpretq_u64_p8(a);
12477 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
12478 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12479 // CHECK: ret <2 x i64> [[TMP0]]
12480 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
12481 return vreinterpretq_u64_p16(a);
12484 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
12485 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12486 // CHECK: ret <8 x half> [[TMP0]]
12487 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
12488 return vreinterpretq_f16_s8(a);
12491 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
12492 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12493 // CHECK: ret <8 x half> [[TMP0]]
12494 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
12495 return vreinterpretq_f16_s16(a);
12498 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
12499 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12500 // CHECK: ret <8 x half> [[TMP0]]
12501 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
12502 return vreinterpretq_f16_s32(a);
12505 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
12506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12507 // CHECK: ret <8 x half> [[TMP0]]
12508 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
12509 return vreinterpretq_f16_s64(a);
12512 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
12513 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12514 // CHECK: ret <8 x half> [[TMP0]]
12515 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
12516 return vreinterpretq_f16_u8(a);
12519 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
12520 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12521 // CHECK: ret <8 x half> [[TMP0]]
12522 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
12523 return vreinterpretq_f16_u16(a);
12526 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
12527 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12528 // CHECK: ret <8 x half> [[TMP0]]
12529 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
12530 return vreinterpretq_f16_u32(a);
12533 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
12534 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12535 // CHECK: ret <8 x half> [[TMP0]]
12536 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
12537 return vreinterpretq_f16_u64(a);
12540 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
12541 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
12542 // CHECK: ret <8 x half> [[TMP0]]
12543 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
12544 return vreinterpretq_f16_f32(a);
12547 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
12548 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12549 // CHECK: ret <8 x half> [[TMP0]]
12550 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
12551 return vreinterpretq_f16_p8(a);
12554 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
12555 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12556 // CHECK: ret <8 x half> [[TMP0]]
12557 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
12558 return vreinterpretq_f16_p16(a);
12561 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
12562 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12563 // CHECK: ret <4 x float> [[TMP0]]
12564 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
12565 return vreinterpretq_f32_s8(a);
12568 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
12569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12570 // CHECK: ret <4 x float> [[TMP0]]
12571 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
12572 return vreinterpretq_f32_s16(a);
12575 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
12576 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
12577 // CHECK: ret <4 x float> [[TMP0]]
12578 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
12579 return vreinterpretq_f32_s32(a);
12582 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
12583 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
12584 // CHECK: ret <4 x float> [[TMP0]]
12585 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
12586 return vreinterpretq_f32_s64(a);
12589 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
12590 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12591 // CHECK: ret <4 x float> [[TMP0]]
12592 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
12593 return vreinterpretq_f32_u8(a);
12596 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
12597 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12598 // CHECK: ret <4 x float> [[TMP0]]
12599 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
12600 return vreinterpretq_f32_u16(a);
12603 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
12604 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
12605 // CHECK: ret <4 x float> [[TMP0]]
12606 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
12607 return vreinterpretq_f32_u32(a);
12610 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
12611 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
12612 // CHECK: ret <4 x float> [[TMP0]]
12613 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
12614 return vreinterpretq_f32_u64(a);
12617 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
12618 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
12619 // CHECK: ret <4 x float> [[TMP0]]
12620 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
12621 return vreinterpretq_f32_f16(a);
12624 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
12625 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
12626 // CHECK: ret <4 x float> [[TMP0]]
12627 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
12628 return vreinterpretq_f32_p8(a);
12631 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
12632 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
12633 // CHECK: ret <4 x float> [[TMP0]]
12634 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
12635 return vreinterpretq_f32_p16(a);
12638 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
12639 // CHECK: ret <16 x i8> %a
12640 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
12641 return vreinterpretq_p8_s8(a);
12644 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
12645 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12646 // CHECK: ret <16 x i8> [[TMP0]]
12647 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
12648 return vreinterpretq_p8_s16(a);
12651 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
12652 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12653 // CHECK: ret <16 x i8> [[TMP0]]
12654 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
12655 return vreinterpretq_p8_s32(a);
12658 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
12659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12660 // CHECK: ret <16 x i8> [[TMP0]]
12661 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
12662 return vreinterpretq_p8_s64(a);
12665 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
12666 // CHECK: ret <16 x i8> %a
12667 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
12668 return vreinterpretq_p8_u8(a);
12671 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
12672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12673 // CHECK: ret <16 x i8> [[TMP0]]
12674 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
12675 return vreinterpretq_p8_u16(a);
12678 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
12679 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12680 // CHECK: ret <16 x i8> [[TMP0]]
12681 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
12682 return vreinterpretq_p8_u32(a);
12685 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
12686 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12687 // CHECK: ret <16 x i8> [[TMP0]]
12688 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
12689 return vreinterpretq_p8_u64(a);
12692 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
12693 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12694 // CHECK: ret <16 x i8> [[TMP0]]
12695 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
12696 return vreinterpretq_p8_f16(a);
12699 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
12700 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12701 // CHECK: ret <16 x i8> [[TMP0]]
12702 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
12703 return vreinterpretq_p8_f32(a);
12706 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
12707 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12708 // CHECK: ret <16 x i8> [[TMP0]]
12709 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
12710 return vreinterpretq_p8_p16(a);
12713 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
12714 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12715 // CHECK: ret <8 x i16> [[TMP0]]
12716 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
12717 return vreinterpretq_p16_s8(a);
12720 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
12721 // CHECK: ret <8 x i16> %a
12722 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
12723 return vreinterpretq_p16_s16(a);
12726 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
12727 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12728 // CHECK: ret <8 x i16> [[TMP0]]
12729 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
12730 return vreinterpretq_p16_s32(a);
12733 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
12734 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12735 // CHECK: ret <8 x i16> [[TMP0]]
12736 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
12737 return vreinterpretq_p16_s64(a);
12740 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
12741 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12742 // CHECK: ret <8 x i16> [[TMP0]]
12743 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
12744 return vreinterpretq_p16_u8(a);
12747 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
12748 // CHECK: ret <8 x i16> %a
12749 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
12750 return vreinterpretq_p16_u16(a);
12753 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
12754 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12755 // CHECK: ret <8 x i16> [[TMP0]]
12756 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
12757 return vreinterpretq_p16_u32(a);
12760 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
12761 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12762 // CHECK: ret <8 x i16> [[TMP0]]
12763 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
12764 return vreinterpretq_p16_u64(a);
12767 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
12768 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12769 // CHECK: ret <8 x i16> [[TMP0]]
12770 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
12771 return vreinterpretq_p16_f16(a);
12774 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
12775 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12776 // CHECK: ret <8 x i16> [[TMP0]]
12777 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
12778 return vreinterpretq_p16_f32(a);
12781 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
12782 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12783 // CHECK: ret <8 x i16> [[TMP0]]
12784 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
12785 return vreinterpretq_p16_p8(a);
12788 // CHECK-LABEL: @test_vrev16_s8(
12789 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12790 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12791 int8x8_t test_vrev16_s8(int8x8_t a) {
12792 return vrev16_s8(a);
12795 // CHECK-LABEL: @test_vrev16_u8(
12796 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12797 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12798 uint8x8_t test_vrev16_u8(uint8x8_t a) {
12799 return vrev16_u8(a);
12802 // CHECK-LABEL: @test_vrev16_p8(
12803 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12804 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12805 poly8x8_t test_vrev16_p8(poly8x8_t a) {
12806 return vrev16_p8(a);
12809 // CHECK-LABEL: @test_vrev16q_s8(
12810 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
12811 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12812 int8x16_t test_vrev16q_s8(int8x16_t a) {
12813 return vrev16q_s8(a);
12816 // CHECK-LABEL: @test_vrev16q_u8(
12817 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
12818 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12819 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
12820 return vrev16q_u8(a);
12823 // CHECK-LABEL: @test_vrev16q_p8(
12824 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
12825 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12826 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
12827 return vrev16q_p8(a);
12830 // CHECK-LABEL: @test_vrev32_s8(
12831 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
12832 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12833 int8x8_t test_vrev32_s8(int8x8_t a) {
12834 return vrev32_s8(a);
12837 // CHECK-LABEL: @test_vrev32_s16(
12838 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
12839 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12840 int16x4_t test_vrev32_s16(int16x4_t a) {
12841 return vrev32_s16(a);
12844 // CHECK-LABEL: @test_vrev32_u8(
12845 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
12846 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12847 uint8x8_t test_vrev32_u8(uint8x8_t a) {
12848 return vrev32_u8(a);
12851 // CHECK-LABEL: @test_vrev32_u16(
12852 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
12853 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12854 uint16x4_t test_vrev32_u16(uint16x4_t a) {
12855 return vrev32_u16(a);
12858 // CHECK-LABEL: @test_vrev32_p8(
12859 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
12860 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12861 poly8x8_t test_vrev32_p8(poly8x8_t a) {
12862 return vrev32_p8(a);
12865 // CHECK-LABEL: @test_vrev32_p16(
12866 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
12867 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12868 poly16x4_t test_vrev32_p16(poly16x4_t a) {
12869 return vrev32_p16(a);
12872 // CHECK-LABEL: @test_vrev32q_s8(
12873 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
12874 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12875 int8x16_t test_vrev32q_s8(int8x16_t a) {
12876 return vrev32q_s8(a);
12879 // CHECK-LABEL: @test_vrev32q_s16(
12880 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12881 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
12882 int16x8_t test_vrev32q_s16(int16x8_t a) {
12883 return vrev32q_s16(a);
12886 // CHECK-LABEL: @test_vrev32q_u8(
12887 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
12888 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12889 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
12890 return vrev32q_u8(a);
12893 // CHECK-LABEL: @test_vrev32q_u16(
12894 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12895 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
12896 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
12897 return vrev32q_u16(a);
12900 // CHECK-LABEL: @test_vrev32q_p8(
12901 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
12902 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12903 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
12904 return vrev32q_p8(a);
12907 // CHECK-LABEL: @test_vrev32q_p16(
12908 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12909 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
12910 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
12911 return vrev32q_p16(a);
12914 // CHECK-LABEL: @test_vrev64_s8(
12915 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
12916 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12917 int8x8_t test_vrev64_s8(int8x8_t a) {
12918 return vrev64_s8(a);
12921 // CHECK-LABEL: @test_vrev64_s16(
12922 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
12923 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12924 int16x4_t test_vrev64_s16(int16x4_t a) {
12925 return vrev64_s16(a);
12928 // CHECK-LABEL: @test_vrev64_s32(
12929 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
12930 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
12931 int32x2_t test_vrev64_s32(int32x2_t a) {
12932 return vrev64_s32(a);
12935 // CHECK-LABEL: @test_vrev64_u8(
12936 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
12937 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12938 uint8x8_t test_vrev64_u8(uint8x8_t a) {
12939 return vrev64_u8(a);
12942 // CHECK-LABEL: @test_vrev64_u16(
12943 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
12944 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12945 uint16x4_t test_vrev64_u16(uint16x4_t a) {
12946 return vrev64_u16(a);
12949 // CHECK-LABEL: @test_vrev64_u32(
12950 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
12951 // CHECK: ret <2 x i32> [[SHUFFLE_I]]
12952 uint32x2_t test_vrev64_u32(uint32x2_t a) {
12953 return vrev64_u32(a);
12956 // CHECK-LABEL: @test_vrev64_p8(
12957 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
12958 // CHECK: ret <8 x i8> [[SHUFFLE_I]]
12959 poly8x8_t test_vrev64_p8(poly8x8_t a) {
12960 return vrev64_p8(a);
12963 // CHECK-LABEL: @test_vrev64_p16(
12964 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
12965 // CHECK: ret <4 x i16> [[SHUFFLE_I]]
12966 poly16x4_t test_vrev64_p16(poly16x4_t a) {
12967 return vrev64_p16(a);
12970 // CHECK-LABEL: @test_vrev64_f32(
12971 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
12972 // CHECK: ret <2 x float> [[SHUFFLE_I]]
12973 float32x2_t test_vrev64_f32(float32x2_t a) {
12974 return vrev64_f32(a);
12977 // CHECK-LABEL: @test_vrev64q_s8(
12978 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
12979 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
12980 int8x16_t test_vrev64q_s8(int8x16_t a) {
12981 return vrev64q_s8(a);
12984 // CHECK-LABEL: @test_vrev64q_s16(
12985 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
12986 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
12987 int16x8_t test_vrev64q_s16(int16x8_t a) {
12988 return vrev64q_s16(a);
12991 // CHECK-LABEL: @test_vrev64q_s32(
12992 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
12993 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
12994 int32x4_t test_vrev64q_s32(int32x4_t a) {
12995 return vrev64q_s32(a);
12998 // CHECK-LABEL: @test_vrev64q_u8(
12999 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13000 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
13001 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
13002 return vrev64q_u8(a);
13005 // CHECK-LABEL: @test_vrev64q_u16(
13006 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13007 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
13008 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
13009 return vrev64q_u16(a);
13012 // CHECK-LABEL: @test_vrev64q_u32(
13013 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13014 // CHECK: ret <4 x i32> [[SHUFFLE_I]]
13015 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
13016 return vrev64q_u32(a);
13019 // CHECK-LABEL: @test_vrev64q_p8(
13020 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13021 // CHECK: ret <16 x i8> [[SHUFFLE_I]]
13022 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
13023 return vrev64q_p8(a);
13026 // CHECK-LABEL: @test_vrev64q_p16(
13027 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13028 // CHECK: ret <8 x i16> [[SHUFFLE_I]]
13029 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
13030 return vrev64q_p16(a);
13033 // CHECK-LABEL: @test_vrev64q_f32(
13034 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13035 // CHECK: ret <4 x float> [[SHUFFLE_I]]
13036 float32x4_t test_vrev64q_f32(float32x4_t a) {
13037 return vrev64q_f32(a);
13040 // CHECK-LABEL: @test_vrhadd_s8(
13041 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
13042 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
13043 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
13044 return vrhadd_s8(a, b);
13047 // CHECK-LABEL: @test_vrhadd_s16(
13048 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13049 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13050 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
13051 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13052 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
13053 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
13054 return vrhadd_s16(a, b);
13057 // CHECK-LABEL: @test_vrhadd_s32(
13058 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13059 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13060 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
13061 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13062 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
13063 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
13064 return vrhadd_s32(a, b);
13067 // CHECK-LABEL: @test_vrhadd_u8(
13068 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
13069 // CHECK: ret <8 x i8> [[VRHADD_V_I]]
13070 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
13071 return vrhadd_u8(a, b);
13074 // CHECK-LABEL: @test_vrhadd_u16(
13075 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13076 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13077 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
13078 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13079 // CHECK: ret <4 x i16> [[VRHADD_V2_I]]
13080 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
13081 return vrhadd_u16(a, b);
13084 // CHECK-LABEL: @test_vrhadd_u32(
13085 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13086 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13087 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
13088 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13089 // CHECK: ret <2 x i32> [[VRHADD_V2_I]]
13090 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
13091 return vrhadd_u32(a, b);
13094 // CHECK-LABEL: @test_vrhaddq_s8(
13095 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
13096 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
13097 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
13098 return vrhaddq_s8(a, b);
13101 // CHECK-LABEL: @test_vrhaddq_s16(
13102 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13103 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13104 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
13105 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13106 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
13107 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
13108 return vrhaddq_s16(a, b);
13111 // CHECK-LABEL: @test_vrhaddq_s32(
13112 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13113 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13114 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
13115 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13116 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
13117 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
13118 return vrhaddq_s32(a, b);
13121 // CHECK-LABEL: @test_vrhaddq_u8(
13122 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
13123 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]]
13124 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
13125 return vrhaddq_u8(a, b);
13128 // CHECK-LABEL: @test_vrhaddq_u16(
13129 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13130 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13131 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
13132 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13133 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]]
13134 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
13135 return vrhaddq_u16(a, b);
13138 // CHECK-LABEL: @test_vrhaddq_u32(
13139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13140 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13141 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
13142 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13143 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]]
13144 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
13145 return vrhaddq_u32(a, b);
13148 // CHECK-LABEL: @test_vrshl_s8(
13149 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13150 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
13151 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
13152 return vrshl_s8(a, b);
13155 // CHECK-LABEL: @test_vrshl_s16(
13156 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13157 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13158 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13159 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13160 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
13161 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
13162 return vrshl_s16(a, b);
13165 // CHECK-LABEL: @test_vrshl_s32(
13166 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13167 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13168 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13169 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13170 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
13171 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
13172 return vrshl_s32(a, b);
13175 // CHECK-LABEL: @test_vrshl_s64(
13176 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13177 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13178 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13179 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13180 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
13181 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
13182 return vrshl_s64(a, b);
13185 // CHECK-LABEL: @test_vrshl_u8(
13186 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13187 // CHECK: ret <8 x i8> [[VRSHL_V_I]]
13188 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
13189 return vrshl_u8(a, b);
13192 // CHECK-LABEL: @test_vrshl_u16(
13193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13195 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
13196 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13197 // CHECK: ret <4 x i16> [[VRSHL_V2_I]]
13198 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
13199 return vrshl_u16(a, b);
13202 // CHECK-LABEL: @test_vrshl_u32(
13203 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13204 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13205 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
13206 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13207 // CHECK: ret <2 x i32> [[VRSHL_V2_I]]
13208 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
13209 return vrshl_u32(a, b);
13212 // CHECK-LABEL: @test_vrshl_u64(
13213 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13214 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13215 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
13216 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13217 // CHECK: ret <1 x i64> [[VRSHL_V2_I]]
13218 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
13219 return vrshl_u64(a, b);
13222 // CHECK-LABEL: @test_vrshlq_s8(
13223 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
13224 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
13225 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
13226 return vrshlq_s8(a, b);
13229 // CHECK-LABEL: @test_vrshlq_s16(
13230 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13231 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13232 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
13233 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13234 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
13235 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
13236 return vrshlq_s16(a, b);
13239 // CHECK-LABEL: @test_vrshlq_s32(
13240 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13241 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13242 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
13243 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13244 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
13245 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
13246 return vrshlq_s32(a, b);
13249 // CHECK-LABEL: @test_vrshlq_s64(
13250 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13251 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13252 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
13253 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13254 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
13255 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
13256 return vrshlq_s64(a, b);
13259 // CHECK-LABEL: @test_vrshlq_u8(
13260 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
13261 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]]
13262 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
13263 return vrshlq_u8(a, b);
13266 // CHECK-LABEL: @test_vrshlq_u16(
13267 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13268 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13269 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
13270 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13271 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]]
13272 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
13273 return vrshlq_u16(a, b);
13276 // CHECK-LABEL: @test_vrshlq_u32(
13277 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13278 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13279 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
13280 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13281 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]]
13282 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
13283 return vrshlq_u32(a, b);
13286 // CHECK-LABEL: @test_vrshlq_u64(
13287 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13288 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13289 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
13290 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13291 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]]
13292 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
13293 return vrshlq_u64(a, b);
13296 // CHECK-LABEL: @test_vrshrn_n_s16(
13297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13298 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13299 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13300 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
13301 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
13302 return vrshrn_n_s16(a, 1);
13305 // CHECK-LABEL: @test_vrshrn_n_s32(
13306 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13307 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13308 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13309 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
13310 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
13311 return vrshrn_n_s32(a, 1);
13314 // CHECK-LABEL: @test_vrshrn_n_s64(
13315 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13316 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13317 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13318 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
13319 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
13320 return vrshrn_n_s64(a, 1);
13323 // CHECK-LABEL: @test_vrshrn_n_u16(
13324 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13325 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13326 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13327 // CHECK: ret <8 x i8> [[VRSHRN_N1]]
13328 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
13329 return vrshrn_n_u16(a, 1);
13332 // CHECK-LABEL: @test_vrshrn_n_u32(
13333 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13334 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13335 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13336 // CHECK: ret <4 x i16> [[VRSHRN_N1]]
13337 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
13338 return vrshrn_n_u32(a, 1);
13341 // CHECK-LABEL: @test_vrshrn_n_u64(
13342 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13343 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13344 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13345 // CHECK: ret <2 x i32> [[VRSHRN_N1]]
13346 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
13347 return vrshrn_n_u64(a, 1);
13350 // CHECK-LABEL: @test_vrshr_n_s8(
13351 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13352 // CHECK: ret <8 x i8> [[VRSHR_N]]
13353 int8x8_t test_vrshr_n_s8(int8x8_t a) {
13354 return vrshr_n_s8(a, 1);
13357 // CHECK-LABEL: @test_vrshr_n_s16(
13358 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13359 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13360 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13361 // CHECK: ret <4 x i16> [[VRSHR_N1]]
13362 int16x4_t test_vrshr_n_s16(int16x4_t a) {
13363 return vrshr_n_s16(a, 1);
13366 // CHECK-LABEL: @test_vrshr_n_s32(
13367 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13368 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13369 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13370 // CHECK: ret <2 x i32> [[VRSHR_N1]]
13371 int32x2_t test_vrshr_n_s32(int32x2_t a) {
13372 return vrshr_n_s32(a, 1);
13375 // CHECK-LABEL: @test_vrshr_n_s64(
13376 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13377 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13378 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13379 // CHECK: ret <1 x i64> [[VRSHR_N1]]
13380 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13381 return vrshr_n_s64(a, 1);
13384 // CHECK-LABEL: @test_vrshr_n_u8(
13385 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13386 // CHECK: ret <8 x i8> [[VRSHR_N]]
13387 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
13388 return vrshr_n_u8(a, 1);
13391 // CHECK-LABEL: @test_vrshr_n_u16(
13392 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13393 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13394 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13395 // CHECK: ret <4 x i16> [[VRSHR_N1]]
13396 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
13397 return vrshr_n_u16(a, 1);
13400 // CHECK-LABEL: @test_vrshr_n_u32(
13401 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13402 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13403 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13404 // CHECK: ret <2 x i32> [[VRSHR_N1]]
13405 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
13406 return vrshr_n_u32(a, 1);
13409 // CHECK-LABEL: @test_vrshr_n_u64(
13410 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13411 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13412 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13413 // CHECK: ret <1 x i64> [[VRSHR_N1]]
13414 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13415 return vrshr_n_u64(a, 1);
13418 // CHECK-LABEL: @test_vrshrq_n_s8(
13419 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13420 // CHECK: ret <16 x i8> [[VRSHR_N]]
13421 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
13422 return vrshrq_n_s8(a, 1);
13425 // CHECK-LABEL: @test_vrshrq_n_s16(
13426 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13427 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13428 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13429 // CHECK: ret <8 x i16> [[VRSHR_N1]]
13430 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
13431 return vrshrq_n_s16(a, 1);
13434 // CHECK-LABEL: @test_vrshrq_n_s32(
13435 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13436 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13437 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13438 // CHECK: ret <4 x i32> [[VRSHR_N1]]
13439 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
13440 return vrshrq_n_s32(a, 1);
13443 // CHECK-LABEL: @test_vrshrq_n_s64(
13444 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13445 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13446 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13447 // CHECK: ret <2 x i64> [[VRSHR_N1]]
13448 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
13449 return vrshrq_n_s64(a, 1);
13452 // CHECK-LABEL: @test_vrshrq_n_u8(
13453 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13454 // CHECK: ret <16 x i8> [[VRSHR_N]]
13455 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
13456 return vrshrq_n_u8(a, 1);
13459 // CHECK-LABEL: @test_vrshrq_n_u16(
13460 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13461 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13462 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13463 // CHECK: ret <8 x i16> [[VRSHR_N1]]
13464 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
13465 return vrshrq_n_u16(a, 1);
13468 // CHECK-LABEL: @test_vrshrq_n_u32(
13469 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13470 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13471 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13472 // CHECK: ret <4 x i32> [[VRSHR_N1]]
13473 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
13474 return vrshrq_n_u32(a, 1);
13477 // CHECK-LABEL: @test_vrshrq_n_u64(
13478 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13479 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13480 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13481 // CHECK: ret <2 x i64> [[VRSHR_N1]]
13482 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
13483 return vrshrq_n_u64(a, 1);
13486 // CHECK-LABEL: @test_vrsqrte_f32(
13487 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13488 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
13489 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]]
13490 float32x2_t test_vrsqrte_f32(float32x2_t a) {
13491 return vrsqrte_f32(a);
13494 // CHECK-LABEL: @test_vrsqrte_u32(
13495 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13496 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
13497 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]]
13498 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
13499 return vrsqrte_u32(a);
13502 // CHECK-LABEL: @test_vrsqrteq_f32(
13503 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13504 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
13505 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]]
13506 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
13507 return vrsqrteq_f32(a);
13510 // CHECK-LABEL: @test_vrsqrteq_u32(
13511 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13512 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
13513 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]]
13514 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
13515 return vrsqrteq_u32(a);
13518 // CHECK-LABEL: @test_vrsqrts_f32(
13519 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13520 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13521 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
13522 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
13523 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]]
13524 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
13525 return vrsqrts_f32(a, b);
13528 // CHECK-LABEL: @test_vrsqrtsq_f32(
13529 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13530 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13531 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
13532 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
13533 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]]
13534 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
13535 return vrsqrtsq_f32(a, b);
13538 // CHECK-LABEL: @test_vrsra_n_s8(
13539 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13540 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13541 // CHECK: ret <8 x i8> [[VRSRA_N]]
13542 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
13543 return vrsra_n_s8(a, b, 1);
13546 // CHECK-LABEL: @test_vrsra_n_s16(
13547 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13548 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13549 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13550 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13551 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13552 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13553 // CHECK: ret <4 x i16> [[VRSRA_N]]
13554 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
13555 return vrsra_n_s16(a, b, 1);
13558 // CHECK-LABEL: @test_vrsra_n_s32(
13559 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13560 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13561 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13562 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13563 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
13564 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
13565 // CHECK: ret <2 x i32> [[VRSRA_N]]
13566 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
13567 return vrsra_n_s32(a, b, 1);
13570 // CHECK-LABEL: @test_vrsra_n_s64(
13571 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13572 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13573 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13574 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13575 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
13576 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
13577 // CHECK: ret <1 x i64> [[VRSRA_N]]
13578 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
13579 return vrsra_n_s64(a, b, 1);
13582 // CHECK-LABEL: @test_vrsra_n_u8(
13583 // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13584 // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13585 // CHECK: ret <8 x i8> [[VRSRA_N]]
13586 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
13587 return vrsra_n_u8(a, b, 1);
13590 // CHECK-LABEL: @test_vrsra_n_u16(
13591 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13593 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13594 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
13595 // CHECK: [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13596 // CHECK: [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
13597 // CHECK: ret <4 x i16> [[VRSRA_N]]
13598 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
13599 return vrsra_n_u16(a, b, 1);
13602 // CHECK-LABEL: @test_vrsra_n_u32(
13603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13604 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13605 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13606 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
13607 // CHECK: [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
13608 // CHECK: [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
13609 // CHECK: ret <2 x i32> [[VRSRA_N]]
13610 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
13611 return vrsra_n_u32(a, b, 1);
13614 // CHECK-LABEL: @test_vrsra_n_u64(
13615 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13616 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13617 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13618 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13619 // CHECK: [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
13620 // CHECK: [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
13621 // CHECK: ret <1 x i64> [[VRSRA_N]]
13622 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
13623 return vrsra_n_u64(a, b, 1);
13626 // CHECK-LABEL: @test_vrsraq_n_s8(
13627 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13628 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
13629 // CHECK: ret <16 x i8> [[VRSRA_N]]
13630 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
13631 return vrsraq_n_s8(a, b, 1);
13634 // CHECK-LABEL: @test_vrsraq_n_s16(
13635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13636 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13637 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13638 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13639 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13640 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
13641 // CHECK: ret <8 x i16> [[VRSRA_N]]
13642 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
13643 return vrsraq_n_s16(a, b, 1);
13646 // CHECK-LABEL: @test_vrsraq_n_s32(
13647 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13648 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13649 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13650 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13651 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13652 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
13653 // CHECK: ret <4 x i32> [[VRSRA_N]]
13654 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
13655 return vrsraq_n_s32(a, b, 1);
13658 // CHECK-LABEL: @test_vrsraq_n_s64(
13659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13660 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13661 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13662 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13663 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
13664 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
13665 // CHECK: ret <2 x i64> [[VRSRA_N]]
13666 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
13667 return vrsraq_n_s64(a, b, 1);
13670 // CHECK-LABEL: @test_vrsraq_n_u8(
13671 // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13672 // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
13673 // CHECK: ret <16 x i8> [[VRSRA_N]]
13674 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
13675 return vrsraq_n_u8(a, b, 1);
13678 // CHECK-LABEL: @test_vrsraq_n_u16(
13679 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13680 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13681 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13682 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
13683 // CHECK: [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13684 // CHECK: [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
13685 // CHECK: ret <8 x i16> [[VRSRA_N]]
13686 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
13687 return vrsraq_n_u16(a, b, 1);
13690 // CHECK-LABEL: @test_vrsraq_n_u32(
13691 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13692 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13693 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13694 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
13695 // CHECK: [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13696 // CHECK: [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
13697 // CHECK: ret <4 x i32> [[VRSRA_N]]
13698 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
13699 return vrsraq_n_u32(a, b, 1);
13702 // CHECK-LABEL: @test_vrsraq_n_u64(
13703 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13704 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13705 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13706 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
13707 // CHECK: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
13708 // CHECK: [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
13709 // CHECK: ret <2 x i64> [[VRSRA_N]]
13710 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
13711 return vrsraq_n_u64(a, b, 1);
13714 // CHECK-LABEL: @test_vrsubhn_s16(
13715 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13716 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13717 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
13718 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
13719 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
13720 return vrsubhn_s16(a, b);
13723 // CHECK-LABEL: @test_vrsubhn_s32(
13724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13726 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
13727 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
13728 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
13729 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
13730 return vrsubhn_s32(a, b);
13733 // CHECK-LABEL: @test_vrsubhn_s64(
13734 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13735 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13736 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
13737 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
13738 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
13739 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
13740 return vrsubhn_s64(a, b);
13743 // CHECK-LABEL: @test_vrsubhn_u16(
13744 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13745 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13746 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
13747 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]]
13748 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
13749 return vrsubhn_u16(a, b);
13752 // CHECK-LABEL: @test_vrsubhn_u32(
13753 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13754 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13755 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
13756 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
13757 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]]
13758 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
13759 return vrsubhn_u32(a, b);
13762 // CHECK-LABEL: @test_vrsubhn_u64(
13763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13764 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13765 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
13766 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
13767 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]]
13768 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
13769 return vrsubhn_u64(a, b);
13772 // CHECK-LABEL: @test_vset_lane_u8(
13773 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
13774 // CHECK: ret <8 x i8> [[VSET_LANE]]
13775 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
13776 return vset_lane_u8(a, b, 7);
13779 // CHECK-LABEL: @test_vset_lane_u16(
13780 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
13781 // CHECK: ret <4 x i16> [[VSET_LANE]]
13782 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
13783 return vset_lane_u16(a, b, 3);
13786 // CHECK-LABEL: @test_vset_lane_u32(
13787 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
13788 // CHECK: ret <2 x i32> [[VSET_LANE]]
13789 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
13790 return vset_lane_u32(a, b, 1);
13793 // CHECK-LABEL: @test_vset_lane_s8(
13794 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
13795 // CHECK: ret <8 x i8> [[VSET_LANE]]
13796 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
13797 return vset_lane_s8(a, b, 7);
13800 // CHECK-LABEL: @test_vset_lane_s16(
13801 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
13802 // CHECK: ret <4 x i16> [[VSET_LANE]]
13803 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
13804 return vset_lane_s16(a, b, 3);
13807 // CHECK-LABEL: @test_vset_lane_s32(
13808 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
13809 // CHECK: ret <2 x i32> [[VSET_LANE]]
13810 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
13811 return vset_lane_s32(a, b, 1);
13814 // CHECK-LABEL: @test_vset_lane_p8(
13815 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
13816 // CHECK: ret <8 x i8> [[VSET_LANE]]
13817 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
13818 return vset_lane_p8(a, b, 7);
13821 // CHECK-LABEL: @test_vset_lane_p16(
13822 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
13823 // CHECK: ret <4 x i16> [[VSET_LANE]]
13824 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
13825 return vset_lane_p16(a, b, 3);
13828 // CHECK-LABEL: @test_vset_lane_f32(
13829 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
13830 // CHECK: ret <2 x float> [[VSET_LANE]]
13831 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
13832 return vset_lane_f32(a, b, 1);
13835 // CHECK-LABEL: @test_vset_lane_f16(
13836 // CHECK: [[__REINT_246:%.*]] = alloca half, align 2
13837 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
13838 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
13839 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
13840 // CHECK: store half [[TMP0]], ptr [[__REINT_246]], align 2
13841 // CHECK: store <4 x half> %b, ptr [[__REINT1_246]], align 8
13842 // CHECK: [[TMP2:%.*]] = load i16, ptr [[__REINT_246]], align 2
13843 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT1_246]], align 8
13844 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
13845 // CHECK: store <4 x i16> [[VSET_LANE]], ptr [[__REINT2_246]], align 8
13846 // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[__REINT2_246]], align 8
13847 // CHECK: ret <4 x half> [[TMP8]]
13848 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
13849 return vset_lane_f16(*a, b, 1);
13852 // CHECK-LABEL: @test_vsetq_lane_u8(
13853 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
13854 // CHECK: ret <16 x i8> [[VSET_LANE]]
13855 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
13856 return vsetq_lane_u8(a, b, 15);
13859 // CHECK-LABEL: @test_vsetq_lane_u16(
13860 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
13861 // CHECK: ret <8 x i16> [[VSET_LANE]]
13862 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
13863 return vsetq_lane_u16(a, b, 7);
13866 // CHECK-LABEL: @test_vsetq_lane_u32(
13867 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
13868 // CHECK: ret <4 x i32> [[VSET_LANE]]
13869 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
13870 return vsetq_lane_u32(a, b, 3);
13873 // CHECK-LABEL: @test_vsetq_lane_s8(
13874 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
13875 // CHECK: ret <16 x i8> [[VSET_LANE]]
13876 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
13877 return vsetq_lane_s8(a, b, 15);
13880 // CHECK-LABEL: @test_vsetq_lane_s16(
13881 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
13882 // CHECK: ret <8 x i16> [[VSET_LANE]]
13883 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
13884 return vsetq_lane_s16(a, b, 7);
13887 // CHECK-LABEL: @test_vsetq_lane_s32(
13888 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
13889 // CHECK: ret <4 x i32> [[VSET_LANE]]
13890 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
13891 return vsetq_lane_s32(a, b, 3);
13894 // CHECK-LABEL: @test_vsetq_lane_p8(
13895 // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
13896 // CHECK: ret <16 x i8> [[VSET_LANE]]
13897 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
13898 return vsetq_lane_p8(a, b, 15);
13901 // CHECK-LABEL: @test_vsetq_lane_p16(
13902 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
13903 // CHECK: ret <8 x i16> [[VSET_LANE]]
13904 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
13905 return vsetq_lane_p16(a, b, 7);
13908 // CHECK-LABEL: @test_vsetq_lane_f32(
13909 // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
13910 // CHECK: ret <4 x float> [[VSET_LANE]]
13911 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
13912 return vsetq_lane_f32(a, b, 3);
13915 // CHECK-LABEL: @test_vsetq_lane_f16(
13916 // CHECK: [[__REINT_248:%.*]] = alloca half, align 2
13917 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
13918 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
13919 // CHECK: [[TMP0:%.*]] = load half, ptr %a, align 2
13920 // CHECK: store half [[TMP0]], ptr [[__REINT_248]], align 2
13921 // CHECK: store <8 x half> %b, ptr [[__REINT1_248]], align 16
13922 // CHECK: [[TMP2:%.*]] = load i16, ptr [[__REINT_248]], align 2
13923 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT1_248]], align 16
13924 // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
13925 // CHECK: store <8 x i16> [[VSET_LANE]], ptr [[__REINT2_248]], align 16
13926 // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[__REINT2_248]], align 16
13927 // CHECK: ret <8 x half> [[TMP8]]
13928 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
13929 return vsetq_lane_f16(*a, b, 3);
13932 // CHECK-LABEL: @test_vset_lane_s64(
13933 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
13934 // CHECK: ret <1 x i64> [[VSET_LANE]]
13935 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
13936 return vset_lane_s64(a, b, 0);
13939 // CHECK-LABEL: @test_vset_lane_u64(
13940 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
13941 // CHECK: ret <1 x i64> [[VSET_LANE]]
13942 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
13943 return vset_lane_u64(a, b, 0);
13946 // CHECK-LABEL: @test_vsetq_lane_s64(
13947 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
13948 // CHECK: ret <2 x i64> [[VSET_LANE]]
13949 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
13950 return vsetq_lane_s64(a, b, 1);
13953 // CHECK-LABEL: @test_vsetq_lane_u64(
13954 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
13955 // CHECK: ret <2 x i64> [[VSET_LANE]]
13956 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
13957 return vsetq_lane_u64(a, b, 1);
13960 // CHECK-LABEL: @test_vshl_s8(
13961 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13962 // CHECK: ret <8 x i8> [[VSHL_V_I]]
13963 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
13964 return vshl_s8(a, b);
13967 // CHECK-LABEL: @test_vshl_s16(
13968 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13969 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13970 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13971 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
13972 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
13973 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
13974 return vshl_s16(a, b);
13977 // CHECK-LABEL: @test_vshl_s32(
13978 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13979 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13980 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13981 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
13982 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
13983 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
13984 return vshl_s32(a, b);
13987 // CHECK-LABEL: @test_vshl_s64(
13988 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13989 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13990 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13991 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
13992 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
13993 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
13994 return vshl_s64(a, b);
13997 // CHECK-LABEL: @test_vshl_u8(
13998 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13999 // CHECK: ret <8 x i8> [[VSHL_V_I]]
14000 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
14001 return vshl_u8(a, b);
14004 // CHECK-LABEL: @test_vshl_u16(
14005 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14006 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14007 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
14008 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14009 // CHECK: ret <4 x i16> [[VSHL_V2_I]]
14010 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
14011 return vshl_u16(a, b);
14014 // CHECK-LABEL: @test_vshl_u32(
14015 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14016 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14017 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
14018 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14019 // CHECK: ret <2 x i32> [[VSHL_V2_I]]
14020 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
14021 return vshl_u32(a, b);
14024 // CHECK-LABEL: @test_vshl_u64(
14025 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14026 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14027 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
14028 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14029 // CHECK: ret <1 x i64> [[VSHL_V2_I]]
14030 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
14031 return vshl_u64(a, b);
14034 // CHECK-LABEL: @test_vshlq_s8(
14035 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
14036 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
14037 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
14038 return vshlq_s8(a, b);
14041 // CHECK-LABEL: @test_vshlq_s16(
14042 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14043 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14044 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
14045 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14046 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
14047 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
14048 return vshlq_s16(a, b);
14051 // CHECK-LABEL: @test_vshlq_s32(
14052 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14053 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14054 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
14055 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14056 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
14057 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
14058 return vshlq_s32(a, b);
14061 // CHECK-LABEL: @test_vshlq_s64(
14062 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14063 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14064 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
14065 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14066 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
14067 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
14068 return vshlq_s64(a, b);
14071 // CHECK-LABEL: @test_vshlq_u8(
14072 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
14073 // CHECK: ret <16 x i8> [[VSHLQ_V_I]]
14074 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
14075 return vshlq_u8(a, b);
14078 // CHECK-LABEL: @test_vshlq_u16(
14079 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14080 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14081 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
14082 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14083 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]]
14084 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
14085 return vshlq_u16(a, b);
14088 // CHECK-LABEL: @test_vshlq_u32(
14089 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14090 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14091 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
14092 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14093 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]]
14094 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
14095 return vshlq_u32(a, b);
14098 // CHECK-LABEL: @test_vshlq_u64(
14099 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14100 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14101 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
14102 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14103 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]]
14104 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
14105 return vshlq_u64(a, b);
14108 // CHECK-LABEL: @test_vshll_n_s8(
14109 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
14110 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14111 // CHECK: ret <8 x i16> [[VSHLL_N]]
14112 int16x8_t test_vshll_n_s8(int8x8_t a) {
14113 return vshll_n_s8(a, 1);
14116 // CHECK-LABEL: @test_vshll_n_s16(
14117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14118 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14119 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
14120 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14121 // CHECK: ret <4 x i32> [[VSHLL_N]]
14122 int32x4_t test_vshll_n_s16(int16x4_t a) {
14123 return vshll_n_s16(a, 1);
14126 // CHECK-LABEL: @test_vshll_n_s32(
14127 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14128 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14129 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
14130 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14131 // CHECK: ret <2 x i64> [[VSHLL_N]]
14132 int64x2_t test_vshll_n_s32(int32x2_t a) {
14133 return vshll_n_s32(a, 1);
14136 // CHECK-LABEL: @test_vshll_n_u8(
14137 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
14138 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14139 // CHECK: ret <8 x i16> [[VSHLL_N]]
14140 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
14141 return vshll_n_u8(a, 1);
14144 // CHECK-LABEL: @test_vshll_n_u16(
14145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14146 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14147 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
14148 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14149 // CHECK: ret <4 x i32> [[VSHLL_N]]
14150 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
14151 return vshll_n_u16(a, 1);
14154 // CHECK-LABEL: @test_vshll_n_u32(
14155 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14156 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14157 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
14158 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14159 // CHECK: ret <2 x i64> [[VSHLL_N]]
14160 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
14161 return vshll_n_u32(a, 1);
14164 // CHECK-LABEL: @test_vshl_n_s8(
14165 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14166 // CHECK: ret <8 x i8> [[VSHL_N]]
14167 int8x8_t test_vshl_n_s8(int8x8_t a) {
14168 return vshl_n_s8(a, 1);
14171 // CHECK-LABEL: @test_vshl_n_s16(
14172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14173 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14174 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14175 // CHECK: ret <4 x i16> [[VSHL_N]]
14176 int16x4_t test_vshl_n_s16(int16x4_t a) {
14177 return vshl_n_s16(a, 1);
14180 // CHECK-LABEL: @test_vshl_n_s32(
14181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14182 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14183 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14184 // CHECK: ret <2 x i32> [[VSHL_N]]
14185 int32x2_t test_vshl_n_s32(int32x2_t a) {
14186 return vshl_n_s32(a, 1);
14189 // CHECK-LABEL: @test_vshl_n_s64(
14190 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14191 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14192 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14193 // CHECK: ret <1 x i64> [[VSHL_N]]
14194 int64x1_t test_vshl_n_s64(int64x1_t a) {
14195 return vshl_n_s64(a, 1);
14198 // CHECK-LABEL: @test_vshl_n_u8(
14199 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14200 // CHECK: ret <8 x i8> [[VSHL_N]]
14201 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
14202 return vshl_n_u8(a, 1);
14205 // CHECK-LABEL: @test_vshl_n_u16(
14206 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14207 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14208 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14209 // CHECK: ret <4 x i16> [[VSHL_N]]
14210 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
14211 return vshl_n_u16(a, 1);
14214 // CHECK-LABEL: @test_vshl_n_u32(
14215 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14216 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14217 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14218 // CHECK: ret <2 x i32> [[VSHL_N]]
14219 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
14220 return vshl_n_u32(a, 1);
14223 // CHECK-LABEL: @test_vshl_n_u64(
14224 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14225 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14226 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14227 // CHECK: ret <1 x i64> [[VSHL_N]]
14228 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14229 return vshl_n_u64(a, 1);
14232 // CHECK-LABEL: @test_vshlq_n_s8(
14233 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14234 // CHECK: ret <16 x i8> [[VSHL_N]]
14235 int8x16_t test_vshlq_n_s8(int8x16_t a) {
14236 return vshlq_n_s8(a, 1);
14239 // CHECK-LABEL: @test_vshlq_n_s16(
14240 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14241 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14242 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14243 // CHECK: ret <8 x i16> [[VSHL_N]]
14244 int16x8_t test_vshlq_n_s16(int16x8_t a) {
14245 return vshlq_n_s16(a, 1);
14248 // CHECK-LABEL: @test_vshlq_n_s32(
14249 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14250 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14251 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14252 // CHECK: ret <4 x i32> [[VSHL_N]]
14253 int32x4_t test_vshlq_n_s32(int32x4_t a) {
14254 return vshlq_n_s32(a, 1);
14257 // CHECK-LABEL: @test_vshlq_n_s64(
14258 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14259 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14260 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14261 // CHECK: ret <2 x i64> [[VSHL_N]]
14262 int64x2_t test_vshlq_n_s64(int64x2_t a) {
14263 return vshlq_n_s64(a, 1);
14266 // CHECK-LABEL: @test_vshlq_n_u8(
14267 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14268 // CHECK: ret <16 x i8> [[VSHL_N]]
14269 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
14270 return vshlq_n_u8(a, 1);
14273 // CHECK-LABEL: @test_vshlq_n_u16(
14274 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14275 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14276 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14277 // CHECK: ret <8 x i16> [[VSHL_N]]
14278 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
14279 return vshlq_n_u16(a, 1);
14282 // CHECK-LABEL: @test_vshlq_n_u32(
14283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14284 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14285 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14286 // CHECK: ret <4 x i32> [[VSHL_N]]
14287 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
14288 return vshlq_n_u32(a, 1);
14291 // CHECK-LABEL: @test_vshlq_n_u64(
14292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14293 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14294 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14295 // CHECK: ret <2 x i64> [[VSHL_N]]
14296 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
14297 return vshlq_n_u64(a, 1);
14300 // CHECK-LABEL: @test_vshrn_n_s16(
14301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14302 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14303 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14304 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14305 // CHECK: ret <8 x i8> [[VSHRN_N]]
14306 int8x8_t test_vshrn_n_s16(int16x8_t a) {
14307 return vshrn_n_s16(a, 1);
14310 // CHECK-LABEL: @test_vshrn_n_s32(
14311 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14312 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14313 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14314 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14315 // CHECK: ret <4 x i16> [[VSHRN_N]]
14316 int16x4_t test_vshrn_n_s32(int32x4_t a) {
14317 return vshrn_n_s32(a, 1);
14320 // CHECK-LABEL: @test_vshrn_n_s64(
14321 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14322 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14323 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14324 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14325 // CHECK: ret <2 x i32> [[VSHRN_N]]
14326 int32x2_t test_vshrn_n_s64(int64x2_t a) {
14327 return vshrn_n_s64(a, 1);
14330 // CHECK-LABEL: @test_vshrn_n_u16(
14331 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14332 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14333 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14334 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14335 // CHECK: ret <8 x i8> [[VSHRN_N]]
14336 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
14337 return vshrn_n_u16(a, 1);
14340 // CHECK-LABEL: @test_vshrn_n_u32(
14341 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14342 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14343 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14344 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14345 // CHECK: ret <4 x i16> [[VSHRN_N]]
14346 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
14347 return vshrn_n_u32(a, 1);
14350 // CHECK-LABEL: @test_vshrn_n_u64(
14351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14352 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14353 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14354 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14355 // CHECK: ret <2 x i32> [[VSHRN_N]]
14356 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
14357 return vshrn_n_u64(a, 1);
14360 // CHECK-LABEL: @test_vshr_n_s8(
14361 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14362 // CHECK: ret <8 x i8> [[VSHR_N]]
14363 int8x8_t test_vshr_n_s8(int8x8_t a) {
14364 return vshr_n_s8(a, 1);
14367 // CHECK-LABEL: @test_vshr_n_s16(
14368 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14369 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14370 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14371 // CHECK: ret <4 x i16> [[VSHR_N]]
14372 int16x4_t test_vshr_n_s16(int16x4_t a) {
14373 return vshr_n_s16(a, 1);
14376 // CHECK-LABEL: @test_vshr_n_s32(
14377 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14378 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14379 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
14380 // CHECK: ret <2 x i32> [[VSHR_N]]
14381 int32x2_t test_vshr_n_s32(int32x2_t a) {
14382 return vshr_n_s32(a, 1);
14385 // CHECK-LABEL: @test_vshr_n_s64(
14386 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14387 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14388 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14389 // CHECK: ret <1 x i64> [[VSHR_N]]
14390 int64x1_t test_vshr_n_s64(int64x1_t a) {
14391 return vshr_n_s64(a, 1);
14394 // CHECK-LABEL: @test_vshr_n_u8(
14395 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14396 // CHECK: ret <8 x i8> [[VSHR_N]]
14397 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
14398 return vshr_n_u8(a, 1);
14401 // CHECK-LABEL: @test_vshr_n_u16(
14402 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14403 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14404 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14405 // CHECK: ret <4 x i16> [[VSHR_N]]
14406 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
14407 return vshr_n_u16(a, 1);
14410 // CHECK-LABEL: @test_vshr_n_u32(
14411 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14412 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14413 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
14414 // CHECK: ret <2 x i32> [[VSHR_N]]
14415 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
14416 return vshr_n_u32(a, 1);
14419 // CHECK-LABEL: @test_vshr_n_u64(
14420 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14421 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14422 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14423 // CHECK: ret <1 x i64> [[VSHR_N]]
14424 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14425 return vshr_n_u64(a, 1);
14428 // CHECK-LABEL: @test_vshrq_n_s8(
14429 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14430 // CHECK: ret <16 x i8> [[VSHR_N]]
14431 int8x16_t test_vshrq_n_s8(int8x16_t a) {
14432 return vshrq_n_s8(a, 1);
14435 // CHECK-LABEL: @test_vshrq_n_s16(
14436 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14437 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14438 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14439 // CHECK: ret <8 x i16> [[VSHR_N]]
14440 int16x8_t test_vshrq_n_s16(int16x8_t a) {
14441 return vshrq_n_s16(a, 1);
14444 // CHECK-LABEL: @test_vshrq_n_s32(
14445 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14446 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14447 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14448 // CHECK: ret <4 x i32> [[VSHR_N]]
14449 int32x4_t test_vshrq_n_s32(int32x4_t a) {
14450 return vshrq_n_s32(a, 1);
14453 // CHECK-LABEL: @test_vshrq_n_s64(
14454 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14455 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14456 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14457 // CHECK: ret <2 x i64> [[VSHR_N]]
14458 int64x2_t test_vshrq_n_s64(int64x2_t a) {
14459 return vshrq_n_s64(a, 1);
14462 // CHECK-LABEL: @test_vshrq_n_u8(
14463 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14464 // CHECK: ret <16 x i8> [[VSHR_N]]
14465 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
14466 return vshrq_n_u8(a, 1);
14469 // CHECK-LABEL: @test_vshrq_n_u16(
14470 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14471 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14472 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14473 // CHECK: ret <8 x i16> [[VSHR_N]]
14474 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
14475 return vshrq_n_u16(a, 1);
14478 // CHECK-LABEL: @test_vshrq_n_u32(
14479 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14480 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14481 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14482 // CHECK: ret <4 x i32> [[VSHR_N]]
14483 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
14484 return vshrq_n_u32(a, 1);
14487 // CHECK-LABEL: @test_vshrq_n_u64(
14488 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14489 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14490 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14491 // CHECK: ret <2 x i64> [[VSHR_N]]
14492 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
14493 return vshrq_n_u64(a, 1);
14496 // CHECK-LABEL: @test_vsli_n_s8(
14497 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14498 // CHECK: ret <8 x i8> [[VSLI_N]]
14499 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
14500 return vsli_n_s8(a, b, 1);
14503 // CHECK-LABEL: @test_vsli_n_s16(
14504 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14505 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14506 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14507 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14508 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14509 // CHECK: ret <4 x i16> [[VSLI_N2]]
14510 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
14511 return vsli_n_s16(a, b, 1);
14514 // CHECK-LABEL: @test_vsli_n_s32(
14515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14516 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14517 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14518 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14519 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14520 // CHECK: ret <2 x i32> [[VSLI_N2]]
14521 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
14522 return vsli_n_s32(a, b, 1);
14525 // CHECK-LABEL: @test_vsli_n_s64(
14526 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14527 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14528 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14529 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14530 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14531 // CHECK: ret <1 x i64> [[VSLI_N2]]
14532 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14533 return vsli_n_s64(a, b, 1);
14536 // CHECK-LABEL: @test_vsli_n_u8(
14537 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14538 // CHECK: ret <8 x i8> [[VSLI_N]]
14539 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
14540 return vsli_n_u8(a, b, 1);
14543 // CHECK-LABEL: @test_vsli_n_u16(
14544 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14545 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14546 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14547 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14548 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14549 // CHECK: ret <4 x i16> [[VSLI_N2]]
14550 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
14551 return vsli_n_u16(a, b, 1);
14554 // CHECK-LABEL: @test_vsli_n_u32(
14555 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14556 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14557 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14558 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14559 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14560 // CHECK: ret <2 x i32> [[VSLI_N2]]
14561 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
14562 return vsli_n_u32(a, b, 1);
14565 // CHECK-LABEL: @test_vsli_n_u64(
14566 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14567 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14568 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14569 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14570 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14571 // CHECK: ret <1 x i64> [[VSLI_N2]]
14572 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14573 return vsli_n_u64(a, b, 1);
14576 // CHECK-LABEL: @test_vsli_n_p8(
14577 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14578 // CHECK: ret <8 x i8> [[VSLI_N]]
14579 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
14580 return vsli_n_p8(a, b, 1);
14583 // CHECK-LABEL: @test_vsli_n_p16(
14584 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14586 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14587 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14588 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14589 // CHECK: ret <4 x i16> [[VSLI_N2]]
14590 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
14591 return vsli_n_p16(a, b, 1);
14594 // CHECK-LABEL: @test_vsliq_n_s8(
14595 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14596 // CHECK: ret <16 x i8> [[VSLI_N]]
14597 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
14598 return vsliq_n_s8(a, b, 1);
14601 // CHECK-LABEL: @test_vsliq_n_s16(
14602 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14603 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14604 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14605 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14606 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
14607 // CHECK: ret <8 x i16> [[VSLI_N2]]
14608 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
14609 return vsliq_n_s16(a, b, 1);
14612 // CHECK-LABEL: @test_vsliq_n_s32(
14613 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14614 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14615 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14616 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14617 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
14618 // CHECK: ret <4 x i32> [[VSLI_N2]]
14619 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
14620 return vsliq_n_s32(a, b, 1);
14623 // CHECK-LABEL: @test_vsliq_n_s64(
14624 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14625 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14626 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14627 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14628 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
14629 // CHECK: ret <2 x i64> [[VSLI_N2]]
14630 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
14631 return vsliq_n_s64(a, b, 1);
14634 // CHECK-LABEL: @test_vsliq_n_u8(
14635 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14636 // CHECK: ret <16 x i8> [[VSLI_N]]
14637 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
14638 return vsliq_n_u8(a, b, 1);
14641 // CHECK-LABEL: @test_vsliq_n_u16(
14642 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14643 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14644 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14645 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14646 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
14647 // CHECK: ret <8 x i16> [[VSLI_N2]]
14648 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
14649 return vsliq_n_u16(a, b, 1);
14652 // CHECK-LABEL: @test_vsliq_n_u32(
14653 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14654 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14655 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14656 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14657 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
14658 // CHECK: ret <4 x i32> [[VSLI_N2]]
14659 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
14660 return vsliq_n_u32(a, b, 1);
14663 // CHECK-LABEL: @test_vsliq_n_u64(
14664 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14665 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14666 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14667 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14668 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
14669 // CHECK: ret <2 x i64> [[VSLI_N2]]
14670 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
14671 return vsliq_n_u64(a, b, 1);
14674 // CHECK-LABEL: @test_vsliq_n_p8(
14675 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14676 // CHECK: ret <16 x i8> [[VSLI_N]]
14677 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
14678 return vsliq_n_p8(a, b, 1);
14681 // CHECK-LABEL: @test_vsliq_n_p16(
14682 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14683 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14684 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14685 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14686 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
14687 // CHECK: ret <8 x i16> [[VSLI_N2]]
14688 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
14689 return vsliq_n_p16(a, b, 1);
14692 // CHECK-LABEL: @test_vsra_n_s8(
14693 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14694 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
14695 // CHECK: ret <8 x i8> [[TMP0]]
14696 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
14697 return vsra_n_s8(a, b, 1);
14700 // CHECK-LABEL: @test_vsra_n_s16(
14701 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14702 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14703 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14704 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14705 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
14706 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
14707 // CHECK: ret <4 x i16> [[TMP4]]
14708 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
14709 return vsra_n_s16(a, b, 1);
14712 // CHECK-LABEL: @test_vsra_n_s32(
14713 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14714 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14715 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14716 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14717 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
14718 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
14719 // CHECK: ret <2 x i32> [[TMP4]]
14720 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
14721 return vsra_n_s32(a, b, 1);
14724 // CHECK-LABEL: @test_vsra_n_s64(
14725 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14726 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14727 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14728 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14729 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
14730 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14731 // CHECK: ret <1 x i64> [[TMP4]]
14732 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
14733 return vsra_n_s64(a, b, 1);
14736 // CHECK-LABEL: @test_vsra_n_u8(
14737 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14738 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
14739 // CHECK: ret <8 x i8> [[TMP0]]
14740 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
14741 return vsra_n_u8(a, b, 1);
14744 // CHECK-LABEL: @test_vsra_n_u16(
14745 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14746 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14747 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14748 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14749 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
14750 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
14751 // CHECK: ret <4 x i16> [[TMP4]]
14752 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
14753 return vsra_n_u16(a, b, 1);
14756 // CHECK-LABEL: @test_vsra_n_u32(
14757 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14758 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14759 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14760 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14761 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
14762 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
14763 // CHECK: ret <2 x i32> [[TMP4]]
14764 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
14765 return vsra_n_u32(a, b, 1);
14768 // CHECK-LABEL: @test_vsra_n_u64(
14769 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14770 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14771 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14772 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14773 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
14774 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
14775 // CHECK: ret <1 x i64> [[TMP4]]
14776 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
14777 return vsra_n_u64(a, b, 1);
14780 // CHECK-LABEL: @test_vsraq_n_s8(
14781 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14782 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
14783 // CHECK: ret <16 x i8> [[TMP0]]
14784 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
14785 return vsraq_n_s8(a, b, 1);
14788 // CHECK-LABEL: @test_vsraq_n_s16(
14789 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14790 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14791 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14792 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14793 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14794 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
14795 // CHECK: ret <8 x i16> [[TMP4]]
14796 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
14797 return vsraq_n_s16(a, b, 1);
14800 // CHECK-LABEL: @test_vsraq_n_s32(
14801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14802 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14803 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14804 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14805 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
14806 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
14807 // CHECK: ret <4 x i32> [[TMP4]]
14808 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
14809 return vsraq_n_s32(a, b, 1);
14812 // CHECK-LABEL: @test_vsraq_n_s64(
14813 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14814 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14815 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14816 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14817 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
14818 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
14819 // CHECK: ret <2 x i64> [[TMP4]]
14820 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
14821 return vsraq_n_s64(a, b, 1);
14824 // CHECK-LABEL: @test_vsraq_n_u8(
14825 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14826 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
14827 // CHECK: ret <16 x i8> [[TMP0]]
14828 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
14829 return vsraq_n_u8(a, b, 1);
14832 // CHECK-LABEL: @test_vsraq_n_u16(
14833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14835 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14836 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14837 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14838 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
14839 // CHECK: ret <8 x i16> [[TMP4]]
14840 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
14841 return vsraq_n_u16(a, b, 1);
14844 // CHECK-LABEL: @test_vsraq_n_u32(
14845 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14846 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14847 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14848 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14849 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
14850 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
14851 // CHECK: ret <4 x i32> [[TMP4]]
14852 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
14853 return vsraq_n_u32(a, b, 1);
14856 // CHECK-LABEL: @test_vsraq_n_u64(
14857 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14858 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14859 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14860 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14861 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
14862 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
14863 // CHECK: ret <2 x i64> [[TMP4]]
14864 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
14865 return vsraq_n_u64(a, b, 1);
14868 // CHECK-LABEL: @test_vsri_n_s8(
14869 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14870 // CHECK: ret <8 x i8> [[VSLI_N]]
14871 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
14872 return vsri_n_s8(a, b, 1);
14875 // CHECK-LABEL: @test_vsri_n_s16(
14876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14878 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14879 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14880 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14881 // CHECK: ret <4 x i16> [[VSLI_N2]]
14882 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
14883 return vsri_n_s16(a, b, 1);
14886 // CHECK-LABEL: @test_vsri_n_s32(
14887 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14888 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14889 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14890 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14891 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
14892 // CHECK: ret <2 x i32> [[VSLI_N2]]
14893 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
14894 return vsri_n_s32(a, b, 1);
14897 // CHECK-LABEL: @test_vsri_n_s64(
14898 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14899 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14900 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14901 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14902 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
14903 // CHECK: ret <1 x i64> [[VSLI_N2]]
14904 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
14905 return vsri_n_s64(a, b, 1);
14908 // CHECK-LABEL: @test_vsri_n_u8(
14909 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14910 // CHECK: ret <8 x i8> [[VSLI_N]]
14911 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
14912 return vsri_n_u8(a, b, 1);
14915 // CHECK-LABEL: @test_vsri_n_u16(
14916 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14917 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14918 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14919 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14920 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14921 // CHECK: ret <4 x i16> [[VSLI_N2]]
14922 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
14923 return vsri_n_u16(a, b, 1);
14926 // CHECK-LABEL: @test_vsri_n_u32(
14927 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14928 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14929 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14930 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14931 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
14932 // CHECK: ret <2 x i32> [[VSLI_N2]]
14933 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
14934 return vsri_n_u32(a, b, 1);
14937 // CHECK-LABEL: @test_vsri_n_u64(
14938 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14939 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14940 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14941 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14942 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
14943 // CHECK: ret <1 x i64> [[VSLI_N2]]
14944 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
14945 return vsri_n_u64(a, b, 1);
14948 // CHECK-LABEL: @test_vsri_n_p8(
14949 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14950 // CHECK: ret <8 x i8> [[VSLI_N]]
14951 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
14952 return vsri_n_p8(a, b, 1);
14955 // CHECK-LABEL: @test_vsri_n_p16(
14956 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14957 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14958 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14959 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14960 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14961 // CHECK: ret <4 x i16> [[VSLI_N2]]
14962 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
14963 return vsri_n_p16(a, b, 1);
14966 // CHECK-LABEL: @test_vsriq_n_s8(
14967 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14968 // CHECK: ret <16 x i8> [[VSLI_N]]
14969 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
14970 return vsriq_n_s8(a, b, 1);
14973 // CHECK-LABEL: @test_vsriq_n_s16(
14974 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14975 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14976 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14977 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14978 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14979 // CHECK: ret <8 x i16> [[VSLI_N2]]
14980 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
14981 return vsriq_n_s16(a, b, 1);
14984 // CHECK-LABEL: @test_vsriq_n_s32(
14985 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14986 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14987 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14988 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14989 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14990 // CHECK: ret <4 x i32> [[VSLI_N2]]
14991 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
14992 return vsriq_n_s32(a, b, 1);
14995 // CHECK-LABEL: @test_vsriq_n_s64(
14996 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14997 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14998 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14999 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15000 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15001 // CHECK: ret <2 x i64> [[VSLI_N2]]
15002 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
15003 return vsriq_n_s64(a, b, 1);
15006 // CHECK-LABEL: @test_vsriq_n_u8(
15007 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15008 // CHECK: ret <16 x i8> [[VSLI_N]]
15009 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
15010 return vsriq_n_u8(a, b, 1);
15013 // CHECK-LABEL: @test_vsriq_n_u16(
15014 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15015 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15016 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15017 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15018 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15019 // CHECK: ret <8 x i16> [[VSLI_N2]]
15020 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
15021 return vsriq_n_u16(a, b, 1);
15024 // CHECK-LABEL: @test_vsriq_n_u32(
15025 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15026 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15027 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15028 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15029 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15030 // CHECK: ret <4 x i32> [[VSLI_N2]]
15031 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
15032 return vsriq_n_u32(a, b, 1);
15035 // CHECK-LABEL: @test_vsriq_n_u64(
15036 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15037 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15038 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15039 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15040 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15041 // CHECK: ret <2 x i64> [[VSLI_N2]]
15042 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
15043 return vsriq_n_u64(a, b, 1);
15046 // CHECK-LABEL: @test_vsriq_n_p8(
15047 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15048 // CHECK: ret <16 x i8> [[VSLI_N]]
15049 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
15050 return vsriq_n_p8(a, b, 1);
15053 // CHECK-LABEL: @test_vsriq_n_p16(
15054 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15055 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15056 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15057 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15058 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15059 // CHECK: ret <8 x i16> [[VSLI_N2]]
15060 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
15061 return vsriq_n_p16(a, b, 1);
15064 // CHECK-LABEL: @test_vst1q_u8(
15065 // CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
15066 // CHECK: ret void
15067 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
15068 vst1q_u8(a, b);
15071 // CHECK-LABEL: @test_vst1q_u16(
15072 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15073 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15074 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
15075 // CHECK: ret void
15076 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
15077 vst1q_u16(a, b);
15080 // CHECK-LABEL: @test_vst1q_u32(
15081 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15082 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15083 // CHECK: call void @llvm.arm.neon.vst1.p0.v4i32(ptr %a, <4 x i32> [[TMP2]], i32 4)
15084 // CHECK: ret void
15085 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
15086 vst1q_u32(a, b);
15089 // CHECK-LABEL: @test_vst1q_u64(
15090 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15091 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15092 // CHECK: call void @llvm.arm.neon.vst1.p0.v2i64(ptr %a, <2 x i64> [[TMP2]], i32 4)
15093 // CHECK: ret void
15094 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
15095 vst1q_u64(a, b);
15098 // CHECK-LABEL: @test_vst1q_s8(
15099 // CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
15100 // CHECK: ret void
15101 void test_vst1q_s8(int8_t * a, int8x16_t b) {
15102 vst1q_s8(a, b);
15105 // CHECK-LABEL: @test_vst1q_s16(
15106 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15107 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15108 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
15109 // CHECK: ret void
15110 void test_vst1q_s16(int16_t * a, int16x8_t b) {
15111 vst1q_s16(a, b);
15114 // CHECK-LABEL: @test_vst1q_s32(
15115 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15116 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15117 // CHECK: call void @llvm.arm.neon.vst1.p0.v4i32(ptr %a, <4 x i32> [[TMP2]], i32 4)
15118 // CHECK: ret void
15119 void test_vst1q_s32(int32_t * a, int32x4_t b) {
15120 vst1q_s32(a, b);
15123 // CHECK-LABEL: @test_vst1q_s64(
15124 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15125 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15126 // CHECK: call void @llvm.arm.neon.vst1.p0.v2i64(ptr %a, <2 x i64> [[TMP2]], i32 4)
15127 // CHECK: ret void
15128 void test_vst1q_s64(int64_t * a, int64x2_t b) {
15129 vst1q_s64(a, b);
15132 // CHECK-LABEL: @test_vst1q_f16(
15133 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15134 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15135 // CHECK: call void @llvm.arm.neon.vst1.p0.v8f16(ptr %a, <8 x half> [[TMP2]], i32 2)
15136 // CHECK: ret void
15137 void test_vst1q_f16(float16_t * a, float16x8_t b) {
15138 vst1q_f16(a, b);
15141 // CHECK-LABEL: @test_vst1q_f32(
15142 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15143 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15144 // CHECK: call void @llvm.arm.neon.vst1.p0.v4f32(ptr %a, <4 x float> [[TMP2]], i32 4)
15145 // CHECK: ret void
15146 void test_vst1q_f32(float32_t * a, float32x4_t b) {
15147 vst1q_f32(a, b);
15150 // CHECK-LABEL: @test_vst1q_p8(
15151 // CHECK: call void @llvm.arm.neon.vst1.p0.v16i8(ptr %a, <16 x i8> %b, i32 1)
15152 // CHECK: ret void
15153 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
15154 vst1q_p8(a, b);
15157 // CHECK-LABEL: @test_vst1q_p16(
15158 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15159 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15160 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i16(ptr %a, <8 x i16> [[TMP2]], i32 2)
15161 // CHECK: ret void
15162 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
15163 vst1q_p16(a, b);
15166 // CHECK-LABEL: @test_vst1_u8(
15167 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
15168 // CHECK: ret void
15169 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
15170 vst1_u8(a, b);
15173 // CHECK-LABEL: @test_vst1_u16(
15174 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15175 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15176 // CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
15177 // CHECK: ret void
15178 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
15179 vst1_u16(a, b);
15182 // CHECK-LABEL: @test_vst1_u32(
15183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15184 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15185 // CHECK: call void @llvm.arm.neon.vst1.p0.v2i32(ptr %a, <2 x i32> [[TMP2]], i32 4)
15186 // CHECK: ret void
15187 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
15188 vst1_u32(a, b);
15191 // CHECK-LABEL: @test_vst1_u64(
15192 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15193 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15194 // CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP2]], i32 4)
15195 // CHECK: ret void
15196 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
15197 vst1_u64(a, b);
15200 // CHECK-LABEL: @test_vst1_s8(
15201 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
15202 // CHECK: ret void
15203 void test_vst1_s8(int8_t * a, int8x8_t b) {
15204 vst1_s8(a, b);
15207 // CHECK-LABEL: @test_vst1_s16(
15208 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15209 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15210 // CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
15211 // CHECK: ret void
15212 void test_vst1_s16(int16_t * a, int16x4_t b) {
15213 vst1_s16(a, b);
15216 // CHECK-LABEL: @test_vst1_s32(
15217 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15218 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15219 // CHECK: call void @llvm.arm.neon.vst1.p0.v2i32(ptr %a, <2 x i32> [[TMP2]], i32 4)
15220 // CHECK: ret void
15221 void test_vst1_s32(int32_t * a, int32x2_t b) {
15222 vst1_s32(a, b);
15225 // CHECK-LABEL: @test_vst1_s64(
15226 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15227 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15228 // CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP2]], i32 4)
15229 // CHECK: ret void
15230 void test_vst1_s64(int64_t * a, int64x1_t b) {
15231 vst1_s64(a, b);
15234 // CHECK-LABEL: @test_vst1_f16(
15235 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15236 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15237 // CHECK: call void @llvm.arm.neon.vst1.p0.v4f16(ptr %a, <4 x half> [[TMP2]], i32 2)
15238 // CHECK: ret void
15239 void test_vst1_f16(float16_t * a, float16x4_t b) {
15240 vst1_f16(a, b);
15243 // CHECK-LABEL: @test_vst1_f32(
15244 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15245 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15246 // CHECK: call void @llvm.arm.neon.vst1.p0.v2f32(ptr %a, <2 x float> [[TMP2]], i32 4)
15247 // CHECK: ret void
15248 void test_vst1_f32(float32_t * a, float32x2_t b) {
15249 vst1_f32(a, b);
15252 // CHECK-LABEL: @test_vst1_p8(
15253 // CHECK: call void @llvm.arm.neon.vst1.p0.v8i8(ptr %a, <8 x i8> %b, i32 1)
15254 // CHECK: ret void
15255 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
15256 vst1_p8(a, b);
15259 // CHECK-LABEL: @test_vst1_p16(
15260 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15261 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15262 // CHECK: call void @llvm.arm.neon.vst1.p0.v4i16(ptr %a, <4 x i16> [[TMP2]], i32 2)
15263 // CHECK: ret void
15264 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
15265 vst1_p16(a, b);
15268 // CHECK-LABEL: @test_vst1q_lane_u8(
15269 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15270 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15271 // CHECK: ret void
15272 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
15273 vst1q_lane_u8(a, b, 15);
15276 // CHECK-LABEL: @test_vst1q_lane_u16(
15277 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15278 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15279 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15280 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15281 // CHECK: ret void
15282 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
15283 vst1q_lane_u16(a, b, 7);
15286 // CHECK-LABEL: @test_vst1q_lane_u32(
15287 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15288 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15289 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15290 // CHECK: store i32 [[TMP3]], ptr %a, align 4
15291 // CHECK: ret void
15292 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
15293 vst1q_lane_u32(a, b, 3);
15296 // CHECK-LABEL: @test_vst1q_lane_u64(
15297 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15298 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15299 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15300 // CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP3]], i32 4)
15301 // CHECK: ret void
15302 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
15303 vst1q_lane_u64(a, b, 1);
15306 // CHECK-LABEL: @test_vst1q_lane_s8(
15307 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15308 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15309 // CHECK: ret void
15310 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
15311 vst1q_lane_s8(a, b, 15);
15314 // CHECK-LABEL: @test_vst1q_lane_s16(
15315 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15316 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15317 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15318 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15319 // CHECK: ret void
15320 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
15321 vst1q_lane_s16(a, b, 7);
15324 // CHECK-LABEL: @test_vst1q_lane_s32(
15325 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15326 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15327 // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15328 // CHECK: store i32 [[TMP3]], ptr %a, align 4
15329 // CHECK: ret void
15330 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
15331 vst1q_lane_s32(a, b, 3);
15334 // CHECK-LABEL: @test_vst1q_lane_s64(
15335 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15336 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15337 // CHECK: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15338 // CHECK: call void @llvm.arm.neon.vst1.p0.v1i64(ptr %a, <1 x i64> [[TMP3]], i32 4)
15339 // CHECK: ret void
15340 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
15341 vst1q_lane_s64(a, b, 1);
15344 // CHECK-LABEL: @test_vst1q_lane_f16(
15345 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15346 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15347 // CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
15348 // CHECK: store half [[TMP3]], ptr %a, align 2
15349 // CHECK: ret void
15350 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
15351 vst1q_lane_f16(a, b, 7);
15354 // CHECK-LABEL: @test_vst1q_lane_f32(
15355 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15356 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15357 // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
15358 // CHECK: store float [[TMP3]], ptr %a, align 4
15359 // CHECK: ret void
15360 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
15361 vst1q_lane_f32(a, b, 3);
15364 // CHECK-LABEL: @test_vst1q_lane_p8(
15365 // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15366 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15367 // CHECK: ret void
15368 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
15369 vst1q_lane_p8(a, b, 15);
15372 // CHECK-LABEL: @test_vst1q_lane_p16(
15373 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15374 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15375 // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15376 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15377 // CHECK: ret void
15378 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
15379 vst1q_lane_p16(a, b, 7);
15382 // CHECK-LABEL: @test_vst1_lane_u8(
15383 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15384 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15385 // CHECK: ret void
15386 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
15387 vst1_lane_u8(a, b, 7);
15390 // CHECK-LABEL: @test_vst1_lane_u16(
15391 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15392 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15393 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15394 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15395 // CHECK: ret void
15396 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
15397 vst1_lane_u16(a, b, 3);
15400 // CHECK-LABEL: @test_vst1_lane_u32(
15401 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15402 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15403 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15404 // CHECK: store i32 [[TMP3]], ptr %a, align 4
15405 // CHECK: ret void
15406 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
15407 vst1_lane_u32(a, b, 1);
15410 // CHECK-LABEL: @test_vst1_lane_u64(
15411 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15412 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15413 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15414 // CHECK: store i64 [[TMP3]], ptr %a, align 4
15415 // CHECK: ret void
15416 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
15417 vst1_lane_u64(a, b, 0);
15420 // CHECK-LABEL: @test_vst1_lane_s8(
15421 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15422 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15423 // CHECK: ret void
15424 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
15425 vst1_lane_s8(a, b, 7);
15428 // CHECK-LABEL: @test_vst1_lane_s16(
15429 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15430 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15431 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15432 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15433 // CHECK: ret void
15434 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
15435 vst1_lane_s16(a, b, 3);
15438 // CHECK-LABEL: @test_vst1_lane_s32(
15439 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15440 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15441 // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15442 // CHECK: store i32 [[TMP3]], ptr %a, align 4
15443 // CHECK: ret void
15444 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
15445 vst1_lane_s32(a, b, 1);
15448 // CHECK-LABEL: @test_vst1_lane_s64(
15449 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15450 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15451 // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15452 // CHECK: store i64 [[TMP3]], ptr %a, align 4
15453 // CHECK: ret void
15454 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
15455 vst1_lane_s64(a, b, 0);
15458 // CHECK-LABEL: @test_vst1_lane_f16(
15459 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15460 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15461 // CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
15462 // CHECK: store half [[TMP3]], ptr %a, align 2
15463 // CHECK: ret void
15464 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
15465 vst1_lane_f16(a, b, 3);
15468 // CHECK-LABEL: @test_vst1_lane_f32(
15469 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15470 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15471 // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
15472 // CHECK: store float [[TMP3]], ptr %a, align 4
15473 // CHECK: ret void
15474 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
15475 vst1_lane_f32(a, b, 1);
15478 // CHECK-LABEL: @test_vst1_lane_p8(
15479 // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15480 // CHECK: store i8 [[TMP0]], ptr %a, align 1
15481 // CHECK: ret void
15482 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
15483 vst1_lane_p8(a, b, 7);
15486 // CHECK-LABEL: @test_vst1_lane_p16(
15487 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15488 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15489 // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15490 // CHECK: store i16 [[TMP3]], ptr %a, align 2
15491 // CHECK: ret void
15492 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
15493 vst1_lane_p16(a, b, 3);
15496 // CHECK-LABEL: @test_vst2q_u8(
15497 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
15498 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
15499 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
15500 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15501 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15502 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
15503 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
15504 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
15505 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
15506 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
15507 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
15508 // CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
15509 // CHECK: ret void
15510 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
15511 vst2q_u8(a, b);
15514 // CHECK-LABEL: @test_vst2q_u16(
15515 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15516 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15517 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
15518 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15519 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15520 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
15521 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
15522 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
15523 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15524 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
15525 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
15526 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
15527 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15528 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15529 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15530 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
15531 // CHECK: ret void
15532 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
15533 vst2q_u16(a, b);
15536 // CHECK-LABEL: @test_vst2q_u32(
15537 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
15538 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
15539 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
15540 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15541 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15542 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
15543 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
15544 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
15545 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
15546 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
15547 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
15548 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
15549 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
15550 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
15551 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
15552 // CHECK: call void @llvm.arm.neon.vst2.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
15553 // CHECK: ret void
15554 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
15555 vst2q_u32(a, b);
15558 // CHECK-LABEL: @test_vst2q_s8(
15559 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
15560 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
15561 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
15562 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15563 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15564 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
15565 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
15566 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
15567 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
15568 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
15569 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
15570 // CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
15571 // CHECK: ret void
15572 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
15573 vst2q_s8(a, b);
15576 // CHECK-LABEL: @test_vst2q_s16(
15577 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
15578 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
15579 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
15580 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15581 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15582 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
15583 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
15584 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
15585 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15586 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
15587 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
15588 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
15589 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15590 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15591 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15592 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
15593 // CHECK: ret void
15594 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
15595 vst2q_s16(a, b);
15598 // CHECK-LABEL: @test_vst2q_s32(
15599 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
15600 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
15601 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
15602 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15603 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15604 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
15605 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
15606 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
15607 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
15608 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
15609 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
15610 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
15611 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
15612 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
15613 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
15614 // CHECK: call void @llvm.arm.neon.vst2.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
15615 // CHECK: ret void
15616 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
15617 vst2q_s32(a, b);
15620 // CHECK-LABEL: @test_vst2q_f16(
15621 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
15622 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
15623 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
15624 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15625 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15626 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
15627 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
15628 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
15629 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
15630 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
15631 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
15632 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
15633 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
15634 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
15635 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
15636 // CHECK: call void @llvm.arm.neon.vst2.p0.v8f16(ptr %a, <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
15637 // CHECK: ret void
15638 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
15639 vst2q_f16(a, b);
15642 // CHECK-LABEL: @test_vst2q_f32(
15643 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
15644 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
15645 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
15646 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15647 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15648 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
15649 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
15650 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
15651 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
15652 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
15653 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
15654 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
15655 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
15656 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
15657 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
15658 // CHECK: call void @llvm.arm.neon.vst2.p0.v4f32(ptr %a, <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
15659 // CHECK: ret void
15660 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
15661 vst2q_f32(a, b);
15664 // CHECK-LABEL: @test_vst2q_p8(
15665 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
15666 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
15667 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
15668 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15669 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15670 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
15671 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
15672 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
15673 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
15674 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
15675 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
15676 // CHECK: call void @llvm.arm.neon.vst2.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
15677 // CHECK: ret void
15678 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
15679 vst2q_p8(a, b);
15682 // CHECK-LABEL: @test_vst2q_p16(
15683 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
15684 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
15685 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
15686 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15687 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15688 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
15689 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
15690 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
15691 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15692 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
15693 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
15694 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
15695 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15696 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15697 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15698 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
15699 // CHECK: ret void
15700 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
15701 vst2q_p16(a, b);
15704 // CHECK-LABEL: @test_vst2_u8(
15705 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
15706 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
15707 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
15708 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15709 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15710 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
15711 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
15712 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
15713 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
15714 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
15715 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
15716 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
15717 // CHECK: ret void
15718 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
15719 vst2_u8(a, b);
15722 // CHECK-LABEL: @test_vst2_u16(
15723 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
15724 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
15725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
15726 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15727 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15728 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
15729 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
15730 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
15731 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
15732 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
15733 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
15734 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
15735 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
15736 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
15737 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
15738 // CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
15739 // CHECK: ret void
15740 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
15741 vst2_u16(a, b);
15744 // CHECK-LABEL: @test_vst2_u32(
15745 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
15746 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
15747 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
15748 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15749 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15750 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
15751 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
15752 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
15753 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
15754 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
15755 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
15756 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
15757 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
15758 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
15759 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
15760 // CHECK: call void @llvm.arm.neon.vst2.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
15761 // CHECK: ret void
15762 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
15763 vst2_u32(a, b);
15766 // CHECK-LABEL: @test_vst2_u64(
15767 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
15768 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
15769 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
15770 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15771 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15772 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
15773 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
15774 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
15775 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
15776 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
15777 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
15778 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
15779 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
15780 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
15781 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
15782 // CHECK: call void @llvm.arm.neon.vst2.p0.v1i64(ptr %a, <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
15783 // CHECK: ret void
15784 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
15785 vst2_u64(a, b);
15788 // CHECK-LABEL: @test_vst2_s8(
15789 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
15790 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
15791 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
15792 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15793 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15794 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
15795 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
15796 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
15797 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
15798 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
15799 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
15800 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
15801 // CHECK: ret void
15802 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
15803 vst2_s8(a, b);
15806 // CHECK-LABEL: @test_vst2_s16(
15807 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
15808 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
15809 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
15810 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15811 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15812 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
15813 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
15814 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
15815 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
15816 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
15817 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
15818 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
15819 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
15820 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
15821 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
15822 // CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
15823 // CHECK: ret void
15824 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
15825 vst2_s16(a, b);
15828 // CHECK-LABEL: @test_vst2_s32(
15829 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
15830 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
15831 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
15832 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15833 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15834 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
15835 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
15836 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
15837 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
15838 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
15839 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
15840 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
15841 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
15842 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
15843 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
15844 // CHECK: call void @llvm.arm.neon.vst2.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
15845 // CHECK: ret void
15846 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
15847 vst2_s32(a, b);
15850 // CHECK-LABEL: @test_vst2_s64(
15851 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
15852 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
15853 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
15854 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15855 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15856 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
15857 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
15858 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
15859 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
15860 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
15861 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
15862 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
15863 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
15864 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
15865 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
15866 // CHECK: call void @llvm.arm.neon.vst2.p0.v1i64(ptr %a, <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
15867 // CHECK: ret void
15868 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
15869 vst2_s64(a, b);
15872 // CHECK-LABEL: @test_vst2_f16(
15873 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
15874 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
15875 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
15876 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15877 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15878 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
15879 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
15880 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
15881 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
15882 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
15883 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
15884 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
15885 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
15886 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
15887 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
15888 // CHECK: call void @llvm.arm.neon.vst2.p0.v4f16(ptr %a, <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
15889 // CHECK: ret void
15890 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
15891 vst2_f16(a, b);
15894 // CHECK-LABEL: @test_vst2_f32(
15895 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
15896 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
15897 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
15898 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15899 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15900 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
15901 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
15902 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
15903 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
15904 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
15905 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
15906 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
15907 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
15908 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
15909 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
15910 // CHECK: call void @llvm.arm.neon.vst2.p0.v2f32(ptr %a, <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
15911 // CHECK: ret void
15912 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
15913 vst2_f32(a, b);
15916 // CHECK-LABEL: @test_vst2_p8(
15917 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
15918 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
15919 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
15920 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15921 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15922 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
15923 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
15924 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
15925 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
15926 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
15927 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
15928 // CHECK: call void @llvm.arm.neon.vst2.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
15929 // CHECK: ret void
15930 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
15931 vst2_p8(a, b);
15934 // CHECK-LABEL: @test_vst2_p16(
15935 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
15936 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
15937 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
15938 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
15939 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
15940 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
15941 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
15942 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
15943 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
15944 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
15945 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
15946 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
15947 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
15948 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
15949 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
15950 // CHECK: call void @llvm.arm.neon.vst2.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
15951 // CHECK: ret void
15952 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
15953 vst2_p16(a, b);
15956 // CHECK-LABEL: @test_vst2q_lane_u16(
15957 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
15958 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
15959 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
15960 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15961 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15962 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
15963 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
15964 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
15965 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
15966 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
15967 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
15968 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
15969 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
15970 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
15971 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
15972 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
15973 // CHECK: ret void
15974 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
15975 vst2q_lane_u16(a, b, 7);
15978 // CHECK-LABEL: @test_vst2q_lane_u32(
15979 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
15980 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
15981 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
15982 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
15983 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
15984 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
15985 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
15986 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
15987 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
15988 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
15989 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
15990 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
15991 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
15992 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
15993 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
15994 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
15995 // CHECK: ret void
15996 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
15997 vst2q_lane_u32(a, b, 3);
16000 // CHECK-LABEL: @test_vst2q_lane_s16(
16001 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16002 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16003 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
16004 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16005 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
16006 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
16007 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16008 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16009 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16010 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
16011 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16012 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16013 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16014 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16015 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16016 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16017 // CHECK: ret void
16018 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
16019 vst2q_lane_s16(a, b, 7);
16022 // CHECK-LABEL: @test_vst2q_lane_s32(
16023 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16024 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16025 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
16026 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16027 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
16028 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
16029 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
16030 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
16031 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16032 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
16033 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
16034 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
16035 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16036 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16037 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16038 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr %a, <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16039 // CHECK: ret void
16040 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
16041 vst2q_lane_s32(a, b, 3);
16044 // CHECK-LABEL: @test_vst2q_lane_f16(
16045 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16046 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16047 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
16048 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16049 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
16050 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
16051 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i32 0, i32 0
16052 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
16053 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16054 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
16055 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
16056 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
16057 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16058 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16059 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16060 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr %a, <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
16061 // CHECK: ret void
16062 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
16063 vst2q_lane_f16(a, b, 7);
16066 // CHECK-LABEL: @test_vst2q_lane_f32(
16067 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16068 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16069 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
16070 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16071 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
16072 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
16073 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i32 0, i32 0
16074 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
16075 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16076 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
16077 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
16078 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
16079 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16080 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16081 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16082 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4f32(ptr %a, <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
16083 // CHECK: ret void
16084 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
16085 vst2q_lane_f32(a, b, 3);
16088 // CHECK-LABEL: @test_vst2q_lane_p16(
16089 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16090 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16091 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
16092 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16093 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 32, i1 false)
16094 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
16095 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16096 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16097 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16098 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
16099 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16100 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16101 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16102 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16103 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16104 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %a, <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16105 // CHECK: ret void
16106 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
16107 vst2q_lane_p16(a, b, 7);
16110 // CHECK-LABEL: @test_vst2_lane_u8(
16111 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16112 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16113 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
16114 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16115 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16116 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
16117 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16118 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16119 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
16120 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16121 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16122 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16123 // CHECK: ret void
16124 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
16125 vst2_lane_u8(a, b, 7);
16128 // CHECK-LABEL: @test_vst2_lane_u16(
16129 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16130 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16131 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
16132 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16133 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16134 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
16135 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16136 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16137 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16138 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
16139 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16140 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16141 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16142 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16143 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16144 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16145 // CHECK: ret void
16146 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
16147 vst2_lane_u16(a, b, 3);
16150 // CHECK-LABEL: @test_vst2_lane_u32(
16151 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16152 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16153 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
16154 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16155 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16156 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
16157 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
16158 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
16159 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16160 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
16161 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
16162 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
16163 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16164 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16165 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16166 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16167 // CHECK: ret void
16168 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
16169 vst2_lane_u32(a, b, 1);
16172 // CHECK-LABEL: @test_vst2_lane_s8(
16173 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16174 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16175 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
16176 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16177 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16178 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
16179 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16180 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16181 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
16182 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16183 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16184 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16185 // CHECK: ret void
16186 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
16187 vst2_lane_s8(a, b, 7);
16190 // CHECK-LABEL: @test_vst2_lane_s16(
16191 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16192 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16193 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
16194 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16195 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16196 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
16197 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16198 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16199 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16200 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
16201 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16202 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16203 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16204 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16205 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16206 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16207 // CHECK: ret void
16208 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
16209 vst2_lane_s16(a, b, 3);
16212 // CHECK-LABEL: @test_vst2_lane_s32(
16213 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16214 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16215 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
16216 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16217 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16218 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
16219 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
16220 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
16221 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16222 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
16223 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
16224 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
16225 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16226 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16227 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16228 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr %a, <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16229 // CHECK: ret void
16230 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
16231 vst2_lane_s32(a, b, 1);
16234 // CHECK-LABEL: @test_vst2_lane_f16(
16235 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16236 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16237 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
16238 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16239 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16240 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
16241 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i32 0, i32 0
16242 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
16243 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16244 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
16245 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
16246 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
16247 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16248 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16249 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16250 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr %a, <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
16251 // CHECK: ret void
16252 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
16253 vst2_lane_f16(a, b, 3);
16256 // CHECK-LABEL: @test_vst2_lane_f32(
16257 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16258 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16259 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
16260 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16261 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16262 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
16263 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i32 0, i32 0
16264 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
16265 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16266 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
16267 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
16268 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
16269 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16270 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16271 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16272 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v2f32(ptr %a, <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
16273 // CHECK: ret void
16274 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
16275 vst2_lane_f32(a, b, 1);
16278 // CHECK-LABEL: @test_vst2_lane_p8(
16279 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16280 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16281 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
16282 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16283 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16284 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
16285 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16286 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16287 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
16288 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16289 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16290 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16291 // CHECK: ret void
16292 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
16293 vst2_lane_p8(a, b, 7);
16296 // CHECK-LABEL: @test_vst2_lane_p16(
16297 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16298 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16299 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
16300 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16301 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 16, i1 false)
16302 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
16303 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16304 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16305 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16306 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
16307 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16308 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16309 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16310 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16311 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16312 // CHECK: call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %a, <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16313 // CHECK: ret void
16314 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
16315 vst2_lane_p16(a, b, 3);
16318 // CHECK-LABEL: @test_vst3q_u8(
16319 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16320 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16321 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
16322 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16323 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16324 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
16325 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
16326 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
16327 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
16328 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
16329 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
16330 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
16331 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
16332 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
16333 // CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16334 // CHECK: ret void
16335 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
16336 vst3q_u8(a, b);
16339 // CHECK-LABEL: @test_vst3q_u16(
16340 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16341 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16342 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
16343 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16344 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16345 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16346 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16347 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16348 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16349 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16350 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16351 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16352 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16353 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16354 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
16355 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
16356 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16357 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16358 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16359 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16360 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
16361 // CHECK: ret void
16362 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
16363 vst3q_u16(a, b);
16366 // CHECK-LABEL: @test_vst3q_u32(
16367 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16368 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16369 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
16370 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16371 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16372 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16373 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
16374 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
16375 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16376 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16377 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
16378 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
16379 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16380 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16381 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
16382 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
16383 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
16384 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16385 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16386 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
16387 // CHECK: call void @llvm.arm.neon.vst3.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
16388 // CHECK: ret void
16389 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
16390 vst3q_u32(a, b);
16393 // CHECK-LABEL: @test_vst3q_s8(
16394 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
16395 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
16396 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
16397 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16398 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16399 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
16400 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
16401 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
16402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
16403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
16404 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
16405 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
16406 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
16407 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
16408 // CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16409 // CHECK: ret void
16410 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
16411 vst3q_s8(a, b);
16414 // CHECK-LABEL: @test_vst3q_s16(
16415 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
16416 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
16417 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
16418 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16419 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16420 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16421 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16422 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16423 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16424 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16425 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16426 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16427 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16428 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16429 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
16430 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
16431 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16432 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16433 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16434 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16435 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
16436 // CHECK: ret void
16437 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
16438 vst3q_s16(a, b);
16441 // CHECK-LABEL: @test_vst3q_s32(
16442 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
16443 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
16444 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
16445 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16446 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16447 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16448 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
16449 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
16450 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16451 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16452 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
16453 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
16454 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16455 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16456 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
16457 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
16458 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
16459 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16460 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16461 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
16462 // CHECK: call void @llvm.arm.neon.vst3.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
16463 // CHECK: ret void
16464 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
16465 vst3q_s32(a, b);
16468 // CHECK-LABEL: @test_vst3q_f16(
16469 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
16470 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
16471 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
16472 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16473 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16474 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16475 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
16476 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
16477 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16478 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16479 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
16480 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
16481 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16482 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16483 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
16484 // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
16485 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
16486 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16487 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16488 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
16489 // CHECK: call void @llvm.arm.neon.vst3.p0.v8f16(ptr %a, <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
16490 // CHECK: ret void
16491 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
16492 vst3q_f16(a, b);
16495 // CHECK-LABEL: @test_vst3q_f32(
16496 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
16497 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
16498 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
16499 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16500 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16501 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
16502 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
16503 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
16504 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16505 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
16506 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
16507 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
16508 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16509 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
16510 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
16511 // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
16512 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
16513 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16514 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16515 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
16516 // CHECK: call void @llvm.arm.neon.vst3.p0.v4f32(ptr %a, <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
16517 // CHECK: ret void
16518 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
16519 vst3q_f32(a, b);
16522 // CHECK-LABEL: @test_vst3q_p8(
16523 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
16524 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
16525 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
16526 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16527 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16528 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
16529 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
16530 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
16531 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
16532 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
16533 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
16534 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
16535 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
16536 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
16537 // CHECK: call void @llvm.arm.neon.vst3.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16538 // CHECK: ret void
16539 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
16540 vst3q_p8(a, b);
16543 // CHECK-LABEL: @test_vst3q_p16(
16544 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
16545 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
16546 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
16547 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16548 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16549 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
16550 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16551 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16552 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16553 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
16554 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16555 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16556 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16557 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
16558 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
16559 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
16560 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16561 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16562 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16563 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16564 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
16565 // CHECK: ret void
16566 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
16567 vst3q_p16(a, b);
16570 // CHECK-LABEL: @test_vst3_u8(
16571 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
16572 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
16573 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
16574 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16575 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16576 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
16577 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16578 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16579 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
16580 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16581 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16582 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
16583 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
16584 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
16585 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
16586 // CHECK: ret void
16587 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
16588 vst3_u8(a, b);
16591 // CHECK-LABEL: @test_vst3_u16(
16592 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
16593 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
16594 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
16595 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16596 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16597 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
16598 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16599 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16600 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16601 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
16602 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16603 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16604 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16605 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
16606 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
16607 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
16608 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
16609 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16610 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16611 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
16612 // CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
16613 // CHECK: ret void
16614 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
16615 vst3_u16(a, b);
16618 // CHECK-LABEL: @test_vst3_u32(
16619 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
16620 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
16621 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
16622 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16623 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16624 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
16625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
16626 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
16627 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
16629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
16630 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
16631 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
16633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
16634 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
16635 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
16636 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16637 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16638 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
16639 // CHECK: call void @llvm.arm.neon.vst3.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
16640 // CHECK: ret void
16641 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
16642 vst3_u32(a, b);
16645 // CHECK-LABEL: @test_vst3_u64(
16646 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
16647 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
16648 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
16649 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16650 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16651 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
16652 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
16653 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
16654 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16655 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
16656 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
16657 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
16658 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16659 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
16660 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
16661 // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
16662 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
16663 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16664 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16665 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
16666 // CHECK: call void @llvm.arm.neon.vst3.p0.v1i64(ptr %a, <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
16667 // CHECK: ret void
16668 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
16669 vst3_u64(a, b);
16672 // CHECK-LABEL: @test_vst3_s8(
16673 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
16674 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
16675 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
16676 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16677 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16678 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
16679 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16680 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16681 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
16682 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16683 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16684 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
16685 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
16686 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
16687 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
16688 // CHECK: ret void
16689 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
16690 vst3_s8(a, b);
16693 // CHECK-LABEL: @test_vst3_s16(
16694 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
16695 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
16696 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
16697 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16698 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16699 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
16700 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16701 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16702 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16703 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
16704 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16705 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16706 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16707 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
16708 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
16709 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
16710 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
16711 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16712 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16713 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
16714 // CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
16715 // CHECK: ret void
16716 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
16717 vst3_s16(a, b);
16720 // CHECK-LABEL: @test_vst3_s32(
16721 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
16722 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
16723 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
16724 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16725 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16726 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
16727 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
16728 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
16729 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16730 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
16731 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
16732 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
16733 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16734 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
16735 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
16736 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
16737 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
16738 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16739 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16740 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
16741 // CHECK: call void @llvm.arm.neon.vst3.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
16742 // CHECK: ret void
16743 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
16744 vst3_s32(a, b);
16747 // CHECK-LABEL: @test_vst3_s64(
16748 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
16749 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
16750 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
16751 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16752 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16753 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
16754 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
16755 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
16756 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16757 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
16758 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
16759 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
16760 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16761 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
16762 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
16763 // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
16764 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
16765 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16766 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16767 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
16768 // CHECK: call void @llvm.arm.neon.vst3.p0.v1i64(ptr %a, <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
16769 // CHECK: ret void
16770 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
16771 vst3_s64(a, b);
16774 // CHECK-LABEL: @test_vst3_f16(
16775 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
16776 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
16777 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
16778 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16779 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16780 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
16781 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
16782 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
16783 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16784 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
16785 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
16786 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
16787 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16788 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
16789 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
16790 // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
16791 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
16792 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16793 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16794 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
16795 // CHECK: call void @llvm.arm.neon.vst3.p0.v4f16(ptr %a, <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
16796 // CHECK: ret void
16797 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
16798 vst3_f16(a, b);
16801 // CHECK-LABEL: @test_vst3_f32(
16802 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
16803 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
16804 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
16805 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16806 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16807 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
16808 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
16809 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
16810 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16811 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
16812 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
16813 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
16814 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16815 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
16816 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
16817 // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
16818 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
16819 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16820 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16821 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
16822 // CHECK: call void @llvm.arm.neon.vst3.p0.v2f32(ptr %a, <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
16823 // CHECK: ret void
16824 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
16825 vst3_f32(a, b);
16828 // CHECK-LABEL: @test_vst3_p8(
16829 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
16830 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
16831 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
16832 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16833 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16834 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
16835 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
16836 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
16837 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
16838 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
16839 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
16840 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
16841 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
16842 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
16843 // CHECK: call void @llvm.arm.neon.vst3.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
16844 // CHECK: ret void
16845 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
16846 vst3_p8(a, b);
16849 // CHECK-LABEL: @test_vst3_p16(
16850 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
16851 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
16852 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
16853 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
16854 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
16855 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
16856 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
16857 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
16858 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16859 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
16860 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
16861 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
16862 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16863 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
16864 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
16865 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
16866 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
16867 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16868 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16869 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
16870 // CHECK: call void @llvm.arm.neon.vst3.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
16871 // CHECK: ret void
16872 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
16873 vst3_p16(a, b);
16876 // CHECK-LABEL: @test_vst3q_lane_u16(
16877 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16878 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
16879 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
16880 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16881 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16882 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16883 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16884 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16885 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16886 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16887 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16888 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16889 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16890 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
16891 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
16892 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
16893 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16894 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16895 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16896 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16897 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
16898 // CHECK: ret void
16899 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
16900 vst3q_lane_u16(a, b, 7);
16903 // CHECK-LABEL: @test_vst3q_lane_u32(
16904 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
16905 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
16906 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
16907 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16908 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16909 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16910 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
16911 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
16912 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16913 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16914 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
16915 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
16916 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16917 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
16918 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
16919 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
16920 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
16921 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16922 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16923 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
16924 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
16925 // CHECK: ret void
16926 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
16927 vst3q_lane_u32(a, b, 3);
16930 // CHECK-LABEL: @test_vst3q_lane_s16(
16931 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
16932 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
16933 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
16934 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16935 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16936 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16937 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
16938 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
16939 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16940 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16941 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
16942 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
16943 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16944 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
16945 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
16946 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
16947 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
16948 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16949 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16950 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
16951 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
16952 // CHECK: ret void
16953 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
16954 vst3q_lane_s16(a, b, 7);
16957 // CHECK-LABEL: @test_vst3q_lane_s32(
16958 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
16959 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
16960 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
16961 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16962 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16963 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
16965 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
16966 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16967 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16968 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
16969 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
16970 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16971 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
16972 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
16973 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
16974 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
16975 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16976 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16977 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
16978 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %a, <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
16979 // CHECK: ret void
16980 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
16981 vst3q_lane_s32(a, b, 3);
16984 // CHECK-LABEL: @test_vst3q_lane_f16(
16985 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
16986 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
16987 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
16988 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
16989 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
16990 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i32 0, i32 0
16992 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
16993 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
16996 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
16997 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16998 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
16999 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
17000 // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
17001 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17002 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17003 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17004 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17005 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr %a, <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
17006 // CHECK: ret void
17007 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
17008 vst3q_lane_f16(a, b, 7);
17011 // CHECK-LABEL: @test_vst3q_lane_f32(
17012 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17013 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17014 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
17015 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17016 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
17017 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
17018 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i32 0, i32 0
17019 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
17020 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17021 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
17022 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
17023 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
17024 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17025 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
17026 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
17027 // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
17028 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17029 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17030 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17031 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17032 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4f32(ptr %a, <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
17033 // CHECK: ret void
17034 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
17035 vst3q_lane_f32(a, b, 3);
17038 // CHECK-LABEL: @test_vst3q_lane_p16(
17039 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17040 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17041 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
17042 // CHECK: store [6 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17043 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 48, i1 false)
17044 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
17045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
17046 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
17047 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17048 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
17049 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
17050 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
17051 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17052 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
17053 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
17054 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
17055 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17056 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17057 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17058 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17059 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %a, <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17060 // CHECK: ret void
17061 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
17062 vst3q_lane_p16(a, b, 7);
17065 // CHECK-LABEL: @test_vst3_lane_u8(
17066 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17067 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17068 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
17069 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17070 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17071 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
17072 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17073 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17074 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
17075 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17076 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17077 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
17078 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17079 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17080 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17081 // CHECK: ret void
17082 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
17083 vst3_lane_u8(a, b, 7);
17086 // CHECK-LABEL: @test_vst3_lane_u16(
17087 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17088 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17089 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
17090 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17091 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17092 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
17093 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17094 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17095 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17096 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
17097 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17098 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17099 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17100 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
17101 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17102 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17103 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17104 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17105 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17106 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17107 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17108 // CHECK: ret void
17109 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
17110 vst3_lane_u16(a, b, 3);
17113 // CHECK-LABEL: @test_vst3_lane_u32(
17114 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17115 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17116 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
17117 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17118 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17119 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
17120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
17121 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
17122 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
17124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
17125 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
17126 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17127 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
17128 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
17129 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
17130 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17131 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17132 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17133 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17134 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17135 // CHECK: ret void
17136 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
17137 vst3_lane_u32(a, b, 1);
17140 // CHECK-LABEL: @test_vst3_lane_s8(
17141 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17142 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17143 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
17144 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17145 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17146 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
17147 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17148 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17149 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
17150 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17151 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17152 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
17153 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17154 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17155 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17156 // CHECK: ret void
17157 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
17158 vst3_lane_s8(a, b, 7);
17161 // CHECK-LABEL: @test_vst3_lane_s16(
17162 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17163 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17164 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
17165 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17166 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17167 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
17168 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17169 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17170 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17171 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
17172 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17173 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17174 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17175 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
17176 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17177 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17178 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17179 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17180 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17181 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17182 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17183 // CHECK: ret void
17184 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
17185 vst3_lane_s16(a, b, 3);
17188 // CHECK-LABEL: @test_vst3_lane_s32(
17189 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17190 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17191 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
17192 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17193 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17194 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
17195 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
17196 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
17197 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17198 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
17199 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
17200 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
17201 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17202 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
17203 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
17204 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
17205 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17206 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17207 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17208 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17209 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr %a, <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17210 // CHECK: ret void
17211 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
17212 vst3_lane_s32(a, b, 1);
17215 // CHECK-LABEL: @test_vst3_lane_f16(
17216 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17217 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17218 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
17219 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17220 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17221 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
17222 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i32 0, i32 0
17223 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
17224 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17225 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
17226 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
17227 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
17228 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17229 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
17230 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
17231 // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
17232 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17233 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17234 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17235 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17236 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr %a, <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
17237 // CHECK: ret void
17238 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
17239 vst3_lane_f16(a, b, 3);
17242 // CHECK-LABEL: @test_vst3_lane_f32(
17243 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17244 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17245 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
17246 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17247 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17248 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
17249 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i32 0, i32 0
17250 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
17251 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17252 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
17253 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
17254 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
17255 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17256 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
17257 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
17258 // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
17259 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17260 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17261 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17262 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17263 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v2f32(ptr %a, <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
17264 // CHECK: ret void
17265 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
17266 vst3_lane_f32(a, b, 1);
17269 // CHECK-LABEL: @test_vst3_lane_p8(
17270 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
17271 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
17272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
17273 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17274 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17275 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
17276 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17277 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17278 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
17279 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17280 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17281 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
17282 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17283 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17284 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17285 // CHECK: ret void
17286 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
17287 vst3_lane_p8(a, b, 7);
17290 // CHECK-LABEL: @test_vst3_lane_p16(
17291 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
17292 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
17293 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
17294 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17295 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 24, i1 false)
17296 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
17297 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17298 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17299 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17300 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
17301 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17302 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17303 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17304 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
17305 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17306 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17307 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17308 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17309 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17310 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17311 // CHECK: call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %a, <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17312 // CHECK: ret void
17313 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
17314 vst3_lane_p16(a, b, 3);
17317 // CHECK-LABEL: @test_vst4q_u8(
17318 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
17319 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
17320 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
17321 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17322 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17323 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
17324 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
17325 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
17326 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
17327 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
17328 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
17329 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
17330 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
17331 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
17332 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
17333 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
17334 // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
17335 // CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
17336 // CHECK: ret void
17337 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
17338 vst4q_u8(a, b);
17341 // CHECK-LABEL: @test_vst4q_u16(
17342 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
17343 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
17344 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
17345 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17346 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17347 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17348 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
17349 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
17350 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17351 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17352 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
17353 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
17354 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17355 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17356 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
17357 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
17358 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17359 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17360 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
17361 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
17362 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
17363 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17364 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17365 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17366 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
17367 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
17368 // CHECK: ret void
17369 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
17370 vst4q_u16(a, b);
17373 // CHECK-LABEL: @test_vst4q_u32(
17374 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
17375 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
17376 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
17377 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17378 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17379 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
17380 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
17381 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
17382 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17383 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
17384 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
17385 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
17386 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17387 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
17388 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
17389 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
17390 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17391 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
17392 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
17393 // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
17394 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
17395 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17396 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17397 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17398 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
17399 // CHECK: call void @llvm.arm.neon.vst4.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
17400 // CHECK: ret void
17401 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
17402 vst4q_u32(a, b);
17405 // CHECK-LABEL: @test_vst4q_s8(
17406 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
17407 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
17408 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
17409 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17410 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17411 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
17412 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
17413 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
17414 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
17415 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
17416 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
17417 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
17418 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
17419 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
17420 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
17421 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
17422 // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
17423 // CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
17424 // CHECK: ret void
17425 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
17426 vst4q_s8(a, b);
17429 // CHECK-LABEL: @test_vst4q_s16(
17430 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
17431 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
17432 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
17433 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17434 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17435 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
17436 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
17437 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
17438 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17439 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
17440 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
17441 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
17442 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17443 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
17444 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
17445 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
17446 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17447 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
17448 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
17449 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
17450 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
17451 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17452 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17453 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17454 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
17455 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
17456 // CHECK: ret void
17457 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
17458 vst4q_s16(a, b);
17461 // CHECK-LABEL: @test_vst4q_s32(
17462 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
17463 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
17464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
17465 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17466 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17467 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
17468 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
17469 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
17470 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17471 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
17472 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
17473 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
17474 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17475 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
17476 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
17477 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
17478 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17479 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
17480 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
17481 // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
17482 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
17483 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17484 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17485 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17486 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
17487 // CHECK: call void @llvm.arm.neon.vst4.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
17488 // CHECK: ret void
17489 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
17490 vst4q_s32(a, b);
17493 // CHECK-LABEL: @test_vst4q_f16(
17494 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
17495 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
17496 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
17497 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17498 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17499 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
17500 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
17501 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
17502 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17503 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
17504 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
17505 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
17506 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17507 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
17508 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
17509 // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
17510 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17511 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
17512 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
17513 // CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
17514 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
17515 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17516 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17517 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17518 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
17519 // CHECK: call void @llvm.arm.neon.vst4.p0.v8f16(ptr %a, <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
17520 // CHECK: ret void
17521 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
17522 vst4q_f16(a, b);
17525 // CHECK-LABEL: @test_vst4q_f32(
17526 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
17527 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
17528 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
17529 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17530 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17531 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
17532 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
17533 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
17534 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17535 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
17536 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
17537 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
17538 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17539 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
17540 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
17541 // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
17542 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17543 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
17544 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
17545 // CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
17546 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
17547 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17548 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17549 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17550 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
17551 // CHECK: call void @llvm.arm.neon.vst4.p0.v4f32(ptr %a, <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
17552 // CHECK: ret void
17553 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
17554 vst4q_f32(a, b);
17557 // CHECK-LABEL: @test_vst4q_p8(
17558 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
17559 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
17560 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
17561 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17562 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17563 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
17564 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i32 0, i32 0
17565 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
17566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
17567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i32 0, i32 1
17568 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
17569 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
17570 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i32 0, i32 2
17571 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
17572 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
17573 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i32 0, i32 3
17574 // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
17575 // CHECK: call void @llvm.arm.neon.vst4.p0.v16i8(ptr %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
17576 // CHECK: ret void
17577 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
17578 vst4q_p8(a, b);
17581 // CHECK-LABEL: @test_vst4q_p16(
17582 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
17583 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
17584 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
17585 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17586 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17587 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
17588 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
17589 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
17590 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17591 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
17592 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
17593 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
17594 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17595 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
17596 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
17597 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
17598 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17599 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
17600 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
17601 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
17602 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
17603 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17604 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17605 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17606 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
17607 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
17608 // CHECK: ret void
17609 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
17610 vst4q_p16(a, b);
17613 // CHECK-LABEL: @test_vst4_u8(
17614 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
17615 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
17616 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
17617 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17618 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17619 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
17620 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17621 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17622 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
17623 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17624 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17625 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
17626 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17627 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17628 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
17629 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
17630 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
17631 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
17632 // CHECK: ret void
17633 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
17634 vst4_u8(a, b);
17637 // CHECK-LABEL: @test_vst4_u16(
17638 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
17639 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
17640 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
17641 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17642 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17643 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
17644 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17645 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17646 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17647 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
17648 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17649 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17650 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17651 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
17652 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17653 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17654 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17655 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
17656 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
17657 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
17658 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
17659 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17660 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17661 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17662 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
17663 // CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
17664 // CHECK: ret void
17665 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
17666 vst4_u16(a, b);
17669 // CHECK-LABEL: @test_vst4_u32(
17670 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
17671 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
17672 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
17673 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17674 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17675 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
17676 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
17677 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
17678 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17679 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
17680 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
17681 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
17682 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17683 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
17684 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
17685 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
17686 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17687 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
17688 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
17689 // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
17690 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
17691 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17692 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17693 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17694 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
17695 // CHECK: call void @llvm.arm.neon.vst4.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
17696 // CHECK: ret void
17697 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
17698 vst4_u32(a, b);
17701 // CHECK-LABEL: @test_vst4_u64(
17702 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
17703 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
17704 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
17705 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17706 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17707 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
17708 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
17709 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
17710 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17711 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
17712 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
17713 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
17714 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17715 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
17716 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
17717 // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
17718 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17719 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
17720 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i32 0, i32 3
17721 // CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
17722 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
17723 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17724 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17725 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17726 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
17727 // CHECK: call void @llvm.arm.neon.vst4.p0.v1i64(ptr %a, <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
17728 // CHECK: ret void
17729 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
17730 vst4_u64(a, b);
17733 // CHECK-LABEL: @test_vst4_s8(
17734 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
17735 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
17736 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
17737 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17738 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17739 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
17740 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17741 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17742 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
17743 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17744 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17745 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
17746 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17747 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17748 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
17749 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
17750 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
17751 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
17752 // CHECK: ret void
17753 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
17754 vst4_s8(a, b);
17757 // CHECK-LABEL: @test_vst4_s16(
17758 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
17759 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
17760 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
17761 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17762 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17763 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
17764 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17765 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17766 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17767 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
17768 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17769 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17770 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17771 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
17772 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17773 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17774 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17775 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
17776 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
17777 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
17778 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
17779 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17780 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17781 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17782 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
17783 // CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
17784 // CHECK: ret void
17785 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
17786 vst4_s16(a, b);
17789 // CHECK-LABEL: @test_vst4_s32(
17790 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
17791 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
17792 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
17793 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17794 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17795 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
17796 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
17797 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
17798 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17799 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
17800 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
17801 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
17802 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17803 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
17804 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
17805 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
17806 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17807 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
17808 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
17809 // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
17810 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
17811 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17812 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17813 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17814 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
17815 // CHECK: call void @llvm.arm.neon.vst4.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
17816 // CHECK: ret void
17817 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
17818 vst4_s32(a, b);
17821 // CHECK-LABEL: @test_vst4_s64(
17822 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
17823 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
17824 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
17825 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17826 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17827 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
17828 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i32 0, i32 0
17829 // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
17830 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17831 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
17832 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i32 0, i32 1
17833 // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
17834 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17835 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
17836 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i32 0, i32 2
17837 // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
17838 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17839 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
17840 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i32 0, i32 3
17841 // CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
17842 // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
17843 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17844 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17845 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17846 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
17847 // CHECK: call void @llvm.arm.neon.vst4.p0.v1i64(ptr %a, <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
17848 // CHECK: ret void
17849 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
17850 vst4_s64(a, b);
17853 // CHECK-LABEL: @test_vst4_f16(
17854 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
17855 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
17856 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
17857 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17858 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17859 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
17860 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
17861 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
17862 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17863 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
17864 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
17865 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
17866 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17867 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
17868 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
17869 // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
17870 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17871 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
17872 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
17873 // CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
17874 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
17875 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17876 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17877 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17878 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
17879 // CHECK: call void @llvm.arm.neon.vst4.p0.v4f16(ptr %a, <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
17880 // CHECK: ret void
17881 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
17882 vst4_f16(a, b);
17885 // CHECK-LABEL: @test_vst4_f32(
17886 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
17887 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
17888 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
17889 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17890 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17891 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
17892 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
17893 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
17894 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17895 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
17896 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
17897 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
17898 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17899 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
17900 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
17901 // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
17902 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17903 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
17904 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
17905 // CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
17906 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
17907 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17908 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17909 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17910 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
17911 // CHECK: call void @llvm.arm.neon.vst4.p0.v2f32(ptr %a, <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
17912 // CHECK: ret void
17913 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
17914 vst4_f32(a, b);
17917 // CHECK-LABEL: @test_vst4_p8(
17918 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
17919 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
17920 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
17921 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17922 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17923 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
17924 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
17925 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
17926 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
17927 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
17928 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
17929 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
17930 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
17931 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
17932 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
17933 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
17934 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
17935 // CHECK: call void @llvm.arm.neon.vst4.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
17936 // CHECK: ret void
17937 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
17938 vst4_p8(a, b);
17941 // CHECK-LABEL: @test_vst4_p16(
17942 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
17943 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
17944 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
17945 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
17946 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
17947 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
17948 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
17949 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
17950 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17951 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
17952 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
17953 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
17954 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17955 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
17956 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
17957 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
17958 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17959 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
17960 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
17961 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
17962 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
17963 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17964 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17965 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17966 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
17967 // CHECK: call void @llvm.arm.neon.vst4.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
17968 // CHECK: ret void
17969 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
17970 vst4_p16(a, b);
17973 // CHECK-LABEL: @test_vst4q_lane_u16(
17974 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
17975 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
17976 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
17977 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
17978 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
17979 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17980 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
17981 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
17982 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17983 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17984 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
17985 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
17986 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17987 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17988 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
17989 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
17990 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17991 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
17992 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
17993 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
17994 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
17995 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17996 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17997 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17998 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
17999 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18000 // CHECK: ret void
18001 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
18002 vst4q_lane_u16(a, b, 7);
18005 // CHECK-LABEL: @test_vst4q_lane_u32(
18006 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18007 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18008 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
18009 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18010 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18011 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
18012 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
18013 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
18014 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18015 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
18016 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
18017 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
18018 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18019 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
18020 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
18021 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
18022 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18023 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
18024 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
18025 // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
18026 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18027 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18028 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18029 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18030 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18031 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18032 // CHECK: ret void
18033 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
18034 vst4q_lane_u32(a, b, 3);
18037 // CHECK-LABEL: @test_vst4q_lane_s16(
18038 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18039 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18040 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
18041 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18042 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18043 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
18044 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
18045 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
18046 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
18048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
18049 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
18050 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18051 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
18052 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
18053 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
18054 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18055 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
18056 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
18057 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
18058 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18059 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18060 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18061 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18062 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18063 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18064 // CHECK: ret void
18065 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
18066 vst4q_lane_s16(a, b, 7);
18069 // CHECK-LABEL: @test_vst4q_lane_s32(
18070 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18071 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18072 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
18073 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18074 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18075 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
18076 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i32 0, i32 0
18077 // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
18078 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18079 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
18080 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i32 0, i32 1
18081 // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
18082 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18083 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
18084 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i32 0, i32 2
18085 // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
18086 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18087 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
18088 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i32 0, i32 3
18089 // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
18090 // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18091 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18092 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18093 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18094 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18095 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr %a, <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18096 // CHECK: ret void
18097 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
18098 vst4q_lane_s32(a, b, 3);
18101 // CHECK-LABEL: @test_vst4q_lane_f16(
18102 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18103 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
18105 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18106 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18107 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
18108 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i32 0, i32 0
18109 // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
18110 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18111 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
18112 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i32 0, i32 1
18113 // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
18114 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18115 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
18116 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i32 0, i32 2
18117 // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
18118 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18119 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
18120 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i32 0, i32 3
18121 // CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
18122 // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18123 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18124 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18125 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18126 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18127 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr %a, <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
18128 // CHECK: ret void
18129 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
18130 vst4q_lane_f16(a, b, 7);
18133 // CHECK-LABEL: @test_vst4q_lane_f32(
18134 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18135 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18136 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
18137 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18138 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18139 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
18140 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i32 0, i32 0
18141 // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
18142 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18143 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
18144 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i32 0, i32 1
18145 // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
18146 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18147 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
18148 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i32 0, i32 2
18149 // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
18150 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
18151 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
18152 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i32 0, i32 3
18153 // CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
18154 // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
18155 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18156 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18157 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
18158 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
18159 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4f32(ptr %a, <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
18160 // CHECK: ret void
18161 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
18162 vst4q_lane_f32(a, b, 3);
18165 // CHECK-LABEL: @test_vst4q_lane_p16(
18166 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
18167 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
18168 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
18169 // CHECK: store [8 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
18170 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[__S1]], ptr align 16 [[B]], i32 64, i1 false)
18171 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
18172 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i32 0, i32 0
18173 // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
18174 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18175 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
18176 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i32 0, i32 1
18177 // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
18178 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18179 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
18180 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i32 0, i32 2
18181 // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
18182 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18183 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
18184 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i32 0, i32 3
18185 // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
18186 // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18187 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18188 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18189 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18190 // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18191 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %a, <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18192 // CHECK: ret void
18193 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
18194 vst4q_lane_p16(a, b, 7);
18197 // CHECK-LABEL: @test_vst4_lane_u8(
18198 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
18199 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
18200 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
18201 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18202 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18203 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
18204 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
18205 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
18206 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
18207 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
18208 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
18209 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
18210 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
18211 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
18212 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
18213 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
18214 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
18215 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
18216 // CHECK: ret void
18217 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
18218 vst4_lane_u8(a, b, 7);
18221 // CHECK-LABEL: @test_vst4_lane_u16(
18222 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
18223 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
18224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
18225 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18226 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18227 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
18228 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
18229 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
18230 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18231 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
18232 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
18233 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
18234 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18235 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
18236 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
18237 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
18238 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18239 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
18240 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
18241 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
18242 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18243 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18244 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18245 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18246 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18247 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
18248 // CHECK: ret void
18249 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
18250 vst4_lane_u16(a, b, 3);
18253 // CHECK-LABEL: @test_vst4_lane_u32(
18254 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
18255 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
18256 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
18257 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18258 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18259 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
18260 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
18261 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
18262 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18263 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
18264 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
18265 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
18266 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18267 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
18268 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
18269 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
18270 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18271 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
18272 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
18273 // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
18274 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18275 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18276 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18277 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18278 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18279 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
18280 // CHECK: ret void
18281 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
18282 vst4_lane_u32(a, b, 1);
18285 // CHECK-LABEL: @test_vst4_lane_s8(
18286 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
18287 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
18288 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
18289 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18290 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18291 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
18292 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
18293 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
18294 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
18295 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
18296 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
18297 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
18298 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
18299 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
18300 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
18301 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
18302 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
18303 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
18304 // CHECK: ret void
18305 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
18306 vst4_lane_s8(a, b, 7);
18309 // CHECK-LABEL: @test_vst4_lane_s16(
18310 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
18311 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
18312 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
18313 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18314 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18315 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
18316 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
18317 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
18318 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18319 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
18320 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
18321 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
18322 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18323 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
18324 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
18325 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
18326 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18327 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
18328 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
18329 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
18330 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18331 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18332 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18333 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18334 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18335 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
18336 // CHECK: ret void
18337 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
18338 vst4_lane_s16(a, b, 3);
18341 // CHECK-LABEL: @test_vst4_lane_s32(
18342 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
18343 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
18344 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
18345 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18346 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18347 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
18348 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i32 0, i32 0
18349 // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
18350 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18351 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
18352 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i32 0, i32 1
18353 // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
18354 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18355 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
18356 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i32 0, i32 2
18357 // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
18358 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18359 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
18360 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i32 0, i32 3
18361 // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
18362 // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18363 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18364 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18365 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18366 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18367 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr %a, <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
18368 // CHECK: ret void
18369 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
18370 vst4_lane_s32(a, b, 1);
18373 // CHECK-LABEL: @test_vst4_lane_f16(
18374 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
18375 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
18376 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
18377 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18378 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18379 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
18380 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i32 0, i32 0
18381 // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
18382 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18383 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
18384 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i32 0, i32 1
18385 // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
18386 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18387 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
18388 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i32 0, i32 2
18389 // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
18390 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18391 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
18392 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i32 0, i32 3
18393 // CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
18394 // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
18395 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18396 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18397 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18398 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
18399 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr %a, <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
18400 // CHECK: ret void
18401 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
18402 vst4_lane_f16(a, b, 3);
18405 // CHECK-LABEL: @test_vst4_lane_f32(
18406 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
18407 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
18408 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
18409 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18410 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18411 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
18412 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i32 0, i32 0
18413 // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
18414 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18415 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
18416 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i32 0, i32 1
18417 // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
18418 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18419 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
18420 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i32 0, i32 2
18421 // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
18422 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18423 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
18424 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i32 0, i32 3
18425 // CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
18426 // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
18427 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18428 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18429 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18430 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
18431 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v2f32(ptr %a, <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
18432 // CHECK: ret void
18433 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
18434 vst4_lane_f32(a, b, 1);
18437 // CHECK-LABEL: @test_vst4_lane_p8(
18438 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
18439 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
18440 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
18441 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18442 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18443 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
18444 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i32 0, i32 0
18445 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
18446 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
18447 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i32 0, i32 1
18448 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
18449 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
18450 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i32 0, i32 2
18451 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
18452 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
18453 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i32 0, i32 3
18454 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
18455 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
18456 // CHECK: ret void
18457 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
18458 vst4_lane_p8(a, b, 7);
18461 // CHECK-LABEL: @test_vst4_lane_p16(
18462 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
18463 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
18464 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
18465 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
18466 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[__S1]], ptr align 8 [[B]], i32 32, i1 false)
18467 // CHECK: [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
18468 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i32 0, i32 0
18469 // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
18470 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18471 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
18472 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i32 0, i32 1
18473 // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
18474 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18475 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
18476 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i32 0, i32 2
18477 // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
18478 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18479 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
18480 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i32 0, i32 3
18481 // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
18482 // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18483 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18484 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18485 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18486 // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18487 // CHECK: call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %a, <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
18488 // CHECK: ret void
18489 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
18490 vst4_lane_p16(a, b, 3);
18493 // CHECK-LABEL: @test_vsub_s8(
18494 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
18495 // CHECK: ret <8 x i8> [[SUB_I]]
18496 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
18497 return vsub_s8(a, b);
18500 // CHECK-LABEL: @test_vsub_s16(
18501 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
18502 // CHECK: ret <4 x i16> [[SUB_I]]
18503 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
18504 return vsub_s16(a, b);
18507 // CHECK-LABEL: @test_vsub_s32(
18508 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
18509 // CHECK: ret <2 x i32> [[SUB_I]]
18510 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
18511 return vsub_s32(a, b);
18514 // CHECK-LABEL: @test_vsub_s64(
18515 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
18516 // CHECK: ret <1 x i64> [[SUB_I]]
18517 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
18518 return vsub_s64(a, b);
18521 // CHECK-LABEL: @test_vsub_f32(
18522 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b
18523 // CHECK: ret <2 x float> [[SUB_I]]
18524 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
18525 return vsub_f32(a, b);
18528 // CHECK-LABEL: @test_vsub_u8(
18529 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b
18530 // CHECK: ret <8 x i8> [[SUB_I]]
18531 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
18532 return vsub_u8(a, b);
18535 // CHECK-LABEL: @test_vsub_u16(
18536 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b
18537 // CHECK: ret <4 x i16> [[SUB_I]]
18538 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
18539 return vsub_u16(a, b);
18542 // CHECK-LABEL: @test_vsub_u32(
18543 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b
18544 // CHECK: ret <2 x i32> [[SUB_I]]
18545 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
18546 return vsub_u32(a, b);
18549 // CHECK-LABEL: @test_vsub_u64(
18550 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b
18551 // CHECK: ret <1 x i64> [[SUB_I]]
18552 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
18553 return vsub_u64(a, b);
18556 // CHECK-LABEL: @test_vsubq_s8(
18557 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
18558 // CHECK: ret <16 x i8> [[SUB_I]]
18559 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
18560 return vsubq_s8(a, b);
18563 // CHECK-LABEL: @test_vsubq_s16(
18564 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
18565 // CHECK: ret <8 x i16> [[SUB_I]]
18566 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
18567 return vsubq_s16(a, b);
18570 // CHECK-LABEL: @test_vsubq_s32(
18571 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
18572 // CHECK: ret <4 x i32> [[SUB_I]]
18573 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
18574 return vsubq_s32(a, b);
18577 // CHECK-LABEL: @test_vsubq_s64(
18578 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
18579 // CHECK: ret <2 x i64> [[SUB_I]]
18580 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
18581 return vsubq_s64(a, b);
18584 // CHECK-LABEL: @test_vsubq_f32(
18585 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b
18586 // CHECK: ret <4 x float> [[SUB_I]]
18587 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
18588 return vsubq_f32(a, b);
18591 // CHECK-LABEL: @test_vsubq_u8(
18592 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b
18593 // CHECK: ret <16 x i8> [[SUB_I]]
18594 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
18595 return vsubq_u8(a, b);
18598 // CHECK-LABEL: @test_vsubq_u16(
18599 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b
18600 // CHECK: ret <8 x i16> [[SUB_I]]
18601 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
18602 return vsubq_u16(a, b);
18605 // CHECK-LABEL: @test_vsubq_u32(
18606 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b
18607 // CHECK: ret <4 x i32> [[SUB_I]]
18608 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
18609 return vsubq_u32(a, b);
18612 // CHECK-LABEL: @test_vsubq_u64(
18613 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b
18614 // CHECK: ret <2 x i64> [[SUB_I]]
18615 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
18616 return vsubq_u64(a, b);
18619 // CHECK-LABEL: @test_vsubhn_s16(
18620 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18621 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18622 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
18623 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
18624 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
18625 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
18626 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
18627 return vsubhn_s16(a, b);
18630 // CHECK-LABEL: @test_vsubhn_s32(
18631 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18632 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
18633 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
18634 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
18635 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
18636 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
18637 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
18638 return vsubhn_s32(a, b);
18641 // CHECK-LABEL: @test_vsubhn_s64(
18642 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18643 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
18644 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
18645 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
18646 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
18647 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
18648 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
18649 return vsubhn_s64(a, b);
18652 // CHECK-LABEL: @test_vsubhn_u16(
18653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
18654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
18655 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
18656 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
18657 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
18658 // CHECK: ret <8 x i8> [[VSUBHN2_I]]
18659 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
18660 return vsubhn_u16(a, b);
18663 // CHECK-LABEL: @test_vsubhn_u32(
18664 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
18665 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
18666 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
18667 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
18668 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
18669 // CHECK: ret <4 x i16> [[VSUBHN2_I]]
18670 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
18671 return vsubhn_u32(a, b);
18674 // CHECK-LABEL: @test_vsubhn_u64(
18675 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
18676 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
18677 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
18678 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
18679 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
18680 // CHECK: ret <2 x i32> [[VSUBHN2_I]]
18681 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
18682 return vsubhn_u64(a, b);
18685 // CHECK-LABEL: @test_vsubl_s8(
18686 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
18687 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
18688 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18689 // CHECK: ret <8 x i16> [[SUB_I]]
18690 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
18691 return vsubl_s8(a, b);
18694 // CHECK-LABEL: @test_vsubl_s16(
18695 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18696 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
18697 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18698 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
18699 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18700 // CHECK: ret <4 x i32> [[SUB_I]]
18701 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
18702 return vsubl_s16(a, b);
18705 // CHECK-LABEL: @test_vsubl_s32(
18706 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18707 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
18708 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18709 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
18710 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18711 // CHECK: ret <2 x i64> [[SUB_I]]
18712 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
18713 return vsubl_s32(a, b);
18716 // CHECK-LABEL: @test_vsubl_u8(
18717 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
18718 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
18719 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18720 // CHECK: ret <8 x i16> [[SUB_I]]
18721 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
18722 return vsubl_u8(a, b);
18725 // CHECK-LABEL: @test_vsubl_u16(
18726 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
18727 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
18728 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18729 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
18730 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18731 // CHECK: ret <4 x i32> [[SUB_I]]
18732 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
18733 return vsubl_u16(a, b);
18736 // CHECK-LABEL: @test_vsubl_u32(
18737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
18738 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
18739 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18740 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
18741 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
18742 // CHECK: ret <2 x i64> [[SUB_I]]
18743 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
18744 return vsubl_u32(a, b);
18747 // CHECK-LABEL: @test_vsubw_s8(
18748 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
18749 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
18750 // CHECK: ret <8 x i16> [[SUB_I]]
18751 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
18752 return vsubw_s8(a, b);
18755 // CHECK-LABEL: @test_vsubw_s16(
18756 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18757 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
18758 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
18759 // CHECK: ret <4 x i32> [[SUB_I]]
18760 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
18761 return vsubw_s16(a, b);
18764 // CHECK-LABEL: @test_vsubw_s32(
18765 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18766 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
18767 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
18768 // CHECK: ret <2 x i64> [[SUB_I]]
18769 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
18770 return vsubw_s32(a, b);
18773 // CHECK-LABEL: @test_vsubw_u8(
18774 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
18775 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
18776 // CHECK: ret <8 x i16> [[SUB_I]]
18777 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
18778 return vsubw_u8(a, b);
18781 // CHECK-LABEL: @test_vsubw_u16(
18782 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
18783 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
18784 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
18785 // CHECK: ret <4 x i32> [[SUB_I]]
18786 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
18787 return vsubw_u16(a, b);
18790 // CHECK-LABEL: @test_vsubw_u32(
18791 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
18792 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
18793 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
18794 // CHECK: ret <2 x i64> [[SUB_I]]
18795 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
18796 return vsubw_u32(a, b);
18799 // CHECK-LABEL: @test_vtbl1_u8(
18800 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
18801 // CHECK: ret <8 x i8> [[VTBL1_I]]
18802 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
18803 return vtbl1_u8(a, b);
18806 // CHECK-LABEL: @test_vtbl1_s8(
18807 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
18808 // CHECK: ret <8 x i8> [[VTBL1_I]]
18809 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
18810 return vtbl1_s8(a, b);
18813 // CHECK-LABEL: @test_vtbl1_p8(
18814 // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
18815 // CHECK: ret <8 x i8> [[VTBL1_I]]
18816 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
18817 return vtbl1_p8(a, b);
18820 // CHECK-LABEL: @test_vtbl2_u8(
18821 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
18822 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
18823 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[A]], i32 0, i32 0
18824 // CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18825 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[A]], i32 0, i32 0
18826 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
18827 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18828 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18829 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18830 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18831 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
18832 // CHECK: ret <8 x i8> [[VTBL2_I]]
18833 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
18834 return vtbl2_u8(a, b);
18837 // CHECK-LABEL: @test_vtbl2_s8(
18838 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
18839 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
18840 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[A]], i32 0, i32 0
18841 // CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18842 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[A]], i32 0, i32 0
18843 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
18844 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18845 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18846 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18847 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18848 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
18849 // CHECK: ret <8 x i8> [[VTBL2_I]]
18850 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
18851 return vtbl2_s8(a, b);
18854 // CHECK-LABEL: @test_vtbl2_p8(
18855 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
18856 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
18857 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[A]], i32 0, i32 0
18858 // CHECK: store [2 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18859 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[A]], i32 0, i32 0
18860 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
18861 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18862 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18863 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18864 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18865 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
18866 // CHECK: ret <8 x i8> [[VTBL2_I]]
18867 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
18868 return vtbl2_p8(a, b);
18871 // CHECK-LABEL: @test_vtbl3_u8(
18872 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
18873 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
18874 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[A]], i32 0, i32 0
18875 // CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18876 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[A]], i32 0, i32 0
18877 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
18878 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18879 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18880 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18881 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18882 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18883 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18884 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
18885 // CHECK: ret <8 x i8> [[VTBL3_I]]
18886 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
18887 return vtbl3_u8(a, b);
18890 // CHECK-LABEL: @test_vtbl3_s8(
18891 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
18892 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
18893 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[A]], i32 0, i32 0
18894 // CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18895 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[A]], i32 0, i32 0
18896 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
18897 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18898 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18899 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18900 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18901 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18902 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18903 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
18904 // CHECK: ret <8 x i8> [[VTBL3_I]]
18905 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
18906 return vtbl3_s8(a, b);
18909 // CHECK-LABEL: @test_vtbl3_p8(
18910 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
18911 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
18912 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[A]], i32 0, i32 0
18913 // CHECK: store [3 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18914 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[A]], i32 0, i32 0
18915 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
18916 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18917 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18918 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18919 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18920 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18921 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18922 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
18923 // CHECK: ret <8 x i8> [[VTBL3_I]]
18924 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
18925 return vtbl3_p8(a, b);
18928 // CHECK-LABEL: @test_vtbl4_u8(
18929 // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
18930 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
18931 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[A]], i32 0, i32 0
18932 // CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18933 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[A]], i32 0, i32 0
18934 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
18935 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18936 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18937 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18938 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18939 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18940 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18941 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
18942 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
18943 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
18944 // CHECK: ret <8 x i8> [[VTBL4_I]]
18945 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
18946 return vtbl4_u8(a, b);
18949 // CHECK-LABEL: @test_vtbl4_s8(
18950 // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
18951 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
18952 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[A]], i32 0, i32 0
18953 // CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18954 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[A]], i32 0, i32 0
18955 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
18956 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18957 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18958 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18959 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18960 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18961 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18962 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
18963 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
18964 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
18965 // CHECK: ret <8 x i8> [[VTBL4_I]]
18966 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
18967 return vtbl4_s8(a, b);
18970 // CHECK-LABEL: @test_vtbl4_p8(
18971 // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
18972 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
18973 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[A]], i32 0, i32 0
18974 // CHECK: store [4 x i64] [[A]].coerce, ptr [[COERCE_DIVE]], align 8
18975 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[A]], i32 0, i32 0
18976 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
18977 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P0_I]], align 8
18978 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P0_I]], align 8
18979 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 1
18980 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
18981 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 2
18982 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
18983 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P0_I]], i32 0, i32 3
18984 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
18985 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
18986 // CHECK: ret <8 x i8> [[VTBL4_I]]
18987 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
18988 return vtbl4_p8(a, b);
18991 // CHECK-LABEL: @test_vtbx1_u8(
18992 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
18993 // CHECK: ret <8 x i8> [[VTBX1_I]]
18994 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
18995 return vtbx1_u8(a, b, c);
18998 // CHECK-LABEL: @test_vtbx1_s8(
18999 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19000 // CHECK: ret <8 x i8> [[VTBX1_I]]
19001 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
19002 return vtbx1_s8(a, b, c);
19005 // CHECK-LABEL: @test_vtbx1_p8(
19006 // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
19007 // CHECK: ret <8 x i8> [[VTBX1_I]]
19008 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
19009 return vtbx1_p8(a, b, c);
19012 // CHECK-LABEL: @test_vtbx2_u8(
19013 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19014 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
19015 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
19016 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19017 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
19018 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
19019 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19020 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19021 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19022 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19023 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
19024 // CHECK: ret <8 x i8> [[VTBX2_I]]
19025 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
19026 return vtbx2_u8(a, b, c);
19029 // CHECK-LABEL: @test_vtbx2_s8(
19030 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
19031 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
19032 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
19033 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19034 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
19035 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
19036 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19037 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19038 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19039 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19040 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
19041 // CHECK: ret <8 x i8> [[VTBX2_I]]
19042 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
19043 return vtbx2_s8(a, b, c);
19046 // CHECK-LABEL: @test_vtbx2_p8(
19047 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
19048 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
19049 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
19050 // CHECK: store [2 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19051 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
19052 // CHECK: [[TMP2:%.*]] = load [2 x i64], ptr [[COERCE_DIVE1]], align 8
19053 // CHECK: store [2 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19054 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19055 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19056 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19057 // CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
19058 // CHECK: ret <8 x i8> [[VTBX2_I]]
19059 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
19060 return vtbx2_p8(a, b, c);
19063 // CHECK-LABEL: @test_vtbx3_u8(
19064 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
19065 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
19066 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
19067 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19068 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
19069 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
19070 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19071 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19072 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19073 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19074 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19075 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19076 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
19077 // CHECK: ret <8 x i8> [[VTBX3_I]]
19078 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
19079 return vtbx3_u8(a, b, c);
19082 // CHECK-LABEL: @test_vtbx3_s8(
19083 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
19084 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
19085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
19086 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19087 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
19088 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
19089 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19090 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19091 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19092 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19093 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19094 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19095 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
19096 // CHECK: ret <8 x i8> [[VTBX3_I]]
19097 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
19098 return vtbx3_s8(a, b, c);
19101 // CHECK-LABEL: @test_vtbx3_p8(
19102 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
19103 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
19104 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
19105 // CHECK: store [3 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19106 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
19107 // CHECK: [[TMP2:%.*]] = load [3 x i64], ptr [[COERCE_DIVE1]], align 8
19108 // CHECK: store [3 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19109 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19110 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19111 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19112 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19113 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19114 // CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
19115 // CHECK: ret <8 x i8> [[VTBX3_I]]
19116 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
19117 return vtbx3_p8(a, b, c);
19120 // CHECK-LABEL: @test_vtbx4_u8(
19121 // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
19122 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
19123 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
19124 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19125 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
19126 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
19127 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19128 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19129 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19130 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19131 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19132 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19133 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
19134 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
19135 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
19136 // CHECK: ret <8 x i8> [[VTBX4_I]]
19137 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
19138 return vtbx4_u8(a, b, c);
19141 // CHECK-LABEL: @test_vtbx4_s8(
19142 // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
19143 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
19144 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
19145 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19146 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
19147 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
19148 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19149 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19150 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19151 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19152 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19153 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19154 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
19155 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
19156 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
19157 // CHECK: ret <8 x i8> [[VTBX4_I]]
19158 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
19159 return vtbx4_s8(a, b, c);
19162 // CHECK-LABEL: @test_vtbx4_p8(
19163 // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
19164 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
19165 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
19166 // CHECK: store [4 x i64] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
19167 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
19168 // CHECK: [[TMP2:%.*]] = load [4 x i64], ptr [[COERCE_DIVE1]], align 8
19169 // CHECK: store [4 x i64] [[TMP2]], ptr [[__P1_I]], align 8
19170 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[__P1_I]], align 8
19171 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 1
19172 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2_I]], align 8
19173 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 2
19174 // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4_I]], align 8
19175 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[__P1_I]], i32 0, i32 3
19176 // CHECK: [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6_I]], align 8
19177 // CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
19178 // CHECK: ret <8 x i8> [[VTBX4_I]]
19179 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
19180 return vtbx4_p8(a, b, c);
19183 // CHECK: @test_vtrn_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19184 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19185 // CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19186 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19187 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19188 // CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19189 // CHECK: ret void
19190 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
19191 return vtrn_s8(a, b);
19194 // CHECK: @test_vtrn_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19195 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19196 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19197 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19198 // CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19199 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19200 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19201 // CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19202 // CHECK: ret void
19203 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
19204 return vtrn_s16(a, b);
19207 // CHECK: @test_vtrn_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19208 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19209 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19210 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19211 // CHECK: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19212 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19213 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19214 // CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19215 // CHECK: ret void
19216 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
19217 return vtrn_s32(a, b);
19220 // CHECK: @test_vtrn_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19221 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19222 // CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19223 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19224 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19225 // CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19226 // CHECK: ret void
19227 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
19228 return vtrn_u8(a, b);
19231 // CHECK: @test_vtrn_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19232 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19233 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19234 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19235 // CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19236 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19237 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19238 // CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19239 // CHECK: ret void
19240 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
19241 return vtrn_u16(a, b);
19244 // CHECK: @test_vtrn_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19245 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19246 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19247 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19248 // CHECK: store <2 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19249 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19250 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19251 // CHECK: store <2 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19252 // CHECK: ret void
19253 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
19254 return vtrn_u32(a, b);
19257 // CHECK: @test_vtrn_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19258 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
19259 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
19260 // CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
19261 // CHECK: store <2 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19262 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
19263 // CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
19264 // CHECK: store <2 x float> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19265 // CHECK: ret void
19266 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
19267 return vtrn_f32(a, b);
19270 // CHECK: @test_vtrn_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19271 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19272 // CHECK: store <8 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19273 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19274 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19275 // CHECK: store <8 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19276 // CHECK: ret void
19277 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
19278 return vtrn_p8(a, b);
19281 // CHECK: @test_vtrn_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19282 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19283 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19284 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19285 // CHECK: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19286 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19287 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19288 // CHECK: store <4 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19289 // CHECK: ret void
19290 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
19291 return vtrn_p16(a, b);
19294 // CHECK: @test_vtrnq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19295 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
19296 // CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19297 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19298 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
19299 // CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19300 // CHECK: ret void
19301 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
19302 return vtrnq_s8(a, b);
19305 // CHECK: @test_vtrnq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19306 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19307 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19308 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19309 // CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19310 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19311 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19312 // CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19313 // CHECK: ret void
19314 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
19315 return vtrnq_s16(a, b);
19318 // CHECK: @test_vtrnq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19319 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19320 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19321 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19322 // CHECK: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19323 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19324 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19325 // CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19326 // CHECK: ret void
19327 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
19328 return vtrnq_s32(a, b);
19331 // CHECK: @test_vtrnq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19332 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
19333 // CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19334 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19335 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
19336 // CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19337 // CHECK: ret void
19338 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
19339 return vtrnq_u8(a, b);
19342 // CHECK: @test_vtrnq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19343 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19344 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19345 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19346 // CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19347 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19348 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19349 // CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19350 // CHECK: ret void
19351 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
19352 return vtrnq_u16(a, b);
19355 // CHECK: @test_vtrnq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19356 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19357 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19358 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19359 // CHECK: store <4 x i32> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19360 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19361 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19362 // CHECK: store <4 x i32> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19363 // CHECK: ret void
19364 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
19365 return vtrnq_u32(a, b);
19368 // CHECK: @test_vtrnq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19369 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
19370 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
19371 // CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
19372 // CHECK: store <4 x float> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19373 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
19374 // CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
19375 // CHECK: store <4 x float> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19376 // CHECK: ret void
19377 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
19378 return vtrnq_f32(a, b);
19381 // CHECK: @test_vtrnq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19382 // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
19383 // CHECK: store <16 x i8> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19384 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19385 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
19386 // CHECK: store <16 x i8> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope
19387 // CHECK: ret void
19388 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
19389 return vtrnq_p8(a, b);
19392 // CHECK: @test_vtrnq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19394 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19395 // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
19396 // CHECK: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19397 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19398 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
19399 // CHECK: store <8 x i16> [[VTRN1_I]], ptr [[TMP4]], align 4, !alias.scope
19400 // CHECK: ret void
19401 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
19402 return vtrnq_p16(a, b);
19405 // CHECK-LABEL: @test_vtst_s8(
19406 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
19407 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
19408 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
19409 // CHECK: ret <8 x i8> [[VTST_I]]
19410 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
19411 return vtst_s8(a, b);
19414 // CHECK-LABEL: @test_vtst_s16(
19415 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19416 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19417 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
19418 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
19419 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
19420 // CHECK: ret <4 x i16> [[VTST_I]]
19421 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
19422 return vtst_s16(a, b);
19425 // CHECK-LABEL: @test_vtst_s32(
19426 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19427 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19428 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
19429 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
19430 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
19431 // CHECK: ret <2 x i32> [[VTST_I]]
19432 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
19433 return vtst_s32(a, b);
19436 // CHECK-LABEL: @test_vtst_u8(
19437 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
19438 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
19439 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
19440 // CHECK: ret <8 x i8> [[VTST_I]]
19441 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
19442 return vtst_u8(a, b);
19445 // CHECK-LABEL: @test_vtst_u16(
19446 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19447 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19448 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
19449 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
19450 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
19451 // CHECK: ret <4 x i16> [[VTST_I]]
19452 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
19453 return vtst_u16(a, b);
19456 // CHECK-LABEL: @test_vtst_u32(
19457 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19458 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19459 // CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b
19460 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
19461 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
19462 // CHECK: ret <2 x i32> [[VTST_I]]
19463 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
19464 return vtst_u32(a, b);
19467 // CHECK-LABEL: @test_vtst_p8(
19468 // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b
19469 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
19470 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
19471 // CHECK: ret <8 x i8> [[VTST_I]]
19472 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
19473 return vtst_p8(a, b);
19476 // CHECK-LABEL: @test_vtst_p16(
19477 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19478 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19479 // CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b
19480 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
19481 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
19482 // CHECK: ret <4 x i16> [[VTST_I]]
19483 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
19484 return vtst_p16(a, b);
19487 // CHECK-LABEL: @test_vtstq_s8(
19488 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
19489 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
19490 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
19491 // CHECK: ret <16 x i8> [[VTST_I]]
19492 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
19493 return vtstq_s8(a, b);
19496 // CHECK-LABEL: @test_vtstq_s16(
19497 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19498 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19499 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
19500 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
19501 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
19502 // CHECK: ret <8 x i16> [[VTST_I]]
19503 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
19504 return vtstq_s16(a, b);
19507 // CHECK-LABEL: @test_vtstq_s32(
19508 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19509 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19510 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
19511 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
19512 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
19513 // CHECK: ret <4 x i32> [[VTST_I]]
19514 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
19515 return vtstq_s32(a, b);
19518 // CHECK-LABEL: @test_vtstq_u8(
19519 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
19520 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
19521 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
19522 // CHECK: ret <16 x i8> [[VTST_I]]
19523 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
19524 return vtstq_u8(a, b);
19527 // CHECK-LABEL: @test_vtstq_u16(
19528 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19529 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19530 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
19531 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
19532 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
19533 // CHECK: ret <8 x i16> [[VTST_I]]
19534 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
19535 return vtstq_u16(a, b);
19538 // CHECK-LABEL: @test_vtstq_u32(
19539 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19540 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19541 // CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b
19542 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
19543 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
19544 // CHECK: ret <4 x i32> [[VTST_I]]
19545 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
19546 return vtstq_u32(a, b);
19549 // CHECK-LABEL: @test_vtstq_p8(
19550 // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b
19551 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
19552 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
19553 // CHECK: ret <16 x i8> [[VTST_I]]
19554 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
19555 return vtstq_p8(a, b);
19558 // CHECK-LABEL: @test_vtstq_p16(
19559 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19560 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19561 // CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b
19562 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
19563 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
19564 // CHECK: ret <8 x i16> [[VTST_I]]
19565 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
19566 return vtstq_p16(a, b);
19569 // CHECK: @test_vuzp_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19570 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19571 // CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19572 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19573 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19574 // CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19575 // CHECK: ret void
19576 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
19577 return vuzp_s8(a, b);
19580 // CHECK: @test_vuzp_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19582 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19583 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19584 // CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19585 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19586 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19587 // CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19588 // CHECK: ret void
19589 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
19590 return vuzp_s16(a, b);
19593 // CHECK: @test_vuzp_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19594 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19595 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19596 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19597 // CHECK: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19598 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19599 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19600 // CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19601 // CHECK: ret void
19602 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
19603 return vuzp_s32(a, b);
19606 // CHECK: @test_vuzp_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19607 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19608 // CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19609 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19610 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19611 // CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19612 // CHECK: ret void
19613 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
19614 return vuzp_u8(a, b);
19617 // CHECK: @test_vuzp_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19618 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19619 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19620 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19621 // CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19622 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19623 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19624 // CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19625 // CHECK: ret void
19626 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
19627 return vuzp_u16(a, b);
19630 // CHECK: @test_vuzp_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19631 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19632 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19633 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19634 // CHECK: store <2 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19635 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19636 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19637 // CHECK: store <2 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19638 // CHECK: ret void
19639 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
19640 return vuzp_u32(a, b);
19643 // CHECK: @test_vuzp_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19644 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
19645 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
19646 // CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
19647 // CHECK: store <2 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19648 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
19649 // CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
19650 // CHECK: store <2 x float> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19651 // CHECK: ret void
19652 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
19653 return vuzp_f32(a, b);
19656 // CHECK: @test_vuzp_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19657 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19658 // CHECK: store <8 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19659 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19660 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19661 // CHECK: store <8 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19662 // CHECK: ret void
19663 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
19664 return vuzp_p8(a, b);
19667 // CHECK: @test_vuzp_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19668 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19669 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19670 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19671 // CHECK: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19672 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19673 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19674 // CHECK: store <4 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19675 // CHECK: ret void
19676 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
19677 return vuzp_p16(a, b);
19680 // CHECK: @test_vuzpq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19681 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
19682 // CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19683 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19684 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
19685 // CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19686 // CHECK: ret void
19687 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
19688 return vuzpq_s8(a, b);
19691 // CHECK: @test_vuzpq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19692 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19693 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19694 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19695 // CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19696 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19697 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19698 // CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19699 // CHECK: ret void
19700 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
19701 return vuzpq_s16(a, b);
19704 // CHECK: @test_vuzpq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19705 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19706 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19707 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19708 // CHECK: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19709 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19710 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19711 // CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19712 // CHECK: ret void
19713 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
19714 return vuzpq_s32(a, b);
19717 // CHECK: @test_vuzpq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19718 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
19719 // CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19720 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19721 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
19722 // CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19723 // CHECK: ret void
19724 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
19725 return vuzpq_u8(a, b);
19728 // CHECK: @test_vuzpq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19729 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19730 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19731 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19732 // CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19733 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19734 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19735 // CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19736 // CHECK: ret void
19737 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
19738 return vuzpq_u16(a, b);
19741 // CHECK: @test_vuzpq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19742 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19743 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19744 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19745 // CHECK: store <4 x i32> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19746 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19747 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19748 // CHECK: store <4 x i32> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19749 // CHECK: ret void
19750 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
19751 return vuzpq_u32(a, b);
19754 // CHECK: @test_vuzpq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19755 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
19756 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
19757 // CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
19758 // CHECK: store <4 x float> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19759 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
19760 // CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19761 // CHECK: store <4 x float> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19762 // CHECK: ret void
19763 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
19764 return vuzpq_f32(a, b);
19767 // CHECK: @test_vuzpq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19768 // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
19769 // CHECK: store <16 x i8> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19770 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19771 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
19772 // CHECK: store <16 x i8> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope
19773 // CHECK: ret void
19774 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
19775 return vuzpq_p8(a, b);
19778 // CHECK: @test_vuzpq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19779 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19780 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19781 // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
19782 // CHECK: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19783 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19784 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
19785 // CHECK: store <8 x i16> [[VUZP1_I]], ptr [[TMP4]], align 4, !alias.scope
19786 // CHECK: ret void
19787 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
19788 return vuzpq_p16(a, b);
19791 // CHECK: @test_vzip_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19792 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
19793 // CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19794 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19795 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
19796 // CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19797 // CHECK: ret void
19798 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
19799 return vzip_s8(a, b);
19802 // CHECK: @test_vzip_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19803 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19804 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19805 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19806 // CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19807 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19808 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19809 // CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19810 // CHECK: ret void
19811 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
19812 return vzip_s16(a, b);
19815 // CHECK: @test_vzip_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19816 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19817 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19818 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19819 // CHECK: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19820 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19821 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19822 // CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19823 // CHECK: ret void
19824 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
19825 return vzip_s32(a, b);
19828 // CHECK: @test_vzip_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19829 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
19830 // CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19831 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19832 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
19833 // CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19834 // CHECK: ret void
19835 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
19836 return vzip_u8(a, b);
19839 // CHECK: @test_vzip_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19840 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19841 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19842 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19843 // CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19844 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19845 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19846 // CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19847 // CHECK: ret void
19848 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
19849 return vzip_u16(a, b);
19852 // CHECK: @test_vzip_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19853 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19854 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19855 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
19856 // CHECK: store <2 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19857 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[AGG_RESULT]], i32 1
19858 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
19859 // CHECK: store <2 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19860 // CHECK: ret void
19861 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
19862 return vzip_u32(a, b);
19865 // CHECK: @test_vzip_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19866 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
19867 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
19868 // CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
19869 // CHECK: store <2 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19870 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[AGG_RESULT]], i32 1
19871 // CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
19872 // CHECK: store <2 x float> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19873 // CHECK: ret void
19874 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
19875 return vzip_f32(a, b);
19878 // CHECK: @test_vzip_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19879 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
19880 // CHECK: store <8 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19881 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[AGG_RESULT]], i32 1
19882 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
19883 // CHECK: store <8 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19884 // CHECK: ret void
19885 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
19886 return vzip_p8(a, b);
19889 // CHECK: @test_vzip_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19890 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19891 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19892 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19893 // CHECK: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19894 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1
19895 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19896 // CHECK: store <4 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19897 // CHECK: ret void
19898 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
19899 return vzip_p16(a, b);
19902 // CHECK: @test_vzipq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19903 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
19904 // CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19905 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19906 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
19907 // CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19908 // CHECK: ret void
19909 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
19910 return vzipq_s8(a, b);
19913 // CHECK: @test_vzipq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19915 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19916 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
19917 // CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19918 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19919 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
19920 // CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19921 // CHECK: ret void
19922 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
19923 return vzipq_s16(a, b);
19926 // CHECK: @test_vzipq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19927 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19928 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19929 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19930 // CHECK: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19931 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19932 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19933 // CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19934 // CHECK: ret void
19935 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
19936 return vzipq_s32(a, b);
19939 // CHECK: @test_vzipq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19940 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
19941 // CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19942 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19943 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
19944 // CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19945 // CHECK: ret void
19946 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
19947 return vzipq_u8(a, b);
19950 // CHECK: @test_vzipq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19951 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19952 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19953 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
19954 // CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19955 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
19956 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
19957 // CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19958 // CHECK: ret void
19959 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
19960 return vzipq_u16(a, b);
19963 // CHECK: @test_vzipq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19964 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19965 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19966 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19967 // CHECK: store <4 x i32> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19968 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[AGG_RESULT]], i32 1
19969 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19970 // CHECK: store <4 x i32> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19971 // CHECK: ret void
19972 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
19973 return vzipq_u32(a, b);
19976 // CHECK: @test_vzipq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19977 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
19978 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
19979 // CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
19980 // CHECK: store <4 x float> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19981 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[AGG_RESULT]], i32 1
19982 // CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
19983 // CHECK: store <4 x float> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
19984 // CHECK: ret void
19985 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
19986 return vzipq_f32(a, b);
19989 // CHECK: @test_vzipq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
19990 // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
19991 // CHECK: store <16 x i8> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
19992 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[AGG_RESULT]], i32 1
19993 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
19994 // CHECK: store <16 x i8> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope
19995 // CHECK: ret void
19996 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
19997 return vzipq_p8(a, b);
20000 // CHECK: @test_vzipq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20001 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20002 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20003 // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20004 // CHECK: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope
20005 // CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1
20006 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20007 // CHECK: store <8 x i16> [[VZIP1_I]], ptr [[TMP4]], align 4, !alias.scope
20008 // CHECK: ret void
20009 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
20010 return vzipq_p16(a, b);