Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / arm-neon-vld.c
blob4ea44777345e37728ae19fdad96c5084d55d4130
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | \
3 // RUN: FileCheck -check-prefixes=CHECK,CHECK-A64 %s
4 // RUN: %clang_cc1 -triple armv8-none-linux-gnueabi -target-feature +neon \
5 // RUN: -target-feature +fp16 -disable-O0-optnone -emit-llvm -o - %s | \
6 // RUN: opt -S -passes=mem2reg | FileCheck -check-prefixes=CHECK,CHECK-A32 %s
8 // REQUIRES: aarch64-registered-target || arm-registered-target
10 #include <arm_neon.h>
12 // CHECK-LABEL: @test_vld1_f16_x2(
13 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
14 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x4x2_t) align 8 [[RETVAL:%.*]],
15 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
16 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v4f16.p0|arm.neon.vld1x2.v4i16.p0}}(ptr %a)
17 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
18 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
19 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
20 // CHECK-A64: ret %struct.float16x4x2_t [[TMP6]]
21 // CHECK-A32: ret void
22 float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
23 return vld1_f16_x2(a);
26 // CHECK-LABEL: @test_vld1_f16_x3(
27 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
28 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x4x3_t) align 8 [[RETVAL:%.*]],
29 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
30 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v4f16.p0|arm.neon.vld1x3.v4i16.p0}}(ptr %a)
31 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
32 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
33 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
34 // CHECK-A64: ret %struct.float16x4x3_t [[TMP6]]
35 // CHECK-A32: ret void
36 float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
37 return vld1_f16_x3(a);
40 // CHECK-LABEL: @test_vld1_f16_x4(
41 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
42 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x4x4_t) align 8 [[RETVAL:%.*]],
43 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
44 // CHECK: [[VLD1XN:%.*]] = call { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v4f16.p0|arm.neon.vld1x4.v4i16.p0}}(ptr %a)
45 // CHECK: store { <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
46 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
47 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
48 // CHECK-A64: ret %struct.float16x4x4_t [[TMP6]]
49 // CHECK-A32: ret void
50 float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
51 return vld1_f16_x4(a);
54 // CHECK-LABEL: @test_vld1_f32_x2(
55 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
56 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x2x2_t) align 8 [[RETVAL:%.*]],
57 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
58 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2f32.p0(ptr %a)
59 // CHECK: store { <2 x float>, <2 x float> } [[VLD1XN]], ptr [[__RET]]
60 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
61 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
62 // CHECK-A64: ret %struct.float32x2x2_t [[TMP6]]
63 // CHECK-A32: ret void
64 float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
65 return vld1_f32_x2(a);
68 // CHECK-LABEL: @test_vld1_f32_x3(
69 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
70 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x2x3_t) align 8 [[RETVAL:%.*]],
71 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
72 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2f32.p0(ptr %a)
73 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], ptr [[__RET]]
74 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
75 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
76 // CHECK-A64: ret %struct.float32x2x3_t [[TMP6]]
77 float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
78 return vld1_f32_x3(a);
81 // CHECK-LABEL: @test_vld1_f32_x4(
82 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
83 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x2x4_t) align 8 [[RETVAL:%.*]],
84 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
85 // CHECK: [[VLD1XN:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2f32.p0(ptr %a)
86 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD1XN]], ptr [[__RET]]
87 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
88 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
89 // CHECK-A64: ret %struct.float32x2x4_t [[TMP6]]
90 // CHECK-A32: ret void
91 float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
92 return vld1_f32_x4(a);
95 // CHECK-LABEL: @test_vld1_p16_x2(
96 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
97 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x4x2_t) align 8 [[RETVAL:%.*]],
98 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
99 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0(ptr %a)
100 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
101 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
102 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
103 // CHECK-A64: ret %struct.poly16x4x2_t [[TMP6]]
104 // CHECK-A32: ret void
105 poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
106 return vld1_p16_x2(a);
109 // CHECK-LABEL: @test_vld1_p16_x3(
110 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
111 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x4x3_t) align 8 [[RETVAL:%.*]],
112 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
113 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0(ptr %a)
114 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
115 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
116 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
117 // CHECK-A64: ret %struct.poly16x4x3_t [[TMP6]]
118 // CHECK-A32: ret void
119 poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
120 return vld1_p16_x3(a);
123 // CHECK-LABEL: @test_vld1_p16_x4(
124 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
125 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x4x4_t) align 8 [[RETVAL:%.*]],
126 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
127 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0(ptr %a)
128 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
129 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
130 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
131 // CHECK-A64: ret %struct.poly16x4x4_t [[TMP6]]
132 // CHECK-A32: ret void
133 poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
134 return vld1_p16_x4(a);
137 // CHECK-LABEL: @test_vld1_p8_x2(
138 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
139 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x8x2_t) align 8 [[RETVAL:%.*]],
140 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
141 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0(ptr %a)
142 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
143 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
144 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
145 // CHECK-A64: ret %struct.poly8x8x2_t [[TMP4]]
146 // CHECK-A32: ret void
147 poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
148 return vld1_p8_x2(a);
151 // CHECK-LABEL: @test_vld1_p8_x3(
152 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
153 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x8x3_t) align 8 [[RETVAL:%.*]],
154 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
155 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0(ptr %a)
156 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
157 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
158 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
159 // CHECK-A64: ret %struct.poly8x8x3_t [[TMP4]]
160 // CHECK-A32: ret void
161 poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
162 return vld1_p8_x3(a);
165 // CHECK-LABEL: @test_vld1_p8_x4(
166 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
167 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x8x4_t) align 8 [[RETVAL:%.*]],
168 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
169 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0(ptr %a)
170 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
171 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
172 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
173 // CHECK-A64: ret %struct.poly8x8x4_t [[TMP4]]
174 // CHECK-A32: ret void
175 poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
176 return vld1_p8_x4(a);
179 // CHECK-LABEL: @test_vld1_s16_x2(
180 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
181 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x4x2_t) align 8 [[RETVAL:%.*]],
182 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
183 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0(ptr %a)
184 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
185 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
186 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
187 // CHECK-A64: ret %struct.int16x4x2_t [[TMP6]]
188 // CHECK-A32: ret void
189 int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
190 return vld1_s16_x2(a);
193 // CHECK-LABEL: @test_vld1_s16_x3(
194 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
195 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x4x3_t) align 8 [[RETVAL:%.*]],
196 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
197 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0(ptr %a)
198 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
199 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
200 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
201 // CHECK-A64: ret %struct.int16x4x3_t [[TMP6]]
202 // CHECK-A32: ret void
203 int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
204 return vld1_s16_x3(a);
207 // CHECK-LABEL: @test_vld1_s16_x4(
208 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
209 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x4x4_t) align 8 [[RETVAL:%.*]],
210 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
211 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0(ptr %a)
212 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
213 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
214 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
215 // CHECK-A64: ret %struct.int16x4x4_t [[TMP6]]
216 // CHECK-A32: ret void
217 int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
218 return vld1_s16_x4(a);
221 // CHECK-LABEL: @test_vld1_s32_x2(
222 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
223 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x2x2_t) align 8 [[RETVAL:%.*]],
224 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
225 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0(ptr %a)
226 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
227 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
228 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
229 // CHECK-A64: ret %struct.int32x2x2_t [[TMP6]]
230 // CHECK-A32: ret void
231 int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
232 return vld1_s32_x2(a);
235 // CHECK-LABEL: @test_vld1_s32_x3(
236 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
237 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x2x3_t) align 8 [[RETVAL:%.*]],
238 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
239 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0(ptr %a)
240 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
241 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
242 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
243 // CHECK-A64: ret %struct.int32x2x3_t [[TMP6]]
244 // CHECK-A32: ret void
245 int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
246 return vld1_s32_x3(a);
249 // CHECK-LABEL: @test_vld1_s32_x4(
250 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
251 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x2x4_t) align 8 [[RETVAL:%.*]],
252 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
253 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0(ptr %a)
254 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
255 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
256 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
257 // CHECK-A64: ret %struct.int32x2x4_t [[TMP6]]
258 // CHECK-A32: ret void
259 int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
260 return vld1_s32_x4(a);
263 // CHECK-LABEL: @test_vld1_s64_x2(
264 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
265 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x1x2_t) align 8 [[RETVAL:%.*]],
266 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
267 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0(ptr %a)
268 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
269 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
270 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
271 // CHECK-A64: ret %struct.int64x1x2_t [[TMP6]]
272 // CHECK-A32: ret void
273 int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
274 return vld1_s64_x2(a);
277 // CHECK-LABEL: @test_vld1_s64_x3(
278 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
279 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x1x3_t) align 8 [[RETVAL:%.*]],
280 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
281 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0(ptr %a)
282 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
283 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
284 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
285 // CHECK-A64: ret %struct.int64x1x3_t [[TMP6]]
286 // CHECK-A32: ret void
287 int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
288 return vld1_s64_x3(a);
291 // CHECK-LABEL: @test_vld1_s64_x4(
292 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
293 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x1x4_t) align 8 [[RETVAL:%.*]],
294 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
295 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0(ptr %a)
296 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
297 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
298 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
299 // CHECK-A64: ret %struct.int64x1x4_t [[TMP6]]
300 // CHECK-A32: ret void
301 int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
302 return vld1_s64_x4(a);
305 // CHECK-LABEL: @test_vld1_s8_x2(
306 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
307 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x8x2_t) align 8 [[RETVAL:%.*]],
308 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
309 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0(ptr %a)
310 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
311 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
312 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
313 // CHECK-A64: ret %struct.int8x8x2_t [[TMP4]]
314 // CHECK-A32: ret void
315 int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
316 return vld1_s8_x2(a);
319 // CHECK-LABEL: @test_vld1_s8_x3(
320 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
321 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x8x3_t) align 8 [[RETVAL:%.*]],
322 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
323 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0(ptr %a)
324 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
325 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
326 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
327 // CHECK-A64: ret %struct.int8x8x3_t [[TMP4]]
328 // CHECK-A32: ret void
329 int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
330 return vld1_s8_x3(a);
333 // CHECK-LABEL: @test_vld1_s8_x4(
334 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
335 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x8x4_t) align 8 [[RETVAL:%.*]],
336 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
337 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0(ptr %a)
338 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
339 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
340 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
341 // CHECK-A64: ret %struct.int8x8x4_t [[TMP4]]
342 // CHECK-A32: ret void
343 int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
344 return vld1_s8_x4(a);
347 // CHECK-LABEL: @test_vld1_u16_x2(
348 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
349 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x4x2_t) align 8 [[RETVAL:%.*]],
350 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
351 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i16.p0(ptr %a)
352 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
353 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
354 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
355 // CHECK-A64: ret %struct.uint16x4x2_t [[TMP6]]
356 // CHECK-A32: ret void
357 uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
358 return vld1_u16_x2(a);
361 // CHECK-LABEL: @test_vld1_u16_x3(
362 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
363 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x4x3_t) align 8 [[RETVAL:%.*]],
364 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
365 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i16.p0(ptr %a)
366 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
367 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
368 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
369 // CHECK-A64: ret %struct.uint16x4x3_t [[TMP6]]
370 // CHECK-A32: ret void
371 uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
372 return vld1_u16_x3(a);
375 // CHECK-LABEL: @test_vld1_u16_x4(
376 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
377 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x4x4_t) align 8 [[RETVAL:%.*]],
378 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
379 // CHECK: [[VLD1XN:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i16.p0(ptr %a)
380 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD1XN]], ptr [[__RET]]
381 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
382 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
383 // CHECK-A64: ret %struct.uint16x4x4_t [[TMP6]]
384 // CHECK-A32: ret void
385 uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
386 return vld1_u16_x4(a);
389 // CHECK-LABEL: @test_vld1_u32_x2(
390 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
391 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x2x2_t) align 8 [[RETVAL:%.*]],
392 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
393 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i32.p0(ptr %a)
394 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
395 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
396 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
397 // CHECK-A64: ret %struct.uint32x2x2_t [[TMP6]]
398 // CHECK-A32: ret void
399 uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
400 return vld1_u32_x2(a);
403 // CHECK-LABEL: @test_vld1_u32_x3(
404 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
405 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x2x3_t) align 8 [[RETVAL:%.*]],
406 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
407 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i32.p0(ptr %a)
408 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
409 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
410 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
411 // CHECK-A64: ret %struct.uint32x2x3_t [[TMP6]]
412 // CHECK-A32: ret void
413 uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
414 return vld1_u32_x3(a);
417 // CHECK-LABEL: @test_vld1_u32_x4(
418 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
419 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x2x4_t) align 8 [[RETVAL:%.*]],
420 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
421 // CHECK: [[VLD1XN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i32.p0(ptr %a)
422 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD1XN]], ptr [[__RET]]
423 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
424 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
425 // CHECK-A64: ret %struct.uint32x2x4_t [[TMP6]]
426 // CHECK-A32: ret void
427 uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
428 return vld1_u32_x4(a);
431 // CHECK-LABEL: @test_vld1_u64_x2(
432 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
433 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x1x2_t) align 8 [[RETVAL:%.*]],
434 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
435 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v1i64.p0(ptr %a)
436 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
437 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
438 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
439 // CHECK-A64: ret %struct.uint64x1x2_t [[TMP6]]
440 // CHECK-A32: ret void
441 uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
442 return vld1_u64_x2(a);
445 // CHECK-LABEL: @test_vld1_u64_x3(
446 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
447 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x1x3_t) align 8 [[RETVAL:%.*]],
448 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
449 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v1i64.p0(ptr %a)
450 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
451 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
452 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
453 // CHECK-A64: ret %struct.uint64x1x3_t [[TMP6]]
454 // CHECK-A32: ret void
455 uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
456 return vld1_u64_x3(a);
459 // CHECK-LABEL: @test_vld1_u64_x4(
460 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
461 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x1x4_t) align 8 [[RETVAL:%.*]],
462 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
463 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v1i64.p0(ptr %a)
464 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
465 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
466 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
467 // CHECK-A64: ret %struct.uint64x1x4_t [[TMP6]]
468 // CHECK-A32: ret void
469 uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
470 return vld1_u64_x4(a);
473 // CHECK-LABEL: @test_vld1_u8_x2(
474 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
475 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x8x2_t) align 8 [[RETVAL:%.*]],
476 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
477 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0(ptr %a)
478 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
479 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
480 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
481 // CHECK-A64: ret %struct.uint8x8x2_t [[TMP4]]
482 // CHECK-A32: ret void
483 uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
484 return vld1_u8_x2(a);
487 // CHECK-LABEL: @test_vld1_u8_x3(
488 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
489 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x8x3_t) align 8 [[RETVAL:%.*]],
490 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
491 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0(ptr %a)
492 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
493 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
494 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
495 // CHECK-A64: ret %struct.uint8x8x3_t [[TMP4]]
496 // CHECK-A32: ret void
497 uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
498 return vld1_u8_x3(a);
501 // CHECK-LABEL: @test_vld1_u8_x4(
502 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
503 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x8x4_t) align 8 [[RETVAL:%.*]],
504 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
505 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0(ptr %a)
506 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], ptr [[__RET]]
507 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
508 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
509 // CHECK-A64: ret %struct.uint8x8x4_t [[TMP4]]
510 // CHECK-A32: ret void
511 uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
512 return vld1_u8_x4(a);
515 // CHECK-LABEL: @test_vld1q_f16_x2(
516 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
517 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x8x2_t) align 8 [[RETVAL:%.*]],
518 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}}
519 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x2.v8f16.p0|arm.neon.vld1x2.v8i16.p0}}(ptr %a)
520 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
521 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
522 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
523 // CHECK-A64: ret %struct.float16x8x2_t [[TMP6]]
524 // CHECK-A32: ret void
525 float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
526 return vld1q_f16_x2(a);
529 // CHECK-LABEL: @test_vld1q_f16_x3(
530 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
531 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x8x3_t) align 8 [[RETVAL:%.*]],
532 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}}
533 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x3.v8f16.p0|arm.neon.vld1x3.v8i16.p0}}(ptr %a)
534 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
535 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
536 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
537 // CHECK-A64: ret %struct.float16x8x3_t [[TMP6]]
538 // CHECK-A32: ret void
539 float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
540 return vld1q_f16_x3(a);
543 // CHECK-LABEL: @test_vld1q_f16_x4(
544 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
545 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float16x8x4_t) align 8 [[RETVAL:%.*]],
546 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}}
547 // CHECK: [[VLD1XN:%.*]] = call { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } @llvm.{{aarch64.neon.ld1x4.v8f16.p0|arm.neon.vld1x4.v8i16.p0}}(ptr %a)
548 // CHECK: store { <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD1XN]], ptr [[__RET]]
549 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
550 // CHECK-A64: [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
551 // CHECK-A64: ret %struct.float16x8x4_t [[TMP6]]
552 // CHECK-A32: ret void
553 float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
554 return vld1q_f16_x4(a);
557 // CHECK-LABEL: @test_vld1q_f32_x2(
558 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
559 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x4x2_t) align 8 [[RETVAL:%.*]],
560 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}}
561 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4f32.p0(ptr %a)
562 // CHECK: store { <4 x float>, <4 x float> } [[VLD1XN]], ptr [[__RET]]
563 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
564 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
565 // CHECK-A64: ret %struct.float32x4x2_t [[TMP6]]
566 // CHECK-A32: ret void
567 float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
568 return vld1q_f32_x2(a);
571 // CHECK-LABEL: @test_vld1q_f32_x3(
572 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
573 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x4x3_t) align 8 [[RETVAL:%.*]],
574 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}}
575 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4f32.p0(ptr %a)
576 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], ptr [[__RET]]
577 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
578 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
579 // CHECK-A64: ret %struct.float32x4x3_t [[TMP6]]
580 // CHECK-A32: ret void
581 float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
582 return vld1q_f32_x3(a);
585 // CHECK-LABEL: @test_vld1q_f32_x4(
586 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
587 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.float32x4x4_t) align 8 [[RETVAL:%.*]],
588 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}}
589 // CHECK: [[VLD1XN:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4f32.p0(ptr %a)
590 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD1XN]], ptr [[__RET]]
591 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
592 // CHECK-A64: [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
593 // CHECK-A64: ret %struct.float32x4x4_t [[TMP6]]
594 // CHECK-A32: ret void
595 float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
596 return vld1q_f32_x4(a);
599 // CHECK-LABEL: @test_vld1q_p16_x2(
600 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
601 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x8x2_t) align 8 [[RETVAL:%.*]],
602 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}}
603 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0(ptr %a)
604 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
605 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
606 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
607 // CHECK-A64: ret %struct.poly16x8x2_t [[TMP6]]
608 // CHECK-A32: ret void
609 poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
610 return vld1q_p16_x2(a);
613 // CHECK-LABEL: @test_vld1q_p16_x3(
614 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
615 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x8x3_t) align 8 [[RETVAL:%.*]],
616 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}}
617 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0(ptr %a)
618 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
619 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
620 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
621 // CHECK-A64: ret %struct.poly16x8x3_t [[TMP6]]
622 // CHECK-A32: ret void
623 poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
624 return vld1q_p16_x3(a);
627 // CHECK-LABEL: @test_vld1q_p16_x4(
628 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
629 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly16x8x4_t) align 8 [[RETVAL:%.*]],
630 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}}
631 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0(ptr %a)
632 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
633 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
634 // CHECK-A64: [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
635 // CHECK-A64: ret %struct.poly16x8x4_t [[TMP6]]
636 // CHECK-A32: ret void
637 poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
638 return vld1q_p16_x4(a);
641 // CHECK-LABEL: @test_vld1q_p8_x2(
642 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
643 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x16x2_t) align 8 [[RETVAL:%.*]],
644 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}}
645 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0(ptr %a)
646 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
647 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
648 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
649 // CHECK-A64: ret %struct.poly8x16x2_t [[TMP4]]
650 // CHECK-A32: ret void
651 poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
652 return vld1q_p8_x2(a);
655 // CHECK-LABEL: @test_vld1q_p8_x3(
656 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
657 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x16x3_t) align 8 [[RETVAL:%.*]],
658 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}}
659 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0(ptr %a)
660 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
661 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
662 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
663 // CHECK-A64: ret %struct.poly8x16x3_t [[TMP4]]
664 // CHECK-A32: ret void
665 poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
666 return vld1q_p8_x3(a);
669 // CHECK-LABEL: @test_vld1q_p8_x4(
670 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
671 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.poly8x16x4_t) align 8 [[RETVAL:%.*]],
672 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}}
673 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0(ptr %a)
674 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
675 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
676 // CHECK-A64: [[TMP4:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
677 // CHECK-A64: ret %struct.poly8x16x4_t [[TMP4]]
678 // CHECK-A32: ret void
679 poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
680 return vld1q_p8_x4(a);
683 // CHECK-LABEL: @test_vld1q_s16_x2(
684 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
685 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x8x2_t) align 8 [[RETVAL:%.*]],
686 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}}
687 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0(ptr %a)
688 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
689 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
690 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
691 // CHECK-A64: ret %struct.int16x8x2_t [[TMP6]]
692 // CHECK-A32: ret void
693 int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
694 return vld1q_s16_x2(a);
697 // CHECK-LABEL: @test_vld1q_s16_x3(
698 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
699 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x8x3_t) align 8 [[RETVAL:%.*]],
700 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}}
701 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0(ptr %a)
702 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
703 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
704 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
705 // CHECK-A64: ret %struct.int16x8x3_t [[TMP6]]
706 // CHECK-A32: ret void
707 int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
708 return vld1q_s16_x3(a);
711 // CHECK-LABEL: @test_vld1q_s16_x4(
712 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
713 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int16x8x4_t) align 8 [[RETVAL:%.*]],
714 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}}
715 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0(ptr %a)
716 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
717 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
718 // CHECK-A64: [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
719 // CHECK-A64: ret %struct.int16x8x4_t [[TMP6]]
720 // CHECK-A32: ret void
721 int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
722 return vld1q_s16_x4(a);
725 // CHECK-LABEL: @test_vld1q_s32_x2(
726 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
727 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x4x2_t) align 8 [[RETVAL:%.*]],
728 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}}
729 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0(ptr %a)
730 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
731 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
732 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
733 // CHECK-A64: ret %struct.int32x4x2_t [[TMP6]]
734 // CHECK-A32: ret void
735 int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
736 return vld1q_s32_x2(a);
739 // CHECK-LABEL: @test_vld1q_s32_x3(
740 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
741 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x4x3_t) align 8 [[RETVAL:%.*]],
742 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}}
743 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0(ptr %a)
744 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
745 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
746 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
747 // CHECK-A64: ret %struct.int32x4x3_t [[TMP6]]
748 // CHECK-A32: ret void
749 int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
750 return vld1q_s32_x3(a);
753 // CHECK-LABEL: @test_vld1q_s32_x4(
754 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
755 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int32x4x4_t) align 8 [[RETVAL:%.*]],
756 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}}
757 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0(ptr %a)
758 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
759 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
760 // CHECK-A64: [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
761 // CHECK-A64: ret %struct.int32x4x4_t [[TMP6]]
762 // CHECK-A32: ret void
763 int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
764 return vld1q_s32_x4(a);
767 // CHECK-LABEL: @test_vld1q_s64_x2(
768 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
769 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x2x2_t) align 8 [[RETVAL:%.*]],
770 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align {{16|8}}
771 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0(ptr %a)
772 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
773 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
774 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
775 // CHECK-A64: ret %struct.int64x2x2_t [[TMP6]]
776 // CHECK-A32: ret void
777 int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
778 return vld1q_s64_x2(a);
781 // CHECK-LABEL: @test_vld1q_s64_x3(
782 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
783 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x2x3_t) align 8 [[RETVAL:%.*]],
784 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align {{16|8}}
785 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0(ptr %a)
786 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
787 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
788 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
789 // CHECK-A64: ret %struct.int64x2x3_t [[TMP6]]
790 // CHECK-A32: ret void
791 int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
792 return vld1q_s64_x3(a);
795 // CHECK-LABEL: @test_vld1q_s64_x4(
796 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
797 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int64x2x4_t) align 8 [[RETVAL:%.*]],
798 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align {{16|8}}
799 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0(ptr %a)
800 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
801 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
802 // CHECK-A64: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
803 // CHECK-A64: ret %struct.int64x2x4_t [[TMP6]]
804 // CHECK-A32: ret void
805 int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
806 return vld1q_s64_x4(a);
809 // CHECK-LABEL: @test_vld1q_s8_x2(
810 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
811 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x16x2_t) align 8 [[RETVAL:%.*]],
812 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}}
813 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0(ptr %a)
814 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
815 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
816 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
817 // CHECK-A64: ret %struct.int8x16x2_t [[TMP4]]
818 // CHECK-A32: ret void
819 int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
820 return vld1q_s8_x2(a);
823 // CHECK-LABEL: @test_vld1q_s8_x3(
824 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
825 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x16x3_t) align 8 [[RETVAL:%.*]],
826 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}}
827 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0(ptr %a)
828 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
829 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
830 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
831 // CHECK-A64: ret %struct.int8x16x3_t [[TMP4]]
832 // CHECK-A32: ret void
833 int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
834 return vld1q_s8_x3(a);
837 // CHECK-LABEL: @test_vld1q_s8_x4(
838 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
839 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.int8x16x4_t) align 8 [[RETVAL:%.*]],
840 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}}
841 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0(ptr %a)
842 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
843 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
844 // CHECK-A64: [[TMP4:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
845 // CHECK-A64: ret %struct.int8x16x4_t [[TMP4]]
846 // CHECK-A32: ret void
847 int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
848 return vld1q_s8_x4(a);
851 // CHECK-LABEL: @test_vld1q_u16_x2(
852 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
853 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x8x2_t) align 8 [[RETVAL:%.*]],
854 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}}
855 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i16.p0(ptr %a)
856 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
857 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
858 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
859 // CHECK-A64: ret %struct.uint16x8x2_t [[TMP6]]
860 // CHECK-A32: ret void
861 uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
862 return vld1q_u16_x2(a);
865 // CHECK-LABEL: @test_vld1q_u16_x3(
866 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
867 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x8x3_t) align 8 [[RETVAL:%.*]],
868 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}}
869 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i16.p0(ptr %a)
870 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
871 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
872 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
873 // CHECK-A64: ret %struct.uint16x8x3_t [[TMP6]]
874 // CHECK-A32: ret void
875 uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
876 return vld1q_u16_x3(a);
879 // CHECK-LABEL: @test_vld1q_u16_x4(
880 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
881 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint16x8x4_t) align 8 [[RETVAL:%.*]],
882 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}}
883 // CHECK: [[VLD1XN:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i16.p0(ptr %a)
884 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD1XN]], ptr [[__RET]]
885 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
886 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
887 // CHECK-A64: ret %struct.uint16x8x4_t [[TMP6]]
888 // CHECK-A32: ret void
889 uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
890 return vld1q_u16_x4(a);
893 // CHECK-LABEL: @test_vld1q_u32_x2(
894 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
895 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x4x2_t) align 8 [[RETVAL:%.*]],
896 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}}
897 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v4i32.p0(ptr %a)
898 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
899 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
900 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
901 // CHECK-A64: ret %struct.uint32x4x2_t [[TMP6]]
902 // CHECK-A32: ret void
903 uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
904 return vld1q_u32_x2(a);
907 // CHECK-LABEL: @test_vld1q_u32_x3(
908 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
909 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x4x3_t) align 8 [[RETVAL:%.*]],
910 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}}
911 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v4i32.p0(ptr %a)
912 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
913 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
914 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
915 // CHECK-A64: ret %struct.uint32x4x3_t [[TMP6]]
916 // CHECK-A32: ret void
917 uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
918 return vld1q_u32_x3(a);
921 // CHECK-LABEL: @test_vld1q_u32_x4(
922 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
923 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint32x4x4_t) align 8 [[RETVAL:%.*]],
924 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}}
925 // CHECK: [[VLD1XN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v4i32.p0(ptr %a)
926 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD1XN]], ptr [[__RET]]
927 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
928 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
929 // CHECK-A64: ret %struct.uint32x4x4_t [[TMP6]]
930 // CHECK-A32: ret void
931 uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
932 return vld1q_u32_x4(a);
935 // CHECK-LABEL: @test_vld1q_u64_x2(
936 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
937 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x2x2_t) align 8 [[RETVAL:%.*]],
938 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align {{16|8}}
939 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v2i64.p0(ptr %a)
940 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
941 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
942 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
943 // CHECK-A64: ret %struct.uint64x2x2_t [[TMP6]]
944 // CHECK-A32: ret void
945 uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
946 return vld1q_u64_x2(a);
949 // CHECK-LABEL: @test_vld1q_u64_x3(
950 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
951 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x2x3_t) align 8 [[RETVAL:%.*]],
952 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align {{16|8}}
953 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v2i64.p0(ptr %a)
954 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
955 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
956 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
957 // CHECK-A64: ret %struct.uint64x2x3_t [[TMP6]]
958 // CHECK-A32: ret void
959 uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
960 return vld1q_u64_x3(a);
963 // CHECK-LABEL: @test_vld1q_u64_x4(
964 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
965 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint64x2x4_t) align 8 [[RETVAL:%.*]],
966 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align {{16|8}}
967 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v2i64.p0(ptr %a)
968 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
969 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
970 // CHECK-A64: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
971 // CHECK-A64: ret %struct.uint64x2x4_t [[TMP6]]
972 // CHECK-A32: ret void
973 uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
974 return vld1q_u64_x4(a);
977 // CHECK-LABEL: @test_vld1q_u8_x2(
978 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
979 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x16x2_t) align 8 [[RETVAL:%.*]],
980 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}}
981 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0(ptr %a)
982 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
983 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
984 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
985 // CHECK-A64: ret %struct.uint8x16x2_t [[TMP4]]
986 // CHECK-A32: ret void
987 uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
988 return vld1q_u8_x2(a);
991 // CHECK-LABEL: @test_vld1q_u8_x3(
992 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
993 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x16x3_t) align 8 [[RETVAL:%.*]],
994 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}}
995 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0(ptr %a)
996 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
997 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
998 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
999 // CHECK-A64: ret %struct.uint8x16x3_t [[TMP4]]
1000 // CHECK-A32: ret void
1001 uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
1002 return vld1q_u8_x3(a);
1005 // CHECK-LABEL: @test_vld1q_u8_x4(
1006 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
1007 // CHECK-A32: ptr dead_on_unwind noalias writable sret(%struct.uint8x16x4_t) align 8 [[RETVAL:%.*]],
1008 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}}
1009 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0(ptr %a)
1010 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], ptr [[__RET]]
1011 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} [[RETVAL]], ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1012 // CHECK-A64: [[TMP4:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
1013 // CHECK-A64: ret %struct.uint8x16x4_t [[TMP4]]
1014 // CHECK-A32: ret void
1015 uint8x16x4_t test_vld1q_u8_x4(uint8_t const *a) {
1016 return vld1q_u8_x4(a);
1019 // CHECK-LABEL: @test_vld2_dup_f16(
1020 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
1021 // CHECK-A64: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0(ptr %src)
1022 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
1023 // CHECK: store { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]> } [[VLD2]], ptr [[__RET]]
1024 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1025 // CHECK: ret void
1026 void test_vld2_dup_f16(float16x4x2_t *dest, const float16_t *src) {
1027 *dest = vld2_dup_f16(src);
1030 // CHECK-LABEL: @test_vld2_dup_f32(
1031 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
1032 // CHECK-A64: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr %src)
1033 // CHECK-A32: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2dup.v2f32.p0(ptr %src, i32 4)
1034 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
1035 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1036 // CHECK: ret void
1037 void test_vld2_dup_f32(float32x2x2_t *dest, const float32_t *src) {
1038 *dest = vld2_dup_f32(src);
1041 // CHECK-LABEL: @test_vld2_dup_p16(
1042 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
1043 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %src)
1044 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
1045 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
1046 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1047 // CHECK: ret void
1048 void test_vld2_dup_p16(poly16x4x2_t *dest, const poly16_t *src) {
1049 *dest = vld2_dup_p16(src);
1052 // CHECK-LABEL: @test_vld2_dup_p8(
1053 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
1054 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %src)
1055 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
1056 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
1057 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1058 // CHECK: ret void
1059 void test_vld2_dup_p8(poly8x8x2_t *dest, poly8_t *src) {
1060 *dest = vld2_dup_p8(src);
1063 // CHECK-LABEL: @test_vld2_dup_s16(
1064 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
1065 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %src)
1066 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
1067 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
1068 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1069 // CHECK: ret void
1070 void test_vld2_dup_s16(int16x4x2_t *dest, const int16_t *src) {
1071 *dest = vld2_dup_s16(src);
1074 // CHECK-LABEL: @test_vld2_dup_s32(
1075 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
1076 // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %src)
1077 // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
1078 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
1079 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1080 // CHECK: ret void
1081 void test_vld2_dup_s32(int32x2x2_t *dest, const int32_t *src) {
1082 *dest = vld2_dup_s32(src);
1085 // CHECK-LABEL: @test_vld2_dup_s8(
1086 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
1087 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %src)
1088 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
1089 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
1090 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1091 // CHECK: ret void
1092 void test_vld2_dup_s8(int8x8x2_t *dest, int8_t *src) {
1093 *dest = vld2_dup_s8(src);
1096 // CHECK-LABEL: @test_vld2_dup_u16(
1097 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
1098 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %src)
1099 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
1100 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
1101 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1102 // CHECK: ret void
1103 void test_vld2_dup_u16(uint16x4x2_t *dest, const uint16_t *src) {
1104 *dest = vld2_dup_u16(src);
1107 // CHECK-LABEL: @test_vld2_dup_u32(
1108 // CHECK: entry:
1109 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
1110 // CHECK-A64: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %src)
1111 // CHECK-A32: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
1112 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
1113 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1114 // CHECK: ret void
1115 void test_vld2_dup_u32(uint32x2x2_t *dest, const uint32_t *src) {
1116 *dest = vld2_dup_u32(src);
1119 // CHECK-LABEL: @test_vld2_dup_s64(
1120 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
1121 // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %src)
1122 // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
1123 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
1124 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1125 // CHECK: ret void
1126 void test_vld2_dup_s64(int64x1x2_t *dest, const int64_t *src) {
1127 *dest = vld2_dup_s64(src);
1130 // CHECK-LABEL: @test_vld2_dup_u64(
1131 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
1132 // CHECK-A64: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %src)
1133 // CHECK-A32: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
1134 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
1135 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1136 // CHECK: ret void
1137 void test_vld2_dup_u64(uint64x1x2_t *dest, const uint64_t *src) {
1138 *dest = vld2_dup_u64(src);
1141 // CHECK-LABEL: @test_vld2_dup_u8(
1142 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
1143 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %src)
1144 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
1145 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
1146 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 16, i1 false)
1147 // CHECK: ret void
1148 void test_vld2_dup_u8(uint8x8x2_t *dest, const uint8_t *src) {
1149 *dest = vld2_dup_u8(src);
1152 // CHECK-LABEL: @test_vld3_dup_f16(
1153 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
1154 // CHECK-A64: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0(ptr %src)
1155 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
1156 // CHECK: store { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD3]], ptr [[__RET]]
1157 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1158 // CHECK: ret void
1159 void test_vld3_dup_f16(float16x4x3_t *dest, float16_t *src) {
1160 *dest = vld3_dup_f16(src);
1163 // CHECK-LABEL: @test_vld3_dup_f32(
1164 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
1165 // CHECK-A64: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr %src)
1166 // CHECK-A32: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3dup.v2f32.p0(ptr %src, i32 4)
1167 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
1168 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1169 // CHECK: ret void
1170 void test_vld3_dup_f32(float32x2x3_t *dest, const float32_t *src) {
1171 *dest = vld3_dup_f32(src);
1174 // CHECK-LABEL: @test_vld3_dup_p16(
1175 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
1176 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %src)
1177 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
1178 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
1179 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1180 // CHECK: ret void
1181 void test_vld3_dup_p16(poly16x4x3_t *dest, const poly16_t *src) {
1182 *dest = vld3_dup_p16(src);
1185 // CHECK-LABEL: @test_vld3_dup_p8(
1186 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
1187 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %src)
1188 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
1189 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
1190 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1191 // CHECK: ret void
1192 void test_vld3_dup_p8(poly8x8x3_t *dest, const poly8_t *src) {
1193 *dest = vld3_dup_p8(src);
1196 // CHECK-LABEL: @test_vld3_dup_s16(
1197 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
1198 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %src)
1199 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
1200 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
1201 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1202 // CHECK: ret void
1203 void test_vld3_dup_s16(int16x4x3_t *dest, const int16_t *src) {
1204 *dest = vld3_dup_s16(src);
1207 // CHECK-LABEL: @test_vld3_dup_s32(
1208 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
1209 // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %src)
1210 // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
1211 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
1212 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1213 // CHECK: ret void
1214 void test_vld3_dup_s32(int32x2x3_t *dest, const int32_t *src) {
1215 *dest = vld3_dup_s32(src);
1218 // CHECK-LABEL: @test_vld3_dup_s8(
1219 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
1220 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %src)
1221 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
1222 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
1223 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1224 // CHECK: ret void
1225 void test_vld3_dup_s8(int8x8x3_t *dest, const int8_t *src) {
1226 *dest = vld3_dup_s8(src);
1229 // CHECK-LABEL: @test_vld3_dup_u16(
1230 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
1231 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %src)
1232 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
1233 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
1234 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1235 // CHECK: ret void
1236 void test_vld3_dup_u16(uint16x4x3_t *dest, const uint16_t *src) {
1237 *dest = vld3_dup_u16(src);
1240 // CHECK-LABEL: @test_vld3_dup_u32(
1241 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
1242 // CHECK-A64: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %src)
1243 // CHECK-A32: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
1244 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
1245 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1246 // CHECK: ret void
1247 void test_vld3_dup_u32(uint32x2x3_t *dest, const uint32_t *src) {
1248 *dest = vld3_dup_u32(src);
1251 // CHECK-LABEL: @test_vld3_dup_u8(
1252 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
1253 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %src)
1254 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
1255 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
1256 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1257 // CHECK: ret void
1258 void test_vld3_dup_u8(uint8x8x3_t *dest, const uint8_t *src) {
1259 *dest = vld3_dup_u8(src);
1262 // CHECK-LABEL: @test_vld3_dup_s64(
1263 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
1264 // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %src)
1265 // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
1266 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
1267 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1268 // CHECK: ret void
1269 void test_vld3_dup_s64(int64x1x3_t *dest, const int64_t *src) {
1270 *dest = vld3_dup_s64(src);
1273 // CHECK-LABEL: @test_vld3_dup_u64(
1274 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
1275 // CHECK-A64: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %src)
1276 // CHECK-A32: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
1277 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
1278 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 24, i1 false)
1279 // CHECK: ret void
1280 void test_vld3_dup_u64(uint64x1x3_t *dest, const uint64_t *src) {
1281 *dest = vld3_dup_u64(src);
1284 // CHECK-LABEL: @test_vld4_dup_f16(
1285 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
1286 // CHECK-A64: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0(ptr %src)
1287 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
1288 // CHECK: store { <4 x [[HALF:half|i16]]>, <4 x [[HALF]]>, <4 x [[HALF]]>, <4 x [[HALF]]> } [[VLD4]], ptr [[__RET]]
1289 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1290 // CHECK: ret void
1291 void test_vld4_dup_f16(float16x4x4_t *dest, const float16_t *src) {
1292 *dest = vld4_dup_f16(src);
1295 // CHECK-LABEL: @test_vld4_dup_f32(
1296 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
1297 // CHECK-A64: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr %src)
1298 // CHECK-A32: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4dup.v2f32.p0(ptr %src, i32 4)
1299 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
1300 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1301 // CHECK: ret void
1302 void test_vld4_dup_f32(float32x2x4_t *dest, const float32_t *src) {
1303 *dest = vld4_dup_f32(src);
1306 // CHECK-LABEL: @test_vld4_dup_p16(
1307 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
1308 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %src)
1309 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
1310 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
1311 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1312 // CHECK: ret void
1313 void test_vld4_dup_p16(poly16x4x4_t *dest, const poly16_t *src) {
1314 *dest = vld4_dup_p16(src);
1317 // CHECK-LABEL: @test_vld4_dup_p8(
1318 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
1319 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %src)
1320 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
1321 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
1322 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1323 // CHECK: ret void
1324 void test_vld4_dup_p8(poly8x8x4_t *dest, const poly8_t *src) {
1325 *dest = vld4_dup_p8(src);
1328 // CHECK-LABEL: @test_vld4_dup_s16(
1329 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
1330 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %src)
1331 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
1332 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
1333 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1334 // CHECK: ret void
1335 void test_vld4_dup_s16(int16x4x4_t *dest, const int16_t *src) {
1336 *dest = vld4_dup_s16(src);
1339 // CHECK-LABEL: @test_vld4_dup_s32(
1340 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
1341 // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %src)
1342 // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
1343 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
1344 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1345 // CHECK: ret void
1346 void test_vld4_dup_s32(int32x2x4_t *dest, const int32_t *src) {
1347 *dest = vld4_dup_s32(src);
1350 // CHECK-LABEL: @test_vld4_dup_s8(
1351 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
1352 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %src)
1353 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
1354 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
1355 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1356 // CHECK: ret void
1357 void test_vld4_dup_s8(int8x8x4_t *dest, const int8_t *src) {
1358 *dest = vld4_dup_s8(src);
1361 // CHECK-LABEL: @test_vld4_dup_u16(
1362 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
1363 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %src)
1364 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
1365 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
1366 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1367 // CHECK: ret void
1368 void test_vld4_dup_u16(uint16x4x4_t *dest, const uint16_t *src) {
1369 *dest = vld4_dup_u16(src);
1372 // CHECK-LABEL: @test_vld4_dup_u32(
1373 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
1374 // CHECK-A64: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %src)
1375 // CHECK-A32: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
1376 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
1377 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1378 // CHECK: ret void
1379 void test_vld4_dup_u32(uint32x2x4_t *dest, const uint32_t *src) {
1380 *dest = vld4_dup_u32(src);
1383 // CHECK-LABEL: @test_vld4_dup_u8(
1384 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
1385 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %src)
1386 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
1387 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
1388 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1389 // CHECK: ret void
1390 void test_vld4_dup_u8(uint8x8x4_t *dest, const uint8_t *src) {
1391 *dest = vld4_dup_u8(src);
1394 // CHECK-LABEL: @test_vld4_dup_s64(
1395 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
1396 // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %src)
1397 // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
1398 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
1399 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1400 // CHECK: ret void
1401 void test_vld4_dup_s64(int64x1x4_t *dest, const int64_t *src) {
1402 *dest = vld4_dup_s64(src);
1405 // CHECK-LABEL: @test_vld4_dup_u64(
1406 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
1407 // CHECK-A64: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %src)
1408 // CHECK-A32: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
1409 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
1410 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align 8 %dest, ptr align 8 [[__RET]], {{i64|i32}} 32, i1 false)
1411 // CHECK: ret void
1412 void test_vld4_dup_u64(uint64x1x4_t *dest, const uint64_t *src) {
1413 *dest = vld4_dup_u64(src);
1416 // CHECK-LABEL: @test_vld2q_dup_f16(
1417 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}}
1418 // CHECK-A64: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0(ptr %src)
1419 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
1420 // CHECK: store { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]> } [[VLD2]], ptr [[__RET]]
1421 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1422 // CHECK: ret void
1423 void test_vld2q_dup_f16(float16x8x2_t *dest, const float16_t *src) {
1424 *dest = vld2q_dup_f16(src);
1427 // CHECK-LABEL: @test_vld2q_dup_f32(
1428 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}}
1429 // CHECK-A64: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr %src)
1430 // CHECK-A32: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2dup.v4f32.p0(ptr %src, i32 4)
1431 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
1432 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1433 // CHECK: ret void
1434 void test_vld2q_dup_f32(float32x4x2_t *dest, const float32_t *src) {
1435 *dest = vld2q_dup_f32(src);
1438 // CHECK-LABEL: @test_vld2q_dup_p16(
1439 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}}
1440 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %src)
1441 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
1442 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
1443 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1444 // CHECK: ret void
1445 void test_vld2q_dup_p16(poly16x8x2_t *dest, const poly16_t *src) {
1446 *dest = vld2q_dup_p16(src);
1449 // CHECK-LABEL: @test_vld2q_dup_p8(
1450 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}}
1451 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %src)
1452 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
1453 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
1454 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1455 // CHECK: ret void
1456 void test_vld2q_dup_p8(poly8x16x2_t *dest, const poly8_t *src) {
1457 *dest = vld2q_dup_p8(src);
1460 // CHECK-LABEL: @test_vld2q_dup_s16(
1461 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}}
1462 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %src)
1463 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
1464 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
1465 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1466 // CHECK: ret void
1467 void test_vld2q_dup_s16(int16x8x2_t *dest, const int16_t *src) {
1468 *dest = vld2q_dup_s16(src);
1471 // CHECK-LABEL: @test_vld2q_dup_s32(
1472 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}}
1473 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %src)
1474 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
1475 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
1476 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1477 // CHECK: ret void
1478 void test_vld2q_dup_s32(int32x4x2_t *dest, const int32_t *src) {
1479 *dest = vld2q_dup_s32(src);
1482 // CHECK-LABEL: @test_vld2q_dup_s8(
1483 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}}
1484 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %src)
1485 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
1486 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
1487 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1488 // CHECK: ret void
1489 void test_vld2q_dup_s8(int8x16x2_t *dest, const int8_t *src) {
1490 *dest = vld2q_dup_s8(src);
1493 // CHECK-LABEL: @test_vld2q_dup_u16(
1494 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}}
1495 // CHECK-A64: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %src)
1496 // CHECK-A32: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
1497 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
1498 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1499 // CHECK: ret void
1500 void test_vld2q_dup_u16(uint16x8x2_t *dest, const uint16_t *src) {
1501 *dest = vld2q_dup_u16(src);
1504 // CHECK-LABEL: @test_vld2q_dup_u32(
1505 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}}
1506 // CHECK-A64: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %src)
1507 // CHECK-A32: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
1508 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
1509 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1510 // CHECK: ret void
1511 void test_vld2q_dup_u32(uint32x4x2_t *dest, const uint32_t *src) {
1512 *dest = vld2q_dup_u32(src);
1515 // CHECK-LABEL: @test_vld2q_dup_u8(
1516 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}}
1517 // CHECK-A64: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %src)
1518 // CHECK-A32: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
1519 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
1520 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 32, i1 false)
1521 // CHECK: ret void
1522 void test_vld2q_dup_u8(uint8x16x2_t *dest, const uint8_t *src) {
1523 *dest = vld2q_dup_u8(src);
1526 // CHECK-LABEL: @test_vld3q_dup_f16(
1527 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}}
1528 // CHECK-A64: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0(ptr %src)
1529 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
1530 // CHECK: store { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD3]], ptr [[__RET]]
1531 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1532 // CHECK: ret void
1533 void test_vld3q_dup_f16(float16x8x3_t *dest, const float16_t *src) {
1534 *dest = vld3q_dup_f16(src);
1537 // CHECK-LABEL: @test_vld3q_dup_f32(
1538 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}}
1539 // CHECK-A64: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr %src)
1540 // CHECK-A32: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3dup.v4f32.p0(ptr %src, i32 4)
1541 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
1542 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1543 // CHECK: ret void
1544 void test_vld3q_dup_f32(float32x4x3_t *dest, const float32_t *src) {
1545 *dest = vld3q_dup_f32(src);
1548 // CHECK-LABEL: @test_vld3q_dup_p16(
1549 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}}
1550 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %src)
1551 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
1552 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
1553 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1554 // CHECK: ret void
1555 void test_vld3q_dup_p16(poly16x8x3_t *dest, const poly16_t *src) {
1556 *dest = vld3q_dup_p16(src);
1559 // CHECK-LABEL: @test_vld3q_dup_p8(
1560 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}}
1561 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %src)
1562 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
1563 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
1564 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1565 // CHECK: ret void
1566 void test_vld3q_dup_p8(poly8x16x3_t *dest, const poly8_t *src) {
1567 *dest = vld3q_dup_p8(src);
1570 // CHECK-LABEL: @test_vld3q_dup_s16(
1571 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}}
1572 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %src)
1573 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
1574 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
1575 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1576 // CHECK: ret void
1577 void test_vld3q_dup_s16(int16x8x3_t *dest, const int16_t *src) {
1578 *dest = vld3q_dup_s16(src);
1581 // CHECK-LABEL: @test_vld3q_dup_s32(
1582 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}}
1583 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %src)
1584 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
1585 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
1586 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1587 // CHECK: ret void
1588 void test_vld3q_dup_s32(int32x4x3_t *dest, const int32_t *src) {
1589 *dest = vld3q_dup_s32(src);
1592 // CHECK-LABEL: @test_vld3q_dup_s8(
1593 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}}
1594 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %src)
1595 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
1596 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
1597 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1598 // CHECK: ret void
1599 void test_vld3q_dup_s8(int8x16x3_t *dest, const int8_t *src) {
1600 *dest = vld3q_dup_s8(src);
1603 // CHECK-LABEL: @test_vld3q_dup_u16(
1604 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}}
1605 // CHECK-A64: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %src)
1606 // CHECK-A32: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
1607 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
1608 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1609 // CHECK: ret void
1610 void test_vld3q_dup_u16(uint16x8x3_t *dest, const uint16_t *src) {
1611 *dest = vld3q_dup_u16(src);
1614 // CHECK-LABEL: @test_vld3q_dup_u32(
1615 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}}
1616 // CHECK-A64: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %src)
1617 // CHECK-A32: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
1618 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
1619 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1620 // CHECK: ret void
1621 void test_vld3q_dup_u32(uint32x4x3_t *dest, const uint32_t *src) {
1622 *dest = vld3q_dup_u32(src);
1625 // CHECK-LABEL: @test_vld3q_dup_u8(
1626 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}}
1627 // CHECK-A64: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %src)
1628 // CHECK-A32: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
1629 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
1630 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 48, i1 false)
1631 // CHECK: ret void
1632 void test_vld3q_dup_u8(uint8x16x3_t *dest, const uint8_t *src) {
1633 *dest = vld3q_dup_u8(src);
1636 // CHECK-LABEL: @test_vld4q_dup_f16(
1637 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}}
1638 // CHECK-A64: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0(ptr %src)
1639 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
1640 // CHECK: store { <8 x [[HALF:half|i16]]>, <8 x [[HALF]]>, <8 x [[HALF]]>, <8 x [[HALF]]> } [[VLD4]], ptr [[__RET]]
1641 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1642 // CHECK: ret void
1643 void test_vld4q_dup_f16(float16x8x4_t *dest, const float16_t *src) {
1644 *dest = vld4q_dup_f16(src);
1647 // CHECK-LABEL: @test_vld4q_dup_f32(
1648 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}}
1649 // CHECK-A64: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr %src)
1650 // CHECK-A32: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4dup.v4f32.p0(ptr %src, i32 4)
1651 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
1652 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1653 // CHECK: ret void
1654 void test_vld4q_dup_f32(float32x4x4_t *dest, const float32_t *src) {
1655 *dest = vld4q_dup_f32(src);
1658 // CHECK-LABEL: @test_vld4q_dup_p16(
1659 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}}
1660 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %src)
1661 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
1662 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
1663 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1664 // CHECK: ret void
1665 void test_vld4q_dup_p16(poly16x8x4_t *dest, const poly16_t *src) {
1666 *dest = vld4q_dup_p16(src);
1669 // CHECK-LABEL: @test_vld4q_dup_p8(
1670 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}}
1671 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %src)
1672 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
1673 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
1674 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1675 // CHECK: ret void
1676 void test_vld4q_dup_p8(poly8x16x4_t *dest, const poly8_t *src) {
1677 *dest = vld4q_dup_p8(src);
1680 // CHECK-LABEL: @test_vld4q_dup_s16(
1681 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}}
1682 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %src)
1683 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
1684 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
1685 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1686 // CHECK: ret void
1687 void test_vld4q_dup_s16(int16x8x4_t *dest, const int16_t *src) {
1688 *dest = vld4q_dup_s16(src);
1691 // CHECK-LABEL: @test_vld4q_dup_s32(
1692 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}}
1693 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %src)
1694 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
1695 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
1696 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1697 // CHECK: ret void
1698 void test_vld4q_dup_s32(int32x4x4_t *dest, const int32_t *src) {
1699 *dest = vld4q_dup_s32(src);
1702 // CHECK-LABEL: @test_vld4q_dup_s8(
1703 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}}
1704 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %src)
1705 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
1706 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
1707 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1708 // CHECK: ret void
1709 void test_vld4q_dup_s8(int8x16x4_t *dest, const int8_t *src) {
1710 *dest = vld4q_dup_s8(src);
1713 // CHECK-LABEL: @test_vld4q_dup_u16(
1714 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}}
1715 // CHECK-A64: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %src)
1716 // CHECK-A32: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
1717 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
1718 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1719 // CHECK: ret void
1720 void test_vld4q_dup_u16(uint16x8x4_t *dest, const uint16_t *src) {
1721 *dest = vld4q_dup_u16(src);
1724 // CHECK-LABEL: @test_vld4q_dup_u32(
1725 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}}
1726 // CHECK-A64: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %src)
1727 // CHECK-A32: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
1728 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
1729 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1730 // CHECK: ret void
1731 void test_vld4q_dup_u32(uint32x4x4_t *dest, const uint32_t *src) {
1732 *dest = vld4q_dup_u32(src);
1735 // CHECK-LABEL: @test_vld4q_dup_u8(
1736 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}}
1737 // CHECK-A64: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %src)
1738 // CHECK-A32: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
1739 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
1740 // CHECK: call void @llvm.memcpy.p0.p0.{{i64|i32}}(ptr align {{16|8}} %dest, ptr align {{16|8}} [[__RET]], {{i64|i32}} 64, i1 false)
1741 // CHECK: ret void
1742 void test_vld4q_dup_u8(uint8x16x4_t *dest, const uint8_t *src) {
1743 *dest = vld4q_dup_u8(src);