1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi --float-abi=hard -verify-machineinstrs \
3 ; RUN: -asm-verbose=false | FileCheck %s
5 %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
6 %struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
7 %struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
9 %struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
10 %struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
11 %struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
13 %struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
14 %struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
15 %struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
17 %struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
18 %struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
19 %struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
21 %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
22 %struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
23 %struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
25 %struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
26 %struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
27 %struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
29 %struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
30 %struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
31 %struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
33 declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32)
34 declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32)
35 declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32)
36 declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32)
38 declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32)
39 declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32)
40 declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32)
41 declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32)
43 declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32)
44 declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32)
45 declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32)
46 declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32)
48 declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32)
49 declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32)
50 declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32)
52 declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32)
53 declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32)
54 declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32)
56 declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32)
57 declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32)
58 declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32)
60 define ptr @test_vld2_dup_u16_update(ptr %dest, ptr %src) {
61 ; CHECK-LABEL: test_vld2_dup_u16_update:
62 ; CHECK: vld2.16 {d16[], d17[]}, [r1]!
63 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
64 ; CHECK-NEXT: vstr d17, [r0]
65 ; CHECK-NEXT: mov r0, r1
68 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
69 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
70 %updated_src = getelementptr inbounds i8, ptr %src, i32 4
74 define ptr @test_vld2_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
75 ; CHECK-LABEL: test_vld2_dup_u16_update_reg:
76 ; CHECK: vld2.16 {d16[], d17[]}, [r1], r2
77 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
78 ; CHECK-NEXT: vstr d17, [r0]
79 ; CHECK-NEXT: mov r0, r1
82 %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
83 store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
84 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
88 define ptr @test_vld2_dup_update(ptr %dest, ptr %src) {
89 ; CHECK-LABEL: test_vld2_dup_update:
90 ; CHECK: vld2.32 {d16[], d17[]}, [r1]!
91 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
92 ; CHECK-NEXT: vstr d17, [r0]
93 ; CHECK-NEXT: mov r0, r1
96 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
97 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
98 %updated_src = getelementptr inbounds i8, ptr %src, i32 8
102 define ptr @test_vld2_dup_update_reg(ptr %dest, ptr %src, i32 %inc) {
103 ; CHECK-LABEL: test_vld2_dup_update_reg:
104 ; CHECK: vld2.32 {d16[], d17[]}, [r1], r2
105 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
106 ; CHECK-NEXT: vstr d17, [r0]
107 ; CHECK-NEXT: mov r0, r1
110 %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
111 store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
112 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
116 define ptr @test_vld2_dup_u64_update(ptr %dest, ptr %src) {
117 ; CHECK-LABEL: test_vld2_dup_u64_update:
118 ; CHECK: vld1.64 {d16, d17}, [r1:64]!
119 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
120 ; CHECK-NEXT: vstr d17, [r0]
121 ; CHECK-NEXT: mov r0, r1
124 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
125 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
126 %updated_src = getelementptr inbounds i8, ptr %src, i32 16
130 define ptr @test_vld2_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
131 ; CHECK-LABEL: test_vld2_dup_u64_update_reg:
132 ; CHECK: vld1.64 {d16, d17}, [r1:64], r2
133 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
134 ; CHECK-NEXT: vstr d17, [r0]
135 ; CHECK-NEXT: mov r0, r1
138 %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
139 store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
140 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
144 define ptr @test_vld2_dup_u8_update(ptr %dest, ptr %src) {
145 ; CHECK-LABEL: test_vld2_dup_u8_update:
146 ; CHECK: vld2.8 {d16[], d17[]}, [r1]!
147 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
148 ; CHECK-NEXT: vstr d17, [r0]
149 ; CHECK-NEXT: mov r0, r1
152 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
153 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
154 %updated_src = getelementptr inbounds i8, ptr %src, i32 2
158 define ptr @test_vld2_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
159 ; CHECK-LABEL: test_vld2_dup_u8_update_reg:
160 ; CHECK: vld2.8 {d16[], d17[]}, [r1], r2
161 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
162 ; CHECK-NEXT: vstr d17, [r0]
163 ; CHECK-NEXT: mov r0, r1
166 %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
167 store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
168 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
172 define ptr @test_vld3_dup_u16_update(ptr %dest, ptr %src) {
173 ; CHECK-LABEL: test_vld3_dup_u16_update:
174 ; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1]!
175 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
176 ; CHECK-NEXT: vst1.16 {d17}, [r0:64]!
177 ; CHECK-NEXT: vstr d18, [r0]
178 ; CHECK-NEXT: mov r0, r1
181 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
182 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
183 %updated_src = getelementptr inbounds i8, ptr %src, i32 6
187 define ptr @test_vld3_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
188 ; CHECK-LABEL: test_vld3_dup_u16_update_reg:
189 ; CHECK: vld3.16 {d16[], d17[], d18[]}, [r1], r2
190 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
191 ; CHECK-NEXT: vst1.16 {d17}, [r0:64]!
192 ; CHECK-NEXT: vstr d18, [r0]
193 ; CHECK-NEXT: mov r0, r1
196 %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
197 store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
198 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
202 define ptr @test_vld3_dup_u32_update(ptr %dest, ptr %src) {
203 ; CHECK-LABEL: test_vld3_dup_u32_update:
204 ; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1]!
205 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
206 ; CHECK-NEXT: vst1.32 {d17}, [r0:64]!
207 ; CHECK-NEXT: vstr d18, [r0]
208 ; CHECK-NEXT: mov r0, r1
211 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
212 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
213 %updated_src = getelementptr inbounds i8, ptr %src, i32 12
217 define ptr @test_vld3_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
218 ; CHECK-LABEL: test_vld3_dup_u32_update_reg:
219 ; CHECK: vld3.32 {d16[], d17[], d18[]}, [r1], r2
220 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
221 ; CHECK-NEXT: vst1.32 {d17}, [r0:64]!
222 ; CHECK-NEXT: vstr d18, [r0]
223 ; CHECK-NEXT: mov r0, r1
226 %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
227 store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
228 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
232 define ptr @test_vld3_dup_u64_update(ptr %dest, ptr %src) {
233 ; CHECK-LABEL: test_vld3_dup_u64_update:
234 ; CHECK: vld1.64 {d16, d17, d18}, [r1]!
235 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
236 ; CHECK-NEXT: vst1.64 {d17}, [r0:64]!
237 ; CHECK-NEXT: vstr d18, [r0]
238 ; CHECK-NEXT: mov r0, r1
241 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
242 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
243 %updated_src = getelementptr inbounds i8, ptr %src, i32 24
247 define ptr @test_vld3_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
248 ; CHECK-LABEL: test_vld3_dup_u64_update_reg:
249 ; CHECK: vld1.64 {d16, d17, d18}, [r1], r2
250 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
251 ; CHECK-NEXT: vst1.64 {d17}, [r0:64]!
252 ; CHECK-NEXT: vstr d18, [r0]
253 ; CHECK-NEXT: mov r0, r1
256 %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
257 store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
258 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
262 define ptr @test_vld3_dup_u8_update(ptr %dest, ptr %src) {
263 ; CHECK-LABEL: test_vld3_dup_u8_update:
264 ; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]!
265 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
266 ; CHECK-NEXT: vst1.8 {d17}, [r0:64]!
267 ; CHECK-NEXT: vstr d18, [r0]
268 ; CHECK-NEXT: mov r0, r1
271 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
272 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
273 %updated_src = getelementptr inbounds i8, ptr %src, i32 3
277 define ptr @test_vld3_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
278 ; CHECK-LABEL: test_vld3_dup_u8_update_reg:
279 ; CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r2
280 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
281 ; CHECK-NEXT: vst1.8 {d17}, [r0:64]!
282 ; CHECK-NEXT: vstr d18, [r0]
283 ; CHECK-NEXT: mov r0, r1
286 %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
287 store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
288 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
292 define ptr @test_vld4_dup_u16_update(ptr %dest, ptr %src) {
293 ; CHECK-LABEL: test_vld4_dup_u16_update:
294 ; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
295 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
296 ; CHECK-NEXT: vst1.16 {d17}, [r0:64]!
297 ; CHECK-NEXT: vst1.16 {d18}, [r0:64]!
298 ; CHECK-NEXT: vstr d19, [r0]
299 ; CHECK-NEXT: mov r0, r1
302 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
303 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
304 %updated_src = getelementptr inbounds i8, ptr %src, i32 8
308 define ptr @test_vld4_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
309 ; CHECK-LABEL: test_vld4_dup_u16_update_reg:
310 ; CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1], r2
311 ; CHECK-NEXT: vst1.16 {d16}, [r0:64]!
312 ; CHECK-NEXT: vst1.16 {d17}, [r0:64]!
313 ; CHECK-NEXT: vst1.16 {d18}, [r0:64]!
314 ; CHECK-NEXT: vstr d19, [r0]
315 ; CHECK-NEXT: mov r0, r1
318 %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
319 store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
320 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
324 define ptr @test_vld4_dup_u32_update(ptr %dest, ptr %src) {
325 ; CHECK-LABEL: test_vld4_dup_u32_update:
326 ; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1]!
327 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
328 ; CHECK-NEXT: vst1.32 {d17}, [r0:64]!
329 ; CHECK-NEXT: vst1.32 {d18}, [r0:64]!
330 ; CHECK-NEXT: vstr d19, [r0]
331 ; CHECK-NEXT: mov r0, r1
334 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
335 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
336 %updated_src = getelementptr inbounds i8, ptr %src, i32 16
340 define ptr @test_vld4_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
341 ; CHECK-LABEL: test_vld4_dup_u32_update_reg:
342 ; CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r1], r2
343 ; CHECK-NEXT: vst1.32 {d16}, [r0:64]!
344 ; CHECK-NEXT: vst1.32 {d17}, [r0:64]!
345 ; CHECK-NEXT: vst1.32 {d18}, [r0:64]!
346 ; CHECK-NEXT: vstr d19, [r0]
347 ; CHECK-NEXT: mov r0, r1
350 %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
351 store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
352 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
356 define ptr @test_vld4_dup_u64_update(ptr %dest, ptr %src) {
357 ; CHECK-LABEL: test_vld4_dup_u64_update:
358 ; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64]!
359 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
360 ; CHECK-NEXT: vst1.64 {d17}, [r0:64]!
361 ; CHECK-NEXT: vst1.64 {d18}, [r0:64]!
362 ; CHECK-NEXT: vstr d19, [r0]
363 ; CHECK-NEXT: mov r0, r1
366 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
367 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
368 %updated_src = getelementptr inbounds i8, ptr %src, i32 32
372 define ptr @test_vld4_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
373 ; CHECK-LABEL: test_vld4_dup_u64_update_reg:
374 ; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:64], r2
375 ; CHECK-NEXT: vst1.64 {d16}, [r0:64]!
376 ; CHECK-NEXT: vst1.64 {d17}, [r0:64]!
377 ; CHECK-NEXT: vst1.64 {d18}, [r0:64]!
378 ; CHECK-NEXT: vstr d19, [r0]
379 ; CHECK-NEXT: mov r0, r1
382 %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
383 store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
384 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
388 define ptr @test_vld4_dup_u8_update(ptr %dest, ptr %src) {
389 ; CHECK-LABEL: test_vld4_dup_u8_update:
390 ; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]!
391 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
392 ; CHECK-NEXT: vst1.8 {d17}, [r0:64]!
393 ; CHECK-NEXT: vst1.8 {d18}, [r0:64]!
394 ; CHECK-NEXT: vstr d19, [r0]
395 ; CHECK-NEXT: mov r0, r1
398 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
399 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
400 %updated_src = getelementptr inbounds i8, ptr %src, i32 4
404 define ptr @test_vld4_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
405 ; CHECK-LABEL: test_vld4_dup_u8_update_reg:
406 ; CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r2
407 ; CHECK-NEXT: vst1.8 {d16}, [r0:64]!
408 ; CHECK-NEXT: vst1.8 {d17}, [r0:64]!
409 ; CHECK-NEXT: vst1.8 {d18}, [r0:64]!
410 ; CHECK-NEXT: vstr d19, [r0]
411 ; CHECK-NEXT: mov r0, r1
414 %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
415 store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
416 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
420 define ptr @test_vld2q_dup_u16_update(ptr %dest, ptr %src, ptr %dest0) {
421 ; CHECK-LABEL: test_vld2q_dup_u16_update:
422 ; CHECK: vld2.16 {d16[], d18[]}, [r1]
423 ; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1]!
424 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
425 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
426 ; CHECK-NEXT: mov r0, r1
429 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
430 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
431 %updated_src = getelementptr inbounds i8, ptr %src, i32 4
435 define ptr @test_vld2q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
436 ; CHECK-LABEL: test_vld2q_dup_u16_update_reg:
437 ; CHECK: vld2.16 {d16[], d18[]}, [r1]
438 ; CHECK-NEXT: vld2.16 {d17[], d19[]}, [r1], r2
439 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
440 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
441 ; CHECK-NEXT: mov r0, r1
444 %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
445 store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
446 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
450 define ptr @test_vld2q_dup_u32_update(ptr %dest, ptr %src) {
451 ; CHECK-LABEL: test_vld2q_dup_u32_update:
452 ; CHECK: vld2.32 {d16[], d18[]}, [r1]
453 ; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1]!
454 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
455 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
456 ; CHECK-NEXT: mov r0, r1
459 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
460 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
461 %updated_src = getelementptr inbounds i8, ptr %src, i32 8
465 define ptr @test_vld2q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
466 ; CHECK-LABEL: test_vld2q_dup_u32_update_reg:
467 ; CHECK: vld2.32 {d16[], d18[]}, [r1]
468 ; CHECK-NEXT: vld2.32 {d17[], d19[]}, [r1], r2
469 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
470 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
471 ; CHECK-NEXT: mov r0, r1
474 %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
475 store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
476 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
480 define ptr @test_vld2q_dup_u8_update(ptr %dest, ptr %src) {
481 ; CHECK-LABEL: test_vld2q_dup_u8_update:
482 ; CHECK: vld2.8 {d16[], d18[]}, [r1]
483 ; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1]!
484 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
485 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
486 ; CHECK-NEXT: mov r0, r1
489 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
490 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
491 %updated_src = getelementptr inbounds i8, ptr %src, i32 2
495 define ptr @test_vld2q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
496 ; CHECK-LABEL: test_vld2q_dup_u8_update_reg:
497 ; CHECK: vld2.8 {d16[], d18[]}, [r1]
498 ; CHECK-NEXT: vld2.8 {d17[], d19[]}, [r1], r2
499 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
500 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0]
501 ; CHECK-NEXT: mov r0, r1
504 %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
505 store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
506 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
510 define ptr @test_vld3q_dup_u16_update(ptr %dest, ptr %src) {
511 ; CHECK-LABEL: test_vld3q_dup_u16_update:
512 ; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1]
513 ; CHECK-NEXT: vld3.16 {d17[], d19[], d21[]}, [r1]!
514 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
515 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]!
516 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
517 ; CHECK-NEXT: mov r0, r1
520 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
521 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
522 %updated_src = getelementptr inbounds i8, ptr %src, i32 6
526 define ptr @test_vld3q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
527 ; CHECK-LABEL: test_vld3q_dup_u16_update_reg:
528 ; CHECK: vld3.16 {d16[], d18[], d20[]}, [r1]
529 ; CHECK-NEXT: vld3.16 {d17[], d19[], d21[]}, [r1], r2
530 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
531 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]!
532 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
533 ; CHECK-NEXT: mov r0, r1
536 %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
537 store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
538 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
542 define ptr @test_vld3q_dup_u32_update(ptr %dest, ptr %src) {
543 ; CHECK-LABEL: test_vld3q_dup_u32_update:
544 ; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1]
545 ; CHECK-NEXT: vld3.32 {d17[], d19[], d21[]}, [r1]!
546 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
547 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]!
548 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
549 ; CHECK-NEXT: mov r0, r1
552 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
553 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
554 %updated_src = getelementptr inbounds i8, ptr %src, i32 12
558 define ptr @test_vld3q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
559 ; CHECK-LABEL: test_vld3q_dup_u32_update_reg:
560 ; CHECK: vld3.32 {d16[], d18[], d20[]}, [r1]
561 ; CHECK-NEXT: vld3.32 {d17[], d19[], d21[]}, [r1], r2
562 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
563 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]!
564 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
565 ; CHECK-NEXT: mov r0, r1
568 %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
569 store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
570 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
574 define ptr @test_vld3q_dup_u8_update(ptr %dest, ptr %src) {
575 ; CHECK-LABEL: test_vld3q_dup_u8_update:
576 ; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1]
577 ; CHECK-NEXT: vld3.8 {d17[], d19[], d21[]}, [r1]!
578 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
579 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]!
580 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
581 ; CHECK-NEXT: mov r0, r1
584 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
585 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
586 %updated_src = getelementptr inbounds i8, ptr %src, i32 3
590 define ptr @test_vld3q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
591 ; CHECK-LABEL: test_vld3q_dup_u8_update_reg:
592 ; CHECK: vld3.8 {d16[], d18[], d20[]}, [r1]
593 ; CHECK-NEXT: vld3.8 {d17[], d19[], d21[]}, [r1], r2
594 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
595 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]!
596 ; CHECK-NEXT: vst1.64 {d20, d21}, [r0]
597 ; CHECK-NEXT: mov r0, r1
600 %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
601 store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
602 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
606 define ptr @test_vld4q_dup_u16_update(ptr %dest, ptr %src) {
607 ; CHECK-LABEL: test_vld4q_dup_u16_update:
608 ; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
609 ; CHECK-NEXT: vld4.16 {d17[], d19[], d21[], d23[]}, [r1]!
610 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
611 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]!
612 ; CHECK-NEXT: vst1.16 {d20, d21}, [r0]!
613 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
614 ; CHECK-NEXT: mov r0, r1
617 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
618 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
619 %updated_src = getelementptr inbounds i8, ptr %src, i32 8
623 define ptr @test_vld4q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
624 ; CHECK-LABEL: test_vld4q_dup_u16_update_reg:
625 ; CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
626 ; CHECK-NEXT: vld4.16 {d17[], d19[], d21[], d23[]}, [r1], r2
627 ; CHECK-NEXT: vst1.16 {d16, d17}, [r0]!
628 ; CHECK-NEXT: vst1.16 {d18, d19}, [r0]!
629 ; CHECK-NEXT: vst1.16 {d20, d21}, [r0]!
630 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
631 ; CHECK-NEXT: mov r0, r1
634 %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
635 store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
636 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
640 define ptr @test_vld4q_dup_u32_update(ptr %dest, ptr %src) {
641 ; CHECK-LABEL: test_vld4q_dup_u32_update:
642 ; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
643 ; CHECK-NEXT: vld4.32 {d17[], d19[], d21[], d23[]}, [r1]!
644 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
645 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]!
646 ; CHECK-NEXT: vst1.32 {d20, d21}, [r0]!
647 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
648 ; CHECK-NEXT: mov r0, r1
651 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
652 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
653 %updated_src = getelementptr inbounds i8, ptr %src, i32 16
657 define ptr @test_vld4q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
658 ; CHECK-LABEL: test_vld4q_dup_u32_update_reg:
659 ; CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
660 ; CHECK-NEXT: vld4.32 {d17[], d19[], d21[], d23[]}, [r1], r2
661 ; CHECK-NEXT: vst1.32 {d16, d17}, [r0]!
662 ; CHECK-NEXT: vst1.32 {d18, d19}, [r0]!
663 ; CHECK-NEXT: vst1.32 {d20, d21}, [r0]!
664 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
665 ; CHECK-NEXT: mov r0, r1
668 %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
669 store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
670 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
674 define ptr @test_vld4q_dup_u8_update(ptr %dest, ptr %src) {
675 ; CHECK-LABEL: test_vld4q_dup_u8_update:
676 ; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
677 ; CHECK-NEXT: vld4.8 {d17[], d19[], d21[], d23[]}, [r1]!
678 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
679 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]!
680 ; CHECK-NEXT: vst1.8 {d20, d21}, [r0]!
681 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
682 ; CHECK-NEXT: mov r0, r1
685 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
686 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
687 %updated_src = getelementptr inbounds i8, ptr %src, i32 4
691 define ptr @test_vld4q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
692 ; CHECK-LABEL: test_vld4q_dup_u8_update_reg:
693 ; CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
694 ; CHECK-NEXT: vld4.8 {d17[], d19[], d21[], d23[]}, [r1], r2
695 ; CHECK-NEXT: vst1.8 {d16, d17}, [r0]!
696 ; CHECK-NEXT: vst1.8 {d18, d19}, [r0]!
697 ; CHECK-NEXT: vst1.8 {d20, d21}, [r0]!
698 ; CHECK-NEXT: vst1.64 {d22, d23}, [r0]
699 ; CHECK-NEXT: mov r0, r1
702 %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
703 store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
704 %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc