1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s
4 define i8* @post_ldrwu32_4(i8* %x, i8* %y) {
5 ; CHECK-LABEL: post_ldrwu32_4:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q0, [r0]
8 ; CHECK-NEXT: adds r0, #4
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %x to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 8
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 8
20 define i8* @post_ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LABEL: post_ldrwu32_3:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vldrw.u32 q0, [r0]
24 ; CHECK-NEXT: adds r0, #3
25 ; CHECK-NEXT: vstrw.32 q0, [r1]
28 %z = getelementptr inbounds i8, i8* %x, i32 3
29 %0 = bitcast i8* %x to <4 x i32>*
30 %1 = load <4 x i32>, <4 x i32>* %0, align 8
31 %2 = bitcast i8* %y to <4 x i32>*
32 store <4 x i32> %1, <4 x i32>* %2, align 8
36 define i8* @post_ldrwu32_m4(i8* %x, i8* %y) {
37 ; CHECK-LABEL: post_ldrwu32_m4:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q0, [r0]
40 ; CHECK-NEXT: subs r0, #4
41 ; CHECK-NEXT: vstrw.32 q0, [r1]
44 %z = getelementptr inbounds i8, i8* %x, i32 -4
45 %0 = bitcast i8* %x to <4 x i32>*
46 %1 = load <4 x i32>, <4 x i32>* %0, align 8
47 %2 = bitcast i8* %y to <4 x i32>*
48 store <4 x i32> %1, <4 x i32>* %2, align 8
52 define i8* @post_ldrwu32_508(i8* %x, i8* %y) {
53 ; CHECK-LABEL: post_ldrwu32_508:
54 ; CHECK: @ %bb.0: @ %entry
55 ; CHECK-NEXT: vldrw.u32 q0, [r0]
56 ; CHECK-NEXT: add.w r0, r0, #508
57 ; CHECK-NEXT: vstrw.32 q0, [r1]
60 %z = getelementptr inbounds i8, i8* %x, i32 508
61 %0 = bitcast i8* %x to <4 x i32>*
62 %1 = load <4 x i32>, <4 x i32>* %0, align 8
63 %2 = bitcast i8* %y to <4 x i32>*
64 store <4 x i32> %1, <4 x i32>* %2, align 8
68 define i8* @post_ldrwu32_512(i8* %x, i8* %y) {
69 ; CHECK-LABEL: post_ldrwu32_512:
70 ; CHECK: @ %bb.0: @ %entry
71 ; CHECK-NEXT: vldrw.u32 q0, [r0]
72 ; CHECK-NEXT: add.w r0, r0, #512
73 ; CHECK-NEXT: vstrw.32 q0, [r1]
76 %z = getelementptr inbounds i8, i8* %x, i32 512
77 %0 = bitcast i8* %x to <4 x i32>*
78 %1 = load <4 x i32>, <4 x i32>* %0, align 8
79 %2 = bitcast i8* %y to <4 x i32>*
80 store <4 x i32> %1, <4 x i32>* %2, align 8
84 define i8* @post_ldrwu32_m508(i8* %x, i8* %y) {
85 ; CHECK-LABEL: post_ldrwu32_m508:
86 ; CHECK: @ %bb.0: @ %entry
87 ; CHECK-NEXT: vldrw.u32 q0, [r0]
88 ; CHECK-NEXT: sub.w r0, r0, #508
89 ; CHECK-NEXT: vstrw.32 q0, [r1]
92 %z = getelementptr inbounds i8, i8* %x, i32 -508
93 %0 = bitcast i8* %x to <4 x i32>*
94 %1 = load <4 x i32>, <4 x i32>* %0, align 8
95 %2 = bitcast i8* %y to <4 x i32>*
96 store <4 x i32> %1, <4 x i32>* %2, align 8
100 define i8* @post_ldrwu32_m512(i8* %x, i8* %y) {
101 ; CHECK-LABEL: post_ldrwu32_m512:
102 ; CHECK: @ %bb.0: @ %entry
103 ; CHECK-NEXT: vldrw.u32 q0, [r0]
104 ; CHECK-NEXT: sub.w r0, r0, #512
105 ; CHECK-NEXT: vstrw.32 q0, [r1]
108 %z = getelementptr inbounds i8, i8* %x, i32 -512
109 %0 = bitcast i8* %x to <4 x i32>*
110 %1 = load <4 x i32>, <4 x i32>* %0, align 8
111 %2 = bitcast i8* %y to <4 x i32>*
112 store <4 x i32> %1, <4 x i32>* %2, align 8
117 define i8* @post_ldrhu32_4(i8* %x, i8* %y) {
118 ; CHECK-LABEL: post_ldrhu32_4:
119 ; CHECK: @ %bb.0: @ %entry
120 ; CHECK-NEXT: vldrh.u32 q0, [r0]
121 ; CHECK-NEXT: adds r0, #4
122 ; CHECK-NEXT: vstrw.32 q0, [r1]
125 %z = getelementptr inbounds i8, i8* %x, i32 4
126 %0 = bitcast i8* %x to <4 x i16>*
127 %1 = load <4 x i16>, <4 x i16>* %0, align 8
128 %2 = zext <4 x i16> %1 to <4 x i32>
129 %3 = bitcast i8* %y to <4 x i32>*
130 store <4 x i32> %2, <4 x i32>* %3, align 8
134 define i8* @post_ldrhu32_3(i8* %x, i8* %y) {
135 ; CHECK-LABEL: post_ldrhu32_3:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vldrh.u32 q0, [r0]
138 ; CHECK-NEXT: adds r0, #3
139 ; CHECK-NEXT: vstrw.32 q0, [r1]
142 %z = getelementptr inbounds i8, i8* %x, i32 3
143 %0 = bitcast i8* %x to <4 x i16>*
144 %1 = load <4 x i16>, <4 x i16>* %0, align 8
145 %2 = zext <4 x i16> %1 to <4 x i32>
146 %3 = bitcast i8* %y to <4 x i32>*
147 store <4 x i32> %2, <4 x i32>* %3, align 8
151 define i8* @post_ldrhu32_2(i8* %x, i8* %y) {
152 ; CHECK-LABEL: post_ldrhu32_2:
153 ; CHECK: @ %bb.0: @ %entry
154 ; CHECK-NEXT: vldrh.u32 q0, [r0]
155 ; CHECK-NEXT: adds r0, #2
156 ; CHECK-NEXT: vstrw.32 q0, [r1]
159 %z = getelementptr inbounds i8, i8* %x, i32 2
160 %0 = bitcast i8* %x to <4 x i16>*
161 %1 = load <4 x i16>, <4 x i16>* %0, align 8
162 %2 = zext <4 x i16> %1 to <4 x i32>
163 %3 = bitcast i8* %y to <4 x i32>*
164 store <4 x i32> %2, <4 x i32>* %3, align 8
168 define i8* @post_ldrhu32_254(i8* %x, i8* %y) {
169 ; CHECK-LABEL: post_ldrhu32_254:
170 ; CHECK: @ %bb.0: @ %entry
171 ; CHECK-NEXT: vldrh.u32 q0, [r0]
172 ; CHECK-NEXT: adds r0, #254
173 ; CHECK-NEXT: vstrw.32 q0, [r1]
176 %z = getelementptr inbounds i8, i8* %x, i32 254
177 %0 = bitcast i8* %x to <4 x i16>*
178 %1 = load <4 x i16>, <4 x i16>* %0, align 8
179 %2 = zext <4 x i16> %1 to <4 x i32>
180 %3 = bitcast i8* %y to <4 x i32>*
181 store <4 x i32> %2, <4 x i32>* %3, align 8
185 define i8* @post_ldrhu32_256(i8* %x, i8* %y) {
186 ; CHECK-LABEL: post_ldrhu32_256:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: vldrh.u32 q0, [r0]
189 ; CHECK-NEXT: add.w r0, r0, #256
190 ; CHECK-NEXT: vstrw.32 q0, [r1]
193 %z = getelementptr inbounds i8, i8* %x, i32 256
194 %0 = bitcast i8* %x to <4 x i16>*
195 %1 = load <4 x i16>, <4 x i16>* %0, align 8
196 %2 = zext <4 x i16> %1 to <4 x i32>
197 %3 = bitcast i8* %y to <4 x i32>*
198 store <4 x i32> %2, <4 x i32>* %3, align 8
203 define i8* @post_ldrhs32_4(i8* %x, i8* %y) {
204 ; CHECK-LABEL: post_ldrhs32_4:
205 ; CHECK: @ %bb.0: @ %entry
206 ; CHECK-NEXT: vldrh.s32 q0, [r0]
207 ; CHECK-NEXT: adds r0, #4
208 ; CHECK-NEXT: vstrw.32 q0, [r1]
211 %z = getelementptr inbounds i8, i8* %x, i32 4
212 %0 = bitcast i8* %x to <4 x i16>*
213 %1 = load <4 x i16>, <4 x i16>* %0, align 8
214 %2 = sext <4 x i16> %1 to <4 x i32>
215 %3 = bitcast i8* %y to <4 x i32>*
216 store <4 x i32> %2, <4 x i32>* %3, align 8
220 define i8* @post_ldrhs32_3(i8* %x, i8* %y) {
221 ; CHECK-LABEL: post_ldrhs32_3:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vldrh.s32 q0, [r0]
224 ; CHECK-NEXT: adds r0, #3
225 ; CHECK-NEXT: vstrw.32 q0, [r1]
228 %z = getelementptr inbounds i8, i8* %x, i32 3
229 %0 = bitcast i8* %x to <4 x i16>*
230 %1 = load <4 x i16>, <4 x i16>* %0, align 8
231 %2 = sext <4 x i16> %1 to <4 x i32>
232 %3 = bitcast i8* %y to <4 x i32>*
233 store <4 x i32> %2, <4 x i32>* %3, align 8
237 define i8* @post_ldrhs32_2(i8* %x, i8* %y) {
238 ; CHECK-LABEL: post_ldrhs32_2:
239 ; CHECK: @ %bb.0: @ %entry
240 ; CHECK-NEXT: vldrh.s32 q0, [r0]
241 ; CHECK-NEXT: adds r0, #2
242 ; CHECK-NEXT: vstrw.32 q0, [r1]
245 %z = getelementptr inbounds i8, i8* %x, i32 2
246 %0 = bitcast i8* %x to <4 x i16>*
247 %1 = load <4 x i16>, <4 x i16>* %0, align 8
248 %2 = sext <4 x i16> %1 to <4 x i32>
249 %3 = bitcast i8* %y to <4 x i32>*
250 store <4 x i32> %2, <4 x i32>* %3, align 8
254 define i8* @post_ldrhs32_254(i8* %x, i8* %y) {
255 ; CHECK-LABEL: post_ldrhs32_254:
256 ; CHECK: @ %bb.0: @ %entry
257 ; CHECK-NEXT: vldrh.s32 q0, [r0]
258 ; CHECK-NEXT: adds r0, #254
259 ; CHECK-NEXT: vstrw.32 q0, [r1]
262 %z = getelementptr inbounds i8, i8* %x, i32 254
263 %0 = bitcast i8* %x to <4 x i16>*
264 %1 = load <4 x i16>, <4 x i16>* %0, align 8
265 %2 = sext <4 x i16> %1 to <4 x i32>
266 %3 = bitcast i8* %y to <4 x i32>*
267 store <4 x i32> %2, <4 x i32>* %3, align 8
271 define i8* @post_ldrhs32_256(i8* %x, i8* %y) {
272 ; CHECK-LABEL: post_ldrhs32_256:
273 ; CHECK: @ %bb.0: @ %entry
274 ; CHECK-NEXT: vldrh.s32 q0, [r0]
275 ; CHECK-NEXT: add.w r0, r0, #256
276 ; CHECK-NEXT: vstrw.32 q0, [r1]
279 %z = getelementptr inbounds i8, i8* %x, i32 256
280 %0 = bitcast i8* %x to <4 x i16>*
281 %1 = load <4 x i16>, <4 x i16>* %0, align 8
282 %2 = sext <4 x i16> %1 to <4 x i32>
283 %3 = bitcast i8* %y to <4 x i32>*
284 store <4 x i32> %2, <4 x i32>* %3, align 8
289 define i8* @post_ldrhu16_4(i8* %x, i8* %y) {
290 ; CHECK-LABEL: post_ldrhu16_4:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vldrw.u32 q0, [r0]
293 ; CHECK-NEXT: adds r0, #4
294 ; CHECK-NEXT: vstrw.32 q0, [r1]
297 %z = getelementptr inbounds i8, i8* %x, i32 4
298 %0 = bitcast i8* %x to <8 x i16>*
299 %1 = load <8 x i16>, <8 x i16>* %0, align 8
300 %2 = bitcast i8* %y to <8 x i16>*
301 store <8 x i16> %1, <8 x i16>* %2, align 8
305 define i8* @post_ldrhu16_3(i8* %x, i8* %y) {
306 ; CHECK-LABEL: post_ldrhu16_3:
307 ; CHECK: @ %bb.0: @ %entry
308 ; CHECK-NEXT: vldrw.u32 q0, [r0]
309 ; CHECK-NEXT: adds r0, #3
310 ; CHECK-NEXT: vstrw.32 q0, [r1]
313 %z = getelementptr inbounds i8, i8* %x, i32 3
314 %0 = bitcast i8* %x to <8 x i16>*
315 %1 = load <8 x i16>, <8 x i16>* %0, align 8
316 %2 = bitcast i8* %y to <8 x i16>*
317 store <8 x i16> %1, <8 x i16>* %2, align 8
321 define i8* @post_ldrhu16_2(i8* %x, i8* %y) {
322 ; CHECK-LABEL: post_ldrhu16_2:
323 ; CHECK: @ %bb.0: @ %entry
324 ; CHECK-NEXT: vldrw.u32 q0, [r0]
325 ; CHECK-NEXT: adds r0, #2
326 ; CHECK-NEXT: vstrw.32 q0, [r1]
329 %z = getelementptr inbounds i8, i8* %x, i32 2
330 %0 = bitcast i8* %x to <8 x i16>*
331 %1 = load <8 x i16>, <8 x i16>* %0, align 8
332 %2 = bitcast i8* %y to <8 x i16>*
333 store <8 x i16> %1, <8 x i16>* %2, align 8
337 define i8* @post_ldrhu16_254(i8* %x, i8* %y) {
338 ; CHECK-LABEL: post_ldrhu16_254:
339 ; CHECK: @ %bb.0: @ %entry
340 ; CHECK-NEXT: vldrw.u32 q0, [r0]
341 ; CHECK-NEXT: adds r0, #254
342 ; CHECK-NEXT: vstrw.32 q0, [r1]
345 %z = getelementptr inbounds i8, i8* %x, i32 254
346 %0 = bitcast i8* %x to <8 x i16>*
347 %1 = load <8 x i16>, <8 x i16>* %0, align 8
348 %2 = bitcast i8* %y to <8 x i16>*
349 store <8 x i16> %1, <8 x i16>* %2, align 8
353 define i8* @post_ldrhu16_256(i8* %x, i8* %y) {
354 ; CHECK-LABEL: post_ldrhu16_256:
355 ; CHECK: @ %bb.0: @ %entry
356 ; CHECK-NEXT: vldrw.u32 q0, [r0]
357 ; CHECK-NEXT: add.w r0, r0, #256
358 ; CHECK-NEXT: vstrw.32 q0, [r1]
361 %z = getelementptr inbounds i8, i8* %x, i32 256
362 %0 = bitcast i8* %x to <8 x i16>*
363 %1 = load <8 x i16>, <8 x i16>* %0, align 8
364 %2 = bitcast i8* %y to <8 x i16>*
365 store <8 x i16> %1, <8 x i16>* %2, align 8
370 define i8* @post_ldrbu32_4(i8* %x, i8* %y) {
371 ; CHECK-LABEL: post_ldrbu32_4:
372 ; CHECK: @ %bb.0: @ %entry
373 ; CHECK-NEXT: vldrb.u32 q0, [r0]
374 ; CHECK-NEXT: adds r0, #4
375 ; CHECK-NEXT: vstrw.32 q0, [r1]
378 %z = getelementptr inbounds i8, i8* %x, i32 4
379 %0 = bitcast i8* %x to <4 x i8>*
380 %1 = load <4 x i8>, <4 x i8>* %0, align 8
381 %2 = zext <4 x i8> %1 to <4 x i32>
382 %3 = bitcast i8* %y to <4 x i32>*
383 store <4 x i32> %2, <4 x i32>* %3, align 8
387 define i8* @post_ldrbu32_3(i8* %x, i8* %y) {
388 ; CHECK-LABEL: post_ldrbu32_3:
389 ; CHECK: @ %bb.0: @ %entry
390 ; CHECK-NEXT: vldrb.u32 q0, [r0]
391 ; CHECK-NEXT: adds r0, #3
392 ; CHECK-NEXT: vstrw.32 q0, [r1]
395 %z = getelementptr inbounds i8, i8* %x, i32 3
396 %0 = bitcast i8* %x to <4 x i8>*
397 %1 = load <4 x i8>, <4 x i8>* %0, align 8
398 %2 = zext <4 x i8> %1 to <4 x i32>
399 %3 = bitcast i8* %y to <4 x i32>*
400 store <4 x i32> %2, <4 x i32>* %3, align 8
404 define i8* @post_ldrbu32_127(i8* %x, i8* %y) {
405 ; CHECK-LABEL: post_ldrbu32_127:
406 ; CHECK: @ %bb.0: @ %entry
407 ; CHECK-NEXT: vldrb.u32 q0, [r0]
408 ; CHECK-NEXT: adds r0, #127
409 ; CHECK-NEXT: vstrw.32 q0, [r1]
412 %z = getelementptr inbounds i8, i8* %x, i32 127
413 %0 = bitcast i8* %x to <4 x i8>*
414 %1 = load <4 x i8>, <4 x i8>* %0, align 8
415 %2 = zext <4 x i8> %1 to <4 x i32>
416 %3 = bitcast i8* %y to <4 x i32>*
417 store <4 x i32> %2, <4 x i32>* %3, align 8
421 define i8* @post_ldrbu32_128(i8* %x, i8* %y) {
422 ; CHECK-LABEL: post_ldrbu32_128:
423 ; CHECK: @ %bb.0: @ %entry
424 ; CHECK-NEXT: vldrb.u32 q0, [r0]
425 ; CHECK-NEXT: adds r0, #128
426 ; CHECK-NEXT: vstrw.32 q0, [r1]
429 %z = getelementptr inbounds i8, i8* %x, i32 128
430 %0 = bitcast i8* %x to <4 x i8>*
431 %1 = load <4 x i8>, <4 x i8>* %0, align 8
432 %2 = zext <4 x i8> %1 to <4 x i32>
433 %3 = bitcast i8* %y to <4 x i32>*
434 store <4 x i32> %2, <4 x i32>* %3, align 8
439 define i8* @post_ldrbs32_4(i8* %x, i8* %y) {
440 ; CHECK-LABEL: post_ldrbs32_4:
441 ; CHECK: @ %bb.0: @ %entry
442 ; CHECK-NEXT: vldrb.s32 q0, [r0]
443 ; CHECK-NEXT: adds r0, #4
444 ; CHECK-NEXT: vstrw.32 q0, [r1]
447 %z = getelementptr inbounds i8, i8* %x, i32 4
448 %0 = bitcast i8* %x to <4 x i8>*
449 %1 = load <4 x i8>, <4 x i8>* %0, align 8
450 %2 = sext <4 x i8> %1 to <4 x i32>
451 %3 = bitcast i8* %y to <4 x i32>*
452 store <4 x i32> %2, <4 x i32>* %3, align 8
456 define i8* @post_ldrbs32_3(i8* %x, i8* %y) {
457 ; CHECK-LABEL: post_ldrbs32_3:
458 ; CHECK: @ %bb.0: @ %entry
459 ; CHECK-NEXT: vldrb.s32 q0, [r0]
460 ; CHECK-NEXT: adds r0, #3
461 ; CHECK-NEXT: vstrw.32 q0, [r1]
464 %z = getelementptr inbounds i8, i8* %x, i32 3
465 %0 = bitcast i8* %x to <4 x i8>*
466 %1 = load <4 x i8>, <4 x i8>* %0, align 8
467 %2 = sext <4 x i8> %1 to <4 x i32>
468 %3 = bitcast i8* %y to <4 x i32>*
469 store <4 x i32> %2, <4 x i32>* %3, align 8
473 define i8* @post_ldrbs32_127(i8* %x, i8* %y) {
474 ; CHECK-LABEL: post_ldrbs32_127:
475 ; CHECK: @ %bb.0: @ %entry
476 ; CHECK-NEXT: vldrb.s32 q0, [r0]
477 ; CHECK-NEXT: adds r0, #127
478 ; CHECK-NEXT: vstrw.32 q0, [r1]
481 %z = getelementptr inbounds i8, i8* %x, i32 127
482 %0 = bitcast i8* %x to <4 x i8>*
483 %1 = load <4 x i8>, <4 x i8>* %0, align 8
484 %2 = sext <4 x i8> %1 to <4 x i32>
485 %3 = bitcast i8* %y to <4 x i32>*
486 store <4 x i32> %2, <4 x i32>* %3, align 8
490 define i8* @post_ldrbs32_128(i8* %x, i8* %y) {
491 ; CHECK-LABEL: post_ldrbs32_128:
492 ; CHECK: @ %bb.0: @ %entry
493 ; CHECK-NEXT: vldrb.s32 q0, [r0]
494 ; CHECK-NEXT: adds r0, #128
495 ; CHECK-NEXT: vstrw.32 q0, [r1]
498 %z = getelementptr inbounds i8, i8* %x, i32 128
499 %0 = bitcast i8* %x to <4 x i8>*
500 %1 = load <4 x i8>, <4 x i8>* %0, align 8
501 %2 = sext <4 x i8> %1 to <4 x i32>
502 %3 = bitcast i8* %y to <4 x i32>*
503 store <4 x i32> %2, <4 x i32>* %3, align 8
508 define i8* @post_ldrbu16_4(i8* %x, i8* %y) {
509 ; CHECK-LABEL: post_ldrbu16_4:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vldrb.u16 q0, [r0]
512 ; CHECK-NEXT: adds r0, #4
513 ; CHECK-NEXT: vstrw.32 q0, [r1]
516 %z = getelementptr inbounds i8, i8* %x, i32 4
517 %0 = bitcast i8* %x to <8 x i8>*
518 %1 = load <8 x i8>, <8 x i8>* %0, align 8
519 %2 = zext <8 x i8> %1 to <8 x i16>
520 %3 = bitcast i8* %y to <8 x i16>*
521 store <8 x i16> %2, <8 x i16>* %3, align 8
525 define i8* @post_ldrbu16_3(i8* %x, i8* %y) {
526 ; CHECK-LABEL: post_ldrbu16_3:
527 ; CHECK: @ %bb.0: @ %entry
528 ; CHECK-NEXT: vldrb.u16 q0, [r0]
529 ; CHECK-NEXT: adds r0, #3
530 ; CHECK-NEXT: vstrw.32 q0, [r1]
533 %z = getelementptr inbounds i8, i8* %x, i32 3
534 %0 = bitcast i8* %x to <8 x i8>*
535 %1 = load <8 x i8>, <8 x i8>* %0, align 8
536 %2 = zext <8 x i8> %1 to <8 x i16>
537 %3 = bitcast i8* %y to <8 x i16>*
538 store <8 x i16> %2, <8 x i16>* %3, align 8
542 define i8* @post_ldrbu16_127(i8* %x, i8* %y) {
543 ; CHECK-LABEL: post_ldrbu16_127:
544 ; CHECK: @ %bb.0: @ %entry
545 ; CHECK-NEXT: vldrb.u16 q0, [r0]
546 ; CHECK-NEXT: adds r0, #127
547 ; CHECK-NEXT: vstrw.32 q0, [r1]
550 %z = getelementptr inbounds i8, i8* %x, i32 127
551 %0 = bitcast i8* %x to <8 x i8>*
552 %1 = load <8 x i8>, <8 x i8>* %0, align 8
553 %2 = zext <8 x i8> %1 to <8 x i16>
554 %3 = bitcast i8* %y to <8 x i16>*
555 store <8 x i16> %2, <8 x i16>* %3, align 8
559 define i8* @post_ldrbu16_128(i8* %x, i8* %y) {
560 ; CHECK-LABEL: post_ldrbu16_128:
561 ; CHECK: @ %bb.0: @ %entry
562 ; CHECK-NEXT: vldrb.u16 q0, [r0]
563 ; CHECK-NEXT: adds r0, #128
564 ; CHECK-NEXT: vstrw.32 q0, [r1]
567 %z = getelementptr inbounds i8, i8* %x, i32 128
568 %0 = bitcast i8* %x to <8 x i8>*
569 %1 = load <8 x i8>, <8 x i8>* %0, align 8
570 %2 = zext <8 x i8> %1 to <8 x i16>
571 %3 = bitcast i8* %y to <8 x i16>*
572 store <8 x i16> %2, <8 x i16>* %3, align 8
577 define i8* @post_ldrbs16_4(i8* %x, i8* %y) {
578 ; CHECK-LABEL: post_ldrbs16_4:
579 ; CHECK: @ %bb.0: @ %entry
580 ; CHECK-NEXT: vldrb.s16 q0, [r0]
581 ; CHECK-NEXT: adds r0, #4
582 ; CHECK-NEXT: vstrw.32 q0, [r1]
585 %z = getelementptr inbounds i8, i8* %x, i32 4
586 %0 = bitcast i8* %x to <8 x i8>*
587 %1 = load <8 x i8>, <8 x i8>* %0, align 8
588 %2 = sext <8 x i8> %1 to <8 x i16>
589 %3 = bitcast i8* %y to <8 x i16>*
590 store <8 x i16> %2, <8 x i16>* %3, align 8
594 define i8* @post_ldrbs16_3(i8* %x, i8* %y) {
595 ; CHECK-LABEL: post_ldrbs16_3:
596 ; CHECK: @ %bb.0: @ %entry
597 ; CHECK-NEXT: vldrb.s16 q0, [r0]
598 ; CHECK-NEXT: adds r0, #3
599 ; CHECK-NEXT: vstrw.32 q0, [r1]
602 %z = getelementptr inbounds i8, i8* %x, i32 3
603 %0 = bitcast i8* %x to <8 x i8>*
604 %1 = load <8 x i8>, <8 x i8>* %0, align 8
605 %2 = sext <8 x i8> %1 to <8 x i16>
606 %3 = bitcast i8* %y to <8 x i16>*
607 store <8 x i16> %2, <8 x i16>* %3, align 8
611 define i8* @post_ldrbs16_127(i8* %x, i8* %y) {
612 ; CHECK-LABEL: post_ldrbs16_127:
613 ; CHECK: @ %bb.0: @ %entry
614 ; CHECK-NEXT: vldrb.s16 q0, [r0]
615 ; CHECK-NEXT: adds r0, #127
616 ; CHECK-NEXT: vstrw.32 q0, [r1]
619 %z = getelementptr inbounds i8, i8* %x, i32 127
620 %0 = bitcast i8* %x to <8 x i8>*
621 %1 = load <8 x i8>, <8 x i8>* %0, align 8
622 %2 = sext <8 x i8> %1 to <8 x i16>
623 %3 = bitcast i8* %y to <8 x i16>*
624 store <8 x i16> %2, <8 x i16>* %3, align 8
628 define i8* @post_ldrbs16_128(i8* %x, i8* %y) {
629 ; CHECK-LABEL: post_ldrbs16_128:
630 ; CHECK: @ %bb.0: @ %entry
631 ; CHECK-NEXT: vldrb.s16 q0, [r0]
632 ; CHECK-NEXT: adds r0, #128
633 ; CHECK-NEXT: vstrw.32 q0, [r1]
636 %z = getelementptr inbounds i8, i8* %x, i32 128
637 %0 = bitcast i8* %x to <8 x i8>*
638 %1 = load <8 x i8>, <8 x i8>* %0, align 8
639 %2 = sext <8 x i8> %1 to <8 x i16>
640 %3 = bitcast i8* %y to <8 x i16>*
641 store <8 x i16> %2, <8 x i16>* %3, align 8
646 define i8* @post_ldrbu8_4(i8* %x, i8* %y) {
647 ; CHECK-LABEL: post_ldrbu8_4:
648 ; CHECK: @ %bb.0: @ %entry
649 ; CHECK-NEXT: vldrw.u32 q0, [r0]
650 ; CHECK-NEXT: adds r0, #4
651 ; CHECK-NEXT: vstrw.32 q0, [r1]
654 %z = getelementptr inbounds i8, i8* %x, i32 4
655 %0 = bitcast i8* %x to <16 x i8>*
656 %1 = load <16 x i8>, <16 x i8>* %0, align 8
657 %2 = bitcast i8* %y to <16 x i8>*
658 store <16 x i8> %1, <16 x i8>* %2, align 8
662 define i8* @post_ldrbu8_3(i8* %x, i8* %y) {
663 ; CHECK-LABEL: post_ldrbu8_3:
664 ; CHECK: @ %bb.0: @ %entry
665 ; CHECK-NEXT: vldrw.u32 q0, [r0]
666 ; CHECK-NEXT: adds r0, #3
667 ; CHECK-NEXT: vstrw.32 q0, [r1]
670 %z = getelementptr inbounds i8, i8* %x, i32 3
671 %0 = bitcast i8* %x to <16 x i8>*
672 %1 = load <16 x i8>, <16 x i8>* %0, align 8
673 %2 = bitcast i8* %y to <16 x i8>*
674 store <16 x i8> %1, <16 x i8>* %2, align 8
678 define i8* @post_ldrbu8_127(i8* %x, i8* %y) {
679 ; CHECK-LABEL: post_ldrbu8_127:
680 ; CHECK: @ %bb.0: @ %entry
681 ; CHECK-NEXT: vldrw.u32 q0, [r0]
682 ; CHECK-NEXT: adds r0, #127
683 ; CHECK-NEXT: vstrw.32 q0, [r1]
686 %z = getelementptr inbounds i8, i8* %x, i32 127
687 %0 = bitcast i8* %x to <16 x i8>*
688 %1 = load <16 x i8>, <16 x i8>* %0, align 8
689 %2 = bitcast i8* %y to <16 x i8>*
690 store <16 x i8> %1, <16 x i8>* %2, align 8
694 define i8* @post_ldrbu8_128(i8* %x, i8* %y) {
695 ; CHECK-LABEL: post_ldrbu8_128:
696 ; CHECK: @ %bb.0: @ %entry
697 ; CHECK-NEXT: vldrw.u32 q0, [r0]
698 ; CHECK-NEXT: adds r0, #128
699 ; CHECK-NEXT: vstrw.32 q0, [r1]
702 %z = getelementptr inbounds i8, i8* %x, i32 128
703 %0 = bitcast i8* %x to <16 x i8>*
704 %1 = load <16 x i8>, <16 x i8>* %0, align 8
705 %2 = bitcast i8* %y to <16 x i8>*
706 store <16 x i8> %1, <16 x i8>* %2, align 8
710 define i8* @post_ldrwf32_4(i8* %x, i8* %y) {
711 ; CHECK-LABEL: post_ldrwf32_4:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: vldrw.u32 q0, [r0]
714 ; CHECK-NEXT: adds r0, #4
715 ; CHECK-NEXT: vstrw.32 q0, [r1]
718 %z = getelementptr inbounds i8, i8* %x, i32 4
719 %0 = bitcast i8* %x to <4 x float>*
720 %1 = load <4 x float>, <4 x float>* %0, align 8
721 %2 = bitcast i8* %y to <4 x float>*
722 store <4 x float> %1, <4 x float>* %2, align 8
726 define i8* @post_ldrwf16_4(i8* %x, i8* %y) {
727 ; CHECK-LABEL: post_ldrwf16_4:
728 ; CHECK: @ %bb.0: @ %entry
729 ; CHECK-NEXT: vldrw.u32 q0, [r0]
730 ; CHECK-NEXT: adds r0, #4
731 ; CHECK-NEXT: vstrw.32 q0, [r1]
734 %z = getelementptr inbounds i8, i8* %x, i32 4
735 %0 = bitcast i8* %x to <8 x half>*
736 %1 = load <8 x half>, <8 x half>* %0, align 8
737 %2 = bitcast i8* %y to <8 x half>*
738 store <8 x half> %1, <8 x half>* %2, align 8
746 define i8* @post_strw32_4(i8* %y, i8* %x) {
747 ; CHECK-LABEL: post_strw32_4:
748 ; CHECK: @ %bb.0: @ %entry
749 ; CHECK-NEXT: vldrw.u32 q0, [r1]
750 ; CHECK-NEXT: vstrw.32 q0, [r0]
751 ; CHECK-NEXT: adds r0, #4
754 %z = getelementptr inbounds i8, i8* %y, i32 4
755 %0 = bitcast i8* %x to <4 x i32>*
756 %1 = load <4 x i32>, <4 x i32>* %0, align 8
757 %2 = bitcast i8* %y to <4 x i32>*
758 store <4 x i32> %1, <4 x i32>* %2, align 8
762 define i8* @post_strw32_3(i8* %y, i8* %x) {
763 ; CHECK-LABEL: post_strw32_3:
764 ; CHECK: @ %bb.0: @ %entry
765 ; CHECK-NEXT: vldrw.u32 q0, [r1]
766 ; CHECK-NEXT: vstrw.32 q0, [r0]
767 ; CHECK-NEXT: adds r0, #3
770 %z = getelementptr inbounds i8, i8* %y, i32 3
771 %0 = bitcast i8* %x to <4 x i32>*
772 %1 = load <4 x i32>, <4 x i32>* %0, align 8
773 %2 = bitcast i8* %y to <4 x i32>*
774 store <4 x i32> %1, <4 x i32>* %2, align 8
778 define i8* @post_strw32_m4(i8* %y, i8* %x) {
779 ; CHECK-LABEL: post_strw32_m4:
780 ; CHECK: @ %bb.0: @ %entry
781 ; CHECK-NEXT: vldrw.u32 q0, [r1]
782 ; CHECK-NEXT: vstrw.32 q0, [r0]
783 ; CHECK-NEXT: subs r0, #4
786 %z = getelementptr inbounds i8, i8* %y, i32 -4
787 %0 = bitcast i8* %x to <4 x i32>*
788 %1 = load <4 x i32>, <4 x i32>* %0, align 8
789 %2 = bitcast i8* %y to <4 x i32>*
790 store <4 x i32> %1, <4 x i32>* %2, align 8
794 define i8* @post_strw32_508(i8* %y, i8* %x) {
795 ; CHECK-LABEL: post_strw32_508:
796 ; CHECK: @ %bb.0: @ %entry
797 ; CHECK-NEXT: vldrw.u32 q0, [r1]
798 ; CHECK-NEXT: vstrw.32 q0, [r0]
799 ; CHECK-NEXT: add.w r0, r0, #508
802 %z = getelementptr inbounds i8, i8* %y, i32 508
803 %0 = bitcast i8* %x to <4 x i32>*
804 %1 = load <4 x i32>, <4 x i32>* %0, align 8
805 %2 = bitcast i8* %y to <4 x i32>*
806 store <4 x i32> %1, <4 x i32>* %2, align 8
810 define i8* @post_strw32_512(i8* %y, i8* %x) {
811 ; CHECK-LABEL: post_strw32_512:
812 ; CHECK: @ %bb.0: @ %entry
813 ; CHECK-NEXT: vldrw.u32 q0, [r1]
814 ; CHECK-NEXT: vstrw.32 q0, [r0]
815 ; CHECK-NEXT: add.w r0, r0, #512
818 %z = getelementptr inbounds i8, i8* %y, i32 512
819 %0 = bitcast i8* %x to <4 x i32>*
820 %1 = load <4 x i32>, <4 x i32>* %0, align 8
821 %2 = bitcast i8* %y to <4 x i32>*
822 store <4 x i32> %1, <4 x i32>* %2, align 8
826 define i8* @post_strw32_m508(i8* %y, i8* %x) {
827 ; CHECK-LABEL: post_strw32_m508:
828 ; CHECK: @ %bb.0: @ %entry
829 ; CHECK-NEXT: vldrw.u32 q0, [r1]
830 ; CHECK-NEXT: vstrw.32 q0, [r0]
831 ; CHECK-NEXT: sub.w r0, r0, #508
834 %z = getelementptr inbounds i8, i8* %y, i32 -508
835 %0 = bitcast i8* %x to <4 x i32>*
836 %1 = load <4 x i32>, <4 x i32>* %0, align 8
837 %2 = bitcast i8* %y to <4 x i32>*
838 store <4 x i32> %1, <4 x i32>* %2, align 8
842 define i8* @post_strw32_m512(i8* %y, i8* %x) {
843 ; CHECK-LABEL: post_strw32_m512:
844 ; CHECK: @ %bb.0: @ %entry
845 ; CHECK-NEXT: vldrw.u32 q0, [r1]
846 ; CHECK-NEXT: vstrw.32 q0, [r0]
847 ; CHECK-NEXT: sub.w r0, r0, #512
850 %z = getelementptr inbounds i8, i8* %y, i32 -512
851 %0 = bitcast i8* %x to <4 x i32>*
852 %1 = load <4 x i32>, <4 x i32>* %0, align 8
853 %2 = bitcast i8* %y to <4 x i32>*
854 store <4 x i32> %1, <4 x i32>* %2, align 8
859 define i8* @post_strh32_4(i8* %y, i8* %x) {
860 ; CHECK-LABEL: post_strh32_4:
861 ; CHECK: @ %bb.0: @ %entry
862 ; CHECK-NEXT: vldrh.u32 q0, [r1]
863 ; CHECK-NEXT: vstrh.32 q0, [r0]
864 ; CHECK-NEXT: adds r0, #4
867 %z = getelementptr inbounds i8, i8* %y, i32 4
868 %0 = bitcast i8* %x to <4 x i16>*
869 %1 = load <4 x i16>, <4 x i16>* %0, align 8
870 %2 = bitcast i8* %y to <4 x i16>*
871 store <4 x i16> %1, <4 x i16>* %2, align 8
875 define i8* @post_strh32_3(i8* %y, i8* %x) {
876 ; CHECK-LABEL: post_strh32_3:
877 ; CHECK: @ %bb.0: @ %entry
878 ; CHECK-NEXT: vldrh.u32 q0, [r1]
879 ; CHECK-NEXT: vstrh.32 q0, [r0]
880 ; CHECK-NEXT: adds r0, #3
883 %z = getelementptr inbounds i8, i8* %y, i32 3
884 %0 = bitcast i8* %x to <4 x i16>*
885 %1 = load <4 x i16>, <4 x i16>* %0, align 8
886 %2 = bitcast i8* %y to <4 x i16>*
887 store <4 x i16> %1, <4 x i16>* %2, align 8
891 define i8* @post_strh32_2(i8* %y, i8* %x) {
892 ; CHECK-LABEL: post_strh32_2:
893 ; CHECK: @ %bb.0: @ %entry
894 ; CHECK-NEXT: vldrh.u32 q0, [r1]
895 ; CHECK-NEXT: vstrh.32 q0, [r0]
896 ; CHECK-NEXT: adds r0, #2
899 %z = getelementptr inbounds i8, i8* %y, i32 2
900 %0 = bitcast i8* %x to <4 x i16>*
901 %1 = load <4 x i16>, <4 x i16>* %0, align 8
902 %2 = bitcast i8* %y to <4 x i16>*
903 store <4 x i16> %1, <4 x i16>* %2, align 8
907 define i8* @post_strh32_254(i8* %y, i8* %x) {
908 ; CHECK-LABEL: post_strh32_254:
909 ; CHECK: @ %bb.0: @ %entry
910 ; CHECK-NEXT: vldrh.u32 q0, [r1]
911 ; CHECK-NEXT: vstrh.32 q0, [r0]
912 ; CHECK-NEXT: adds r0, #254
915 %z = getelementptr inbounds i8, i8* %y, i32 254
916 %0 = bitcast i8* %x to <4 x i16>*
917 %1 = load <4 x i16>, <4 x i16>* %0, align 8
918 %2 = bitcast i8* %y to <4 x i16>*
919 store <4 x i16> %1, <4 x i16>* %2, align 8
923 define i8* @post_strh32_256(i8* %y, i8* %x) {
924 ; CHECK-LABEL: post_strh32_256:
925 ; CHECK: @ %bb.0: @ %entry
926 ; CHECK-NEXT: vldrh.u32 q0, [r1]
927 ; CHECK-NEXT: vstrh.32 q0, [r0]
928 ; CHECK-NEXT: add.w r0, r0, #256
931 %z = getelementptr inbounds i8, i8* %y, i32 256
932 %0 = bitcast i8* %x to <4 x i16>*
933 %1 = load <4 x i16>, <4 x i16>* %0, align 8
934 %2 = bitcast i8* %y to <4 x i16>*
935 store <4 x i16> %1, <4 x i16>* %2, align 8
940 define i8* @post_strh16_4(i8* %y, i8* %x) {
941 ; CHECK-LABEL: post_strh16_4:
942 ; CHECK: @ %bb.0: @ %entry
943 ; CHECK-NEXT: vldrw.u32 q0, [r1]
944 ; CHECK-NEXT: vstrw.32 q0, [r0]
945 ; CHECK-NEXT: adds r0, #4
948 %z = getelementptr inbounds i8, i8* %y, i32 4
949 %0 = bitcast i8* %x to <8 x i16>*
950 %1 = load <8 x i16>, <8 x i16>* %0, align 8
951 %2 = bitcast i8* %y to <8 x i16>*
952 store <8 x i16> %1, <8 x i16>* %2, align 8
956 define i8* @post_strh16_3(i8* %y, i8* %x) {
957 ; CHECK-LABEL: post_strh16_3:
958 ; CHECK: @ %bb.0: @ %entry
959 ; CHECK-NEXT: vldrw.u32 q0, [r1]
960 ; CHECK-NEXT: vstrw.32 q0, [r0]
961 ; CHECK-NEXT: adds r0, #3
964 %z = getelementptr inbounds i8, i8* %y, i32 3
965 %0 = bitcast i8* %x to <8 x i16>*
966 %1 = load <8 x i16>, <8 x i16>* %0, align 8
967 %2 = bitcast i8* %y to <8 x i16>*
968 store <8 x i16> %1, <8 x i16>* %2, align 8
972 define i8* @post_strh16_2(i8* %y, i8* %x) {
973 ; CHECK-LABEL: post_strh16_2:
974 ; CHECK: @ %bb.0: @ %entry
975 ; CHECK-NEXT: vldrw.u32 q0, [r1]
976 ; CHECK-NEXT: vstrw.32 q0, [r0]
977 ; CHECK-NEXT: adds r0, #2
980 %z = getelementptr inbounds i8, i8* %y, i32 2
981 %0 = bitcast i8* %x to <8 x i16>*
982 %1 = load <8 x i16>, <8 x i16>* %0, align 8
983 %2 = bitcast i8* %y to <8 x i16>*
984 store <8 x i16> %1, <8 x i16>* %2, align 8
988 define i8* @post_strh16_254(i8* %y, i8* %x) {
989 ; CHECK-LABEL: post_strh16_254:
990 ; CHECK: @ %bb.0: @ %entry
991 ; CHECK-NEXT: vldrw.u32 q0, [r1]
992 ; CHECK-NEXT: vstrw.32 q0, [r0]
993 ; CHECK-NEXT: adds r0, #254
996 %z = getelementptr inbounds i8, i8* %y, i32 254
997 %0 = bitcast i8* %x to <8 x i16>*
998 %1 = load <8 x i16>, <8 x i16>* %0, align 8
999 %2 = bitcast i8* %y to <8 x i16>*
1000 store <8 x i16> %1, <8 x i16>* %2, align 8
1004 define i8* @post_strh16_256(i8* %y, i8* %x) {
1005 ; CHECK-LABEL: post_strh16_256:
1006 ; CHECK: @ %bb.0: @ %entry
1007 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1008 ; CHECK-NEXT: vstrw.32 q0, [r0]
1009 ; CHECK-NEXT: add.w r0, r0, #256
1012 %z = getelementptr inbounds i8, i8* %y, i32 256
1013 %0 = bitcast i8* %x to <8 x i16>*
1014 %1 = load <8 x i16>, <8 x i16>* %0, align 8
1015 %2 = bitcast i8* %y to <8 x i16>*
1016 store <8 x i16> %1, <8 x i16>* %2, align 8
1021 define i8* @post_strb32_4(i8* %y, i8* %x) {
1022 ; CHECK-LABEL: post_strb32_4:
1023 ; CHECK: @ %bb.0: @ %entry
1024 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1025 ; CHECK-NEXT: vstrb.32 q0, [r0]
1026 ; CHECK-NEXT: adds r0, #4
1029 %z = getelementptr inbounds i8, i8* %y, i32 4
1030 %0 = bitcast i8* %x to <4 x i8>*
1031 %1 = load <4 x i8>, <4 x i8>* %0, align 8
1032 %2 = bitcast i8* %y to <4 x i8>*
1033 store <4 x i8> %1, <4 x i8>* %2, align 8
1037 define i8* @post_strb32_3(i8* %y, i8* %x) {
1038 ; CHECK-LABEL: post_strb32_3:
1039 ; CHECK: @ %bb.0: @ %entry
1040 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1041 ; CHECK-NEXT: vstrb.32 q0, [r0]
1042 ; CHECK-NEXT: adds r0, #3
1045 %z = getelementptr inbounds i8, i8* %y, i32 3
1046 %0 = bitcast i8* %x to <4 x i8>*
1047 %1 = load <4 x i8>, <4 x i8>* %0, align 8
1048 %2 = bitcast i8* %y to <4 x i8>*
1049 store <4 x i8> %1, <4 x i8>* %2, align 8
1053 define i8* @post_strb32_127(i8* %y, i8* %x) {
1054 ; CHECK-LABEL: post_strb32_127:
1055 ; CHECK: @ %bb.0: @ %entry
1056 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1057 ; CHECK-NEXT: vstrb.32 q0, [r0]
1058 ; CHECK-NEXT: adds r0, #127
1061 %z = getelementptr inbounds i8, i8* %y, i32 127
1062 %0 = bitcast i8* %x to <4 x i8>*
1063 %1 = load <4 x i8>, <4 x i8>* %0, align 8
1064 %2 = bitcast i8* %y to <4 x i8>*
1065 store <4 x i8> %1, <4 x i8>* %2, align 8
1069 define i8* @post_strb32_128(i8* %y, i8* %x) {
1070 ; CHECK-LABEL: post_strb32_128:
1071 ; CHECK: @ %bb.0: @ %entry
1072 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1073 ; CHECK-NEXT: vstrb.32 q0, [r0]
1074 ; CHECK-NEXT: adds r0, #128
1077 %z = getelementptr inbounds i8, i8* %y, i32 128
1078 %0 = bitcast i8* %x to <4 x i8>*
1079 %1 = load <4 x i8>, <4 x i8>* %0, align 8
1080 %2 = bitcast i8* %y to <4 x i8>*
1081 store <4 x i8> %1, <4 x i8>* %2, align 8
1086 define i8* @post_strb16_4(i8* %y, i8* %x) {
1087 ; CHECK-LABEL: post_strb16_4:
1088 ; CHECK: @ %bb.0: @ %entry
1089 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1090 ; CHECK-NEXT: vstrb.16 q0, [r0]
1091 ; CHECK-NEXT: adds r0, #4
1094 %z = getelementptr inbounds i8, i8* %y, i32 4
1095 %0 = bitcast i8* %x to <8 x i8>*
1096 %1 = load <8 x i8>, <8 x i8>* %0, align 8
1097 %2 = bitcast i8* %y to <8 x i8>*
1098 store <8 x i8> %1, <8 x i8>* %2, align 8
1102 define i8* @post_strb16_3(i8* %y, i8* %x) {
1103 ; CHECK-LABEL: post_strb16_3:
1104 ; CHECK: @ %bb.0: @ %entry
1105 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1106 ; CHECK-NEXT: vstrb.16 q0, [r0]
1107 ; CHECK-NEXT: adds r0, #3
1110 %z = getelementptr inbounds i8, i8* %y, i32 3
1111 %0 = bitcast i8* %x to <8 x i8>*
1112 %1 = load <8 x i8>, <8 x i8>* %0, align 8
1113 %2 = bitcast i8* %y to <8 x i8>*
1114 store <8 x i8> %1, <8 x i8>* %2, align 8
1118 define i8* @post_strb16_127(i8* %y, i8* %x) {
1119 ; CHECK-LABEL: post_strb16_127:
1120 ; CHECK: @ %bb.0: @ %entry
1121 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1122 ; CHECK-NEXT: vstrb.16 q0, [r0]
1123 ; CHECK-NEXT: adds r0, #127
1126 %z = getelementptr inbounds i8, i8* %y, i32 127
1127 %0 = bitcast i8* %x to <8 x i8>*
1128 %1 = load <8 x i8>, <8 x i8>* %0, align 8
1129 %2 = bitcast i8* %y to <8 x i8>*
1130 store <8 x i8> %1, <8 x i8>* %2, align 8
1134 define i8* @post_strb16_128(i8* %y, i8* %x) {
1135 ; CHECK-LABEL: post_strb16_128:
1136 ; CHECK: @ %bb.0: @ %entry
1137 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1138 ; CHECK-NEXT: vstrb.16 q0, [r0]
1139 ; CHECK-NEXT: adds r0, #128
1142 %z = getelementptr inbounds i8, i8* %y, i32 128
1143 %0 = bitcast i8* %x to <8 x i8>*
1144 %1 = load <8 x i8>, <8 x i8>* %0, align 8
1145 %2 = bitcast i8* %y to <8 x i8>*
1146 store <8 x i8> %1, <8 x i8>* %2, align 8
1151 define i8* @post_strb8_4(i8* %y, i8* %x) {
1152 ; CHECK-LABEL: post_strb8_4:
1153 ; CHECK: @ %bb.0: @ %entry
1154 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1155 ; CHECK-NEXT: vstrw.32 q0, [r0]
1156 ; CHECK-NEXT: adds r0, #4
1159 %z = getelementptr inbounds i8, i8* %y, i32 4
1160 %0 = bitcast i8* %x to <16 x i8>*
1161 %1 = load <16 x i8>, <16 x i8>* %0, align 8
1162 %2 = bitcast i8* %y to <16 x i8>*
1163 store <16 x i8> %1, <16 x i8>* %2, align 8
1167 define i8* @post_strb8_3(i8* %y, i8* %x) {
1168 ; CHECK-LABEL: post_strb8_3:
1169 ; CHECK: @ %bb.0: @ %entry
1170 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1171 ; CHECK-NEXT: vstrw.32 q0, [r0]
1172 ; CHECK-NEXT: adds r0, #3
1175 %z = getelementptr inbounds i8, i8* %y, i32 3
1176 %0 = bitcast i8* %x to <16 x i8>*
1177 %1 = load <16 x i8>, <16 x i8>* %0, align 8
1178 %2 = bitcast i8* %y to <16 x i8>*
1179 store <16 x i8> %1, <16 x i8>* %2, align 8
1183 define i8* @post_strb8_127(i8* %y, i8* %x) {
1184 ; CHECK-LABEL: post_strb8_127:
1185 ; CHECK: @ %bb.0: @ %entry
1186 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1187 ; CHECK-NEXT: vstrw.32 q0, [r0]
1188 ; CHECK-NEXT: adds r0, #127
1191 %z = getelementptr inbounds i8, i8* %y, i32 127
1192 %0 = bitcast i8* %x to <16 x i8>*
1193 %1 = load <16 x i8>, <16 x i8>* %0, align 8
1194 %2 = bitcast i8* %y to <16 x i8>*
1195 store <16 x i8> %1, <16 x i8>* %2, align 8
1199 define i8* @post_strb8_128(i8* %y, i8* %x) {
1200 ; CHECK-LABEL: post_strb8_128:
1201 ; CHECK: @ %bb.0: @ %entry
1202 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1203 ; CHECK-NEXT: vstrw.32 q0, [r0]
1204 ; CHECK-NEXT: adds r0, #128
1207 %z = getelementptr inbounds i8, i8* %y, i32 128
1208 %0 = bitcast i8* %x to <16 x i8>*
1209 %1 = load <16 x i8>, <16 x i8>* %0, align 8
1210 %2 = bitcast i8* %y to <16 x i8>*
1211 store <16 x i8> %1, <16 x i8>* %2, align 8
1215 define i8* @post_strf32_4(i8* %y, i8* %x) {
1216 ; CHECK-LABEL: post_strf32_4:
1217 ; CHECK: @ %bb.0: @ %entry
1218 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1219 ; CHECK-NEXT: vstrw.32 q0, [r0]
1220 ; CHECK-NEXT: adds r0, #4
1223 %z = getelementptr inbounds i8, i8* %y, i32 4
1224 %0 = bitcast i8* %x to <4 x float>*
1225 %1 = load <4 x float>, <4 x float>* %0, align 8
1226 %2 = bitcast i8* %y to <4 x float>*
1227 store <4 x float> %1, <4 x float>* %2, align 8
1231 define i8* @post_strf16_4(i8* %y, i8* %x) {
1232 ; CHECK-LABEL: post_strf16_4:
1233 ; CHECK: @ %bb.0: @ %entry
1234 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1235 ; CHECK-NEXT: vstrw.32 q0, [r0]
1236 ; CHECK-NEXT: adds r0, #4
1239 %z = getelementptr inbounds i8, i8* %y, i32 4
1240 %0 = bitcast i8* %x to <8 x half>*
1241 %1 = load <8 x half>, <8 x half>* %0, align 8
1242 %2 = bitcast i8* %y to <8 x half>*
1243 store <8 x half> %1, <8 x half>* %2, align 8