1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define i8* @ldrwu32_4(i8* %x, i8* %y) {
5 ; CHECK-LABEL: ldrwu32_4:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
8 ; CHECK-NEXT: vstrw.32 q0, [r1]
11 %z = getelementptr inbounds i8, i8* %x, i32 4
12 %0 = bitcast i8* %z to <4 x i32>*
13 %1 = load <4 x i32>, <4 x i32>* %0, align 4
14 %2 = bitcast i8* %y to <4 x i32>*
15 store <4 x i32> %1, <4 x i32>* %2, align 4
19 define i8* @ldrwu32_3(i8* %x, i8* %y) {
20 ; CHECK-LABEL: ldrwu32_3:
21 ; CHECK: @ %bb.0: @ %entry
22 ; CHECK-NEXT: adds r2, r0, #3
23 ; CHECK-NEXT: vldrw.u32 q0, [r2]
24 ; CHECK-NEXT: vstrw.32 q0, [r1]
27 %z = getelementptr inbounds i8, i8* %x, i32 3
28 %0 = bitcast i8* %z to <4 x i32>*
29 %1 = load <4 x i32>, <4 x i32>* %0, align 4
30 %2 = bitcast i8* %y to <4 x i32>*
31 store <4 x i32> %1, <4 x i32>* %2, align 4
35 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
36 ; CHECK-LABEL: ldrwu32_m4:
37 ; CHECK: @ %bb.0: @ %entry
38 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
39 ; CHECK-NEXT: vstrw.32 q0, [r1]
42 %z = getelementptr inbounds i8, i8* %x, i32 -4
43 %0 = bitcast i8* %z to <4 x i32>*
44 %1 = load <4 x i32>, <4 x i32>* %0, align 4
45 %2 = bitcast i8* %y to <4 x i32>*
46 store <4 x i32> %1, <4 x i32>* %2, align 4
50 define i8* @ldrwu32_508(i8* %x, i8* %y) {
51 ; CHECK-LABEL: ldrwu32_508:
52 ; CHECK: @ %bb.0: @ %entry
53 ; CHECK-NEXT: add.w r2, r0, #508
54 ; CHECK-NEXT: vldrw.u32 q0, [r2]
55 ; CHECK-NEXT: vstrw.32 q0, [r1]
58 %z = getelementptr inbounds i8, i8* %x, i32 508
59 %0 = bitcast i8* %z to <4 x i32>*
60 %1 = load <4 x i32>, <4 x i32>* %0, align 4
61 %2 = bitcast i8* %y to <4 x i32>*
62 store <4 x i32> %1, <4 x i32>* %2, align 4
66 define i8* @ldrwu32_512(i8* %x, i8* %y) {
67 ; CHECK-LABEL: ldrwu32_512:
68 ; CHECK: @ %bb.0: @ %entry
69 ; CHECK-NEXT: add.w r2, r0, #512
70 ; CHECK-NEXT: vldrw.u32 q0, [r2]
71 ; CHECK-NEXT: vstrw.32 q0, [r1]
74 %z = getelementptr inbounds i8, i8* %x, i32 512
75 %0 = bitcast i8* %z to <4 x i32>*
76 %1 = load <4 x i32>, <4 x i32>* %0, align 4
77 %2 = bitcast i8* %y to <4 x i32>*
78 store <4 x i32> %1, <4 x i32>* %2, align 4
82 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
83 ; CHECK-LABEL: ldrwu32_m508:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: sub.w r2, r0, #508
86 ; CHECK-NEXT: vldrw.u32 q0, [r2]
87 ; CHECK-NEXT: vstrw.32 q0, [r1]
90 %z = getelementptr inbounds i8, i8* %x, i32 -508
91 %0 = bitcast i8* %z to <4 x i32>*
92 %1 = load <4 x i32>, <4 x i32>* %0, align 4
93 %2 = bitcast i8* %y to <4 x i32>*
94 store <4 x i32> %1, <4 x i32>* %2, align 4
98 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
99 ; CHECK-LABEL: ldrwu32_m512:
100 ; CHECK: @ %bb.0: @ %entry
101 ; CHECK-NEXT: sub.w r2, r0, #512
102 ; CHECK-NEXT: vldrw.u32 q0, [r2]
103 ; CHECK-NEXT: vstrw.32 q0, [r1]
106 %z = getelementptr inbounds i8, i8* %x, i32 -512
107 %0 = bitcast i8* %z to <4 x i32>*
108 %1 = load <4 x i32>, <4 x i32>* %0, align 4
109 %2 = bitcast i8* %y to <4 x i32>*
110 store <4 x i32> %1, <4 x i32>* %2, align 4
115 define i8* @ldrhu32_4(i8* %x, i8* %y) {
116 ; CHECK-LABEL: ldrhu32_4:
117 ; CHECK: @ %bb.0: @ %entry
118 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
119 ; CHECK-NEXT: vstrw.32 q0, [r1]
122 %z = getelementptr inbounds i8, i8* %x, i32 4
123 %0 = bitcast i8* %z to <4 x i16>*
124 %1 = load <4 x i16>, <4 x i16>* %0, align 2
125 %2 = zext <4 x i16> %1 to <4 x i32>
126 %3 = bitcast i8* %y to <4 x i32>*
127 store <4 x i32> %2, <4 x i32>* %3, align 4
131 define i8* @ldrhu32_3(i8* %x, i8* %y) {
132 ; CHECK-LABEL: ldrhu32_3:
133 ; CHECK: @ %bb.0: @ %entry
134 ; CHECK-NEXT: adds r2, r0, #3
135 ; CHECK-NEXT: vldrh.u32 q0, [r2]
136 ; CHECK-NEXT: vstrw.32 q0, [r1]
139 %z = getelementptr inbounds i8, i8* %x, i32 3
140 %0 = bitcast i8* %z to <4 x i16>*
141 %1 = load <4 x i16>, <4 x i16>* %0, align 2
142 %2 = zext <4 x i16> %1 to <4 x i32>
143 %3 = bitcast i8* %y to <4 x i32>*
144 store <4 x i32> %2, <4 x i32>* %3, align 4
148 define i8* @ldrhu32_2(i8* %x, i8* %y) {
149 ; CHECK-LABEL: ldrhu32_2:
150 ; CHECK: @ %bb.0: @ %entry
151 ; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
152 ; CHECK-NEXT: vstrw.32 q0, [r1]
155 %z = getelementptr inbounds i8, i8* %x, i32 2
156 %0 = bitcast i8* %z to <4 x i16>*
157 %1 = load <4 x i16>, <4 x i16>* %0, align 2
158 %2 = zext <4 x i16> %1 to <4 x i32>
159 %3 = bitcast i8* %y to <4 x i32>*
160 store <4 x i32> %2, <4 x i32>* %3, align 4
164 define i8* @ldrhu32_254(i8* %x, i8* %y) {
165 ; CHECK-LABEL: ldrhu32_254:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: add.w r2, r0, #254
168 ; CHECK-NEXT: vldrh.u32 q0, [r2]
169 ; CHECK-NEXT: vstrw.32 q0, [r1]
172 %z = getelementptr inbounds i8, i8* %x, i32 254
173 %0 = bitcast i8* %z to <4 x i16>*
174 %1 = load <4 x i16>, <4 x i16>* %0, align 2
175 %2 = zext <4 x i16> %1 to <4 x i32>
176 %3 = bitcast i8* %y to <4 x i32>*
177 store <4 x i32> %2, <4 x i32>* %3, align 4
181 define i8* @ldrhu32_256(i8* %x, i8* %y) {
182 ; CHECK-LABEL: ldrhu32_256:
183 ; CHECK: @ %bb.0: @ %entry
184 ; CHECK-NEXT: add.w r2, r0, #256
185 ; CHECK-NEXT: vldrh.u32 q0, [r2]
186 ; CHECK-NEXT: vstrw.32 q0, [r1]
189 %z = getelementptr inbounds i8, i8* %x, i32 256
190 %0 = bitcast i8* %z to <4 x i16>*
191 %1 = load <4 x i16>, <4 x i16>* %0, align 2
192 %2 = zext <4 x i16> %1 to <4 x i32>
193 %3 = bitcast i8* %y to <4 x i32>*
194 store <4 x i32> %2, <4 x i32>* %3, align 4
199 define i8* @ldrhs32_4(i8* %x, i8* %y) {
200 ; CHECK-LABEL: ldrhs32_4:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
203 ; CHECK-NEXT: vstrw.32 q0, [r1]
206 %z = getelementptr inbounds i8, i8* %x, i32 4
207 %0 = bitcast i8* %z to <4 x i16>*
208 %1 = load <4 x i16>, <4 x i16>* %0, align 2
209 %2 = sext <4 x i16> %1 to <4 x i32>
210 %3 = bitcast i8* %y to <4 x i32>*
211 store <4 x i32> %2, <4 x i32>* %3, align 4
215 define i8* @ldrhs32_3(i8* %x, i8* %y) {
216 ; CHECK-LABEL: ldrhs32_3:
217 ; CHECK: @ %bb.0: @ %entry
218 ; CHECK-NEXT: adds r2, r0, #3
219 ; CHECK-NEXT: vldrh.s32 q0, [r2]
220 ; CHECK-NEXT: vstrw.32 q0, [r1]
223 %z = getelementptr inbounds i8, i8* %x, i32 3
224 %0 = bitcast i8* %z to <4 x i16>*
225 %1 = load <4 x i16>, <4 x i16>* %0, align 2
226 %2 = sext <4 x i16> %1 to <4 x i32>
227 %3 = bitcast i8* %y to <4 x i32>*
228 store <4 x i32> %2, <4 x i32>* %3, align 4
232 define i8* @ldrhs32_2(i8* %x, i8* %y) {
233 ; CHECK-LABEL: ldrhs32_2:
234 ; CHECK: @ %bb.0: @ %entry
235 ; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
236 ; CHECK-NEXT: vstrw.32 q0, [r1]
239 %z = getelementptr inbounds i8, i8* %x, i32 2
240 %0 = bitcast i8* %z to <4 x i16>*
241 %1 = load <4 x i16>, <4 x i16>* %0, align 2
242 %2 = sext <4 x i16> %1 to <4 x i32>
243 %3 = bitcast i8* %y to <4 x i32>*
244 store <4 x i32> %2, <4 x i32>* %3, align 4
248 define i8* @ldrhs32_254(i8* %x, i8* %y) {
249 ; CHECK-LABEL: ldrhs32_254:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: add.w r2, r0, #254
252 ; CHECK-NEXT: vldrh.s32 q0, [r2]
253 ; CHECK-NEXT: vstrw.32 q0, [r1]
256 %z = getelementptr inbounds i8, i8* %x, i32 254
257 %0 = bitcast i8* %z to <4 x i16>*
258 %1 = load <4 x i16>, <4 x i16>* %0, align 2
259 %2 = sext <4 x i16> %1 to <4 x i32>
260 %3 = bitcast i8* %y to <4 x i32>*
261 store <4 x i32> %2, <4 x i32>* %3, align 4
265 define i8* @ldrhs32_256(i8* %x, i8* %y) {
266 ; CHECK-LABEL: ldrhs32_256:
267 ; CHECK: @ %bb.0: @ %entry
268 ; CHECK-NEXT: add.w r2, r0, #256
269 ; CHECK-NEXT: vldrh.s32 q0, [r2]
270 ; CHECK-NEXT: vstrw.32 q0, [r1]
273 %z = getelementptr inbounds i8, i8* %x, i32 256
274 %0 = bitcast i8* %z to <4 x i16>*
275 %1 = load <4 x i16>, <4 x i16>* %0, align 2
276 %2 = sext <4 x i16> %1 to <4 x i32>
277 %3 = bitcast i8* %y to <4 x i32>*
278 store <4 x i32> %2, <4 x i32>* %3, align 4
283 define i8* @ldrhu16_4(i8* %x, i8* %y) {
284 ; CHECK-LABEL: ldrhu16_4:
285 ; CHECK: @ %bb.0: @ %entry
286 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
287 ; CHECK-NEXT: vstrh.16 q0, [r1]
290 %z = getelementptr inbounds i8, i8* %x, i32 4
291 %0 = bitcast i8* %z to <8 x i16>*
292 %1 = load <8 x i16>, <8 x i16>* %0, align 2
293 %2 = bitcast i8* %y to <8 x i16>*
294 store <8 x i16> %1, <8 x i16>* %2, align 2
298 define i8* @ldrhu16_3(i8* %x, i8* %y) {
299 ; CHECK-LABEL: ldrhu16_3:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: adds r2, r0, #3
302 ; CHECK-NEXT: vldrh.u16 q0, [r2]
303 ; CHECK-NEXT: vstrh.16 q0, [r1]
306 %z = getelementptr inbounds i8, i8* %x, i32 3
307 %0 = bitcast i8* %z to <8 x i16>*
308 %1 = load <8 x i16>, <8 x i16>* %0, align 2
309 %2 = bitcast i8* %y to <8 x i16>*
310 store <8 x i16> %1, <8 x i16>* %2, align 2
314 define i8* @ldrhu16_2(i8* %x, i8* %y) {
315 ; CHECK-LABEL: ldrhu16_2:
316 ; CHECK: @ %bb.0: @ %entry
317 ; CHECK-NEXT: vldrh.u16 q0, [r0, #2]
318 ; CHECK-NEXT: vstrh.16 q0, [r1]
321 %z = getelementptr inbounds i8, i8* %x, i32 2
322 %0 = bitcast i8* %z to <8 x i16>*
323 %1 = load <8 x i16>, <8 x i16>* %0, align 2
324 %2 = bitcast i8* %y to <8 x i16>*
325 store <8 x i16> %1, <8 x i16>* %2, align 2
329 define i8* @ldrhu16_254(i8* %x, i8* %y) {
330 ; CHECK-LABEL: ldrhu16_254:
331 ; CHECK: @ %bb.0: @ %entry
332 ; CHECK-NEXT: add.w r2, r0, #254
333 ; CHECK-NEXT: vldrh.u16 q0, [r2]
334 ; CHECK-NEXT: vstrh.16 q0, [r1]
337 %z = getelementptr inbounds i8, i8* %x, i32 254
338 %0 = bitcast i8* %z to <8 x i16>*
339 %1 = load <8 x i16>, <8 x i16>* %0, align 2
340 %2 = bitcast i8* %y to <8 x i16>*
341 store <8 x i16> %1, <8 x i16>* %2, align 2
345 define i8* @ldrhu16_256(i8* %x, i8* %y) {
346 ; CHECK-LABEL: ldrhu16_256:
347 ; CHECK: @ %bb.0: @ %entry
348 ; CHECK-NEXT: add.w r2, r0, #256
349 ; CHECK-NEXT: vldrh.u16 q0, [r2]
350 ; CHECK-NEXT: vstrh.16 q0, [r1]
353 %z = getelementptr inbounds i8, i8* %x, i32 256
354 %0 = bitcast i8* %z to <8 x i16>*
355 %1 = load <8 x i16>, <8 x i16>* %0, align 2
356 %2 = bitcast i8* %y to <8 x i16>*
357 store <8 x i16> %1, <8 x i16>* %2, align 2
362 define i8* @ldrbu32_4(i8* %x, i8* %y) {
363 ; CHECK-LABEL: ldrbu32_4:
364 ; CHECK: @ %bb.0: @ %entry
365 ; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
366 ; CHECK-NEXT: vstrw.32 q0, [r1]
369 %z = getelementptr inbounds i8, i8* %x, i32 4
370 %0 = bitcast i8* %z to <4 x i8>*
371 %1 = load <4 x i8>, <4 x i8>* %0, align 1
372 %2 = zext <4 x i8> %1 to <4 x i32>
373 %3 = bitcast i8* %y to <4 x i32>*
374 store <4 x i32> %2, <4 x i32>* %3, align 4
378 define i8* @ldrbu32_3(i8* %x, i8* %y) {
379 ; CHECK-LABEL: ldrbu32_3:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
382 ; CHECK-NEXT: vstrw.32 q0, [r1]
385 %z = getelementptr inbounds i8, i8* %x, i32 3
386 %0 = bitcast i8* %z to <4 x i8>*
387 %1 = load <4 x i8>, <4 x i8>* %0, align 1
388 %2 = zext <4 x i8> %1 to <4 x i32>
389 %3 = bitcast i8* %y to <4 x i32>*
390 store <4 x i32> %2, <4 x i32>* %3, align 4
394 define i8* @ldrbu32_127(i8* %x, i8* %y) {
395 ; CHECK-LABEL: ldrbu32_127:
396 ; CHECK: @ %bb.0: @ %entry
397 ; CHECK-NEXT: add.w r2, r0, #127
398 ; CHECK-NEXT: vldrb.u32 q0, [r2]
399 ; CHECK-NEXT: vstrw.32 q0, [r1]
402 %z = getelementptr inbounds i8, i8* %x, i32 127
403 %0 = bitcast i8* %z to <4 x i8>*
404 %1 = load <4 x i8>, <4 x i8>* %0, align 1
405 %2 = zext <4 x i8> %1 to <4 x i32>
406 %3 = bitcast i8* %y to <4 x i32>*
407 store <4 x i32> %2, <4 x i32>* %3, align 4
411 define i8* @ldrbu32_128(i8* %x, i8* %y) {
412 ; CHECK-LABEL: ldrbu32_128:
413 ; CHECK: @ %bb.0: @ %entry
414 ; CHECK-NEXT: add.w r2, r0, #128
415 ; CHECK-NEXT: vldrb.u32 q0, [r2]
416 ; CHECK-NEXT: vstrw.32 q0, [r1]
419 %z = getelementptr inbounds i8, i8* %x, i32 128
420 %0 = bitcast i8* %z to <4 x i8>*
421 %1 = load <4 x i8>, <4 x i8>* %0, align 1
422 %2 = zext <4 x i8> %1 to <4 x i32>
423 %3 = bitcast i8* %y to <4 x i32>*
424 store <4 x i32> %2, <4 x i32>* %3, align 4
429 define i8* @ldrbs32_4(i8* %x, i8* %y) {
430 ; CHECK-LABEL: ldrbs32_4:
431 ; CHECK: @ %bb.0: @ %entry
432 ; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
433 ; CHECK-NEXT: vstrw.32 q0, [r1]
436 %z = getelementptr inbounds i8, i8* %x, i32 4
437 %0 = bitcast i8* %z to <4 x i8>*
438 %1 = load <4 x i8>, <4 x i8>* %0, align 1
439 %2 = sext <4 x i8> %1 to <4 x i32>
440 %3 = bitcast i8* %y to <4 x i32>*
441 store <4 x i32> %2, <4 x i32>* %3, align 4
445 define i8* @ldrbs32_3(i8* %x, i8* %y) {
446 ; CHECK-LABEL: ldrbs32_3:
447 ; CHECK: @ %bb.0: @ %entry
448 ; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
449 ; CHECK-NEXT: vstrw.32 q0, [r1]
452 %z = getelementptr inbounds i8, i8* %x, i32 3
453 %0 = bitcast i8* %z to <4 x i8>*
454 %1 = load <4 x i8>, <4 x i8>* %0, align 1
455 %2 = sext <4 x i8> %1 to <4 x i32>
456 %3 = bitcast i8* %y to <4 x i32>*
457 store <4 x i32> %2, <4 x i32>* %3, align 4
461 define i8* @ldrbs32_127(i8* %x, i8* %y) {
462 ; CHECK-LABEL: ldrbs32_127:
463 ; CHECK: @ %bb.0: @ %entry
464 ; CHECK-NEXT: add.w r2, r0, #127
465 ; CHECK-NEXT: vldrb.s32 q0, [r2]
466 ; CHECK-NEXT: vstrw.32 q0, [r1]
469 %z = getelementptr inbounds i8, i8* %x, i32 127
470 %0 = bitcast i8* %z to <4 x i8>*
471 %1 = load <4 x i8>, <4 x i8>* %0, align 1
472 %2 = sext <4 x i8> %1 to <4 x i32>
473 %3 = bitcast i8* %y to <4 x i32>*
474 store <4 x i32> %2, <4 x i32>* %3, align 4
478 define i8* @ldrbs32_128(i8* %x, i8* %y) {
479 ; CHECK-LABEL: ldrbs32_128:
480 ; CHECK: @ %bb.0: @ %entry
481 ; CHECK-NEXT: add.w r2, r0, #128
482 ; CHECK-NEXT: vldrb.s32 q0, [r2]
483 ; CHECK-NEXT: vstrw.32 q0, [r1]
486 %z = getelementptr inbounds i8, i8* %x, i32 128
487 %0 = bitcast i8* %z to <4 x i8>*
488 %1 = load <4 x i8>, <4 x i8>* %0, align 1
489 %2 = sext <4 x i8> %1 to <4 x i32>
490 %3 = bitcast i8* %y to <4 x i32>*
491 store <4 x i32> %2, <4 x i32>* %3, align 4
496 define i8* @ldrbu16_4(i8* %x, i8* %y) {
497 ; CHECK-LABEL: ldrbu16_4:
498 ; CHECK: @ %bb.0: @ %entry
499 ; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
500 ; CHECK-NEXT: vstrh.16 q0, [r1]
503 %z = getelementptr inbounds i8, i8* %x, i32 4
504 %0 = bitcast i8* %z to <8 x i8>*
505 %1 = load <8 x i8>, <8 x i8>* %0, align 1
506 %2 = zext <8 x i8> %1 to <8 x i16>
507 %3 = bitcast i8* %y to <8 x i16>*
508 store <8 x i16> %2, <8 x i16>* %3, align 2
512 define i8* @ldrbu16_3(i8* %x, i8* %y) {
513 ; CHECK-LABEL: ldrbu16_3:
514 ; CHECK: @ %bb.0: @ %entry
515 ; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
516 ; CHECK-NEXT: vstrh.16 q0, [r1]
519 %z = getelementptr inbounds i8, i8* %x, i32 3
520 %0 = bitcast i8* %z to <8 x i8>*
521 %1 = load <8 x i8>, <8 x i8>* %0, align 1
522 %2 = zext <8 x i8> %1 to <8 x i16>
523 %3 = bitcast i8* %y to <8 x i16>*
524 store <8 x i16> %2, <8 x i16>* %3, align 2
528 define i8* @ldrbu16_127(i8* %x, i8* %y) {
529 ; CHECK-LABEL: ldrbu16_127:
530 ; CHECK: @ %bb.0: @ %entry
531 ; CHECK-NEXT: add.w r2, r0, #127
532 ; CHECK-NEXT: vldrb.u16 q0, [r2]
533 ; CHECK-NEXT: vstrh.16 q0, [r1]
536 %z = getelementptr inbounds i8, i8* %x, i32 127
537 %0 = bitcast i8* %z to <8 x i8>*
538 %1 = load <8 x i8>, <8 x i8>* %0, align 1
539 %2 = zext <8 x i8> %1 to <8 x i16>
540 %3 = bitcast i8* %y to <8 x i16>*
541 store <8 x i16> %2, <8 x i16>* %3, align 2
545 define i8* @ldrbu16_128(i8* %x, i8* %y) {
546 ; CHECK-LABEL: ldrbu16_128:
547 ; CHECK: @ %bb.0: @ %entry
548 ; CHECK-NEXT: add.w r2, r0, #128
549 ; CHECK-NEXT: vldrb.u16 q0, [r2]
550 ; CHECK-NEXT: vstrh.16 q0, [r1]
553 %z = getelementptr inbounds i8, i8* %x, i32 128
554 %0 = bitcast i8* %z to <8 x i8>*
555 %1 = load <8 x i8>, <8 x i8>* %0, align 1
556 %2 = zext <8 x i8> %1 to <8 x i16>
557 %3 = bitcast i8* %y to <8 x i16>*
558 store <8 x i16> %2, <8 x i16>* %3, align 2
563 define i8* @ldrbs16_4(i8* %x, i8* %y) {
564 ; CHECK-LABEL: ldrbs16_4:
565 ; CHECK: @ %bb.0: @ %entry
566 ; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
567 ; CHECK-NEXT: vstrh.16 q0, [r1]
570 %z = getelementptr inbounds i8, i8* %x, i32 4
571 %0 = bitcast i8* %z to <8 x i8>*
572 %1 = load <8 x i8>, <8 x i8>* %0, align 1
573 %2 = sext <8 x i8> %1 to <8 x i16>
574 %3 = bitcast i8* %y to <8 x i16>*
575 store <8 x i16> %2, <8 x i16>* %3, align 2
579 define i8* @ldrbs16_3(i8* %x, i8* %y) {
580 ; CHECK-LABEL: ldrbs16_3:
581 ; CHECK: @ %bb.0: @ %entry
582 ; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
583 ; CHECK-NEXT: vstrh.16 q0, [r1]
586 %z = getelementptr inbounds i8, i8* %x, i32 3
587 %0 = bitcast i8* %z to <8 x i8>*
588 %1 = load <8 x i8>, <8 x i8>* %0, align 1
589 %2 = sext <8 x i8> %1 to <8 x i16>
590 %3 = bitcast i8* %y to <8 x i16>*
591 store <8 x i16> %2, <8 x i16>* %3, align 2
595 define i8* @ldrbs16_127(i8* %x, i8* %y) {
596 ; CHECK-LABEL: ldrbs16_127:
597 ; CHECK: @ %bb.0: @ %entry
598 ; CHECK-NEXT: add.w r2, r0, #127
599 ; CHECK-NEXT: vldrb.s16 q0, [r2]
600 ; CHECK-NEXT: vstrh.16 q0, [r1]
603 %z = getelementptr inbounds i8, i8* %x, i32 127
604 %0 = bitcast i8* %z to <8 x i8>*
605 %1 = load <8 x i8>, <8 x i8>* %0, align 1
606 %2 = sext <8 x i8> %1 to <8 x i16>
607 %3 = bitcast i8* %y to <8 x i16>*
608 store <8 x i16> %2, <8 x i16>* %3, align 2
612 define i8* @ldrbs16_128(i8* %x, i8* %y) {
613 ; CHECK-LABEL: ldrbs16_128:
614 ; CHECK: @ %bb.0: @ %entry
615 ; CHECK-NEXT: add.w r2, r0, #128
616 ; CHECK-NEXT: vldrb.s16 q0, [r2]
617 ; CHECK-NEXT: vstrh.16 q0, [r1]
620 %z = getelementptr inbounds i8, i8* %x, i32 128
621 %0 = bitcast i8* %z to <8 x i8>*
622 %1 = load <8 x i8>, <8 x i8>* %0, align 1
623 %2 = sext <8 x i8> %1 to <8 x i16>
624 %3 = bitcast i8* %y to <8 x i16>*
625 store <8 x i16> %2, <8 x i16>* %3, align 2
630 define i8* @ldrbu8_4(i8* %x, i8* %y) {
631 ; CHECK-LABEL: ldrbu8_4:
632 ; CHECK: @ %bb.0: @ %entry
633 ; CHECK-NEXT: vldrb.u8 q0, [r0, #4]
634 ; CHECK-NEXT: vstrb.8 q0, [r1]
637 %z = getelementptr inbounds i8, i8* %x, i32 4
638 %0 = bitcast i8* %z to <16 x i8>*
639 %1 = load <16 x i8>, <16 x i8>* %0, align 1
640 %2 = bitcast i8* %y to <16 x i8>*
641 store <16 x i8> %1, <16 x i8>* %2, align 1
645 define i8* @ldrbu8_3(i8* %x, i8* %y) {
646 ; CHECK-LABEL: ldrbu8_3:
647 ; CHECK: @ %bb.0: @ %entry
648 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
649 ; CHECK-NEXT: vstrb.8 q0, [r1]
652 %z = getelementptr inbounds i8, i8* %x, i32 3
653 %0 = bitcast i8* %z to <16 x i8>*
654 %1 = load <16 x i8>, <16 x i8>* %0, align 1
655 %2 = bitcast i8* %y to <16 x i8>*
656 store <16 x i8> %1, <16 x i8>* %2, align 1
660 define i8* @ldrbu8_127(i8* %x, i8* %y) {
661 ; CHECK-LABEL: ldrbu8_127:
662 ; CHECK: @ %bb.0: @ %entry
663 ; CHECK-NEXT: add.w r2, r0, #127
664 ; CHECK-NEXT: vldrb.u8 q0, [r2]
665 ; CHECK-NEXT: vstrb.8 q0, [r1]
668 %z = getelementptr inbounds i8, i8* %x, i32 127
669 %0 = bitcast i8* %z to <16 x i8>*
670 %1 = load <16 x i8>, <16 x i8>* %0, align 1
671 %2 = bitcast i8* %y to <16 x i8>*
672 store <16 x i8> %1, <16 x i8>* %2, align 1
676 define i8* @ldrbu8_128(i8* %x, i8* %y) {
677 ; CHECK-LABEL: ldrbu8_128:
678 ; CHECK: @ %bb.0: @ %entry
679 ; CHECK-NEXT: add.w r2, r0, #128
680 ; CHECK-NEXT: vldrb.u8 q0, [r2]
681 ; CHECK-NEXT: vstrb.8 q0, [r1]
684 %z = getelementptr inbounds i8, i8* %x, i32 128
685 %0 = bitcast i8* %z to <16 x i8>*
686 %1 = load <16 x i8>, <16 x i8>* %0, align 1
687 %2 = bitcast i8* %y to <16 x i8>*
688 store <16 x i8> %1, <16 x i8>* %2, align 1
692 define i8* @ldrwf32_4(i8* %x, i8* %y) {
693 ; CHECK-LABEL: ldrwf32_4:
694 ; CHECK: @ %bb.0: @ %entry
695 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
696 ; CHECK-NEXT: vstrw.32 q0, [r1]
699 %z = getelementptr inbounds i8, i8* %x, i32 4
700 %0 = bitcast i8* %z to <4 x float>*
701 %1 = load <4 x float>, <4 x float>* %0, align 4
702 %2 = bitcast i8* %y to <4 x float>*
703 store <4 x float> %1, <4 x float>* %2, align 4
707 define i8* @ldrwf16_4(i8* %x, i8* %y) {
708 ; CHECK-LABEL: ldrwf16_4:
709 ; CHECK: @ %bb.0: @ %entry
710 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
711 ; CHECK-NEXT: vstrh.16 q0, [r1]
714 %z = getelementptr inbounds i8, i8* %x, i32 4
715 %0 = bitcast i8* %z to <8 x half>*
716 %1 = load <8 x half>, <8 x half>* %0, align 2
717 %2 = bitcast i8* %y to <8 x half>*
718 store <8 x half> %1, <8 x half>* %2, align 2
722 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
723 ; CHECK-LABEL: ldrwi32_align1:
724 ; CHECK: @ %bb.0: @ %entry
725 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
726 ; CHECK-NEXT: vstrw.32 q0, [r1]
729 %z = getelementptr inbounds i8, i8* %x, i32 3
730 %0 = bitcast i8* %z to <4 x i32>*
731 %1 = load <4 x i32>, <4 x i32>* %0, align 1
732 %2 = bitcast i8* %y to <4 x i32>*
733 store <4 x i32> %1, <4 x i32>* %2, align 4
737 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
738 ; CHECK-LABEL: ldrhi16_align1:
739 ; CHECK: @ %bb.0: @ %entry
740 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
741 ; CHECK-NEXT: vstrh.16 q0, [r1]
744 %z = getelementptr inbounds i8, i8* %x, i32 3
745 %0 = bitcast i8* %z to <8 x i16>*
746 %1 = load <8 x i16>, <8 x i16>* %0, align 1
747 %2 = bitcast i8* %y to <8 x i16>*
748 store <8 x i16> %1, <8 x i16>* %2, align 2
752 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
753 ; CHECK-LABEL: ldrhi32_align1:
754 ; CHECK: @ %bb.0: @ %entry
755 ; CHECK-NEXT: .pad #8
756 ; CHECK-NEXT: sub sp, #8
757 ; CHECK-NEXT: ldr.w r3, [r0, #7]
758 ; CHECK-NEXT: ldr.w r2, [r0, #3]
759 ; CHECK-NEXT: strd r2, r3, [sp]
760 ; CHECK-NEXT: mov r2, sp
761 ; CHECK-NEXT: vldrh.s32 q0, [r2]
762 ; CHECK-NEXT: vstrw.32 q0, [r1]
763 ; CHECK-NEXT: add sp, #8
766 %z = getelementptr inbounds i8, i8* %x, i32 3
767 %0 = bitcast i8* %z to <4 x i16>*
768 %1 = load <4 x i16>, <4 x i16>* %0, align 1
769 %2 = bitcast i8* %y to <4 x i32>*
770 %3 = sext <4 x i16> %1 to <4 x i32>
771 store <4 x i32> %3, <4 x i32>* %2, align 4
775 define i8* @ldrwf32_align1(i8* %x, i8* %y) {
776 ; CHECK-LABEL: ldrwf32_align1:
777 ; CHECK: @ %bb.0: @ %entry
778 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
779 ; CHECK-NEXT: vstrw.32 q0, [r1]
782 %z = getelementptr inbounds i8, i8* %x, i32 3
783 %0 = bitcast i8* %z to <4 x float>*
784 %1 = load <4 x float>, <4 x float>* %0, align 1
785 %2 = bitcast i8* %y to <4 x float>*
786 store <4 x float> %1, <4 x float>* %2, align 4
790 define i8* @ldrwf16_align1(i8* %x, i8* %y) {
791 ; CHECK-LABEL: ldrwf16_align1:
792 ; CHECK: @ %bb.0: @ %entry
793 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
794 ; CHECK-NEXT: vstrh.16 q0, [r1]
797 %z = getelementptr inbounds i8, i8* %x, i32 3
798 %0 = bitcast i8* %z to <8 x half>*
799 %1 = load <8 x half>, <8 x half>* %0, align 1
800 %2 = bitcast i8* %y to <8 x half>*
801 store <8 x half> %1, <8 x half>* %2, align 2
809 define i8* @strw32_4(i8* %y, i8* %x) {
810 ; CHECK-LABEL: strw32_4:
811 ; CHECK: @ %bb.0: @ %entry
812 ; CHECK-NEXT: vldrw.u32 q0, [r1]
813 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
816 %z = getelementptr inbounds i8, i8* %y, i32 4
817 %0 = bitcast i8* %x to <4 x i32>*
818 %1 = load <4 x i32>, <4 x i32>* %0, align 4
819 %2 = bitcast i8* %z to <4 x i32>*
820 store <4 x i32> %1, <4 x i32>* %2, align 4
824 define i8* @strw32_3(i8* %y, i8* %x) {
825 ; CHECK-LABEL: strw32_3:
826 ; CHECK: @ %bb.0: @ %entry
827 ; CHECK-NEXT: vldrw.u32 q0, [r1]
828 ; CHECK-NEXT: adds r1, r0, #3
829 ; CHECK-NEXT: vstrw.32 q0, [r1]
832 %z = getelementptr inbounds i8, i8* %y, i32 3
833 %0 = bitcast i8* %x to <4 x i32>*
834 %1 = load <4 x i32>, <4 x i32>* %0, align 4
835 %2 = bitcast i8* %z to <4 x i32>*
836 store <4 x i32> %1, <4 x i32>* %2, align 4
840 define i8* @strw32_m4(i8* %y, i8* %x) {
841 ; CHECK-LABEL: strw32_m4:
842 ; CHECK: @ %bb.0: @ %entry
843 ; CHECK-NEXT: vldrw.u32 q0, [r1]
844 ; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
847 %z = getelementptr inbounds i8, i8* %y, i32 -4
848 %0 = bitcast i8* %x to <4 x i32>*
849 %1 = load <4 x i32>, <4 x i32>* %0, align 4
850 %2 = bitcast i8* %z to <4 x i32>*
851 store <4 x i32> %1, <4 x i32>* %2, align 4
855 define i8* @strw32_508(i8* %y, i8* %x) {
856 ; CHECK-LABEL: strw32_508:
857 ; CHECK: @ %bb.0: @ %entry
858 ; CHECK-NEXT: vldrw.u32 q0, [r1]
859 ; CHECK-NEXT: add.w r1, r0, #508
860 ; CHECK-NEXT: vstrw.32 q0, [r1]
863 %z = getelementptr inbounds i8, i8* %y, i32 508
864 %0 = bitcast i8* %x to <4 x i32>*
865 %1 = load <4 x i32>, <4 x i32>* %0, align 4
866 %2 = bitcast i8* %z to <4 x i32>*
867 store <4 x i32> %1, <4 x i32>* %2, align 4
871 define i8* @strw32_512(i8* %y, i8* %x) {
872 ; CHECK-LABEL: strw32_512:
873 ; CHECK: @ %bb.0: @ %entry
874 ; CHECK-NEXT: vldrw.u32 q0, [r1]
875 ; CHECK-NEXT: add.w r1, r0, #512
876 ; CHECK-NEXT: vstrw.32 q0, [r1]
879 %z = getelementptr inbounds i8, i8* %y, i32 512
880 %0 = bitcast i8* %x to <4 x i32>*
881 %1 = load <4 x i32>, <4 x i32>* %0, align 4
882 %2 = bitcast i8* %z to <4 x i32>*
883 store <4 x i32> %1, <4 x i32>* %2, align 4
887 define i8* @strw32_m508(i8* %y, i8* %x) {
888 ; CHECK-LABEL: strw32_m508:
889 ; CHECK: @ %bb.0: @ %entry
890 ; CHECK-NEXT: vldrw.u32 q0, [r1]
891 ; CHECK-NEXT: sub.w r1, r0, #508
892 ; CHECK-NEXT: vstrw.32 q0, [r1]
895 %z = getelementptr inbounds i8, i8* %y, i32 -508
896 %0 = bitcast i8* %x to <4 x i32>*
897 %1 = load <4 x i32>, <4 x i32>* %0, align 4
898 %2 = bitcast i8* %z to <4 x i32>*
899 store <4 x i32> %1, <4 x i32>* %2, align 4
903 define i8* @strw32_m512(i8* %y, i8* %x) {
904 ; CHECK-LABEL: strw32_m512:
905 ; CHECK: @ %bb.0: @ %entry
906 ; CHECK-NEXT: vldrw.u32 q0, [r1]
907 ; CHECK-NEXT: sub.w r1, r0, #512
908 ; CHECK-NEXT: vstrw.32 q0, [r1]
911 %z = getelementptr inbounds i8, i8* %y, i32 -512
912 %0 = bitcast i8* %x to <4 x i32>*
913 %1 = load <4 x i32>, <4 x i32>* %0, align 4
914 %2 = bitcast i8* %z to <4 x i32>*
915 store <4 x i32> %1, <4 x i32>* %2, align 4
920 define i8* @strh32_4(i8* %y, i8* %x) {
921 ; CHECK-LABEL: strh32_4:
922 ; CHECK: @ %bb.0: @ %entry
923 ; CHECK-NEXT: vldrh.u32 q0, [r1]
924 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]
927 %z = getelementptr inbounds i8, i8* %y, i32 4
928 %0 = bitcast i8* %x to <4 x i16>*
929 %1 = load <4 x i16>, <4 x i16>* %0, align 2
930 %2 = bitcast i8* %z to <4 x i16>*
931 store <4 x i16> %1, <4 x i16>* %2, align 2
935 define i8* @strh32_3(i8* %y, i8* %x) {
936 ; CHECK-LABEL: strh32_3:
937 ; CHECK: @ %bb.0: @ %entry
938 ; CHECK-NEXT: vldrh.u32 q0, [r1]
939 ; CHECK-NEXT: adds r1, r0, #3
940 ; CHECK-NEXT: vstrh.32 q0, [r1]
943 %z = getelementptr inbounds i8, i8* %y, i32 3
944 %0 = bitcast i8* %x to <4 x i16>*
945 %1 = load <4 x i16>, <4 x i16>* %0, align 2
946 %2 = bitcast i8* %z to <4 x i16>*
947 store <4 x i16> %1, <4 x i16>* %2, align 2
951 define i8* @strh32_2(i8* %y, i8* %x) {
952 ; CHECK-LABEL: strh32_2:
953 ; CHECK: @ %bb.0: @ %entry
954 ; CHECK-NEXT: vldrh.u32 q0, [r1]
955 ; CHECK-NEXT: vstrh.32 q0, [r0, #2]
958 %z = getelementptr inbounds i8, i8* %y, i32 2
959 %0 = bitcast i8* %x to <4 x i16>*
960 %1 = load <4 x i16>, <4 x i16>* %0, align 2
961 %2 = bitcast i8* %z to <4 x i16>*
962 store <4 x i16> %1, <4 x i16>* %2, align 2
966 define i8* @strh32_254(i8* %y, i8* %x) {
967 ; CHECK-LABEL: strh32_254:
968 ; CHECK: @ %bb.0: @ %entry
969 ; CHECK-NEXT: vldrh.u32 q0, [r1]
970 ; CHECK-NEXT: add.w r1, r0, #254
971 ; CHECK-NEXT: vstrh.32 q0, [r1]
974 %z = getelementptr inbounds i8, i8* %y, i32 254
975 %0 = bitcast i8* %x to <4 x i16>*
976 %1 = load <4 x i16>, <4 x i16>* %0, align 2
977 %2 = bitcast i8* %z to <4 x i16>*
978 store <4 x i16> %1, <4 x i16>* %2, align 2
982 define i8* @strh32_256(i8* %y, i8* %x) {
983 ; CHECK-LABEL: strh32_256:
984 ; CHECK: @ %bb.0: @ %entry
985 ; CHECK-NEXT: vldrh.u32 q0, [r1]
986 ; CHECK-NEXT: add.w r1, r0, #256
987 ; CHECK-NEXT: vstrh.32 q0, [r1]
990 %z = getelementptr inbounds i8, i8* %y, i32 256
991 %0 = bitcast i8* %x to <4 x i16>*
992 %1 = load <4 x i16>, <4 x i16>* %0, align 2
993 %2 = bitcast i8* %z to <4 x i16>*
994 store <4 x i16> %1, <4 x i16>* %2, align 2
999 define i8* @strh16_4(i8* %y, i8* %x) {
1000 ; CHECK-LABEL: strh16_4:
1001 ; CHECK: @ %bb.0: @ %entry
1002 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1003 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1006 %z = getelementptr inbounds i8, i8* %y, i32 4
1007 %0 = bitcast i8* %x to <8 x i16>*
1008 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1009 %2 = bitcast i8* %z to <8 x i16>*
1010 store <8 x i16> %1, <8 x i16>* %2, align 2
1014 define i8* @strh16_3(i8* %y, i8* %x) {
1015 ; CHECK-LABEL: strh16_3:
1016 ; CHECK: @ %bb.0: @ %entry
1017 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1018 ; CHECK-NEXT: adds r1, r0, #3
1019 ; CHECK-NEXT: vstrh.16 q0, [r1]
1022 %z = getelementptr inbounds i8, i8* %y, i32 3
1023 %0 = bitcast i8* %x to <8 x i16>*
1024 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1025 %2 = bitcast i8* %z to <8 x i16>*
1026 store <8 x i16> %1, <8 x i16>* %2, align 2
1030 define i8* @strh16_2(i8* %y, i8* %x) {
1031 ; CHECK-LABEL: strh16_2:
1032 ; CHECK: @ %bb.0: @ %entry
1033 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1034 ; CHECK-NEXT: vstrh.16 q0, [r0, #2]
1037 %z = getelementptr inbounds i8, i8* %y, i32 2
1038 %0 = bitcast i8* %x to <8 x i16>*
1039 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1040 %2 = bitcast i8* %z to <8 x i16>*
1041 store <8 x i16> %1, <8 x i16>* %2, align 2
1045 define i8* @strh16_254(i8* %y, i8* %x) {
1046 ; CHECK-LABEL: strh16_254:
1047 ; CHECK: @ %bb.0: @ %entry
1048 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1049 ; CHECK-NEXT: add.w r1, r0, #254
1050 ; CHECK-NEXT: vstrh.16 q0, [r1]
1053 %z = getelementptr inbounds i8, i8* %y, i32 254
1054 %0 = bitcast i8* %x to <8 x i16>*
1055 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1056 %2 = bitcast i8* %z to <8 x i16>*
1057 store <8 x i16> %1, <8 x i16>* %2, align 2
1061 define i8* @strh16_256(i8* %y, i8* %x) {
1062 ; CHECK-LABEL: strh16_256:
1063 ; CHECK: @ %bb.0: @ %entry
1064 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1065 ; CHECK-NEXT: add.w r1, r0, #256
1066 ; CHECK-NEXT: vstrh.16 q0, [r1]
1069 %z = getelementptr inbounds i8, i8* %y, i32 256
1070 %0 = bitcast i8* %x to <8 x i16>*
1071 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1072 %2 = bitcast i8* %z to <8 x i16>*
1073 store <8 x i16> %1, <8 x i16>* %2, align 2
1078 define i8* @strb32_4(i8* %y, i8* %x) {
1079 ; CHECK-LABEL: strb32_4:
1080 ; CHECK: @ %bb.0: @ %entry
1081 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1082 ; CHECK-NEXT: vstrb.32 q0, [r0, #4]
1085 %z = getelementptr inbounds i8, i8* %y, i32 4
1086 %0 = bitcast i8* %x to <4 x i8>*
1087 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1088 %2 = bitcast i8* %z to <4 x i8>*
1089 store <4 x i8> %1, <4 x i8>* %2, align 1
1093 define i8* @strb32_3(i8* %y, i8* %x) {
1094 ; CHECK-LABEL: strb32_3:
1095 ; CHECK: @ %bb.0: @ %entry
1096 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1097 ; CHECK-NEXT: vstrb.32 q0, [r0, #3]
1100 %z = getelementptr inbounds i8, i8* %y, i32 3
1101 %0 = bitcast i8* %x to <4 x i8>*
1102 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1103 %2 = bitcast i8* %z to <4 x i8>*
1104 store <4 x i8> %1, <4 x i8>* %2, align 1
1108 define i8* @strb32_127(i8* %y, i8* %x) {
1109 ; CHECK-LABEL: strb32_127:
1110 ; CHECK: @ %bb.0: @ %entry
1111 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1112 ; CHECK-NEXT: add.w r1, r0, #127
1113 ; CHECK-NEXT: vstrb.32 q0, [r1]
1116 %z = getelementptr inbounds i8, i8* %y, i32 127
1117 %0 = bitcast i8* %x to <4 x i8>*
1118 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1119 %2 = bitcast i8* %z to <4 x i8>*
1120 store <4 x i8> %1, <4 x i8>* %2, align 1
1124 define i8* @strb32_128(i8* %y, i8* %x) {
1125 ; CHECK-LABEL: strb32_128:
1126 ; CHECK: @ %bb.0: @ %entry
1127 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1128 ; CHECK-NEXT: add.w r1, r0, #128
1129 ; CHECK-NEXT: vstrb.32 q0, [r1]
1132 %z = getelementptr inbounds i8, i8* %y, i32 128
1133 %0 = bitcast i8* %x to <4 x i8>*
1134 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1135 %2 = bitcast i8* %z to <4 x i8>*
1136 store <4 x i8> %1, <4 x i8>* %2, align 1
1141 define i8* @strb16_4(i8* %y, i8* %x) {
1142 ; CHECK-LABEL: strb16_4:
1143 ; CHECK: @ %bb.0: @ %entry
1144 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1145 ; CHECK-NEXT: vstrb.16 q0, [r0, #4]
1148 %z = getelementptr inbounds i8, i8* %y, i32 4
1149 %0 = bitcast i8* %x to <8 x i8>*
1150 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1151 %2 = bitcast i8* %z to <8 x i8>*
1152 store <8 x i8> %1, <8 x i8>* %2, align 1
1156 define i8* @strb16_3(i8* %y, i8* %x) {
1157 ; CHECK-LABEL: strb16_3:
1158 ; CHECK: @ %bb.0: @ %entry
1159 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1160 ; CHECK-NEXT: vstrb.16 q0, [r0, #3]
1163 %z = getelementptr inbounds i8, i8* %y, i32 3
1164 %0 = bitcast i8* %x to <8 x i8>*
1165 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1166 %2 = bitcast i8* %z to <8 x i8>*
1167 store <8 x i8> %1, <8 x i8>* %2, align 1
1171 define i8* @strb16_127(i8* %y, i8* %x) {
1172 ; CHECK-LABEL: strb16_127:
1173 ; CHECK: @ %bb.0: @ %entry
1174 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1175 ; CHECK-NEXT: add.w r1, r0, #127
1176 ; CHECK-NEXT: vstrb.16 q0, [r1]
1179 %z = getelementptr inbounds i8, i8* %y, i32 127
1180 %0 = bitcast i8* %x to <8 x i8>*
1181 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1182 %2 = bitcast i8* %z to <8 x i8>*
1183 store <8 x i8> %1, <8 x i8>* %2, align 1
1187 define i8* @strb16_128(i8* %y, i8* %x) {
1188 ; CHECK-LABEL: strb16_128:
1189 ; CHECK: @ %bb.0: @ %entry
1190 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1191 ; CHECK-NEXT: add.w r1, r0, #128
1192 ; CHECK-NEXT: vstrb.16 q0, [r1]
1195 %z = getelementptr inbounds i8, i8* %y, i32 128
1196 %0 = bitcast i8* %x to <8 x i8>*
1197 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1198 %2 = bitcast i8* %z to <8 x i8>*
1199 store <8 x i8> %1, <8 x i8>* %2, align 1
1204 define i8* @strb8_4(i8* %y, i8* %x) {
1205 ; CHECK-LABEL: strb8_4:
1206 ; CHECK: @ %bb.0: @ %entry
1207 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1208 ; CHECK-NEXT: vstrb.8 q0, [r0, #4]
1211 %z = getelementptr inbounds i8, i8* %y, i32 4
1212 %0 = bitcast i8* %x to <16 x i8>*
1213 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1214 %2 = bitcast i8* %z to <16 x i8>*
1215 store <16 x i8> %1, <16 x i8>* %2, align 1
1219 define i8* @strb8_3(i8* %y, i8* %x) {
1220 ; CHECK-LABEL: strb8_3:
1221 ; CHECK: @ %bb.0: @ %entry
1222 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1223 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1226 %z = getelementptr inbounds i8, i8* %y, i32 3
1227 %0 = bitcast i8* %x to <16 x i8>*
1228 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1229 %2 = bitcast i8* %z to <16 x i8>*
1230 store <16 x i8> %1, <16 x i8>* %2, align 1
1234 define i8* @strb8_127(i8* %y, i8* %x) {
1235 ; CHECK-LABEL: strb8_127:
1236 ; CHECK: @ %bb.0: @ %entry
1237 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1238 ; CHECK-NEXT: add.w r1, r0, #127
1239 ; CHECK-NEXT: vstrb.8 q0, [r1]
1242 %z = getelementptr inbounds i8, i8* %y, i32 127
1243 %0 = bitcast i8* %x to <16 x i8>*
1244 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1245 %2 = bitcast i8* %z to <16 x i8>*
1246 store <16 x i8> %1, <16 x i8>* %2, align 1
1250 define i8* @strb8_128(i8* %y, i8* %x) {
1251 ; CHECK-LABEL: strb8_128:
1252 ; CHECK: @ %bb.0: @ %entry
1253 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1254 ; CHECK-NEXT: add.w r1, r0, #128
1255 ; CHECK-NEXT: vstrb.8 q0, [r1]
1258 %z = getelementptr inbounds i8, i8* %y, i32 128
1259 %0 = bitcast i8* %x to <16 x i8>*
1260 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1261 %2 = bitcast i8* %z to <16 x i8>*
1262 store <16 x i8> %1, <16 x i8>* %2, align 1
1266 define i8* @strf32_4(i8* %y, i8* %x) {
1267 ; CHECK-LABEL: strf32_4:
1268 ; CHECK: @ %bb.0: @ %entry
1269 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1270 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1273 %z = getelementptr inbounds i8, i8* %y, i32 4
1274 %0 = bitcast i8* %x to <4 x float>*
1275 %1 = load <4 x float>, <4 x float>* %0, align 4
1276 %2 = bitcast i8* %z to <4 x float>*
1277 store <4 x float> %1, <4 x float>* %2, align 4
1281 define i8* @strf16_4(i8* %y, i8* %x) {
1282 ; CHECK-LABEL: strf16_4:
1283 ; CHECK: @ %bb.0: @ %entry
1284 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1285 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1288 %z = getelementptr inbounds i8, i8* %y, i32 4
1289 %0 = bitcast i8* %x to <8 x half>*
1290 %1 = load <8 x half>, <8 x half>* %0, align 2
1291 %2 = bitcast i8* %z to <8 x half>*
1292 store <8 x half> %1, <8 x half>* %2, align 2
1296 define i8* @strwi32_align1(i8* %y, i8* %x) {
1297 ; CHECK-LABEL: strwi32_align1:
1298 ; CHECK: @ %bb.0: @ %entry
1299 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1300 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1303 %z = getelementptr inbounds i8, i8* %y, i32 3
1304 %0 = bitcast i8* %x to <4 x i32>*
1305 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1306 %2 = bitcast i8* %z to <4 x i32>*
1307 store <4 x i32> %1, <4 x i32>* %2, align 1
1311 define i8* @strhi16_align1(i8* %y, i8* %x) {
1312 ; CHECK-LABEL: strhi16_align1:
1313 ; CHECK: @ %bb.0: @ %entry
1314 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1315 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1318 %z = getelementptr inbounds i8, i8* %y, i32 3
1319 %0 = bitcast i8* %x to <8 x i16>*
1320 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1321 %2 = bitcast i8* %z to <8 x i16>*
1322 store <8 x i16> %1, <8 x i16>* %2, align 1
1326 define i8* @strhi32_align1(i8* %y, i8* %x) {
1327 ; CHECK-LABEL: strhi32_align1:
1328 ; CHECK: @ %bb.0: @ %entry
1329 ; CHECK-NEXT: .pad #8
1330 ; CHECK-NEXT: sub sp, #8
1331 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1332 ; CHECK-NEXT: mov r1, sp
1333 ; CHECK-NEXT: vstrh.32 q0, [r1]
1334 ; CHECK-NEXT: ldrd r1, r2, [sp]
1335 ; CHECK-NEXT: str.w r1, [r0, #3]
1336 ; CHECK-NEXT: str.w r2, [r0, #7]
1337 ; CHECK-NEXT: add sp, #8
1340 %z = getelementptr inbounds i8, i8* %y, i32 3
1341 %0 = bitcast i8* %x to <4 x i32>*
1342 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1343 %2 = bitcast i8* %z to <4 x i16>*
1344 %3 = trunc <4 x i32> %1 to <4 x i16>
1345 store <4 x i16> %3, <4 x i16>* %2, align 1
1349 define i8* @strf32_align1(i8* %y, i8* %x) {
1350 ; CHECK-LABEL: strf32_align1:
1351 ; CHECK: @ %bb.0: @ %entry
1352 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1353 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1356 %z = getelementptr inbounds i8, i8* %y, i32 3
1357 %0 = bitcast i8* %x to <4 x float>*
1358 %1 = load <4 x float>, <4 x float>* %0, align 4
1359 %2 = bitcast i8* %z to <4 x float>*
1360 store <4 x float> %1, <4 x float>* %2, align 1
1364 define i8* @strf16_align1(i8* %y, i8* %x) {
1365 ; CHECK-LABEL: strf16_align1:
1366 ; CHECK: @ %bb.0: @ %entry
1367 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1368 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1371 %z = getelementptr inbounds i8, i8* %y, i32 3
1372 %0 = bitcast i8* %x to <8 x half>*
1373 %1 = load <8 x half>, <8 x half>* %0, align 2
1374 %2 = bitcast i8* %z to <8 x half>*
1375 store <8 x half> %1, <8 x half>* %2, align 1