1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define i8* @ldrwu32_4(i8* %x, i8* %y) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]!
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %z to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LE-LABEL: ldrwu32_3:
22 ; CHECK-LE: @ %bb.0: @ %entry
23 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
24 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
25 ; CHECK-LE-NEXT: bx lr
27 ; CHECK-BE-LABEL: ldrwu32_3:
28 ; CHECK-BE: @ %bb.0: @ %entry
29 ; CHECK-BE-NEXT: adds r0, #3
30 ; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
31 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
32 ; CHECK-BE-NEXT: bx lr
34 %z = getelementptr inbounds i8, i8* %x, i32 3
35 %0 = bitcast i8* %z to <4 x i32>*
36 %1 = load <4 x i32>, <4 x i32>* %0, align 4
37 %2 = bitcast i8* %y to <4 x i32>*
38 store <4 x i32> %1, <4 x i32>* %2, align 4
42 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
43 ; CHECK-LABEL: ldrwu32_m4:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]!
46 ; CHECK-NEXT: vstrw.32 q0, [r1]
49 %z = getelementptr inbounds i8, i8* %x, i32 -4
50 %0 = bitcast i8* %z to <4 x i32>*
51 %1 = load <4 x i32>, <4 x i32>* %0, align 4
52 %2 = bitcast i8* %y to <4 x i32>*
53 store <4 x i32> %1, <4 x i32>* %2, align 4
57 define i8* @ldrwu32_508(i8* %x, i8* %y) {
58 ; CHECK-LABEL: ldrwu32_508:
59 ; CHECK: @ %bb.0: @ %entry
60 ; CHECK-NEXT: vldrw.u32 q0, [r0, #508]!
61 ; CHECK-NEXT: vstrw.32 q0, [r1]
64 %z = getelementptr inbounds i8, i8* %x, i32 508
65 %0 = bitcast i8* %z to <4 x i32>*
66 %1 = load <4 x i32>, <4 x i32>* %0, align 4
67 %2 = bitcast i8* %y to <4 x i32>*
68 store <4 x i32> %1, <4 x i32>* %2, align 4
72 define i8* @ldrwu32_512(i8* %x, i8* %y) {
73 ; CHECK-LABEL: ldrwu32_512:
74 ; CHECK: @ %bb.0: @ %entry
75 ; CHECK-NEXT: add.w r0, r0, #512
76 ; CHECK-NEXT: vldrw.u32 q0, [r0]
77 ; CHECK-NEXT: vstrw.32 q0, [r1]
80 %z = getelementptr inbounds i8, i8* %x, i32 512
81 %0 = bitcast i8* %z to <4 x i32>*
82 %1 = load <4 x i32>, <4 x i32>* %0, align 4
83 %2 = bitcast i8* %y to <4 x i32>*
84 store <4 x i32> %1, <4 x i32>* %2, align 4
88 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
89 ; CHECK-LABEL: ldrwu32_m508:
90 ; CHECK: @ %bb.0: @ %entry
91 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]!
92 ; CHECK-NEXT: vstrw.32 q0, [r1]
95 %z = getelementptr inbounds i8, i8* %x, i32 -508
96 %0 = bitcast i8* %z to <4 x i32>*
97 %1 = load <4 x i32>, <4 x i32>* %0, align 4
98 %2 = bitcast i8* %y to <4 x i32>*
99 store <4 x i32> %1, <4 x i32>* %2, align 4
103 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
104 ; CHECK-LABEL: ldrwu32_m512:
105 ; CHECK: @ %bb.0: @ %entry
106 ; CHECK-NEXT: sub.w r0, r0, #512
107 ; CHECK-NEXT: vldrw.u32 q0, [r0]
108 ; CHECK-NEXT: vstrw.32 q0, [r1]
111 %z = getelementptr inbounds i8, i8* %x, i32 -512
112 %0 = bitcast i8* %z to <4 x i32>*
113 %1 = load <4 x i32>, <4 x i32>* %0, align 4
114 %2 = bitcast i8* %y to <4 x i32>*
115 store <4 x i32> %1, <4 x i32>* %2, align 4
120 define i8* @ldrhu32_4(i8* %x, i8* %y) {
121 ; CHECK-LABEL: ldrhu32_4:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]!
124 ; CHECK-NEXT: vstrw.32 q0, [r1]
127 %z = getelementptr inbounds i8, i8* %x, i32 4
128 %0 = bitcast i8* %z to <4 x i16>*
129 %1 = load <4 x i16>, <4 x i16>* %0, align 2
130 %2 = zext <4 x i16> %1 to <4 x i32>
131 %3 = bitcast i8* %y to <4 x i32>*
132 store <4 x i32> %2, <4 x i32>* %3, align 4
136 define i8* @ldrhu32_3(i8* %x, i8* %y) {
137 ; CHECK-LABEL: ldrhu32_3:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: adds r0, #3
140 ; CHECK-NEXT: vldrh.u32 q0, [r0]
141 ; CHECK-NEXT: vstrw.32 q0, [r1]
144 %z = getelementptr inbounds i8, i8* %x, i32 3
145 %0 = bitcast i8* %z to <4 x i16>*
146 %1 = load <4 x i16>, <4 x i16>* %0, align 2
147 %2 = zext <4 x i16> %1 to <4 x i32>
148 %3 = bitcast i8* %y to <4 x i32>*
149 store <4 x i32> %2, <4 x i32>* %3, align 4
153 define i8* @ldrhu32_2(i8* %x, i8* %y) {
154 ; CHECK-LABEL: ldrhu32_2:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vldrh.u32 q0, [r0, #2]!
157 ; CHECK-NEXT: vstrw.32 q0, [r1]
160 %z = getelementptr inbounds i8, i8* %x, i32 2
161 %0 = bitcast i8* %z to <4 x i16>*
162 %1 = load <4 x i16>, <4 x i16>* %0, align 2
163 %2 = zext <4 x i16> %1 to <4 x i32>
164 %3 = bitcast i8* %y to <4 x i32>*
165 store <4 x i32> %2, <4 x i32>* %3, align 4
169 define i8* @ldrhu32_254(i8* %x, i8* %y) {
170 ; CHECK-LABEL: ldrhu32_254:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vldrh.u32 q0, [r0, #254]!
173 ; CHECK-NEXT: vstrw.32 q0, [r1]
176 %z = getelementptr inbounds i8, i8* %x, i32 254
177 %0 = bitcast i8* %z to <4 x i16>*
178 %1 = load <4 x i16>, <4 x i16>* %0, align 2
179 %2 = zext <4 x i16> %1 to <4 x i32>
180 %3 = bitcast i8* %y to <4 x i32>*
181 store <4 x i32> %2, <4 x i32>* %3, align 4
185 define i8* @ldrhu32_256(i8* %x, i8* %y) {
186 ; CHECK-LABEL: ldrhu32_256:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: add.w r0, r0, #256
189 ; CHECK-NEXT: vldrh.u32 q0, [r0]
190 ; CHECK-NEXT: vstrw.32 q0, [r1]
193 %z = getelementptr inbounds i8, i8* %x, i32 256
194 %0 = bitcast i8* %z to <4 x i16>*
195 %1 = load <4 x i16>, <4 x i16>* %0, align 2
196 %2 = zext <4 x i16> %1 to <4 x i32>
197 %3 = bitcast i8* %y to <4 x i32>*
198 store <4 x i32> %2, <4 x i32>* %3, align 4
203 define i8* @ldrhs32_4(i8* %x, i8* %y) {
204 ; CHECK-LABEL: ldrhs32_4:
205 ; CHECK: @ %bb.0: @ %entry
206 ; CHECK-NEXT: vldrh.s32 q0, [r0, #4]!
207 ; CHECK-NEXT: vstrw.32 q0, [r1]
210 %z = getelementptr inbounds i8, i8* %x, i32 4
211 %0 = bitcast i8* %z to <4 x i16>*
212 %1 = load <4 x i16>, <4 x i16>* %0, align 2
213 %2 = sext <4 x i16> %1 to <4 x i32>
214 %3 = bitcast i8* %y to <4 x i32>*
215 store <4 x i32> %2, <4 x i32>* %3, align 4
219 define i8* @ldrhs32_3(i8* %x, i8* %y) {
220 ; CHECK-LABEL: ldrhs32_3:
221 ; CHECK: @ %bb.0: @ %entry
222 ; CHECK-NEXT: adds r0, #3
223 ; CHECK-NEXT: vldrh.s32 q0, [r0]
224 ; CHECK-NEXT: vstrw.32 q0, [r1]
227 %z = getelementptr inbounds i8, i8* %x, i32 3
228 %0 = bitcast i8* %z to <4 x i16>*
229 %1 = load <4 x i16>, <4 x i16>* %0, align 2
230 %2 = sext <4 x i16> %1 to <4 x i32>
231 %3 = bitcast i8* %y to <4 x i32>*
232 store <4 x i32> %2, <4 x i32>* %3, align 4
236 define i8* @ldrhs32_2(i8* %x, i8* %y) {
237 ; CHECK-LABEL: ldrhs32_2:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vldrh.s32 q0, [r0, #2]!
240 ; CHECK-NEXT: vstrw.32 q0, [r1]
243 %z = getelementptr inbounds i8, i8* %x, i32 2
244 %0 = bitcast i8* %z to <4 x i16>*
245 %1 = load <4 x i16>, <4 x i16>* %0, align 2
246 %2 = sext <4 x i16> %1 to <4 x i32>
247 %3 = bitcast i8* %y to <4 x i32>*
248 store <4 x i32> %2, <4 x i32>* %3, align 4
252 define i8* @ldrhs32_254(i8* %x, i8* %y) {
253 ; CHECK-LABEL: ldrhs32_254:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vldrh.s32 q0, [r0, #254]!
256 ; CHECK-NEXT: vstrw.32 q0, [r1]
259 %z = getelementptr inbounds i8, i8* %x, i32 254
260 %0 = bitcast i8* %z to <4 x i16>*
261 %1 = load <4 x i16>, <4 x i16>* %0, align 2
262 %2 = sext <4 x i16> %1 to <4 x i32>
263 %3 = bitcast i8* %y to <4 x i32>*
264 store <4 x i32> %2, <4 x i32>* %3, align 4
268 define i8* @ldrhs32_256(i8* %x, i8* %y) {
269 ; CHECK-LABEL: ldrhs32_256:
270 ; CHECK: @ %bb.0: @ %entry
271 ; CHECK-NEXT: add.w r0, r0, #256
272 ; CHECK-NEXT: vldrh.s32 q0, [r0]
273 ; CHECK-NEXT: vstrw.32 q0, [r1]
276 %z = getelementptr inbounds i8, i8* %x, i32 256
277 %0 = bitcast i8* %z to <4 x i16>*
278 %1 = load <4 x i16>, <4 x i16>* %0, align 2
279 %2 = sext <4 x i16> %1 to <4 x i32>
280 %3 = bitcast i8* %y to <4 x i32>*
281 store <4 x i32> %2, <4 x i32>* %3, align 4
286 define i8* @ldrhu16_4(i8* %x, i8* %y) {
287 ; CHECK-LABEL: ldrhu16_4:
288 ; CHECK: @ %bb.0: @ %entry
289 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]!
290 ; CHECK-NEXT: vstrh.16 q0, [r1]
293 %z = getelementptr inbounds i8, i8* %x, i32 4
294 %0 = bitcast i8* %z to <8 x i16>*
295 %1 = load <8 x i16>, <8 x i16>* %0, align 2
296 %2 = bitcast i8* %y to <8 x i16>*
297 store <8 x i16> %1, <8 x i16>* %2, align 2
301 define i8* @ldrhu16_3(i8* %x, i8* %y) {
302 ; CHECK-LE-LABEL: ldrhu16_3:
303 ; CHECK-LE: @ %bb.0: @ %entry
304 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
305 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
306 ; CHECK-LE-NEXT: bx lr
308 ; CHECK-BE-LABEL: ldrhu16_3:
309 ; CHECK-BE: @ %bb.0: @ %entry
310 ; CHECK-BE-NEXT: adds r0, #3
311 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
312 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
313 ; CHECK-BE-NEXT: bx lr
315 %z = getelementptr inbounds i8, i8* %x, i32 3
316 %0 = bitcast i8* %z to <8 x i16>*
317 %1 = load <8 x i16>, <8 x i16>* %0, align 2
318 %2 = bitcast i8* %y to <8 x i16>*
319 store <8 x i16> %1, <8 x i16>* %2, align 2
323 define i8* @ldrhu16_2(i8* %x, i8* %y) {
324 ; CHECK-LABEL: ldrhu16_2:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vldrh.u16 q0, [r0, #2]!
327 ; CHECK-NEXT: vstrh.16 q0, [r1]
330 %z = getelementptr inbounds i8, i8* %x, i32 2
331 %0 = bitcast i8* %z to <8 x i16>*
332 %1 = load <8 x i16>, <8 x i16>* %0, align 2
333 %2 = bitcast i8* %y to <8 x i16>*
334 store <8 x i16> %1, <8 x i16>* %2, align 2
338 define i8* @ldrhu16_254(i8* %x, i8* %y) {
339 ; CHECK-LABEL: ldrhu16_254:
340 ; CHECK: @ %bb.0: @ %entry
341 ; CHECK-NEXT: vldrh.u16 q0, [r0, #254]!
342 ; CHECK-NEXT: vstrh.16 q0, [r1]
345 %z = getelementptr inbounds i8, i8* %x, i32 254
346 %0 = bitcast i8* %z to <8 x i16>*
347 %1 = load <8 x i16>, <8 x i16>* %0, align 2
348 %2 = bitcast i8* %y to <8 x i16>*
349 store <8 x i16> %1, <8 x i16>* %2, align 2
353 define i8* @ldrhu16_256(i8* %x, i8* %y) {
354 ; CHECK-LABEL: ldrhu16_256:
355 ; CHECK: @ %bb.0: @ %entry
356 ; CHECK-NEXT: add.w r0, r0, #256
357 ; CHECK-NEXT: vldrh.u16 q0, [r0]
358 ; CHECK-NEXT: vstrh.16 q0, [r1]
361 %z = getelementptr inbounds i8, i8* %x, i32 256
362 %0 = bitcast i8* %z to <8 x i16>*
363 %1 = load <8 x i16>, <8 x i16>* %0, align 2
364 %2 = bitcast i8* %y to <8 x i16>*
365 store <8 x i16> %1, <8 x i16>* %2, align 2
370 define i8* @ldrbu32_4(i8* %x, i8* %y) {
371 ; CHECK-LABEL: ldrbu32_4:
372 ; CHECK: @ %bb.0: @ %entry
373 ; CHECK-NEXT: vldrb.u32 q0, [r0, #4]!
374 ; CHECK-NEXT: vstrw.32 q0, [r1]
377 %z = getelementptr inbounds i8, i8* %x, i32 4
378 %0 = bitcast i8* %z to <4 x i8>*
379 %1 = load <4 x i8>, <4 x i8>* %0, align 1
380 %2 = zext <4 x i8> %1 to <4 x i32>
381 %3 = bitcast i8* %y to <4 x i32>*
382 store <4 x i32> %2, <4 x i32>* %3, align 4
386 define i8* @ldrbu32_3(i8* %x, i8* %y) {
387 ; CHECK-LABEL: ldrbu32_3:
388 ; CHECK: @ %bb.0: @ %entry
389 ; CHECK-NEXT: vldrb.u32 q0, [r0, #3]!
390 ; CHECK-NEXT: vstrw.32 q0, [r1]
393 %z = getelementptr inbounds i8, i8* %x, i32 3
394 %0 = bitcast i8* %z to <4 x i8>*
395 %1 = load <4 x i8>, <4 x i8>* %0, align 1
396 %2 = zext <4 x i8> %1 to <4 x i32>
397 %3 = bitcast i8* %y to <4 x i32>*
398 store <4 x i32> %2, <4 x i32>* %3, align 4
402 define i8* @ldrbu32_127(i8* %x, i8* %y) {
403 ; CHECK-LABEL: ldrbu32_127:
404 ; CHECK: @ %bb.0: @ %entry
405 ; CHECK-NEXT: vldrb.u32 q0, [r0, #127]!
406 ; CHECK-NEXT: vstrw.32 q0, [r1]
409 %z = getelementptr inbounds i8, i8* %x, i32 127
410 %0 = bitcast i8* %z to <4 x i8>*
411 %1 = load <4 x i8>, <4 x i8>* %0, align 1
412 %2 = zext <4 x i8> %1 to <4 x i32>
413 %3 = bitcast i8* %y to <4 x i32>*
414 store <4 x i32> %2, <4 x i32>* %3, align 4
418 define i8* @ldrbu32_128(i8* %x, i8* %y) {
419 ; CHECK-LABEL: ldrbu32_128:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: adds r0, #128
422 ; CHECK-NEXT: vldrb.u32 q0, [r0]
423 ; CHECK-NEXT: vstrw.32 q0, [r1]
426 %z = getelementptr inbounds i8, i8* %x, i32 128
427 %0 = bitcast i8* %z to <4 x i8>*
428 %1 = load <4 x i8>, <4 x i8>* %0, align 1
429 %2 = zext <4 x i8> %1 to <4 x i32>
430 %3 = bitcast i8* %y to <4 x i32>*
431 store <4 x i32> %2, <4 x i32>* %3, align 4
436 define i8* @ldrbs32_4(i8* %x, i8* %y) {
437 ; CHECK-LABEL: ldrbs32_4:
438 ; CHECK: @ %bb.0: @ %entry
439 ; CHECK-NEXT: vldrb.s32 q0, [r0, #4]!
440 ; CHECK-NEXT: vstrw.32 q0, [r1]
443 %z = getelementptr inbounds i8, i8* %x, i32 4
444 %0 = bitcast i8* %z to <4 x i8>*
445 %1 = load <4 x i8>, <4 x i8>* %0, align 1
446 %2 = sext <4 x i8> %1 to <4 x i32>
447 %3 = bitcast i8* %y to <4 x i32>*
448 store <4 x i32> %2, <4 x i32>* %3, align 4
452 define i8* @ldrbs32_3(i8* %x, i8* %y) {
453 ; CHECK-LABEL: ldrbs32_3:
454 ; CHECK: @ %bb.0: @ %entry
455 ; CHECK-NEXT: vldrb.s32 q0, [r0, #3]!
456 ; CHECK-NEXT: vstrw.32 q0, [r1]
459 %z = getelementptr inbounds i8, i8* %x, i32 3
460 %0 = bitcast i8* %z to <4 x i8>*
461 %1 = load <4 x i8>, <4 x i8>* %0, align 1
462 %2 = sext <4 x i8> %1 to <4 x i32>
463 %3 = bitcast i8* %y to <4 x i32>*
464 store <4 x i32> %2, <4 x i32>* %3, align 4
468 define i8* @ldrbs32_127(i8* %x, i8* %y) {
469 ; CHECK-LABEL: ldrbs32_127:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vldrb.s32 q0, [r0, #127]!
472 ; CHECK-NEXT: vstrw.32 q0, [r1]
475 %z = getelementptr inbounds i8, i8* %x, i32 127
476 %0 = bitcast i8* %z to <4 x i8>*
477 %1 = load <4 x i8>, <4 x i8>* %0, align 1
478 %2 = sext <4 x i8> %1 to <4 x i32>
479 %3 = bitcast i8* %y to <4 x i32>*
480 store <4 x i32> %2, <4 x i32>* %3, align 4
484 define i8* @ldrbs32_128(i8* %x, i8* %y) {
485 ; CHECK-LABEL: ldrbs32_128:
486 ; CHECK: @ %bb.0: @ %entry
487 ; CHECK-NEXT: adds r0, #128
488 ; CHECK-NEXT: vldrb.s32 q0, [r0]
489 ; CHECK-NEXT: vstrw.32 q0, [r1]
492 %z = getelementptr inbounds i8, i8* %x, i32 128
493 %0 = bitcast i8* %z to <4 x i8>*
494 %1 = load <4 x i8>, <4 x i8>* %0, align 1
495 %2 = sext <4 x i8> %1 to <4 x i32>
496 %3 = bitcast i8* %y to <4 x i32>*
497 store <4 x i32> %2, <4 x i32>* %3, align 4
502 define i8* @ldrbu16_4(i8* %x, i8* %y) {
503 ; CHECK-LABEL: ldrbu16_4:
504 ; CHECK: @ %bb.0: @ %entry
505 ; CHECK-NEXT: vldrb.u16 q0, [r0, #4]!
506 ; CHECK-NEXT: vstrh.16 q0, [r1]
509 %z = getelementptr inbounds i8, i8* %x, i32 4
510 %0 = bitcast i8* %z to <8 x i8>*
511 %1 = load <8 x i8>, <8 x i8>* %0, align 1
512 %2 = zext <8 x i8> %1 to <8 x i16>
513 %3 = bitcast i8* %y to <8 x i16>*
514 store <8 x i16> %2, <8 x i16>* %3, align 2
518 define i8* @ldrbu16_3(i8* %x, i8* %y) {
519 ; CHECK-LABEL: ldrbu16_3:
520 ; CHECK: @ %bb.0: @ %entry
521 ; CHECK-NEXT: vldrb.u16 q0, [r0, #3]!
522 ; CHECK-NEXT: vstrh.16 q0, [r1]
525 %z = getelementptr inbounds i8, i8* %x, i32 3
526 %0 = bitcast i8* %z to <8 x i8>*
527 %1 = load <8 x i8>, <8 x i8>* %0, align 1
528 %2 = zext <8 x i8> %1 to <8 x i16>
529 %3 = bitcast i8* %y to <8 x i16>*
530 store <8 x i16> %2, <8 x i16>* %3, align 2
534 define i8* @ldrbu16_127(i8* %x, i8* %y) {
535 ; CHECK-LABEL: ldrbu16_127:
536 ; CHECK: @ %bb.0: @ %entry
537 ; CHECK-NEXT: vldrb.u16 q0, [r0, #127]!
538 ; CHECK-NEXT: vstrh.16 q0, [r1]
541 %z = getelementptr inbounds i8, i8* %x, i32 127
542 %0 = bitcast i8* %z to <8 x i8>*
543 %1 = load <8 x i8>, <8 x i8>* %0, align 1
544 %2 = zext <8 x i8> %1 to <8 x i16>
545 %3 = bitcast i8* %y to <8 x i16>*
546 store <8 x i16> %2, <8 x i16>* %3, align 2
550 define i8* @ldrbu16_128(i8* %x, i8* %y) {
551 ; CHECK-LABEL: ldrbu16_128:
552 ; CHECK: @ %bb.0: @ %entry
553 ; CHECK-NEXT: adds r0, #128
554 ; CHECK-NEXT: vldrb.u16 q0, [r0]
555 ; CHECK-NEXT: vstrh.16 q0, [r1]
558 %z = getelementptr inbounds i8, i8* %x, i32 128
559 %0 = bitcast i8* %z to <8 x i8>*
560 %1 = load <8 x i8>, <8 x i8>* %0, align 1
561 %2 = zext <8 x i8> %1 to <8 x i16>
562 %3 = bitcast i8* %y to <8 x i16>*
563 store <8 x i16> %2, <8 x i16>* %3, align 2
568 define i8* @ldrbs16_4(i8* %x, i8* %y) {
569 ; CHECK-LABEL: ldrbs16_4:
570 ; CHECK: @ %bb.0: @ %entry
571 ; CHECK-NEXT: vldrb.s16 q0, [r0, #4]!
572 ; CHECK-NEXT: vstrh.16 q0, [r1]
575 %z = getelementptr inbounds i8, i8* %x, i32 4
576 %0 = bitcast i8* %z to <8 x i8>*
577 %1 = load <8 x i8>, <8 x i8>* %0, align 1
578 %2 = sext <8 x i8> %1 to <8 x i16>
579 %3 = bitcast i8* %y to <8 x i16>*
580 store <8 x i16> %2, <8 x i16>* %3, align 2
584 define i8* @ldrbs16_3(i8* %x, i8* %y) {
585 ; CHECK-LABEL: ldrbs16_3:
586 ; CHECK: @ %bb.0: @ %entry
587 ; CHECK-NEXT: vldrb.s16 q0, [r0, #3]!
588 ; CHECK-NEXT: vstrh.16 q0, [r1]
591 %z = getelementptr inbounds i8, i8* %x, i32 3
592 %0 = bitcast i8* %z to <8 x i8>*
593 %1 = load <8 x i8>, <8 x i8>* %0, align 1
594 %2 = sext <8 x i8> %1 to <8 x i16>
595 %3 = bitcast i8* %y to <8 x i16>*
596 store <8 x i16> %2, <8 x i16>* %3, align 2
600 define i8* @ldrbs16_127(i8* %x, i8* %y) {
601 ; CHECK-LABEL: ldrbs16_127:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: vldrb.s16 q0, [r0, #127]!
604 ; CHECK-NEXT: vstrh.16 q0, [r1]
607 %z = getelementptr inbounds i8, i8* %x, i32 127
608 %0 = bitcast i8* %z to <8 x i8>*
609 %1 = load <8 x i8>, <8 x i8>* %0, align 1
610 %2 = sext <8 x i8> %1 to <8 x i16>
611 %3 = bitcast i8* %y to <8 x i16>*
612 store <8 x i16> %2, <8 x i16>* %3, align 2
616 define i8* @ldrbs16_128(i8* %x, i8* %y) {
617 ; CHECK-LABEL: ldrbs16_128:
618 ; CHECK: @ %bb.0: @ %entry
619 ; CHECK-NEXT: adds r0, #128
620 ; CHECK-NEXT: vldrb.s16 q0, [r0]
621 ; CHECK-NEXT: vstrh.16 q0, [r1]
624 %z = getelementptr inbounds i8, i8* %x, i32 128
625 %0 = bitcast i8* %z to <8 x i8>*
626 %1 = load <8 x i8>, <8 x i8>* %0, align 1
627 %2 = sext <8 x i8> %1 to <8 x i16>
628 %3 = bitcast i8* %y to <8 x i16>*
629 store <8 x i16> %2, <8 x i16>* %3, align 2
634 define i8* @ldrbu8_4(i8* %x, i8* %y) {
635 ; CHECK-LABEL: ldrbu8_4:
636 ; CHECK: @ %bb.0: @ %entry
637 ; CHECK-NEXT: vldrb.u8 q0, [r0, #4]!
638 ; CHECK-NEXT: vstrb.8 q0, [r1]
641 %z = getelementptr inbounds i8, i8* %x, i32 4
642 %0 = bitcast i8* %z to <16 x i8>*
643 %1 = load <16 x i8>, <16 x i8>* %0, align 1
644 %2 = bitcast i8* %y to <16 x i8>*
645 store <16 x i8> %1, <16 x i8>* %2, align 1
649 define i8* @ldrbu8_3(i8* %x, i8* %y) {
650 ; CHECK-LABEL: ldrbu8_3:
651 ; CHECK: @ %bb.0: @ %entry
652 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
653 ; CHECK-NEXT: vstrb.8 q0, [r1]
656 %z = getelementptr inbounds i8, i8* %x, i32 3
657 %0 = bitcast i8* %z to <16 x i8>*
658 %1 = load <16 x i8>, <16 x i8>* %0, align 1
659 %2 = bitcast i8* %y to <16 x i8>*
660 store <16 x i8> %1, <16 x i8>* %2, align 1
664 define i8* @ldrbu8_127(i8* %x, i8* %y) {
665 ; CHECK-LABEL: ldrbu8_127:
666 ; CHECK: @ %bb.0: @ %entry
667 ; CHECK-NEXT: vldrb.u8 q0, [r0, #127]!
668 ; CHECK-NEXT: vstrb.8 q0, [r1]
671 %z = getelementptr inbounds i8, i8* %x, i32 127
672 %0 = bitcast i8* %z to <16 x i8>*
673 %1 = load <16 x i8>, <16 x i8>* %0, align 1
674 %2 = bitcast i8* %y to <16 x i8>*
675 store <16 x i8> %1, <16 x i8>* %2, align 1
679 define i8* @ldrbu8_128(i8* %x, i8* %y) {
680 ; CHECK-LABEL: ldrbu8_128:
681 ; CHECK: @ %bb.0: @ %entry
682 ; CHECK-NEXT: adds r0, #128
683 ; CHECK-NEXT: vldrb.u8 q0, [r0]
684 ; CHECK-NEXT: vstrb.8 q0, [r1]
687 %z = getelementptr inbounds i8, i8* %x, i32 128
688 %0 = bitcast i8* %z to <16 x i8>*
689 %1 = load <16 x i8>, <16 x i8>* %0, align 1
690 %2 = bitcast i8* %y to <16 x i8>*
691 store <16 x i8> %1, <16 x i8>* %2, align 1
695 define i8* @ldrwf32_4(i8* %x, i8* %y) {
696 ; CHECK-LABEL: ldrwf32_4:
697 ; CHECK: @ %bb.0: @ %entry
698 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]!
699 ; CHECK-NEXT: vstrw.32 q0, [r1]
702 %z = getelementptr inbounds i8, i8* %x, i32 4
703 %0 = bitcast i8* %z to <4 x float>*
704 %1 = load <4 x float>, <4 x float>* %0, align 4
705 %2 = bitcast i8* %y to <4 x float>*
706 store <4 x float> %1, <4 x float>* %2, align 4
710 define i8* @ldrwf16_4(i8* %x, i8* %y) {
711 ; CHECK-LABEL: ldrwf16_4:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]!
714 ; CHECK-NEXT: vstrh.16 q0, [r1]
717 %z = getelementptr inbounds i8, i8* %x, i32 4
718 %0 = bitcast i8* %z to <8 x half>*
719 %1 = load <8 x half>, <8 x half>* %0, align 2
720 %2 = bitcast i8* %y to <8 x half>*
721 store <8 x half> %1, <8 x half>* %2, align 2
725 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
726 ; CHECK-LE-LABEL: ldrwi32_align1:
727 ; CHECK-LE: @ %bb.0: @ %entry
728 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
729 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
730 ; CHECK-LE-NEXT: bx lr
732 ; CHECK-BE-LABEL: ldrwi32_align1:
733 ; CHECK-BE: @ %bb.0: @ %entry
734 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
735 ; CHECK-BE-NEXT: adds r0, #3
736 ; CHECK-BE-NEXT: vrev32.8 q0, q0
737 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
738 ; CHECK-BE-NEXT: bx lr
740 %z = getelementptr inbounds i8, i8* %x, i32 3
741 %0 = bitcast i8* %z to <4 x i32>*
742 %1 = load <4 x i32>, <4 x i32>* %0, align 1
743 %2 = bitcast i8* %y to <4 x i32>*
744 store <4 x i32> %1, <4 x i32>* %2, align 4
748 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
749 ; CHECK-LE-LABEL: ldrhi16_align1:
750 ; CHECK-LE: @ %bb.0: @ %entry
751 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
752 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
753 ; CHECK-LE-NEXT: bx lr
755 ; CHECK-BE-LABEL: ldrhi16_align1:
756 ; CHECK-BE: @ %bb.0: @ %entry
757 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
758 ; CHECK-BE-NEXT: adds r0, #3
759 ; CHECK-BE-NEXT: vrev16.8 q0, q0
760 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
761 ; CHECK-BE-NEXT: bx lr
763 %z = getelementptr inbounds i8, i8* %x, i32 3
764 %0 = bitcast i8* %z to <8 x i16>*
765 %1 = load <8 x i16>, <8 x i16>* %0, align 1
766 %2 = bitcast i8* %y to <8 x i16>*
767 store <8 x i16> %1, <8 x i16>* %2, align 2
771 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
772 ; CHECK-LABEL: ldrhi32_align1:
773 ; CHECK: @ %bb.0: @ %entry
774 ; CHECK-NEXT: .pad #8
775 ; CHECK-NEXT: sub sp, #8
776 ; CHECK-NEXT: ldr r2, [r0, #3]!
777 ; CHECK-NEXT: str r2, [sp]
778 ; CHECK-NEXT: ldr r2, [r0, #4]
779 ; CHECK-NEXT: str r2, [sp, #4]
780 ; CHECK-NEXT: mov r2, sp
781 ; CHECK-NEXT: vldrh.s32 q0, [r2]
782 ; CHECK-NEXT: vstrw.32 q0, [r1]
783 ; CHECK-NEXT: add sp, #8
786 %z = getelementptr inbounds i8, i8* %x, i32 3
787 %0 = bitcast i8* %z to <4 x i16>*
788 %1 = load <4 x i16>, <4 x i16>* %0, align 1
789 %2 = bitcast i8* %y to <4 x i32>*
790 %3 = sext <4 x i16> %1 to <4 x i32>
791 store <4 x i32> %3, <4 x i32>* %2, align 4
795 define i8* @ldrf32_align1(i8* %x, i8* %y) {
796 ; CHECK-LE-LABEL: ldrf32_align1:
797 ; CHECK-LE: @ %bb.0: @ %entry
798 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
799 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
800 ; CHECK-LE-NEXT: bx lr
802 ; CHECK-BE-LABEL: ldrf32_align1:
803 ; CHECK-BE: @ %bb.0: @ %entry
804 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
805 ; CHECK-BE-NEXT: adds r0, #3
806 ; CHECK-BE-NEXT: vrev32.8 q0, q0
807 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
808 ; CHECK-BE-NEXT: bx lr
810 %z = getelementptr inbounds i8, i8* %x, i32 3
811 %0 = bitcast i8* %z to <4 x float>*
812 %1 = load <4 x float>, <4 x float>* %0, align 1
813 %2 = bitcast i8* %y to <4 x float>*
814 store <4 x float> %1, <4 x float>* %2, align 4
818 define i8* @ldrf16_align1(i8* %x, i8* %y) {
819 ; CHECK-LE-LABEL: ldrf16_align1:
820 ; CHECK-LE: @ %bb.0: @ %entry
821 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]!
822 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
823 ; CHECK-LE-NEXT: bx lr
825 ; CHECK-BE-LABEL: ldrf16_align1:
826 ; CHECK-BE: @ %bb.0: @ %entry
827 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
828 ; CHECK-BE-NEXT: adds r0, #3
829 ; CHECK-BE-NEXT: vrev16.8 q0, q0
830 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
831 ; CHECK-BE-NEXT: bx lr
833 %z = getelementptr inbounds i8, i8* %x, i32 3
834 %0 = bitcast i8* %z to <8 x half>*
835 %1 = load <8 x half>, <8 x half>* %0, align 1
836 %2 = bitcast i8* %y to <8 x half>*
837 store <8 x half> %1, <8 x half>* %2, align 2
841 define i8* @ldrh16_align8(i8* %x, i8* %y) {
842 ; CHECK-LE-LABEL: ldrh16_align8:
843 ; CHECK-LE: @ %bb.0: @ %entry
844 ; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]!
845 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
846 ; CHECK-LE-NEXT: bx lr
848 ; CHECK-BE-LABEL: ldrh16_align8:
849 ; CHECK-BE: @ %bb.0: @ %entry
850 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]!
851 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
852 ; CHECK-BE-NEXT: bx lr
854 %z = getelementptr inbounds i8, i8* %x, i32 4
855 %0 = bitcast i8* %z to <8 x i16>*
856 %1 = load <8 x i16>, <8 x i16>* %0, align 8
857 %2 = bitcast i8* %y to <8 x i16>*
858 store <8 x i16> %1, <8 x i16>* %2, align 2
866 define i8* @strw32_4(i8* %y, i8* %x) {
867 ; CHECK-LE-LABEL: strw32_4:
868 ; CHECK-LE: @ %bb.0: @ %entry
869 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
870 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
871 ; CHECK-LE-NEXT: bx lr
873 ; CHECK-BE-LABEL: strw32_4:
874 ; CHECK-BE: @ %bb.0: @ %entry
875 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
876 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
877 ; CHECK-BE-NEXT: bx lr
879 %z = getelementptr inbounds i8, i8* %y, i32 4
880 %0 = bitcast i8* %x to <4 x i32>*
881 %1 = load <4 x i32>, <4 x i32>* %0, align 4
882 %2 = bitcast i8* %z to <4 x i32>*
883 store <4 x i32> %1, <4 x i32>* %2, align 4
887 define i8* @strw32_3(i8* %y, i8* %x) {
888 ; CHECK-LE-LABEL: strw32_3:
889 ; CHECK-LE: @ %bb.0: @ %entry
890 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
891 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
892 ; CHECK-LE-NEXT: bx lr
894 ; CHECK-BE-LABEL: strw32_3:
895 ; CHECK-BE: @ %bb.0: @ %entry
896 ; CHECK-BE-NEXT: adds r0, #3
897 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
898 ; CHECK-BE-NEXT: vstrw.32 q0, [r0]
899 ; CHECK-BE-NEXT: bx lr
901 %z = getelementptr inbounds i8, i8* %y, i32 3
902 %0 = bitcast i8* %x to <4 x i32>*
903 %1 = load <4 x i32>, <4 x i32>* %0, align 4
904 %2 = bitcast i8* %z to <4 x i32>*
905 store <4 x i32> %1, <4 x i32>* %2, align 4
909 define i8* @strw32_m4(i8* %y, i8* %x) {
910 ; CHECK-LE-LABEL: strw32_m4:
911 ; CHECK-LE: @ %bb.0: @ %entry
912 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
913 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #-4]!
914 ; CHECK-LE-NEXT: bx lr
916 ; CHECK-BE-LABEL: strw32_m4:
917 ; CHECK-BE: @ %bb.0: @ %entry
918 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
919 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #-4]!
920 ; CHECK-BE-NEXT: bx lr
922 %z = getelementptr inbounds i8, i8* %y, i32 -4
923 %0 = bitcast i8* %x to <4 x i32>*
924 %1 = load <4 x i32>, <4 x i32>* %0, align 4
925 %2 = bitcast i8* %z to <4 x i32>*
926 store <4 x i32> %1, <4 x i32>* %2, align 4
930 define i8* @strw32_508(i8* %y, i8* %x) {
931 ; CHECK-LABEL: strw32_508:
932 ; CHECK: @ %bb.0: @ %entry
933 ; CHECK-NEXT: vldrw.u32 q0, [r1]
934 ; CHECK-NEXT: vstrw.32 q0, [r0, #508]!
937 %z = getelementptr inbounds i8, i8* %y, i32 508
938 %0 = bitcast i8* %x to <4 x i32>*
939 %1 = load <4 x i32>, <4 x i32>* %0, align 4
940 %2 = bitcast i8* %z to <4 x i32>*
941 store <4 x i32> %1, <4 x i32>* %2, align 4
945 define i8* @strw32_512(i8* %y, i8* %x) {
946 ; CHECK-LABEL: strw32_512:
947 ; CHECK: @ %bb.0: @ %entry
948 ; CHECK-NEXT: add.w r0, r0, #512
949 ; CHECK-NEXT: vldrw.u32 q0, [r1]
950 ; CHECK-NEXT: vstrw.32 q0, [r0]
953 %z = getelementptr inbounds i8, i8* %y, i32 512
954 %0 = bitcast i8* %x to <4 x i32>*
955 %1 = load <4 x i32>, <4 x i32>* %0, align 4
956 %2 = bitcast i8* %z to <4 x i32>*
957 store <4 x i32> %1, <4 x i32>* %2, align 4
961 define i8* @strw32_m508(i8* %y, i8* %x) {
962 ; CHECK-LABEL: strw32_m508:
963 ; CHECK: @ %bb.0: @ %entry
964 ; CHECK-NEXT: vldrw.u32 q0, [r1]
965 ; CHECK-NEXT: vstrw.32 q0, [r0, #-508]!
968 %z = getelementptr inbounds i8, i8* %y, i32 -508
969 %0 = bitcast i8* %x to <4 x i32>*
970 %1 = load <4 x i32>, <4 x i32>* %0, align 4
971 %2 = bitcast i8* %z to <4 x i32>*
972 store <4 x i32> %1, <4 x i32>* %2, align 4
976 define i8* @strw32_m512(i8* %y, i8* %x) {
977 ; CHECK-LABEL: strw32_m512:
978 ; CHECK: @ %bb.0: @ %entry
979 ; CHECK-NEXT: sub.w r0, r0, #512
980 ; CHECK-NEXT: vldrw.u32 q0, [r1]
981 ; CHECK-NEXT: vstrw.32 q0, [r0]
984 %z = getelementptr inbounds i8, i8* %y, i32 -512
985 %0 = bitcast i8* %x to <4 x i32>*
986 %1 = load <4 x i32>, <4 x i32>* %0, align 4
987 %2 = bitcast i8* %z to <4 x i32>*
988 store <4 x i32> %1, <4 x i32>* %2, align 4
993 define i8* @strh32_4(i8* %y, i8* %x) {
994 ; CHECK-LABEL: strh32_4:
995 ; CHECK: @ %bb.0: @ %entry
996 ; CHECK-NEXT: vldrh.u32 q0, [r1]
997 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]!
1000 %z = getelementptr inbounds i8, i8* %y, i32 4
1001 %0 = bitcast i8* %x to <4 x i16>*
1002 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1003 %2 = bitcast i8* %z to <4 x i16>*
1004 store <4 x i16> %1, <4 x i16>* %2, align 2
1008 define i8* @strh32_3(i8* %y, i8* %x) {
1009 ; CHECK-LABEL: strh32_3:
1010 ; CHECK: @ %bb.0: @ %entry
1011 ; CHECK-NEXT: adds r0, #3
1012 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1013 ; CHECK-NEXT: vstrh.32 q0, [r0]
1016 %z = getelementptr inbounds i8, i8* %y, i32 3
1017 %0 = bitcast i8* %x to <4 x i16>*
1018 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1019 %2 = bitcast i8* %z to <4 x i16>*
1020 store <4 x i16> %1, <4 x i16>* %2, align 2
1024 define i8* @strh32_2(i8* %y, i8* %x) {
1025 ; CHECK-LABEL: strh32_2:
1026 ; CHECK: @ %bb.0: @ %entry
1027 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1028 ; CHECK-NEXT: vstrh.32 q0, [r0, #2]!
1031 %z = getelementptr inbounds i8, i8* %y, i32 2
1032 %0 = bitcast i8* %x to <4 x i16>*
1033 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1034 %2 = bitcast i8* %z to <4 x i16>*
1035 store <4 x i16> %1, <4 x i16>* %2, align 2
1039 define i8* @strh32_254(i8* %y, i8* %x) {
1040 ; CHECK-LABEL: strh32_254:
1041 ; CHECK: @ %bb.0: @ %entry
1042 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1043 ; CHECK-NEXT: vstrh.32 q0, [r0, #254]!
1046 %z = getelementptr inbounds i8, i8* %y, i32 254
1047 %0 = bitcast i8* %x to <4 x i16>*
1048 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1049 %2 = bitcast i8* %z to <4 x i16>*
1050 store <4 x i16> %1, <4 x i16>* %2, align 2
1054 define i8* @strh32_256(i8* %y, i8* %x) {
1055 ; CHECK-LABEL: strh32_256:
1056 ; CHECK: @ %bb.0: @ %entry
1057 ; CHECK-NEXT: add.w r0, r0, #256
1058 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1059 ; CHECK-NEXT: vstrh.32 q0, [r0]
1062 %z = getelementptr inbounds i8, i8* %y, i32 256
1063 %0 = bitcast i8* %x to <4 x i16>*
1064 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1065 %2 = bitcast i8* %z to <4 x i16>*
1066 store <4 x i16> %1, <4 x i16>* %2, align 2
1071 define i8* @strh16_4(i8* %y, i8* %x) {
1072 ; CHECK-LE-LABEL: strh16_4:
1073 ; CHECK-LE: @ %bb.0: @ %entry
1074 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1075 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
1076 ; CHECK-LE-NEXT: bx lr
1078 ; CHECK-BE-LABEL: strh16_4:
1079 ; CHECK-BE: @ %bb.0: @ %entry
1080 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1081 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
1082 ; CHECK-BE-NEXT: bx lr
1084 %z = getelementptr inbounds i8, i8* %y, i32 4
1085 %0 = bitcast i8* %x to <8 x i16>*
1086 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1087 %2 = bitcast i8* %z to <8 x i16>*
1088 store <8 x i16> %1, <8 x i16>* %2, align 2
1092 define i8* @strh16_3(i8* %y, i8* %x) {
1093 ; CHECK-LE-LABEL: strh16_3:
1094 ; CHECK-LE: @ %bb.0: @ %entry
1095 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1096 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
1097 ; CHECK-LE-NEXT: bx lr
1099 ; CHECK-BE-LABEL: strh16_3:
1100 ; CHECK-BE: @ %bb.0: @ %entry
1101 ; CHECK-BE-NEXT: adds r0, #3
1102 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1103 ; CHECK-BE-NEXT: vstrh.16 q0, [r0]
1104 ; CHECK-BE-NEXT: bx lr
1106 %z = getelementptr inbounds i8, i8* %y, i32 3
1107 %0 = bitcast i8* %x to <8 x i16>*
1108 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1109 %2 = bitcast i8* %z to <8 x i16>*
1110 store <8 x i16> %1, <8 x i16>* %2, align 2
1114 define i8* @strh16_2(i8* %y, i8* %x) {
1115 ; CHECK-LE-LABEL: strh16_2:
1116 ; CHECK-LE: @ %bb.0: @ %entry
1117 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1118 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #2]!
1119 ; CHECK-LE-NEXT: bx lr
1121 ; CHECK-BE-LABEL: strh16_2:
1122 ; CHECK-BE: @ %bb.0: @ %entry
1123 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1124 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #2]!
1125 ; CHECK-BE-NEXT: bx lr
1127 %z = getelementptr inbounds i8, i8* %y, i32 2
1128 %0 = bitcast i8* %x to <8 x i16>*
1129 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1130 %2 = bitcast i8* %z to <8 x i16>*
1131 store <8 x i16> %1, <8 x i16>* %2, align 2
1135 define i8* @strh16_254(i8* %y, i8* %x) {
1136 ; CHECK-LABEL: strh16_254:
1137 ; CHECK: @ %bb.0: @ %entry
1138 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1139 ; CHECK-NEXT: vstrh.16 q0, [r0, #254]!
1142 %z = getelementptr inbounds i8, i8* %y, i32 254
1143 %0 = bitcast i8* %x to <8 x i16>*
1144 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1145 %2 = bitcast i8* %z to <8 x i16>*
1146 store <8 x i16> %1, <8 x i16>* %2, align 2
1150 define i8* @strh16_256(i8* %y, i8* %x) {
1151 ; CHECK-LABEL: strh16_256:
1152 ; CHECK: @ %bb.0: @ %entry
1153 ; CHECK-NEXT: add.w r0, r0, #256
1154 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1155 ; CHECK-NEXT: vstrh.16 q0, [r0]
1158 %z = getelementptr inbounds i8, i8* %y, i32 256
1159 %0 = bitcast i8* %x to <8 x i16>*
1160 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1161 %2 = bitcast i8* %z to <8 x i16>*
1162 store <8 x i16> %1, <8 x i16>* %2, align 2
1167 define i8* @strb32_4(i8* %y, i8* %x) {
1168 ; CHECK-LABEL: strb32_4:
1169 ; CHECK: @ %bb.0: @ %entry
1170 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1171 ; CHECK-NEXT: vstrb.32 q0, [r0, #4]!
1174 %z = getelementptr inbounds i8, i8* %y, i32 4
1175 %0 = bitcast i8* %x to <4 x i8>*
1176 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1177 %2 = bitcast i8* %z to <4 x i8>*
1178 store <4 x i8> %1, <4 x i8>* %2, align 1
1182 define i8* @strb32_3(i8* %y, i8* %x) {
1183 ; CHECK-LABEL: strb32_3:
1184 ; CHECK: @ %bb.0: @ %entry
1185 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1186 ; CHECK-NEXT: vstrb.32 q0, [r0, #3]!
1189 %z = getelementptr inbounds i8, i8* %y, i32 3
1190 %0 = bitcast i8* %x to <4 x i8>*
1191 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1192 %2 = bitcast i8* %z to <4 x i8>*
1193 store <4 x i8> %1, <4 x i8>* %2, align 1
1197 define i8* @strb32_127(i8* %y, i8* %x) {
1198 ; CHECK-LABEL: strb32_127:
1199 ; CHECK: @ %bb.0: @ %entry
1200 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1201 ; CHECK-NEXT: vstrb.32 q0, [r0, #127]!
1204 %z = getelementptr inbounds i8, i8* %y, i32 127
1205 %0 = bitcast i8* %x to <4 x i8>*
1206 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1207 %2 = bitcast i8* %z to <4 x i8>*
1208 store <4 x i8> %1, <4 x i8>* %2, align 1
1212 define i8* @strb32_128(i8* %y, i8* %x) {
1213 ; CHECK-LABEL: strb32_128:
1214 ; CHECK: @ %bb.0: @ %entry
1215 ; CHECK-NEXT: adds r0, #128
1216 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1217 ; CHECK-NEXT: vstrb.32 q0, [r0]
1220 %z = getelementptr inbounds i8, i8* %y, i32 128
1221 %0 = bitcast i8* %x to <4 x i8>*
1222 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1223 %2 = bitcast i8* %z to <4 x i8>*
1224 store <4 x i8> %1, <4 x i8>* %2, align 1
1229 define i8* @strb16_4(i8* %y, i8* %x) {
1230 ; CHECK-LABEL: strb16_4:
1231 ; CHECK: @ %bb.0: @ %entry
1232 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1233 ; CHECK-NEXT: vstrb.16 q0, [r0, #4]!
1236 %z = getelementptr inbounds i8, i8* %y, i32 4
1237 %0 = bitcast i8* %x to <8 x i8>*
1238 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1239 %2 = bitcast i8* %z to <8 x i8>*
1240 store <8 x i8> %1, <8 x i8>* %2, align 1
1244 define i8* @strb16_3(i8* %y, i8* %x) {
1245 ; CHECK-LABEL: strb16_3:
1246 ; CHECK: @ %bb.0: @ %entry
1247 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1248 ; CHECK-NEXT: vstrb.16 q0, [r0, #3]!
1251 %z = getelementptr inbounds i8, i8* %y, i32 3
1252 %0 = bitcast i8* %x to <8 x i8>*
1253 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1254 %2 = bitcast i8* %z to <8 x i8>*
1255 store <8 x i8> %1, <8 x i8>* %2, align 1
1259 define i8* @strb16_127(i8* %y, i8* %x) {
1260 ; CHECK-LABEL: strb16_127:
1261 ; CHECK: @ %bb.0: @ %entry
1262 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1263 ; CHECK-NEXT: vstrb.16 q0, [r0, #127]!
1266 %z = getelementptr inbounds i8, i8* %y, i32 127
1267 %0 = bitcast i8* %x to <8 x i8>*
1268 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1269 %2 = bitcast i8* %z to <8 x i8>*
1270 store <8 x i8> %1, <8 x i8>* %2, align 1
1274 define i8* @strb16_128(i8* %y, i8* %x) {
1275 ; CHECK-LABEL: strb16_128:
1276 ; CHECK: @ %bb.0: @ %entry
1277 ; CHECK-NEXT: adds r0, #128
1278 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1279 ; CHECK-NEXT: vstrb.16 q0, [r0]
1282 %z = getelementptr inbounds i8, i8* %y, i32 128
1283 %0 = bitcast i8* %x to <8 x i8>*
1284 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1285 %2 = bitcast i8* %z to <8 x i8>*
1286 store <8 x i8> %1, <8 x i8>* %2, align 1
1291 define i8* @strb8_4(i8* %y, i8* %x) {
1292 ; CHECK-LABEL: strb8_4:
1293 ; CHECK: @ %bb.0: @ %entry
1294 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1295 ; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
1298 %z = getelementptr inbounds i8, i8* %y, i32 4
1299 %0 = bitcast i8* %x to <16 x i8>*
1300 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1301 %2 = bitcast i8* %z to <16 x i8>*
1302 store <16 x i8> %1, <16 x i8>* %2, align 1
1306 define i8* @strb8_3(i8* %y, i8* %x) {
1307 ; CHECK-LABEL: strb8_3:
1308 ; CHECK: @ %bb.0: @ %entry
1309 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1310 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
1313 %z = getelementptr inbounds i8, i8* %y, i32 3
1314 %0 = bitcast i8* %x to <16 x i8>*
1315 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1316 %2 = bitcast i8* %z to <16 x i8>*
1317 store <16 x i8> %1, <16 x i8>* %2, align 1
1321 define i8* @strb8_127(i8* %y, i8* %x) {
1322 ; CHECK-LABEL: strb8_127:
1323 ; CHECK: @ %bb.0: @ %entry
1324 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1325 ; CHECK-NEXT: vstrb.8 q0, [r0, #127]!
1328 %z = getelementptr inbounds i8, i8* %y, i32 127
1329 %0 = bitcast i8* %x to <16 x i8>*
1330 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1331 %2 = bitcast i8* %z to <16 x i8>*
1332 store <16 x i8> %1, <16 x i8>* %2, align 1
1336 define i8* @strb8_128(i8* %y, i8* %x) {
1337 ; CHECK-LABEL: strb8_128:
1338 ; CHECK: @ %bb.0: @ %entry
1339 ; CHECK-NEXT: adds r0, #128
1340 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1341 ; CHECK-NEXT: vstrb.8 q0, [r0]
1344 %z = getelementptr inbounds i8, i8* %y, i32 128
1345 %0 = bitcast i8* %x to <16 x i8>*
1346 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1347 %2 = bitcast i8* %z to <16 x i8>*
1348 store <16 x i8> %1, <16 x i8>* %2, align 1
1352 define i8* @strf32_4(i8* %y, i8* %x) {
1353 ; CHECK-LE-LABEL: strf32_4:
1354 ; CHECK-LE: @ %bb.0: @ %entry
1355 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1356 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
1357 ; CHECK-LE-NEXT: bx lr
1359 ; CHECK-BE-LABEL: strf32_4:
1360 ; CHECK-BE: @ %bb.0: @ %entry
1361 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1362 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #4]!
1363 ; CHECK-BE-NEXT: bx lr
1365 %z = getelementptr inbounds i8, i8* %y, i32 4
1366 %0 = bitcast i8* %x to <4 x float>*
1367 %1 = load <4 x float>, <4 x float>* %0, align 4
1368 %2 = bitcast i8* %z to <4 x float>*
1369 store <4 x float> %1, <4 x float>* %2, align 4
1373 define i8* @strf16_4(i8* %y, i8* %x) {
1374 ; CHECK-LE-LABEL: strf16_4:
1375 ; CHECK-LE: @ %bb.0: @ %entry
1376 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1377 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #4]!
1378 ; CHECK-LE-NEXT: bx lr
1380 ; CHECK-BE-LABEL: strf16_4:
1381 ; CHECK-BE: @ %bb.0: @ %entry
1382 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1383 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #4]!
1384 ; CHECK-BE-NEXT: bx lr
1386 %z = getelementptr inbounds i8, i8* %y, i32 4
1387 %0 = bitcast i8* %x to <8 x half>*
1388 %1 = load <8 x half>, <8 x half>* %0, align 2
1389 %2 = bitcast i8* %z to <8 x half>*
1390 store <8 x half> %1, <8 x half>* %2, align 2
1394 define i8* @strwi32_align1(i8* %y, i8* %x) {
1395 ; CHECK-LE-LABEL: strwi32_align1:
1396 ; CHECK-LE: @ %bb.0: @ %entry
1397 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1398 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
1399 ; CHECK-LE-NEXT: bx lr
1401 ; CHECK-BE-LABEL: strwi32_align1:
1402 ; CHECK-BE: @ %bb.0: @ %entry
1403 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1404 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1405 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1406 ; CHECK-BE-NEXT: adds r0, #3
1407 ; CHECK-BE-NEXT: bx lr
1409 %z = getelementptr inbounds i8, i8* %y, i32 3
1410 %0 = bitcast i8* %x to <4 x i32>*
1411 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1412 %2 = bitcast i8* %z to <4 x i32>*
1413 store <4 x i32> %1, <4 x i32>* %2, align 1
1417 define i8* @strhi16_align1(i8* %y, i8* %x) {
1418 ; CHECK-LE-LABEL: strhi16_align1:
1419 ; CHECK-LE: @ %bb.0: @ %entry
1420 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1421 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
1422 ; CHECK-LE-NEXT: bx lr
1424 ; CHECK-BE-LABEL: strhi16_align1:
1425 ; CHECK-BE: @ %bb.0: @ %entry
1426 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1427 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1428 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1429 ; CHECK-BE-NEXT: adds r0, #3
1430 ; CHECK-BE-NEXT: bx lr
1432 %z = getelementptr inbounds i8, i8* %y, i32 3
1433 %0 = bitcast i8* %x to <8 x i16>*
1434 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1435 %2 = bitcast i8* %z to <8 x i16>*
1436 store <8 x i16> %1, <8 x i16>* %2, align 1
1440 define i8* @strhi32_align1(i8* %y, i8* %x) {
1441 ; CHECK-LABEL: strhi32_align1:
1442 ; CHECK: @ %bb.0: @ %entry
1443 ; CHECK-NEXT: .pad #8
1444 ; CHECK-NEXT: sub sp, #8
1445 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1446 ; CHECK-NEXT: mov r1, sp
1447 ; CHECK-NEXT: vstrh.32 q0, [r1]
1448 ; CHECK-NEXT: ldrd r1, r2, [sp]
1449 ; CHECK-NEXT: str r1, [r0, #3]!
1450 ; CHECK-NEXT: str r2, [r0, #4]
1451 ; CHECK-NEXT: add sp, #8
1454 %z = getelementptr inbounds i8, i8* %y, i32 3
1455 %0 = bitcast i8* %x to <4 x i32>*
1456 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1457 %2 = bitcast i8* %z to <4 x i16>*
1458 %3 = trunc <4 x i32> %1 to <4 x i16>
1459 store <4 x i16> %3, <4 x i16>* %2, align 1
1463 define i8* @strf32_align1(i8* %y, i8* %x) {
1464 ; CHECK-LE-LABEL: strf32_align1:
1465 ; CHECK-LE: @ %bb.0: @ %entry
1466 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1467 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
1468 ; CHECK-LE-NEXT: bx lr
1470 ; CHECK-BE-LABEL: strf32_align1:
1471 ; CHECK-BE: @ %bb.0: @ %entry
1472 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1473 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1474 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1475 ; CHECK-BE-NEXT: adds r0, #3
1476 ; CHECK-BE-NEXT: bx lr
1478 %z = getelementptr inbounds i8, i8* %y, i32 3
1479 %0 = bitcast i8* %x to <4 x float>*
1480 %1 = load <4 x float>, <4 x float>* %0, align 4
1481 %2 = bitcast i8* %z to <4 x float>*
1482 store <4 x float> %1, <4 x float>* %2, align 1
1486 define i8* @strf16_align1(i8* %y, i8* %x) {
1487 ; CHECK-LE-LABEL: strf16_align1:
1488 ; CHECK-LE: @ %bb.0: @ %entry
1489 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1490 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]!
1491 ; CHECK-LE-NEXT: bx lr
1493 ; CHECK-BE-LABEL: strf16_align1:
1494 ; CHECK-BE: @ %bb.0: @ %entry
1495 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1496 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1497 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1498 ; CHECK-BE-NEXT: adds r0, #3
1499 ; CHECK-BE-NEXT: bx lr
1501 %z = getelementptr inbounds i8, i8* %y, i32 3
1502 %0 = bitcast i8* %x to <8 x half>*
1503 %1 = load <8 x half>, <8 x half>* %0, align 2
1504 %2 = bitcast i8* %z to <8 x half>*
1505 store <8 x half> %1, <8 x half>* %2, align 1
1509 define i8* @strf16_align8(i8* %y, i8* %x) {
1510 ; CHECK-LE-LABEL: strf16_align8:
1511 ; CHECK-LE: @ %bb.0: @ %entry
1512 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1513 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #16]!
1514 ; CHECK-LE-NEXT: bx lr
1516 ; CHECK-BE-LABEL: strf16_align8:
1517 ; CHECK-BE: @ %bb.0: @ %entry
1518 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1519 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]!
1520 ; CHECK-BE-NEXT: bx lr
1522 %z = getelementptr inbounds i8, i8* %y, i32 16
1523 %0 = bitcast i8* %x to <8 x i16>*
1524 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1525 %2 = bitcast i8* %z to <8 x i16>*
1526 store <8 x i16> %1, <8 x i16>* %2, align 8