1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define i8* @ldrwu32_4(i8* %x, i8* %y) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %x to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LE-LABEL: ldrwu32_3:
22 ; CHECK-LE: @ %bb.0: @ %entry
23 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
24 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
25 ; CHECK-LE-NEXT: bx lr
27 ; CHECK-BE-LABEL: ldrwu32_3:
28 ; CHECK-BE: @ %bb.0: @ %entry
29 ; CHECK-BE-NEXT: vldrw.u32 q0, [r0]
30 ; CHECK-BE-NEXT: adds r0, #3
31 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
32 ; CHECK-BE-NEXT: bx lr
34 %z = getelementptr inbounds i8, i8* %x, i32 3
35 %0 = bitcast i8* %x to <4 x i32>*
36 %1 = load <4 x i32>, <4 x i32>* %0, align 4
37 %2 = bitcast i8* %y to <4 x i32>*
38 store <4 x i32> %1, <4 x i32>* %2, align 4
42 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
43 ; CHECK-LABEL: ldrwu32_m4:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-4
46 ; CHECK-NEXT: vstrw.32 q0, [r1]
49 %z = getelementptr inbounds i8, i8* %x, i32 -4
50 %0 = bitcast i8* %x to <4 x i32>*
51 %1 = load <4 x i32>, <4 x i32>* %0, align 4
52 %2 = bitcast i8* %y to <4 x i32>*
53 store <4 x i32> %1, <4 x i32>* %2, align 4
57 define i8* @ldrwu32_508(i8* %x, i8* %y) {
58 ; CHECK-LABEL: ldrwu32_508:
59 ; CHECK: @ %bb.0: @ %entry
60 ; CHECK-NEXT: vldrw.u32 q0, [r0], #508
61 ; CHECK-NEXT: vstrw.32 q0, [r1]
64 %z = getelementptr inbounds i8, i8* %x, i32 508
65 %0 = bitcast i8* %x to <4 x i32>*
66 %1 = load <4 x i32>, <4 x i32>* %0, align 4
67 %2 = bitcast i8* %y to <4 x i32>*
68 store <4 x i32> %1, <4 x i32>* %2, align 4
72 define i8* @ldrwu32_512(i8* %x, i8* %y) {
73 ; CHECK-LABEL: ldrwu32_512:
74 ; CHECK: @ %bb.0: @ %entry
75 ; CHECK-NEXT: vldrw.u32 q0, [r0]
76 ; CHECK-NEXT: add.w r0, r0, #512
77 ; CHECK-NEXT: vstrw.32 q0, [r1]
80 %z = getelementptr inbounds i8, i8* %x, i32 512
81 %0 = bitcast i8* %x to <4 x i32>*
82 %1 = load <4 x i32>, <4 x i32>* %0, align 4
83 %2 = bitcast i8* %y to <4 x i32>*
84 store <4 x i32> %1, <4 x i32>* %2, align 4
88 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
89 ; CHECK-LABEL: ldrwu32_m508:
90 ; CHECK: @ %bb.0: @ %entry
91 ; CHECK-NEXT: vldrw.u32 q0, [r0], #-508
92 ; CHECK-NEXT: vstrw.32 q0, [r1]
95 %z = getelementptr inbounds i8, i8* %x, i32 -508
96 %0 = bitcast i8* %x to <4 x i32>*
97 %1 = load <4 x i32>, <4 x i32>* %0, align 4
98 %2 = bitcast i8* %y to <4 x i32>*
99 store <4 x i32> %1, <4 x i32>* %2, align 4
103 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
104 ; CHECK-LABEL: ldrwu32_m512:
105 ; CHECK: @ %bb.0: @ %entry
106 ; CHECK-NEXT: vldrw.u32 q0, [r0]
107 ; CHECK-NEXT: sub.w r0, r0, #512
108 ; CHECK-NEXT: vstrw.32 q0, [r1]
111 %z = getelementptr inbounds i8, i8* %x, i32 -512
112 %0 = bitcast i8* %x to <4 x i32>*
113 %1 = load <4 x i32>, <4 x i32>* %0, align 4
114 %2 = bitcast i8* %y to <4 x i32>*
115 store <4 x i32> %1, <4 x i32>* %2, align 4
120 define i8* @ldrhu32_4(i8* %x, i8* %y) {
121 ; CHECK-LABEL: ldrhu32_4:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vldrh.u32 q0, [r0], #4
124 ; CHECK-NEXT: vstrw.32 q0, [r1]
127 %z = getelementptr inbounds i8, i8* %x, i32 4
128 %0 = bitcast i8* %x to <4 x i16>*
129 %1 = load <4 x i16>, <4 x i16>* %0, align 2
130 %2 = zext <4 x i16> %1 to <4 x i32>
131 %3 = bitcast i8* %y to <4 x i32>*
132 store <4 x i32> %2, <4 x i32>* %3, align 4
136 define i8* @ldrhu32_3(i8* %x, i8* %y) {
137 ; CHECK-LABEL: ldrhu32_3:
138 ; CHECK: @ %bb.0: @ %entry
139 ; CHECK-NEXT: vldrh.u32 q0, [r0]
140 ; CHECK-NEXT: adds r0, #3
141 ; CHECK-NEXT: vstrw.32 q0, [r1]
144 %z = getelementptr inbounds i8, i8* %x, i32 3
145 %0 = bitcast i8* %x to <4 x i16>*
146 %1 = load <4 x i16>, <4 x i16>* %0, align 2
147 %2 = zext <4 x i16> %1 to <4 x i32>
148 %3 = bitcast i8* %y to <4 x i32>*
149 store <4 x i32> %2, <4 x i32>* %3, align 4
153 define i8* @ldrhu32_2(i8* %x, i8* %y) {
154 ; CHECK-LABEL: ldrhu32_2:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vldrh.u32 q0, [r0], #2
157 ; CHECK-NEXT: vstrw.32 q0, [r1]
160 %z = getelementptr inbounds i8, i8* %x, i32 2
161 %0 = bitcast i8* %x to <4 x i16>*
162 %1 = load <4 x i16>, <4 x i16>* %0, align 2
163 %2 = zext <4 x i16> %1 to <4 x i32>
164 %3 = bitcast i8* %y to <4 x i32>*
165 store <4 x i32> %2, <4 x i32>* %3, align 4
169 define i8* @ldrhu32_254(i8* %x, i8* %y) {
170 ; CHECK-LABEL: ldrhu32_254:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vldrh.u32 q0, [r0], #254
173 ; CHECK-NEXT: vstrw.32 q0, [r1]
176 %z = getelementptr inbounds i8, i8* %x, i32 254
177 %0 = bitcast i8* %x to <4 x i16>*
178 %1 = load <4 x i16>, <4 x i16>* %0, align 2
179 %2 = zext <4 x i16> %1 to <4 x i32>
180 %3 = bitcast i8* %y to <4 x i32>*
181 store <4 x i32> %2, <4 x i32>* %3, align 4
185 define i8* @ldrhu32_256(i8* %x, i8* %y) {
186 ; CHECK-LABEL: ldrhu32_256:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: vldrh.u32 q0, [r0]
189 ; CHECK-NEXT: add.w r0, r0, #256
190 ; CHECK-NEXT: vstrw.32 q0, [r1]
193 %z = getelementptr inbounds i8, i8* %x, i32 256
194 %0 = bitcast i8* %x to <4 x i16>*
195 %1 = load <4 x i16>, <4 x i16>* %0, align 2
196 %2 = zext <4 x i16> %1 to <4 x i32>
197 %3 = bitcast i8* %y to <4 x i32>*
198 store <4 x i32> %2, <4 x i32>* %3, align 4
202 define i8* @ldrhu32_m254(i8* %x, i8* %y) {
203 ; CHECK-LABEL: ldrhu32_m254:
204 ; CHECK: @ %bb.0: @ %entry
205 ; CHECK-NEXT: vldrh.u32 q0, [r0], #-254
206 ; CHECK-NEXT: vstrw.32 q0, [r1]
209 %z = getelementptr inbounds i8, i8* %x, i32 -254
210 %0 = bitcast i8* %x to <4 x i16>*
211 %1 = load <4 x i16>, <4 x i16>* %0, align 2
212 %2 = zext <4 x i16> %1 to <4 x i32>
213 %3 = bitcast i8* %y to <4 x i32>*
214 store <4 x i32> %2, <4 x i32>* %3, align 4
218 define i8* @ldrhu32_m256(i8* %x, i8* %y) {
219 ; CHECK-LABEL: ldrhu32_m256:
220 ; CHECK: @ %bb.0: @ %entry
221 ; CHECK-NEXT: vldrh.u32 q0, [r0]
222 ; CHECK-NEXT: sub.w r0, r0, #256
223 ; CHECK-NEXT: vstrw.32 q0, [r1]
226 %z = getelementptr inbounds i8, i8* %x, i32 -256
227 %0 = bitcast i8* %x to <4 x i16>*
228 %1 = load <4 x i16>, <4 x i16>* %0, align 2
229 %2 = zext <4 x i16> %1 to <4 x i32>
230 %3 = bitcast i8* %y to <4 x i32>*
231 store <4 x i32> %2, <4 x i32>* %3, align 4
236 define i8* @ldrhs32_4(i8* %x, i8* %y) {
237 ; CHECK-LABEL: ldrhs32_4:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vldrh.s32 q0, [r0], #4
240 ; CHECK-NEXT: vstrw.32 q0, [r1]
243 %z = getelementptr inbounds i8, i8* %x, i32 4
244 %0 = bitcast i8* %x to <4 x i16>*
245 %1 = load <4 x i16>, <4 x i16>* %0, align 2
246 %2 = sext <4 x i16> %1 to <4 x i32>
247 %3 = bitcast i8* %y to <4 x i32>*
248 store <4 x i32> %2, <4 x i32>* %3, align 4
252 define i8* @ldrhs32_3(i8* %x, i8* %y) {
253 ; CHECK-LABEL: ldrhs32_3:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vldrh.s32 q0, [r0]
256 ; CHECK-NEXT: adds r0, #3
257 ; CHECK-NEXT: vstrw.32 q0, [r1]
260 %z = getelementptr inbounds i8, i8* %x, i32 3
261 %0 = bitcast i8* %x to <4 x i16>*
262 %1 = load <4 x i16>, <4 x i16>* %0, align 2
263 %2 = sext <4 x i16> %1 to <4 x i32>
264 %3 = bitcast i8* %y to <4 x i32>*
265 store <4 x i32> %2, <4 x i32>* %3, align 4
269 define i8* @ldrhs32_2(i8* %x, i8* %y) {
270 ; CHECK-LABEL: ldrhs32_2:
271 ; CHECK: @ %bb.0: @ %entry
272 ; CHECK-NEXT: vldrh.s32 q0, [r0], #2
273 ; CHECK-NEXT: vstrw.32 q0, [r1]
276 %z = getelementptr inbounds i8, i8* %x, i32 2
277 %0 = bitcast i8* %x to <4 x i16>*
278 %1 = load <4 x i16>, <4 x i16>* %0, align 2
279 %2 = sext <4 x i16> %1 to <4 x i32>
280 %3 = bitcast i8* %y to <4 x i32>*
281 store <4 x i32> %2, <4 x i32>* %3, align 4
285 define i8* @ldrhs32_254(i8* %x, i8* %y) {
286 ; CHECK-LABEL: ldrhs32_254:
287 ; CHECK: @ %bb.0: @ %entry
288 ; CHECK-NEXT: vldrh.s32 q0, [r0], #254
289 ; CHECK-NEXT: vstrw.32 q0, [r1]
292 %z = getelementptr inbounds i8, i8* %x, i32 254
293 %0 = bitcast i8* %x to <4 x i16>*
294 %1 = load <4 x i16>, <4 x i16>* %0, align 2
295 %2 = sext <4 x i16> %1 to <4 x i32>
296 %3 = bitcast i8* %y to <4 x i32>*
297 store <4 x i32> %2, <4 x i32>* %3, align 4
301 define i8* @ldrhs32_256(i8* %x, i8* %y) {
302 ; CHECK-LABEL: ldrhs32_256:
303 ; CHECK: @ %bb.0: @ %entry
304 ; CHECK-NEXT: vldrh.s32 q0, [r0]
305 ; CHECK-NEXT: add.w r0, r0, #256
306 ; CHECK-NEXT: vstrw.32 q0, [r1]
309 %z = getelementptr inbounds i8, i8* %x, i32 256
310 %0 = bitcast i8* %x to <4 x i16>*
311 %1 = load <4 x i16>, <4 x i16>* %0, align 2
312 %2 = sext <4 x i16> %1 to <4 x i32>
313 %3 = bitcast i8* %y to <4 x i32>*
314 store <4 x i32> %2, <4 x i32>* %3, align 4
318 define i8* @ldrhs32_m254(i8* %x, i8* %y) {
319 ; CHECK-LABEL: ldrhs32_m254:
320 ; CHECK: @ %bb.0: @ %entry
321 ; CHECK-NEXT: vldrh.s32 q0, [r0], #-254
322 ; CHECK-NEXT: vstrw.32 q0, [r1]
325 %z = getelementptr inbounds i8, i8* %x, i32 -254
326 %0 = bitcast i8* %x to <4 x i16>*
327 %1 = load <4 x i16>, <4 x i16>* %0, align 2
328 %2 = sext <4 x i16> %1 to <4 x i32>
329 %3 = bitcast i8* %y to <4 x i32>*
330 store <4 x i32> %2, <4 x i32>* %3, align 4
334 define i8* @ldrhs32_m256(i8* %x, i8* %y) {
335 ; CHECK-LABEL: ldrhs32_m256:
336 ; CHECK: @ %bb.0: @ %entry
337 ; CHECK-NEXT: vldrh.s32 q0, [r0]
338 ; CHECK-NEXT: sub.w r0, r0, #256
339 ; CHECK-NEXT: vstrw.32 q0, [r1]
342 %z = getelementptr inbounds i8, i8* %x, i32 -256
343 %0 = bitcast i8* %x to <4 x i16>*
344 %1 = load <4 x i16>, <4 x i16>* %0, align 2
345 %2 = sext <4 x i16> %1 to <4 x i32>
346 %3 = bitcast i8* %y to <4 x i32>*
347 store <4 x i32> %2, <4 x i32>* %3, align 4
352 define i8* @ldrhu16_4(i8* %x, i8* %y) {
353 ; CHECK-LABEL: ldrhu16_4:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
356 ; CHECK-NEXT: vstrh.16 q0, [r1]
359 %z = getelementptr inbounds i8, i8* %x, i32 4
360 %0 = bitcast i8* %x to <8 x i16>*
361 %1 = load <8 x i16>, <8 x i16>* %0, align 2
362 %2 = bitcast i8* %y to <8 x i16>*
363 store <8 x i16> %1, <8 x i16>* %2, align 2
367 define i8* @ldrhu16_3(i8* %x, i8* %y) {
368 ; CHECK-LE-LABEL: ldrhu16_3:
369 ; CHECK-LE: @ %bb.0: @ %entry
370 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
371 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
372 ; CHECK-LE-NEXT: bx lr
374 ; CHECK-BE-LABEL: ldrhu16_3:
375 ; CHECK-BE: @ %bb.0: @ %entry
376 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0]
377 ; CHECK-BE-NEXT: adds r0, #3
378 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
379 ; CHECK-BE-NEXT: bx lr
381 %z = getelementptr inbounds i8, i8* %x, i32 3
382 %0 = bitcast i8* %x to <8 x i16>*
383 %1 = load <8 x i16>, <8 x i16>* %0, align 2
384 %2 = bitcast i8* %y to <8 x i16>*
385 store <8 x i16> %1, <8 x i16>* %2, align 2
389 define i8* @ldrhu16_2(i8* %x, i8* %y) {
390 ; CHECK-LABEL: ldrhu16_2:
391 ; CHECK: @ %bb.0: @ %entry
392 ; CHECK-NEXT: vldrh.u16 q0, [r0], #2
393 ; CHECK-NEXT: vstrh.16 q0, [r1]
396 %z = getelementptr inbounds i8, i8* %x, i32 2
397 %0 = bitcast i8* %x to <8 x i16>*
398 %1 = load <8 x i16>, <8 x i16>* %0, align 2
399 %2 = bitcast i8* %y to <8 x i16>*
400 store <8 x i16> %1, <8 x i16>* %2, align 2
404 define i8* @ldrhu16_254(i8* %x, i8* %y) {
405 ; CHECK-LABEL: ldrhu16_254:
406 ; CHECK: @ %bb.0: @ %entry
407 ; CHECK-NEXT: vldrh.u16 q0, [r0], #254
408 ; CHECK-NEXT: vstrh.16 q0, [r1]
411 %z = getelementptr inbounds i8, i8* %x, i32 254
412 %0 = bitcast i8* %x to <8 x i16>*
413 %1 = load <8 x i16>, <8 x i16>* %0, align 2
414 %2 = bitcast i8* %y to <8 x i16>*
415 store <8 x i16> %1, <8 x i16>* %2, align 2
419 define i8* @ldrhu16_256(i8* %x, i8* %y) {
420 ; CHECK-LABEL: ldrhu16_256:
421 ; CHECK: @ %bb.0: @ %entry
422 ; CHECK-NEXT: vldrh.u16 q0, [r0]
423 ; CHECK-NEXT: add.w r0, r0, #256
424 ; CHECK-NEXT: vstrh.16 q0, [r1]
427 %z = getelementptr inbounds i8, i8* %x, i32 256
428 %0 = bitcast i8* %x to <8 x i16>*
429 %1 = load <8 x i16>, <8 x i16>* %0, align 2
430 %2 = bitcast i8* %y to <8 x i16>*
431 store <8 x i16> %1, <8 x i16>* %2, align 2
435 define i8* @ldrhu16_m254(i8* %x, i8* %y) {
436 ; CHECK-LABEL: ldrhu16_m254:
437 ; CHECK: @ %bb.0: @ %entry
438 ; CHECK-NEXT: vldrh.u16 q0, [r0], #-254
439 ; CHECK-NEXT: vstrh.16 q0, [r1]
442 %z = getelementptr inbounds i8, i8* %x, i32 -254
443 %0 = bitcast i8* %x to <8 x i16>*
444 %1 = load <8 x i16>, <8 x i16>* %0, align 2
445 %2 = bitcast i8* %y to <8 x i16>*
446 store <8 x i16> %1, <8 x i16>* %2, align 2
450 define i8* @ldrhu16_m256(i8* %x, i8* %y) {
451 ; CHECK-LABEL: ldrhu16_m256:
452 ; CHECK: @ %bb.0: @ %entry
453 ; CHECK-NEXT: vldrh.u16 q0, [r0]
454 ; CHECK-NEXT: sub.w r0, r0, #256
455 ; CHECK-NEXT: vstrh.16 q0, [r1]
458 %z = getelementptr inbounds i8, i8* %x, i32 -256
459 %0 = bitcast i8* %x to <8 x i16>*
460 %1 = load <8 x i16>, <8 x i16>* %0, align 2
461 %2 = bitcast i8* %y to <8 x i16>*
462 store <8 x i16> %1, <8 x i16>* %2, align 2
467 define i8* @ldrbu32_4(i8* %x, i8* %y) {
468 ; CHECK-LABEL: ldrbu32_4:
469 ; CHECK: @ %bb.0: @ %entry
470 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4
471 ; CHECK-NEXT: vstrw.32 q0, [r1]
474 %z = getelementptr inbounds i8, i8* %x, i32 4
475 %0 = bitcast i8* %x to <4 x i8>*
476 %1 = load <4 x i8>, <4 x i8>* %0, align 1
477 %2 = zext <4 x i8> %1 to <4 x i32>
478 %3 = bitcast i8* %y to <4 x i32>*
479 store <4 x i32> %2, <4 x i32>* %3, align 4
483 define i8* @ldrbu32_3(i8* %x, i8* %y) {
484 ; CHECK-LABEL: ldrbu32_3:
485 ; CHECK: @ %bb.0: @ %entry
486 ; CHECK-NEXT: vldrb.u32 q0, [r0], #3
487 ; CHECK-NEXT: vstrw.32 q0, [r1]
490 %z = getelementptr inbounds i8, i8* %x, i32 3
491 %0 = bitcast i8* %x to <4 x i8>*
492 %1 = load <4 x i8>, <4 x i8>* %0, align 1
493 %2 = zext <4 x i8> %1 to <4 x i32>
494 %3 = bitcast i8* %y to <4 x i32>*
495 store <4 x i32> %2, <4 x i32>* %3, align 4
499 define i8* @ldrbu32_127(i8* %x, i8* %y) {
500 ; CHECK-LABEL: ldrbu32_127:
501 ; CHECK: @ %bb.0: @ %entry
502 ; CHECK-NEXT: vldrb.u32 q0, [r0], #127
503 ; CHECK-NEXT: vstrw.32 q0, [r1]
506 %z = getelementptr inbounds i8, i8* %x, i32 127
507 %0 = bitcast i8* %x to <4 x i8>*
508 %1 = load <4 x i8>, <4 x i8>* %0, align 1
509 %2 = zext <4 x i8> %1 to <4 x i32>
510 %3 = bitcast i8* %y to <4 x i32>*
511 store <4 x i32> %2, <4 x i32>* %3, align 4
515 define i8* @ldrbu32_128(i8* %x, i8* %y) {
516 ; CHECK-LABEL: ldrbu32_128:
517 ; CHECK: @ %bb.0: @ %entry
518 ; CHECK-NEXT: vldrb.u32 q0, [r0]
519 ; CHECK-NEXT: adds r0, #128
520 ; CHECK-NEXT: vstrw.32 q0, [r1]
523 %z = getelementptr inbounds i8, i8* %x, i32 128
524 %0 = bitcast i8* %x to <4 x i8>*
525 %1 = load <4 x i8>, <4 x i8>* %0, align 1
526 %2 = zext <4 x i8> %1 to <4 x i32>
527 %3 = bitcast i8* %y to <4 x i32>*
528 store <4 x i32> %2, <4 x i32>* %3, align 4
532 define i8* @ldrbu32_m127(i8* %x, i8* %y) {
533 ; CHECK-LABEL: ldrbu32_m127:
534 ; CHECK: @ %bb.0: @ %entry
535 ; CHECK-NEXT: vldrb.u32 q0, [r0], #-127
536 ; CHECK-NEXT: vstrw.32 q0, [r1]
539 %z = getelementptr inbounds i8, i8* %x, i32 -127
540 %0 = bitcast i8* %x to <4 x i8>*
541 %1 = load <4 x i8>, <4 x i8>* %0, align 1
542 %2 = zext <4 x i8> %1 to <4 x i32>
543 %3 = bitcast i8* %y to <4 x i32>*
544 store <4 x i32> %2, <4 x i32>* %3, align 4
548 define i8* @ldrbu32_m128(i8* %x, i8* %y) {
549 ; CHECK-LABEL: ldrbu32_m128:
550 ; CHECK: @ %bb.0: @ %entry
551 ; CHECK-NEXT: vldrb.u32 q0, [r0]
552 ; CHECK-NEXT: subs r0, #128
553 ; CHECK-NEXT: vstrw.32 q0, [r1]
556 %z = getelementptr inbounds i8, i8* %x, i32 -128
557 %0 = bitcast i8* %x to <4 x i8>*
558 %1 = load <4 x i8>, <4 x i8>* %0, align 1
559 %2 = zext <4 x i8> %1 to <4 x i32>
560 %3 = bitcast i8* %y to <4 x i32>*
561 store <4 x i32> %2, <4 x i32>* %3, align 4
566 define i8* @ldrbs32_4(i8* %x, i8* %y) {
567 ; CHECK-LABEL: ldrbs32_4:
568 ; CHECK: @ %bb.0: @ %entry
569 ; CHECK-NEXT: vldrb.s32 q0, [r0], #4
570 ; CHECK-NEXT: vstrw.32 q0, [r1]
573 %z = getelementptr inbounds i8, i8* %x, i32 4
574 %0 = bitcast i8* %x to <4 x i8>*
575 %1 = load <4 x i8>, <4 x i8>* %0, align 1
576 %2 = sext <4 x i8> %1 to <4 x i32>
577 %3 = bitcast i8* %y to <4 x i32>*
578 store <4 x i32> %2, <4 x i32>* %3, align 4
582 define i8* @ldrbs32_3(i8* %x, i8* %y) {
583 ; CHECK-LABEL: ldrbs32_3:
584 ; CHECK: @ %bb.0: @ %entry
585 ; CHECK-NEXT: vldrb.s32 q0, [r0], #3
586 ; CHECK-NEXT: vstrw.32 q0, [r1]
589 %z = getelementptr inbounds i8, i8* %x, i32 3
590 %0 = bitcast i8* %x to <4 x i8>*
591 %1 = load <4 x i8>, <4 x i8>* %0, align 1
592 %2 = sext <4 x i8> %1 to <4 x i32>
593 %3 = bitcast i8* %y to <4 x i32>*
594 store <4 x i32> %2, <4 x i32>* %3, align 4
598 define i8* @ldrbs32_127(i8* %x, i8* %y) {
599 ; CHECK-LABEL: ldrbs32_127:
600 ; CHECK: @ %bb.0: @ %entry
601 ; CHECK-NEXT: vldrb.s32 q0, [r0], #127
602 ; CHECK-NEXT: vstrw.32 q0, [r1]
605 %z = getelementptr inbounds i8, i8* %x, i32 127
606 %0 = bitcast i8* %x to <4 x i8>*
607 %1 = load <4 x i8>, <4 x i8>* %0, align 1
608 %2 = sext <4 x i8> %1 to <4 x i32>
609 %3 = bitcast i8* %y to <4 x i32>*
610 store <4 x i32> %2, <4 x i32>* %3, align 4
614 define i8* @ldrbs32_128(i8* %x, i8* %y) {
615 ; CHECK-LABEL: ldrbs32_128:
616 ; CHECK: @ %bb.0: @ %entry
617 ; CHECK-NEXT: vldrb.s32 q0, [r0]
618 ; CHECK-NEXT: adds r0, #128
619 ; CHECK-NEXT: vstrw.32 q0, [r1]
622 %z = getelementptr inbounds i8, i8* %x, i32 128
623 %0 = bitcast i8* %x to <4 x i8>*
624 %1 = load <4 x i8>, <4 x i8>* %0, align 1
625 %2 = sext <4 x i8> %1 to <4 x i32>
626 %3 = bitcast i8* %y to <4 x i32>*
627 store <4 x i32> %2, <4 x i32>* %3, align 4
631 define i8* @ldrbs32_m127(i8* %x, i8* %y) {
632 ; CHECK-LABEL: ldrbs32_m127:
633 ; CHECK: @ %bb.0: @ %entry
634 ; CHECK-NEXT: vldrb.s32 q0, [r0], #-127
635 ; CHECK-NEXT: vstrw.32 q0, [r1]
638 %z = getelementptr inbounds i8, i8* %x, i32 -127
639 %0 = bitcast i8* %x to <4 x i8>*
640 %1 = load <4 x i8>, <4 x i8>* %0, align 1
641 %2 = sext <4 x i8> %1 to <4 x i32>
642 %3 = bitcast i8* %y to <4 x i32>*
643 store <4 x i32> %2, <4 x i32>* %3, align 4
647 define i8* @ldrbs32_m128(i8* %x, i8* %y) {
648 ; CHECK-LABEL: ldrbs32_m128:
649 ; CHECK: @ %bb.0: @ %entry
650 ; CHECK-NEXT: vldrb.s32 q0, [r0]
651 ; CHECK-NEXT: subs r0, #128
652 ; CHECK-NEXT: vstrw.32 q0, [r1]
655 %z = getelementptr inbounds i8, i8* %x, i32 -128
656 %0 = bitcast i8* %x to <4 x i8>*
657 %1 = load <4 x i8>, <4 x i8>* %0, align 1
658 %2 = sext <4 x i8> %1 to <4 x i32>
659 %3 = bitcast i8* %y to <4 x i32>*
660 store <4 x i32> %2, <4 x i32>* %3, align 4
665 define i8* @ldrbu16_4(i8* %x, i8* %y) {
666 ; CHECK-LABEL: ldrbu16_4:
667 ; CHECK: @ %bb.0: @ %entry
668 ; CHECK-NEXT: vldrb.u16 q0, [r0], #4
669 ; CHECK-NEXT: vstrh.16 q0, [r1]
672 %z = getelementptr inbounds i8, i8* %x, i32 4
673 %0 = bitcast i8* %x to <8 x i8>*
674 %1 = load <8 x i8>, <8 x i8>* %0, align 1
675 %2 = zext <8 x i8> %1 to <8 x i16>
676 %3 = bitcast i8* %y to <8 x i16>*
677 store <8 x i16> %2, <8 x i16>* %3, align 2
681 define i8* @ldrbu16_3(i8* %x, i8* %y) {
682 ; CHECK-LABEL: ldrbu16_3:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vldrb.u16 q0, [r0], #3
685 ; CHECK-NEXT: vstrh.16 q0, [r1]
688 %z = getelementptr inbounds i8, i8* %x, i32 3
689 %0 = bitcast i8* %x to <8 x i8>*
690 %1 = load <8 x i8>, <8 x i8>* %0, align 1
691 %2 = zext <8 x i8> %1 to <8 x i16>
692 %3 = bitcast i8* %y to <8 x i16>*
693 store <8 x i16> %2, <8 x i16>* %3, align 2
697 define i8* @ldrbu16_127(i8* %x, i8* %y) {
698 ; CHECK-LABEL: ldrbu16_127:
699 ; CHECK: @ %bb.0: @ %entry
700 ; CHECK-NEXT: vldrb.u16 q0, [r0], #127
701 ; CHECK-NEXT: vstrh.16 q0, [r1]
704 %z = getelementptr inbounds i8, i8* %x, i32 127
705 %0 = bitcast i8* %x to <8 x i8>*
706 %1 = load <8 x i8>, <8 x i8>* %0, align 1
707 %2 = zext <8 x i8> %1 to <8 x i16>
708 %3 = bitcast i8* %y to <8 x i16>*
709 store <8 x i16> %2, <8 x i16>* %3, align 2
713 define i8* @ldrbu16_128(i8* %x, i8* %y) {
714 ; CHECK-LABEL: ldrbu16_128:
715 ; CHECK: @ %bb.0: @ %entry
716 ; CHECK-NEXT: vldrb.u16 q0, [r0]
717 ; CHECK-NEXT: adds r0, #128
718 ; CHECK-NEXT: vstrh.16 q0, [r1]
721 %z = getelementptr inbounds i8, i8* %x, i32 128
722 %0 = bitcast i8* %x to <8 x i8>*
723 %1 = load <8 x i8>, <8 x i8>* %0, align 1
724 %2 = zext <8 x i8> %1 to <8 x i16>
725 %3 = bitcast i8* %y to <8 x i16>*
726 store <8 x i16> %2, <8 x i16>* %3, align 2
730 define i8* @ldrbu16_m127(i8* %x, i8* %y) {
731 ; CHECK-LABEL: ldrbu16_m127:
732 ; CHECK: @ %bb.0: @ %entry
733 ; CHECK-NEXT: vldrb.u16 q0, [r0], #-127
734 ; CHECK-NEXT: vstrh.16 q0, [r1]
737 %z = getelementptr inbounds i8, i8* %x, i32 -127
738 %0 = bitcast i8* %x to <8 x i8>*
739 %1 = load <8 x i8>, <8 x i8>* %0, align 1
740 %2 = zext <8 x i8> %1 to <8 x i16>
741 %3 = bitcast i8* %y to <8 x i16>*
742 store <8 x i16> %2, <8 x i16>* %3, align 2
746 define i8* @ldrbu16_m128(i8* %x, i8* %y) {
747 ; CHECK-LABEL: ldrbu16_m128:
748 ; CHECK: @ %bb.0: @ %entry
749 ; CHECK-NEXT: vldrb.u16 q0, [r0]
750 ; CHECK-NEXT: subs r0, #128
751 ; CHECK-NEXT: vstrh.16 q0, [r1]
754 %z = getelementptr inbounds i8, i8* %x, i32 -128
755 %0 = bitcast i8* %x to <8 x i8>*
756 %1 = load <8 x i8>, <8 x i8>* %0, align 1
757 %2 = zext <8 x i8> %1 to <8 x i16>
758 %3 = bitcast i8* %y to <8 x i16>*
759 store <8 x i16> %2, <8 x i16>* %3, align 2
764 define i8* @ldrbs16_4(i8* %x, i8* %y) {
765 ; CHECK-LABEL: ldrbs16_4:
766 ; CHECK: @ %bb.0: @ %entry
767 ; CHECK-NEXT: vldrb.s16 q0, [r0], #4
768 ; CHECK-NEXT: vstrh.16 q0, [r1]
771 %z = getelementptr inbounds i8, i8* %x, i32 4
772 %0 = bitcast i8* %x to <8 x i8>*
773 %1 = load <8 x i8>, <8 x i8>* %0, align 1
774 %2 = sext <8 x i8> %1 to <8 x i16>
775 %3 = bitcast i8* %y to <8 x i16>*
776 store <8 x i16> %2, <8 x i16>* %3, align 2
780 define i8* @ldrbs16_3(i8* %x, i8* %y) {
781 ; CHECK-LABEL: ldrbs16_3:
782 ; CHECK: @ %bb.0: @ %entry
783 ; CHECK-NEXT: vldrb.s16 q0, [r0], #3
784 ; CHECK-NEXT: vstrh.16 q0, [r1]
787 %z = getelementptr inbounds i8, i8* %x, i32 3
788 %0 = bitcast i8* %x to <8 x i8>*
789 %1 = load <8 x i8>, <8 x i8>* %0, align 1
790 %2 = sext <8 x i8> %1 to <8 x i16>
791 %3 = bitcast i8* %y to <8 x i16>*
792 store <8 x i16> %2, <8 x i16>* %3, align 2
796 define i8* @ldrbs16_127(i8* %x, i8* %y) {
797 ; CHECK-LABEL: ldrbs16_127:
798 ; CHECK: @ %bb.0: @ %entry
799 ; CHECK-NEXT: vldrb.s16 q0, [r0], #127
800 ; CHECK-NEXT: vstrh.16 q0, [r1]
803 %z = getelementptr inbounds i8, i8* %x, i32 127
804 %0 = bitcast i8* %x to <8 x i8>*
805 %1 = load <8 x i8>, <8 x i8>* %0, align 1
806 %2 = sext <8 x i8> %1 to <8 x i16>
807 %3 = bitcast i8* %y to <8 x i16>*
808 store <8 x i16> %2, <8 x i16>* %3, align 2
812 define i8* @ldrbs16_128(i8* %x, i8* %y) {
813 ; CHECK-LABEL: ldrbs16_128:
814 ; CHECK: @ %bb.0: @ %entry
815 ; CHECK-NEXT: vldrb.s16 q0, [r0]
816 ; CHECK-NEXT: adds r0, #128
817 ; CHECK-NEXT: vstrh.16 q0, [r1]
820 %z = getelementptr inbounds i8, i8* %x, i32 128
821 %0 = bitcast i8* %x to <8 x i8>*
822 %1 = load <8 x i8>, <8 x i8>* %0, align 1
823 %2 = sext <8 x i8> %1 to <8 x i16>
824 %3 = bitcast i8* %y to <8 x i16>*
825 store <8 x i16> %2, <8 x i16>* %3, align 2
829 define i8* @ldrbs16_m127(i8* %x, i8* %y) {
830 ; CHECK-LABEL: ldrbs16_m127:
831 ; CHECK: @ %bb.0: @ %entry
832 ; CHECK-NEXT: vldrb.s16 q0, [r0], #-127
833 ; CHECK-NEXT: vstrh.16 q0, [r1]
836 %z = getelementptr inbounds i8, i8* %x, i32 -127
837 %0 = bitcast i8* %x to <8 x i8>*
838 %1 = load <8 x i8>, <8 x i8>* %0, align 1
839 %2 = sext <8 x i8> %1 to <8 x i16>
840 %3 = bitcast i8* %y to <8 x i16>*
841 store <8 x i16> %2, <8 x i16>* %3, align 2
845 define i8* @ldrbs16_m128(i8* %x, i8* %y) {
846 ; CHECK-LABEL: ldrbs16_m128:
847 ; CHECK: @ %bb.0: @ %entry
848 ; CHECK-NEXT: vldrb.s16 q0, [r0]
849 ; CHECK-NEXT: subs r0, #128
850 ; CHECK-NEXT: vstrh.16 q0, [r1]
853 %z = getelementptr inbounds i8, i8* %x, i32 -128
854 %0 = bitcast i8* %x to <8 x i8>*
855 %1 = load <8 x i8>, <8 x i8>* %0, align 1
856 %2 = sext <8 x i8> %1 to <8 x i16>
857 %3 = bitcast i8* %y to <8 x i16>*
858 store <8 x i16> %2, <8 x i16>* %3, align 2
863 define i8* @ldrbu8_4(i8* %x, i8* %y) {
864 ; CHECK-LABEL: ldrbu8_4:
865 ; CHECK: @ %bb.0: @ %entry
866 ; CHECK-NEXT: vldrb.u8 q0, [r0], #4
867 ; CHECK-NEXT: vstrb.8 q0, [r1]
870 %z = getelementptr inbounds i8, i8* %x, i32 4
871 %0 = bitcast i8* %x to <16 x i8>*
872 %1 = load <16 x i8>, <16 x i8>* %0, align 1
873 %2 = bitcast i8* %y to <16 x i8>*
874 store <16 x i8> %1, <16 x i8>* %2, align 1
878 define i8* @ldrbu8_3(i8* %x, i8* %y) {
879 ; CHECK-LABEL: ldrbu8_3:
880 ; CHECK: @ %bb.0: @ %entry
881 ; CHECK-NEXT: vldrb.u8 q0, [r0], #3
882 ; CHECK-NEXT: vstrb.8 q0, [r1]
885 %z = getelementptr inbounds i8, i8* %x, i32 3
886 %0 = bitcast i8* %x to <16 x i8>*
887 %1 = load <16 x i8>, <16 x i8>* %0, align 1
888 %2 = bitcast i8* %y to <16 x i8>*
889 store <16 x i8> %1, <16 x i8>* %2, align 1
893 define i8* @ldrbu8_127(i8* %x, i8* %y) {
894 ; CHECK-LABEL: ldrbu8_127:
895 ; CHECK: @ %bb.0: @ %entry
896 ; CHECK-NEXT: vldrb.u8 q0, [r0], #127
897 ; CHECK-NEXT: vstrb.8 q0, [r1]
900 %z = getelementptr inbounds i8, i8* %x, i32 127
901 %0 = bitcast i8* %x to <16 x i8>*
902 %1 = load <16 x i8>, <16 x i8>* %0, align 1
903 %2 = bitcast i8* %y to <16 x i8>*
904 store <16 x i8> %1, <16 x i8>* %2, align 1
908 define i8* @ldrbu8_128(i8* %x, i8* %y) {
909 ; CHECK-LABEL: ldrbu8_128:
910 ; CHECK: @ %bb.0: @ %entry
911 ; CHECK-NEXT: vldrb.u8 q0, [r0]
912 ; CHECK-NEXT: adds r0, #128
913 ; CHECK-NEXT: vstrb.8 q0, [r1]
916 %z = getelementptr inbounds i8, i8* %x, i32 128
917 %0 = bitcast i8* %x to <16 x i8>*
918 %1 = load <16 x i8>, <16 x i8>* %0, align 1
919 %2 = bitcast i8* %y to <16 x i8>*
920 store <16 x i8> %1, <16 x i8>* %2, align 1
924 define i8* @ldrbu8_m127(i8* %x, i8* %y) {
925 ; CHECK-LABEL: ldrbu8_m127:
926 ; CHECK: @ %bb.0: @ %entry
927 ; CHECK-NEXT: vldrb.u8 q0, [r0], #-127
928 ; CHECK-NEXT: vstrb.8 q0, [r1]
931 %z = getelementptr inbounds i8, i8* %x, i32 -127
932 %0 = bitcast i8* %x to <16 x i8>*
933 %1 = load <16 x i8>, <16 x i8>* %0, align 1
934 %2 = bitcast i8* %y to <16 x i8>*
935 store <16 x i8> %1, <16 x i8>* %2, align 1
939 define i8* @ldrbu8_m128(i8* %x, i8* %y) {
940 ; CHECK-LABEL: ldrbu8_m128:
941 ; CHECK: @ %bb.0: @ %entry
942 ; CHECK-NEXT: vldrb.u8 q0, [r0]
943 ; CHECK-NEXT: subs r0, #128
944 ; CHECK-NEXT: vstrb.8 q0, [r1]
947 %z = getelementptr inbounds i8, i8* %x, i32 -128
948 %0 = bitcast i8* %x to <16 x i8>*
949 %1 = load <16 x i8>, <16 x i8>* %0, align 1
950 %2 = bitcast i8* %y to <16 x i8>*
951 store <16 x i8> %1, <16 x i8>* %2, align 1
956 define i8* @ldrwf32_4(i8* %x, i8* %y) {
957 ; CHECK-LABEL: ldrwf32_4:
958 ; CHECK: @ %bb.0: @ %entry
959 ; CHECK-NEXT: vldrw.u32 q0, [r0], #4
960 ; CHECK-NEXT: vstrw.32 q0, [r1]
963 %z = getelementptr inbounds i8, i8* %x, i32 4
964 %0 = bitcast i8* %x to <4 x float>*
965 %1 = load <4 x float>, <4 x float>* %0, align 4
966 %2 = bitcast i8* %y to <4 x float>*
967 store <4 x float> %1, <4 x float>* %2, align 4
971 define i8* @ldrwf16_4(i8* %x, i8* %y) {
972 ; CHECK-LABEL: ldrwf16_4:
973 ; CHECK: @ %bb.0: @ %entry
974 ; CHECK-NEXT: vldrh.u16 q0, [r0], #4
975 ; CHECK-NEXT: vstrh.16 q0, [r1]
978 %z = getelementptr inbounds i8, i8* %x, i32 4
979 %0 = bitcast i8* %x to <8 x half>*
980 %1 = load <8 x half>, <8 x half>* %0, align 2
981 %2 = bitcast i8* %y to <8 x half>*
982 store <8 x half> %1, <8 x half>* %2, align 2
986 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
987 ; CHECK-LE-LABEL: ldrwi32_align1:
988 ; CHECK-LE: @ %bb.0: @ %entry
989 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
990 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
991 ; CHECK-LE-NEXT: bx lr
993 ; CHECK-BE-LABEL: ldrwi32_align1:
994 ; CHECK-BE: @ %bb.0: @ %entry
995 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3
996 ; CHECK-BE-NEXT: vrev32.8 q0, q0
997 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
998 ; CHECK-BE-NEXT: bx lr
1000 %z = getelementptr inbounds i8, i8* %x, i32 3
1001 %0 = bitcast i8* %x to <4 x i32>*
1002 %1 = load <4 x i32>, <4 x i32>* %0, align 1
1003 %2 = bitcast i8* %y to <4 x i32>*
1004 store <4 x i32> %1, <4 x i32>* %2, align 4
1008 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
1009 ; CHECK-LE-LABEL: ldrhi16_align1:
1010 ; CHECK-LE: @ %bb.0: @ %entry
1011 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1012 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1013 ; CHECK-LE-NEXT: bx lr
1015 ; CHECK-BE-LABEL: ldrhi16_align1:
1016 ; CHECK-BE: @ %bb.0: @ %entry
1017 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3
1018 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1019 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1020 ; CHECK-BE-NEXT: bx lr
1022 %z = getelementptr inbounds i8, i8* %x, i32 3
1023 %0 = bitcast i8* %x to <8 x i16>*
1024 %1 = load <8 x i16>, <8 x i16>* %0, align 1
1025 %2 = bitcast i8* %y to <8 x i16>*
1026 store <8 x i16> %1, <8 x i16>* %2, align 2
1030 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1031 ; CHECK-LABEL: ldrhi32_align1:
1032 ; CHECK: @ %bb.0: @ %entry
1033 ; CHECK-NEXT: .pad #8
1034 ; CHECK-NEXT: sub sp, #8
1035 ; CHECK-NEXT: ldr r3, [r0, #4]
1036 ; CHECK-NEXT: ldr r2, [r0]
1037 ; CHECK-NEXT: adds r0, #3
1038 ; CHECK-NEXT: strd r2, r3, [sp]
1039 ; CHECK-NEXT: mov r2, sp
1040 ; CHECK-NEXT: vldrh.s32 q0, [r2]
1041 ; CHECK-NEXT: vstrw.32 q0, [r1]
1042 ; CHECK-NEXT: add sp, #8
1045 %z = getelementptr inbounds i8, i8* %x, i32 3
1046 %0 = bitcast i8* %x to <4 x i16>*
1047 %1 = load <4 x i16>, <4 x i16>* %0, align 1
1048 %2 = bitcast i8* %y to <4 x i32>*
1049 %3 = sext <4 x i16> %1 to <4 x i32>
1050 store <4 x i32> %3, <4 x i32>* %2, align 4
1054 define i8* @ldrf32_align1(i8* %x, i8* %y) {
1055 ; CHECK-LE-LABEL: ldrf32_align1:
1056 ; CHECK-LE: @ %bb.0: @ %entry
1057 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1058 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
1059 ; CHECK-LE-NEXT: bx lr
1061 ; CHECK-BE-LABEL: ldrf32_align1:
1062 ; CHECK-BE: @ %bb.0: @ %entry
1063 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3
1064 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1065 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
1066 ; CHECK-BE-NEXT: bx lr
1068 %z = getelementptr inbounds i8, i8* %x, i32 3
1069 %0 = bitcast i8* %x to <4 x float>*
1070 %1 = load <4 x float>, <4 x float>* %0, align 1
1071 %2 = bitcast i8* %y to <4 x float>*
1072 store <4 x float> %1, <4 x float>* %2, align 4
1076 define i8* @ldrf16_align1(i8* %x, i8* %y) {
1077 ; CHECK-LE-LABEL: ldrf16_align1:
1078 ; CHECK-LE: @ %bb.0: @ %entry
1079 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0], #3
1080 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1081 ; CHECK-LE-NEXT: bx lr
1083 ; CHECK-BE-LABEL: ldrf16_align1:
1084 ; CHECK-BE: @ %bb.0: @ %entry
1085 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0], #3
1086 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1087 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1088 ; CHECK-BE-NEXT: bx lr
1090 %z = getelementptr inbounds i8, i8* %x, i32 3
1091 %0 = bitcast i8* %x to <8 x half>*
1092 %1 = load <8 x half>, <8 x half>* %0, align 1
1093 %2 = bitcast i8* %y to <8 x half>*
1094 store <8 x half> %1, <8 x half>* %2, align 2
1098 define i8* @ldrh16_align8(i8* %x, i8* %y) {
1099 ; CHECK-LE-LABEL: ldrh16_align8:
1100 ; CHECK-LE: @ %bb.0: @ %entry
1101 ; CHECK-LE-NEXT: vldrw.u32 q0, [r0], #4
1102 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1103 ; CHECK-LE-NEXT: bx lr
1105 ; CHECK-BE-LABEL: ldrh16_align8:
1106 ; CHECK-BE: @ %bb.0: @ %entry
1107 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0], #4
1108 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1109 ; CHECK-BE-NEXT: bx lr
1111 %z = getelementptr inbounds i8, i8* %x, i32 4
1112 %0 = bitcast i8* %x to <8 x i16>*
1113 %1 = load <8 x i16>, <8 x i16>* %0, align 8
1114 %2 = bitcast i8* %y to <8 x i16>*
1115 store <8 x i16> %1, <8 x i16>* %2, align 2
1123 define i8* @strw32_4(i8* %y, i8* %x) {
1124 ; CHECK-LE-LABEL: strw32_4:
1125 ; CHECK-LE: @ %bb.0: @ %entry
1126 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1127 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1128 ; CHECK-LE-NEXT: bx lr
1130 ; CHECK-BE-LABEL: strw32_4:
1131 ; CHECK-BE: @ %bb.0: @ %entry
1132 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1133 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
1134 ; CHECK-BE-NEXT: bx lr
1136 %z = getelementptr inbounds i8, i8* %y, i32 4
1137 %0 = bitcast i8* %x to <4 x i32>*
1138 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1139 %2 = bitcast i8* %y to <4 x i32>*
1140 store <4 x i32> %1, <4 x i32>* %2, align 4
1144 define i8* @strw32_3(i8* %y, i8* %x) {
1145 ; CHECK-LE-LABEL: strw32_3:
1146 ; CHECK-LE: @ %bb.0: @ %entry
1147 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1148 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1149 ; CHECK-LE-NEXT: bx lr
1151 ; CHECK-BE-LABEL: strw32_3:
1152 ; CHECK-BE: @ %bb.0: @ %entry
1153 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1154 ; CHECK-BE-NEXT: vstrw.32 q0, [r0]
1155 ; CHECK-BE-NEXT: adds r0, #3
1156 ; CHECK-BE-NEXT: bx lr
1158 %z = getelementptr inbounds i8, i8* %y, i32 3
1159 %0 = bitcast i8* %x to <4 x i32>*
1160 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1161 %2 = bitcast i8* %y to <4 x i32>*
1162 store <4 x i32> %1, <4 x i32>* %2, align 4
1166 define i8* @strw32_m4(i8* %y, i8* %x) {
1167 ; CHECK-LE-LABEL: strw32_m4:
1168 ; CHECK-LE: @ %bb.0: @ %entry
1169 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1170 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #-4
1171 ; CHECK-LE-NEXT: bx lr
1173 ; CHECK-BE-LABEL: strw32_m4:
1174 ; CHECK-BE: @ %bb.0: @ %entry
1175 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1176 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #-4
1177 ; CHECK-BE-NEXT: bx lr
1179 %z = getelementptr inbounds i8, i8* %y, i32 -4
1180 %0 = bitcast i8* %x to <4 x i32>*
1181 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1182 %2 = bitcast i8* %y to <4 x i32>*
1183 store <4 x i32> %1, <4 x i32>* %2, align 4
1187 define i8* @strw32_508(i8* %y, i8* %x) {
1188 ; CHECK-LABEL: strw32_508:
1189 ; CHECK: @ %bb.0: @ %entry
1190 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1191 ; CHECK-NEXT: vstrw.32 q0, [r0], #508
1194 %z = getelementptr inbounds i8, i8* %y, i32 508
1195 %0 = bitcast i8* %x to <4 x i32>*
1196 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1197 %2 = bitcast i8* %y to <4 x i32>*
1198 store <4 x i32> %1, <4 x i32>* %2, align 4
1202 define i8* @strw32_512(i8* %y, i8* %x) {
1203 ; CHECK-LABEL: strw32_512:
1204 ; CHECK: @ %bb.0: @ %entry
1205 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1206 ; CHECK-NEXT: vstrw.32 q0, [r0]
1207 ; CHECK-NEXT: add.w r0, r0, #512
1210 %z = getelementptr inbounds i8, i8* %y, i32 512
1211 %0 = bitcast i8* %x to <4 x i32>*
1212 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1213 %2 = bitcast i8* %y to <4 x i32>*
1214 store <4 x i32> %1, <4 x i32>* %2, align 4
1218 define i8* @strw32_m508(i8* %y, i8* %x) {
1219 ; CHECK-LABEL: strw32_m508:
1220 ; CHECK: @ %bb.0: @ %entry
1221 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1222 ; CHECK-NEXT: vstrw.32 q0, [r0], #-508
1225 %z = getelementptr inbounds i8, i8* %y, i32 -508
1226 %0 = bitcast i8* %x to <4 x i32>*
1227 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1228 %2 = bitcast i8* %y to <4 x i32>*
1229 store <4 x i32> %1, <4 x i32>* %2, align 4
1233 define i8* @strw32_m512(i8* %y, i8* %x) {
1234 ; CHECK-LABEL: strw32_m512:
1235 ; CHECK: @ %bb.0: @ %entry
1236 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1237 ; CHECK-NEXT: vstrw.32 q0, [r0]
1238 ; CHECK-NEXT: sub.w r0, r0, #512
1241 %z = getelementptr inbounds i8, i8* %y, i32 -512
1242 %0 = bitcast i8* %x to <4 x i32>*
1243 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1244 %2 = bitcast i8* %y to <4 x i32>*
1245 store <4 x i32> %1, <4 x i32>* %2, align 4
1250 define i8* @strh32_4(i8* %y, i8* %x) {
1251 ; CHECK-LABEL: strh32_4:
1252 ; CHECK: @ %bb.0: @ %entry
1253 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1254 ; CHECK-NEXT: vstrh.32 q0, [r0], #4
1257 %z = getelementptr inbounds i8, i8* %y, i32 4
1258 %0 = bitcast i8* %x to <4 x i16>*
1259 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1260 %2 = bitcast i8* %y to <4 x i16>*
1261 store <4 x i16> %1, <4 x i16>* %2, align 2
1265 define i8* @strh32_3(i8* %y, i8* %x) {
1266 ; CHECK-LABEL: strh32_3:
1267 ; CHECK: @ %bb.0: @ %entry
1268 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1269 ; CHECK-NEXT: vstrh.32 q0, [r0]
1270 ; CHECK-NEXT: adds r0, #3
1273 %z = getelementptr inbounds i8, i8* %y, i32 3
1274 %0 = bitcast i8* %x to <4 x i16>*
1275 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1276 %2 = bitcast i8* %y to <4 x i16>*
1277 store <4 x i16> %1, <4 x i16>* %2, align 2
1281 define i8* @strh32_2(i8* %y, i8* %x) {
1282 ; CHECK-LABEL: strh32_2:
1283 ; CHECK: @ %bb.0: @ %entry
1284 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1285 ; CHECK-NEXT: vstrh.32 q0, [r0], #2
1288 %z = getelementptr inbounds i8, i8* %y, i32 2
1289 %0 = bitcast i8* %x to <4 x i16>*
1290 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1291 %2 = bitcast i8* %y to <4 x i16>*
1292 store <4 x i16> %1, <4 x i16>* %2, align 2
1296 define i8* @strh32_254(i8* %y, i8* %x) {
1297 ; CHECK-LABEL: strh32_254:
1298 ; CHECK: @ %bb.0: @ %entry
1299 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1300 ; CHECK-NEXT: vstrh.32 q0, [r0], #254
1303 %z = getelementptr inbounds i8, i8* %y, i32 254
1304 %0 = bitcast i8* %x to <4 x i16>*
1305 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1306 %2 = bitcast i8* %y to <4 x i16>*
1307 store <4 x i16> %1, <4 x i16>* %2, align 2
1311 define i8* @strh32_256(i8* %y, i8* %x) {
1312 ; CHECK-LABEL: strh32_256:
1313 ; CHECK: @ %bb.0: @ %entry
1314 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1315 ; CHECK-NEXT: vstrh.32 q0, [r0]
1316 ; CHECK-NEXT: add.w r0, r0, #256
1319 %z = getelementptr inbounds i8, i8* %y, i32 256
1320 %0 = bitcast i8* %x to <4 x i16>*
1321 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1322 %2 = bitcast i8* %y to <4 x i16>*
1323 store <4 x i16> %1, <4 x i16>* %2, align 2
1327 define i8* @strh32_m254(i8* %y, i8* %x) {
1328 ; CHECK-LABEL: strh32_m254:
1329 ; CHECK: @ %bb.0: @ %entry
1330 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1331 ; CHECK-NEXT: vstrh.32 q0, [r0], #-254
1334 %z = getelementptr inbounds i8, i8* %y, i32 -254
1335 %0 = bitcast i8* %x to <4 x i16>*
1336 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1337 %2 = bitcast i8* %y to <4 x i16>*
1338 store <4 x i16> %1, <4 x i16>* %2, align 2
1342 define i8* @strh32_m256(i8* %y, i8* %x) {
1343 ; CHECK-LABEL: strh32_m256:
1344 ; CHECK: @ %bb.0: @ %entry
1345 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1346 ; CHECK-NEXT: vstrh.32 q0, [r0]
1347 ; CHECK-NEXT: sub.w r0, r0, #256
1350 %z = getelementptr inbounds i8, i8* %y, i32 -256
1351 %0 = bitcast i8* %x to <4 x i16>*
1352 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1353 %2 = bitcast i8* %y to <4 x i16>*
1354 store <4 x i16> %1, <4 x i16>* %2, align 2
1359 define i8* @strh16_4(i8* %y, i8* %x) {
1360 ; CHECK-LE-LABEL: strh16_4:
1361 ; CHECK-LE: @ %bb.0: @ %entry
1362 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1363 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1364 ; CHECK-LE-NEXT: bx lr
1366 ; CHECK-BE-LABEL: strh16_4:
1367 ; CHECK-BE: @ %bb.0: @ %entry
1368 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1369 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
1370 ; CHECK-BE-NEXT: bx lr
1372 %z = getelementptr inbounds i8, i8* %y, i32 4
1373 %0 = bitcast i8* %x to <8 x i16>*
1374 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1375 %2 = bitcast i8* %y to <8 x i16>*
1376 store <8 x i16> %1, <8 x i16>* %2, align 2
1380 define i8* @strh16_3(i8* %y, i8* %x) {
1381 ; CHECK-LE-LABEL: strh16_3:
1382 ; CHECK-LE: @ %bb.0: @ %entry
1383 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1384 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1385 ; CHECK-LE-NEXT: bx lr
1387 ; CHECK-BE-LABEL: strh16_3:
1388 ; CHECK-BE: @ %bb.0: @ %entry
1389 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1390 ; CHECK-BE-NEXT: vstrh.16 q0, [r0]
1391 ; CHECK-BE-NEXT: adds r0, #3
1392 ; CHECK-BE-NEXT: bx lr
1394 %z = getelementptr inbounds i8, i8* %y, i32 3
1395 %0 = bitcast i8* %x to <8 x i16>*
1396 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1397 %2 = bitcast i8* %y to <8 x i16>*
1398 store <8 x i16> %1, <8 x i16>* %2, align 2
1402 define i8* @strh16_2(i8* %y, i8* %x) {
1403 ; CHECK-LE-LABEL: strh16_2:
1404 ; CHECK-LE: @ %bb.0: @ %entry
1405 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1406 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #2
1407 ; CHECK-LE-NEXT: bx lr
1409 ; CHECK-BE-LABEL: strh16_2:
1410 ; CHECK-BE: @ %bb.0: @ %entry
1411 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1412 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #2
1413 ; CHECK-BE-NEXT: bx lr
1415 %z = getelementptr inbounds i8, i8* %y, i32 2
1416 %0 = bitcast i8* %x to <8 x i16>*
1417 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1418 %2 = bitcast i8* %y to <8 x i16>*
1419 store <8 x i16> %1, <8 x i16>* %2, align 2
1423 define i8* @strh16_254(i8* %y, i8* %x) {
1424 ; CHECK-LABEL: strh16_254:
1425 ; CHECK: @ %bb.0: @ %entry
1426 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1427 ; CHECK-NEXT: vstrh.16 q0, [r0], #254
1430 %z = getelementptr inbounds i8, i8* %y, i32 254
1431 %0 = bitcast i8* %x to <8 x i16>*
1432 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1433 %2 = bitcast i8* %y to <8 x i16>*
1434 store <8 x i16> %1, <8 x i16>* %2, align 2
1438 define i8* @strh16_256(i8* %y, i8* %x) {
1439 ; CHECK-LABEL: strh16_256:
1440 ; CHECK: @ %bb.0: @ %entry
1441 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1442 ; CHECK-NEXT: vstrh.16 q0, [r0]
1443 ; CHECK-NEXT: add.w r0, r0, #256
1446 %z = getelementptr inbounds i8, i8* %y, i32 256
1447 %0 = bitcast i8* %x to <8 x i16>*
1448 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1449 %2 = bitcast i8* %y to <8 x i16>*
1450 store <8 x i16> %1, <8 x i16>* %2, align 2
1454 define i8* @strh16_m254(i8* %y, i8* %x) {
1455 ; CHECK-LABEL: strh16_m254:
1456 ; CHECK: @ %bb.0: @ %entry
1457 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1458 ; CHECK-NEXT: vstrh.16 q0, [r0], #-254
1461 %z = getelementptr inbounds i8, i8* %y, i32 -254
1462 %0 = bitcast i8* %x to <8 x i16>*
1463 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1464 %2 = bitcast i8* %y to <8 x i16>*
1465 store <8 x i16> %1, <8 x i16>* %2, align 2
1469 define i8* @strh16_m256(i8* %y, i8* %x) {
1470 ; CHECK-LABEL: strh16_m256:
1471 ; CHECK: @ %bb.0: @ %entry
1472 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1473 ; CHECK-NEXT: vstrh.16 q0, [r0]
1474 ; CHECK-NEXT: sub.w r0, r0, #256
1477 %z = getelementptr inbounds i8, i8* %y, i32 -256
1478 %0 = bitcast i8* %x to <8 x i16>*
1479 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1480 %2 = bitcast i8* %y to <8 x i16>*
1481 store <8 x i16> %1, <8 x i16>* %2, align 2
1486 define i8* @strb32_4(i8* %y, i8* %x) {
1487 ; CHECK-LABEL: strb32_4:
1488 ; CHECK: @ %bb.0: @ %entry
1489 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1490 ; CHECK-NEXT: vstrb.32 q0, [r0], #4
1493 %z = getelementptr inbounds i8, i8* %y, i32 4
1494 %0 = bitcast i8* %x to <4 x i8>*
1495 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1496 %2 = bitcast i8* %y to <4 x i8>*
1497 store <4 x i8> %1, <4 x i8>* %2, align 1
1501 define i8* @strb32_3(i8* %y, i8* %x) {
1502 ; CHECK-LABEL: strb32_3:
1503 ; CHECK: @ %bb.0: @ %entry
1504 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1505 ; CHECK-NEXT: vstrb.32 q0, [r0], #3
1508 %z = getelementptr inbounds i8, i8* %y, i32 3
1509 %0 = bitcast i8* %x to <4 x i8>*
1510 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1511 %2 = bitcast i8* %y to <4 x i8>*
1512 store <4 x i8> %1, <4 x i8>* %2, align 1
1516 define i8* @strb32_127(i8* %y, i8* %x) {
1517 ; CHECK-LABEL: strb32_127:
1518 ; CHECK: @ %bb.0: @ %entry
1519 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1520 ; CHECK-NEXT: vstrb.32 q0, [r0], #127
1523 %z = getelementptr inbounds i8, i8* %y, i32 127
1524 %0 = bitcast i8* %x to <4 x i8>*
1525 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1526 %2 = bitcast i8* %y to <4 x i8>*
1527 store <4 x i8> %1, <4 x i8>* %2, align 1
1531 define i8* @strb32_128(i8* %y, i8* %x) {
1532 ; CHECK-LABEL: strb32_128:
1533 ; CHECK: @ %bb.0: @ %entry
1534 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1535 ; CHECK-NEXT: vstrb.32 q0, [r0]
1536 ; CHECK-NEXT: adds r0, #128
1539 %z = getelementptr inbounds i8, i8* %y, i32 128
1540 %0 = bitcast i8* %x to <4 x i8>*
1541 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1542 %2 = bitcast i8* %y to <4 x i8>*
1543 store <4 x i8> %1, <4 x i8>* %2, align 1
1547 define i8* @strb32_m127(i8* %y, i8* %x) {
1548 ; CHECK-LABEL: strb32_m127:
1549 ; CHECK: @ %bb.0: @ %entry
1550 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1551 ; CHECK-NEXT: vstrb.32 q0, [r0], #-127
1554 %z = getelementptr inbounds i8, i8* %y, i32 -127
1555 %0 = bitcast i8* %x to <4 x i8>*
1556 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1557 %2 = bitcast i8* %y to <4 x i8>*
1558 store <4 x i8> %1, <4 x i8>* %2, align 1
1562 define i8* @strb32_m128(i8* %y, i8* %x) {
1563 ; CHECK-LABEL: strb32_m128:
1564 ; CHECK: @ %bb.0: @ %entry
1565 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1566 ; CHECK-NEXT: vstrb.32 q0, [r0]
1567 ; CHECK-NEXT: subs r0, #128
1570 %z = getelementptr inbounds i8, i8* %y, i32 -128
1571 %0 = bitcast i8* %x to <4 x i8>*
1572 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1573 %2 = bitcast i8* %y to <4 x i8>*
1574 store <4 x i8> %1, <4 x i8>* %2, align 1
1579 define i8* @strb16_4(i8* %y, i8* %x) {
1580 ; CHECK-LABEL: strb16_4:
1581 ; CHECK: @ %bb.0: @ %entry
1582 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1583 ; CHECK-NEXT: vstrb.16 q0, [r0], #4
1586 %z = getelementptr inbounds i8, i8* %y, i32 4
1587 %0 = bitcast i8* %x to <8 x i8>*
1588 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1589 %2 = bitcast i8* %y to <8 x i8>*
1590 store <8 x i8> %1, <8 x i8>* %2, align 1
1594 define i8* @strb16_3(i8* %y, i8* %x) {
1595 ; CHECK-LABEL: strb16_3:
1596 ; CHECK: @ %bb.0: @ %entry
1597 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1598 ; CHECK-NEXT: vstrb.16 q0, [r0], #3
1601 %z = getelementptr inbounds i8, i8* %y, i32 3
1602 %0 = bitcast i8* %x to <8 x i8>*
1603 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1604 %2 = bitcast i8* %y to <8 x i8>*
1605 store <8 x i8> %1, <8 x i8>* %2, align 1
1609 define i8* @strb16_127(i8* %y, i8* %x) {
1610 ; CHECK-LABEL: strb16_127:
1611 ; CHECK: @ %bb.0: @ %entry
1612 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1613 ; CHECK-NEXT: vstrb.16 q0, [r0], #127
1616 %z = getelementptr inbounds i8, i8* %y, i32 127
1617 %0 = bitcast i8* %x to <8 x i8>*
1618 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1619 %2 = bitcast i8* %y to <8 x i8>*
1620 store <8 x i8> %1, <8 x i8>* %2, align 1
1624 define i8* @strb16_128(i8* %y, i8* %x) {
1625 ; CHECK-LABEL: strb16_128:
1626 ; CHECK: @ %bb.0: @ %entry
1627 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1628 ; CHECK-NEXT: vstrb.16 q0, [r0]
1629 ; CHECK-NEXT: adds r0, #128
1632 %z = getelementptr inbounds i8, i8* %y, i32 128
1633 %0 = bitcast i8* %x to <8 x i8>*
1634 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1635 %2 = bitcast i8* %y to <8 x i8>*
1636 store <8 x i8> %1, <8 x i8>* %2, align 1
1640 define i8* @strb16_m127(i8* %y, i8* %x) {
1641 ; CHECK-LABEL: strb16_m127:
1642 ; CHECK: @ %bb.0: @ %entry
1643 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1644 ; CHECK-NEXT: vstrb.16 q0, [r0], #-127
1647 %z = getelementptr inbounds i8, i8* %y, i32 -127
1648 %0 = bitcast i8* %x to <8 x i8>*
1649 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1650 %2 = bitcast i8* %y to <8 x i8>*
1651 store <8 x i8> %1, <8 x i8>* %2, align 1
1655 define i8* @strb16_m128(i8* %y, i8* %x) {
1656 ; CHECK-LABEL: strb16_m128:
1657 ; CHECK: @ %bb.0: @ %entry
1658 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1659 ; CHECK-NEXT: vstrb.16 q0, [r0]
1660 ; CHECK-NEXT: subs r0, #128
1663 %z = getelementptr inbounds i8, i8* %y, i32 -128
1664 %0 = bitcast i8* %x to <8 x i8>*
1665 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1666 %2 = bitcast i8* %y to <8 x i8>*
1667 store <8 x i8> %1, <8 x i8>* %2, align 1
1672 define i8* @strb8_4(i8* %y, i8* %x) {
1673 ; CHECK-LABEL: strb8_4:
1674 ; CHECK: @ %bb.0: @ %entry
1675 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1676 ; CHECK-NEXT: vstrb.8 q0, [r0], #4
1679 %z = getelementptr inbounds i8, i8* %y, i32 4
1680 %0 = bitcast i8* %x to <16 x i8>*
1681 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1682 %2 = bitcast i8* %y to <16 x i8>*
1683 store <16 x i8> %1, <16 x i8>* %2, align 1
1687 define i8* @strb8_3(i8* %y, i8* %x) {
1688 ; CHECK-LABEL: strb8_3:
1689 ; CHECK: @ %bb.0: @ %entry
1690 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1691 ; CHECK-NEXT: vstrb.8 q0, [r0], #3
1694 %z = getelementptr inbounds i8, i8* %y, i32 3
1695 %0 = bitcast i8* %x to <16 x i8>*
1696 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1697 %2 = bitcast i8* %y to <16 x i8>*
1698 store <16 x i8> %1, <16 x i8>* %2, align 1
1702 define i8* @strb8_127(i8* %y, i8* %x) {
1703 ; CHECK-LABEL: strb8_127:
1704 ; CHECK: @ %bb.0: @ %entry
1705 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1706 ; CHECK-NEXT: vstrb.8 q0, [r0], #127
1709 %z = getelementptr inbounds i8, i8* %y, i32 127
1710 %0 = bitcast i8* %x to <16 x i8>*
1711 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1712 %2 = bitcast i8* %y to <16 x i8>*
1713 store <16 x i8> %1, <16 x i8>* %2, align 1
1717 define i8* @strb8_128(i8* %y, i8* %x) {
1718 ; CHECK-LABEL: strb8_128:
1719 ; CHECK: @ %bb.0: @ %entry
1720 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1721 ; CHECK-NEXT: vstrb.8 q0, [r0]
1722 ; CHECK-NEXT: adds r0, #128
1725 %z = getelementptr inbounds i8, i8* %y, i32 128
1726 %0 = bitcast i8* %x to <16 x i8>*
1727 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1728 %2 = bitcast i8* %y to <16 x i8>*
1729 store <16 x i8> %1, <16 x i8>* %2, align 1
1733 define i8* @strb8_m127(i8* %y, i8* %x) {
1734 ; CHECK-LABEL: strb8_m127:
1735 ; CHECK: @ %bb.0: @ %entry
1736 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1737 ; CHECK-NEXT: vstrb.8 q0, [r0], #-127
1740 %z = getelementptr inbounds i8, i8* %y, i32 -127
1741 %0 = bitcast i8* %x to <16 x i8>*
1742 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1743 %2 = bitcast i8* %y to <16 x i8>*
1744 store <16 x i8> %1, <16 x i8>* %2, align 1
1748 define i8* @strb8_m128(i8* %y, i8* %x) {
1749 ; CHECK-LABEL: strb8_m128:
1750 ; CHECK: @ %bb.0: @ %entry
1751 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1752 ; CHECK-NEXT: vstrb.8 q0, [r0]
1753 ; CHECK-NEXT: subs r0, #128
1756 %z = getelementptr inbounds i8, i8* %y, i32 -128
1757 %0 = bitcast i8* %x to <16 x i8>*
1758 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1759 %2 = bitcast i8* %y to <16 x i8>*
1760 store <16 x i8> %1, <16 x i8>* %2, align 1
1765 define i8* @strf32_4(i8* %y, i8* %x) {
1766 ; CHECK-LE-LABEL: strf32_4:
1767 ; CHECK-LE: @ %bb.0: @ %entry
1768 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1769 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1770 ; CHECK-LE-NEXT: bx lr
1772 ; CHECK-BE-LABEL: strf32_4:
1773 ; CHECK-BE: @ %bb.0: @ %entry
1774 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1775 ; CHECK-BE-NEXT: vstrw.32 q0, [r0], #4
1776 ; CHECK-BE-NEXT: bx lr
1778 %z = getelementptr inbounds i8, i8* %y, i32 4
1779 %0 = bitcast i8* %x to <4 x float>*
1780 %1 = load <4 x float>, <4 x float>* %0, align 4
1781 %2 = bitcast i8* %y to <4 x float>*
1782 store <4 x float> %1, <4 x float>* %2, align 4
1786 define i8* @strf16_4(i8* %y, i8* %x) {
1787 ; CHECK-LE-LABEL: strf16_4:
1788 ; CHECK-LE: @ %bb.0: @ %entry
1789 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1790 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #4
1791 ; CHECK-LE-NEXT: bx lr
1793 ; CHECK-BE-LABEL: strf16_4:
1794 ; CHECK-BE: @ %bb.0: @ %entry
1795 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1796 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #4
1797 ; CHECK-BE-NEXT: bx lr
1799 %z = getelementptr inbounds i8, i8* %y, i32 4
1800 %0 = bitcast i8* %x to <8 x half>*
1801 %1 = load <8 x half>, <8 x half>* %0, align 2
1802 %2 = bitcast i8* %y to <8 x half>*
1803 store <8 x half> %1, <8 x half>* %2, align 2
1807 define i8* @strwi32_align1(i8* %y, i8* %x) {
1808 ; CHECK-LE-LABEL: strwi32_align1:
1809 ; CHECK-LE: @ %bb.0: @ %entry
1810 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1811 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1812 ; CHECK-LE-NEXT: bx lr
1814 ; CHECK-BE-LABEL: strwi32_align1:
1815 ; CHECK-BE: @ %bb.0: @ %entry
1816 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1817 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1818 ; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3
1819 ; CHECK-BE-NEXT: bx lr
1821 %z = getelementptr inbounds i8, i8* %y, i32 3
1822 %0 = bitcast i8* %x to <4 x i32>*
1823 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1824 %2 = bitcast i8* %y to <4 x i32>*
1825 store <4 x i32> %1, <4 x i32>* %2, align 1
1829 define i8* @strhi16_align1(i8* %y, i8* %x) {
1830 ; CHECK-LE-LABEL: strhi16_align1:
1831 ; CHECK-LE: @ %bb.0: @ %entry
1832 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1833 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1834 ; CHECK-LE-NEXT: bx lr
1836 ; CHECK-BE-LABEL: strhi16_align1:
1837 ; CHECK-BE: @ %bb.0: @ %entry
1838 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1839 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1840 ; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3
1841 ; CHECK-BE-NEXT: bx lr
1843 %z = getelementptr inbounds i8, i8* %y, i32 3
1844 %0 = bitcast i8* %x to <8 x i16>*
1845 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1846 %2 = bitcast i8* %y to <8 x i16>*
1847 store <8 x i16> %1, <8 x i16>* %2, align 1
1851 define i8* @strhi32_align1(i8* %y, i8* %x) {
1852 ; CHECK-LABEL: strhi32_align1:
1853 ; CHECK: @ %bb.0: @ %entry
1854 ; CHECK-NEXT: .pad #8
1855 ; CHECK-NEXT: sub sp, #8
1856 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1857 ; CHECK-NEXT: mov r1, sp
1858 ; CHECK-NEXT: vstrh.32 q0, [r1]
1859 ; CHECK-NEXT: ldrd r1, r2, [sp]
1860 ; CHECK-NEXT: str r1, [r0]
1861 ; CHECK-NEXT: str r2, [r0, #4]
1862 ; CHECK-NEXT: adds r0, #3
1863 ; CHECK-NEXT: add sp, #8
1866 %z = getelementptr inbounds i8, i8* %y, i32 3
1867 %0 = bitcast i8* %x to <4 x i32>*
1868 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1869 %2 = bitcast i8* %y to <4 x i16>*
1870 %3 = trunc <4 x i32> %1 to <4 x i16>
1871 store <4 x i16> %3, <4 x i16>* %2, align 1
1875 define i8* @strf32_align1(i8* %y, i8* %x) {
1876 ; CHECK-LE-LABEL: strf32_align1:
1877 ; CHECK-LE: @ %bb.0: @ %entry
1878 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1879 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1880 ; CHECK-LE-NEXT: bx lr
1882 ; CHECK-BE-LABEL: strf32_align1:
1883 ; CHECK-BE: @ %bb.0: @ %entry
1884 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1885 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1886 ; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3
1887 ; CHECK-BE-NEXT: bx lr
1889 %z = getelementptr inbounds i8, i8* %y, i32 3
1890 %0 = bitcast i8* %x to <4 x float>*
1891 %1 = load <4 x float>, <4 x float>* %0, align 4
1892 %2 = bitcast i8* %y to <4 x float>*
1893 store <4 x float> %1, <4 x float>* %2, align 1
1897 define i8* @strf16_align1(i8* %y, i8* %x) {
1898 ; CHECK-LE-LABEL: strf16_align1:
1899 ; CHECK-LE: @ %bb.0: @ %entry
1900 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1901 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #3
1902 ; CHECK-LE-NEXT: bx lr
1904 ; CHECK-BE-LABEL: strf16_align1:
1905 ; CHECK-BE: @ %bb.0: @ %entry
1906 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1907 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1908 ; CHECK-BE-NEXT: vstrb.8 q0, [r0], #3
1909 ; CHECK-BE-NEXT: bx lr
1911 %z = getelementptr inbounds i8, i8* %y, i32 3
1912 %0 = bitcast i8* %x to <8 x half>*
1913 %1 = load <8 x half>, <8 x half>* %0, align 2
1914 %2 = bitcast i8* %y to <8 x half>*
1915 store <8 x half> %1, <8 x half>* %2, align 1
1919 define i8* @strf16_align8(i8* %y, i8* %x) {
1920 ; CHECK-LE-LABEL: strf16_align8:
1921 ; CHECK-LE: @ %bb.0: @ %entry
1922 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1923 ; CHECK-LE-NEXT: vstrb.8 q0, [r0], #16
1924 ; CHECK-LE-NEXT: bx lr
1926 ; CHECK-BE-LABEL: strf16_align8:
1927 ; CHECK-BE: @ %bb.0: @ %entry
1928 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1929 ; CHECK-BE-NEXT: vstrh.16 q0, [r0], #16
1930 ; CHECK-BE-NEXT: bx lr
1932 %z = getelementptr inbounds i8, i8* %y, i32 16
1933 %0 = bitcast i8* %x to <8 x i16>*
1934 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1935 %2 = bitcast i8* %y to <8 x i16>*
1936 store <8 x i16> %1, <8 x i16>* %2, align 8