1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define i8* @ldrwu32_4(i8* %x, i8* %y) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %z to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LABEL: ldrwu32_3:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: adds r2, r0, #3
24 ; CHECK-NEXT: vldrw.u32 q0, [r2]
25 ; CHECK-NEXT: vstrw.32 q0, [r1]
28 %z = getelementptr inbounds i8, i8* %x, i32 3
29 %0 = bitcast i8* %z to <4 x i32>*
30 %1 = load <4 x i32>, <4 x i32>* %0, align 4
31 %2 = bitcast i8* %y to <4 x i32>*
32 store <4 x i32> %1, <4 x i32>* %2, align 4
36 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
37 ; CHECK-LABEL: ldrwu32_m4:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
40 ; CHECK-NEXT: vstrw.32 q0, [r1]
43 %z = getelementptr inbounds i8, i8* %x, i32 -4
44 %0 = bitcast i8* %z to <4 x i32>*
45 %1 = load <4 x i32>, <4 x i32>* %0, align 4
46 %2 = bitcast i8* %y to <4 x i32>*
47 store <4 x i32> %1, <4 x i32>* %2, align 4
51 define i8* @ldrwu32_508(i8* %x, i8* %y) {
52 ; CHECK-LABEL: ldrwu32_508:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: vldrw.u32 q0, [r0, #508]
55 ; CHECK-NEXT: vstrw.32 q0, [r1]
58 %z = getelementptr inbounds i8, i8* %x, i32 508
59 %0 = bitcast i8* %z to <4 x i32>*
60 %1 = load <4 x i32>, <4 x i32>* %0, align 4
61 %2 = bitcast i8* %y to <4 x i32>*
62 store <4 x i32> %1, <4 x i32>* %2, align 4
66 define i8* @ldrwu32_512(i8* %x, i8* %y) {
67 ; CHECK-LABEL: ldrwu32_512:
68 ; CHECK: @ %bb.0: @ %entry
69 ; CHECK-NEXT: add.w r2, r0, #512
70 ; CHECK-NEXT: vldrw.u32 q0, [r2]
71 ; CHECK-NEXT: vstrw.32 q0, [r1]
74 %z = getelementptr inbounds i8, i8* %x, i32 512
75 %0 = bitcast i8* %z to <4 x i32>*
76 %1 = load <4 x i32>, <4 x i32>* %0, align 4
77 %2 = bitcast i8* %y to <4 x i32>*
78 store <4 x i32> %1, <4 x i32>* %2, align 4
82 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
83 ; CHECK-LABEL: ldrwu32_m508:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]
86 ; CHECK-NEXT: vstrw.32 q0, [r1]
89 %z = getelementptr inbounds i8, i8* %x, i32 -508
90 %0 = bitcast i8* %z to <4 x i32>*
91 %1 = load <4 x i32>, <4 x i32>* %0, align 4
92 %2 = bitcast i8* %y to <4 x i32>*
93 store <4 x i32> %1, <4 x i32>* %2, align 4
97 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
98 ; CHECK-LABEL: ldrwu32_m512:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: sub.w r2, r0, #512
101 ; CHECK-NEXT: vldrw.u32 q0, [r2]
102 ; CHECK-NEXT: vstrw.32 q0, [r1]
105 %z = getelementptr inbounds i8, i8* %x, i32 -512
106 %0 = bitcast i8* %z to <4 x i32>*
107 %1 = load <4 x i32>, <4 x i32>* %0, align 4
108 %2 = bitcast i8* %y to <4 x i32>*
109 store <4 x i32> %1, <4 x i32>* %2, align 4
114 define i8* @ldrhu32_4(i8* %x, i8* %y) {
115 ; CHECK-LABEL: ldrhu32_4:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
118 ; CHECK-NEXT: vstrw.32 q0, [r1]
121 %z = getelementptr inbounds i8, i8* %x, i32 4
122 %0 = bitcast i8* %z to <4 x i16>*
123 %1 = load <4 x i16>, <4 x i16>* %0, align 2
124 %2 = zext <4 x i16> %1 to <4 x i32>
125 %3 = bitcast i8* %y to <4 x i32>*
126 store <4 x i32> %2, <4 x i32>* %3, align 4
130 define i8* @ldrhu32_3(i8* %x, i8* %y) {
131 ; CHECK-LABEL: ldrhu32_3:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: adds r2, r0, #3
134 ; CHECK-NEXT: vldrh.u32 q0, [r2]
135 ; CHECK-NEXT: vstrw.32 q0, [r1]
138 %z = getelementptr inbounds i8, i8* %x, i32 3
139 %0 = bitcast i8* %z to <4 x i16>*
140 %1 = load <4 x i16>, <4 x i16>* %0, align 2
141 %2 = zext <4 x i16> %1 to <4 x i32>
142 %3 = bitcast i8* %y to <4 x i32>*
143 store <4 x i32> %2, <4 x i32>* %3, align 4
147 define i8* @ldrhu32_2(i8* %x, i8* %y) {
148 ; CHECK-LABEL: ldrhu32_2:
149 ; CHECK: @ %bb.0: @ %entry
150 ; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
151 ; CHECK-NEXT: vstrw.32 q0, [r1]
154 %z = getelementptr inbounds i8, i8* %x, i32 2
155 %0 = bitcast i8* %z to <4 x i16>*
156 %1 = load <4 x i16>, <4 x i16>* %0, align 2
157 %2 = zext <4 x i16> %1 to <4 x i32>
158 %3 = bitcast i8* %y to <4 x i32>*
159 store <4 x i32> %2, <4 x i32>* %3, align 4
163 define i8* @ldrhu32_254(i8* %x, i8* %y) {
164 ; CHECK-LABEL: ldrhu32_254:
165 ; CHECK: @ %bb.0: @ %entry
166 ; CHECK-NEXT: vldrh.u32 q0, [r0, #254]
167 ; CHECK-NEXT: vstrw.32 q0, [r1]
170 %z = getelementptr inbounds i8, i8* %x, i32 254
171 %0 = bitcast i8* %z to <4 x i16>*
172 %1 = load <4 x i16>, <4 x i16>* %0, align 2
173 %2 = zext <4 x i16> %1 to <4 x i32>
174 %3 = bitcast i8* %y to <4 x i32>*
175 store <4 x i32> %2, <4 x i32>* %3, align 4
179 define i8* @ldrhu32_256(i8* %x, i8* %y) {
180 ; CHECK-LABEL: ldrhu32_256:
181 ; CHECK: @ %bb.0: @ %entry
182 ; CHECK-NEXT: add.w r2, r0, #256
183 ; CHECK-NEXT: vldrh.u32 q0, [r2]
184 ; CHECK-NEXT: vstrw.32 q0, [r1]
187 %z = getelementptr inbounds i8, i8* %x, i32 256
188 %0 = bitcast i8* %z to <4 x i16>*
189 %1 = load <4 x i16>, <4 x i16>* %0, align 2
190 %2 = zext <4 x i16> %1 to <4 x i32>
191 %3 = bitcast i8* %y to <4 x i32>*
192 store <4 x i32> %2, <4 x i32>* %3, align 4
196 define i8* @ldrhu32_m254(i8* %x, i8* %y) {
197 ; CHECK-LABEL: ldrhu32_m254:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vldrh.u32 q0, [r0, #-254]
200 ; CHECK-NEXT: vstrw.32 q0, [r1]
203 %z = getelementptr inbounds i8, i8* %x, i32 -254
204 %0 = bitcast i8* %z to <4 x i16>*
205 %1 = load <4 x i16>, <4 x i16>* %0, align 2
206 %2 = zext <4 x i16> %1 to <4 x i32>
207 %3 = bitcast i8* %y to <4 x i32>*
208 store <4 x i32> %2, <4 x i32>* %3, align 4
212 define i8* @ldrhu32_m256(i8* %x, i8* %y) {
213 ; CHECK-LABEL: ldrhu32_m256:
214 ; CHECK: @ %bb.0: @ %entry
215 ; CHECK-NEXT: sub.w r2, r0, #256
216 ; CHECK-NEXT: vldrh.u32 q0, [r2]
217 ; CHECK-NEXT: vstrw.32 q0, [r1]
220 %z = getelementptr inbounds i8, i8* %x, i32 -256
221 %0 = bitcast i8* %z to <4 x i16>*
222 %1 = load <4 x i16>, <4 x i16>* %0, align 2
223 %2 = zext <4 x i16> %1 to <4 x i32>
224 %3 = bitcast i8* %y to <4 x i32>*
225 store <4 x i32> %2, <4 x i32>* %3, align 4
230 define i8* @ldrhs32_4(i8* %x, i8* %y) {
231 ; CHECK-LABEL: ldrhs32_4:
232 ; CHECK: @ %bb.0: @ %entry
233 ; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
234 ; CHECK-NEXT: vstrw.32 q0, [r1]
237 %z = getelementptr inbounds i8, i8* %x, i32 4
238 %0 = bitcast i8* %z to <4 x i16>*
239 %1 = load <4 x i16>, <4 x i16>* %0, align 2
240 %2 = sext <4 x i16> %1 to <4 x i32>
241 %3 = bitcast i8* %y to <4 x i32>*
242 store <4 x i32> %2, <4 x i32>* %3, align 4
246 define i8* @ldrhs32_3(i8* %x, i8* %y) {
247 ; CHECK-LABEL: ldrhs32_3:
248 ; CHECK: @ %bb.0: @ %entry
249 ; CHECK-NEXT: adds r2, r0, #3
250 ; CHECK-NEXT: vldrh.s32 q0, [r2]
251 ; CHECK-NEXT: vstrw.32 q0, [r1]
254 %z = getelementptr inbounds i8, i8* %x, i32 3
255 %0 = bitcast i8* %z to <4 x i16>*
256 %1 = load <4 x i16>, <4 x i16>* %0, align 2
257 %2 = sext <4 x i16> %1 to <4 x i32>
258 %3 = bitcast i8* %y to <4 x i32>*
259 store <4 x i32> %2, <4 x i32>* %3, align 4
263 define i8* @ldrhs32_2(i8* %x, i8* %y) {
264 ; CHECK-LABEL: ldrhs32_2:
265 ; CHECK: @ %bb.0: @ %entry
266 ; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
267 ; CHECK-NEXT: vstrw.32 q0, [r1]
270 %z = getelementptr inbounds i8, i8* %x, i32 2
271 %0 = bitcast i8* %z to <4 x i16>*
272 %1 = load <4 x i16>, <4 x i16>* %0, align 2
273 %2 = sext <4 x i16> %1 to <4 x i32>
274 %3 = bitcast i8* %y to <4 x i32>*
275 store <4 x i32> %2, <4 x i32>* %3, align 4
279 define i8* @ldrhs32_254(i8* %x, i8* %y) {
280 ; CHECK-LABEL: ldrhs32_254:
281 ; CHECK: @ %bb.0: @ %entry
282 ; CHECK-NEXT: vldrh.s32 q0, [r0, #254]
283 ; CHECK-NEXT: vstrw.32 q0, [r1]
286 %z = getelementptr inbounds i8, i8* %x, i32 254
287 %0 = bitcast i8* %z to <4 x i16>*
288 %1 = load <4 x i16>, <4 x i16>* %0, align 2
289 %2 = sext <4 x i16> %1 to <4 x i32>
290 %3 = bitcast i8* %y to <4 x i32>*
291 store <4 x i32> %2, <4 x i32>* %3, align 4
295 define i8* @ldrhs32_256(i8* %x, i8* %y) {
296 ; CHECK-LABEL: ldrhs32_256:
297 ; CHECK: @ %bb.0: @ %entry
298 ; CHECK-NEXT: add.w r2, r0, #256
299 ; CHECK-NEXT: vldrh.s32 q0, [r2]
300 ; CHECK-NEXT: vstrw.32 q0, [r1]
303 %z = getelementptr inbounds i8, i8* %x, i32 256
304 %0 = bitcast i8* %z to <4 x i16>*
305 %1 = load <4 x i16>, <4 x i16>* %0, align 2
306 %2 = sext <4 x i16> %1 to <4 x i32>
307 %3 = bitcast i8* %y to <4 x i32>*
308 store <4 x i32> %2, <4 x i32>* %3, align 4
312 define i8* @ldrhs32_m254(i8* %x, i8* %y) {
313 ; CHECK-LABEL: ldrhs32_m254:
314 ; CHECK: @ %bb.0: @ %entry
315 ; CHECK-NEXT: vldrh.s32 q0, [r0, #-254]
316 ; CHECK-NEXT: vstrw.32 q0, [r1]
319 %z = getelementptr inbounds i8, i8* %x, i32 -254
320 %0 = bitcast i8* %z to <4 x i16>*
321 %1 = load <4 x i16>, <4 x i16>* %0, align 2
322 %2 = sext <4 x i16> %1 to <4 x i32>
323 %3 = bitcast i8* %y to <4 x i32>*
324 store <4 x i32> %2, <4 x i32>* %3, align 4
328 define i8* @ldrhs32_m256(i8* %x, i8* %y) {
329 ; CHECK-LABEL: ldrhs32_m256:
330 ; CHECK: @ %bb.0: @ %entry
331 ; CHECK-NEXT: sub.w r2, r0, #256
332 ; CHECK-NEXT: vldrh.s32 q0, [r2]
333 ; CHECK-NEXT: vstrw.32 q0, [r1]
336 %z = getelementptr inbounds i8, i8* %x, i32 -256
337 %0 = bitcast i8* %z to <4 x i16>*
338 %1 = load <4 x i16>, <4 x i16>* %0, align 2
339 %2 = sext <4 x i16> %1 to <4 x i32>
340 %3 = bitcast i8* %y to <4 x i32>*
341 store <4 x i32> %2, <4 x i32>* %3, align 4
346 define i8* @ldrhu16_4(i8* %x, i8* %y) {
347 ; CHECK-LABEL: ldrhu16_4:
348 ; CHECK: @ %bb.0: @ %entry
349 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
350 ; CHECK-NEXT: vstrh.16 q0, [r1]
353 %z = getelementptr inbounds i8, i8* %x, i32 4
354 %0 = bitcast i8* %z to <8 x i16>*
355 %1 = load <8 x i16>, <8 x i16>* %0, align 2
356 %2 = bitcast i8* %y to <8 x i16>*
357 store <8 x i16> %1, <8 x i16>* %2, align 2
361 define i8* @ldrhu16_3(i8* %x, i8* %y) {
362 ; CHECK-LABEL: ldrhu16_3:
363 ; CHECK: @ %bb.0: @ %entry
364 ; CHECK-NEXT: adds r2, r0, #3
365 ; CHECK-NEXT: vldrh.u16 q0, [r2]
366 ; CHECK-NEXT: vstrh.16 q0, [r1]
369 %z = getelementptr inbounds i8, i8* %x, i32 3
370 %0 = bitcast i8* %z to <8 x i16>*
371 %1 = load <8 x i16>, <8 x i16>* %0, align 2
372 %2 = bitcast i8* %y to <8 x i16>*
373 store <8 x i16> %1, <8 x i16>* %2, align 2
377 define i8* @ldrhu16_2(i8* %x, i8* %y) {
378 ; CHECK-LABEL: ldrhu16_2:
379 ; CHECK: @ %bb.0: @ %entry
380 ; CHECK-NEXT: vldrh.u16 q0, [r0, #2]
381 ; CHECK-NEXT: vstrh.16 q0, [r1]
384 %z = getelementptr inbounds i8, i8* %x, i32 2
385 %0 = bitcast i8* %z to <8 x i16>*
386 %1 = load <8 x i16>, <8 x i16>* %0, align 2
387 %2 = bitcast i8* %y to <8 x i16>*
388 store <8 x i16> %1, <8 x i16>* %2, align 2
392 define i8* @ldrhu16_254(i8* %x, i8* %y) {
393 ; CHECK-LABEL: ldrhu16_254:
394 ; CHECK: @ %bb.0: @ %entry
395 ; CHECK-NEXT: vldrh.u16 q0, [r0, #254]
396 ; CHECK-NEXT: vstrh.16 q0, [r1]
399 %z = getelementptr inbounds i8, i8* %x, i32 254
400 %0 = bitcast i8* %z to <8 x i16>*
401 %1 = load <8 x i16>, <8 x i16>* %0, align 2
402 %2 = bitcast i8* %y to <8 x i16>*
403 store <8 x i16> %1, <8 x i16>* %2, align 2
407 define i8* @ldrhu16_256(i8* %x, i8* %y) {
408 ; CHECK-LABEL: ldrhu16_256:
409 ; CHECK: @ %bb.0: @ %entry
410 ; CHECK-NEXT: add.w r2, r0, #256
411 ; CHECK-NEXT: vldrh.u16 q0, [r2]
412 ; CHECK-NEXT: vstrh.16 q0, [r1]
415 %z = getelementptr inbounds i8, i8* %x, i32 256
416 %0 = bitcast i8* %z to <8 x i16>*
417 %1 = load <8 x i16>, <8 x i16>* %0, align 2
418 %2 = bitcast i8* %y to <8 x i16>*
419 store <8 x i16> %1, <8 x i16>* %2, align 2
423 define i8* @ldrhu16_m254(i8* %x, i8* %y) {
424 ; CHECK-LABEL: ldrhu16_m254:
425 ; CHECK: @ %bb.0: @ %entry
426 ; CHECK-NEXT: vldrh.u16 q0, [r0, #-254]
427 ; CHECK-NEXT: vstrh.16 q0, [r1]
430 %z = getelementptr inbounds i8, i8* %x, i32 -254
431 %0 = bitcast i8* %z to <8 x i16>*
432 %1 = load <8 x i16>, <8 x i16>* %0, align 2
433 %2 = bitcast i8* %y to <8 x i16>*
434 store <8 x i16> %1, <8 x i16>* %2, align 2
438 define i8* @ldrhu16_m256(i8* %x, i8* %y) {
439 ; CHECK-LABEL: ldrhu16_m256:
440 ; CHECK: @ %bb.0: @ %entry
441 ; CHECK-NEXT: sub.w r2, r0, #256
442 ; CHECK-NEXT: vldrh.u16 q0, [r2]
443 ; CHECK-NEXT: vstrh.16 q0, [r1]
446 %z = getelementptr inbounds i8, i8* %x, i32 -256
447 %0 = bitcast i8* %z to <8 x i16>*
448 %1 = load <8 x i16>, <8 x i16>* %0, align 2
449 %2 = bitcast i8* %y to <8 x i16>*
450 store <8 x i16> %1, <8 x i16>* %2, align 2
455 define i8* @ldrbu32_4(i8* %x, i8* %y) {
456 ; CHECK-LABEL: ldrbu32_4:
457 ; CHECK: @ %bb.0: @ %entry
458 ; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
459 ; CHECK-NEXT: vstrw.32 q0, [r1]
462 %z = getelementptr inbounds i8, i8* %x, i32 4
463 %0 = bitcast i8* %z to <4 x i8>*
464 %1 = load <4 x i8>, <4 x i8>* %0, align 1
465 %2 = zext <4 x i8> %1 to <4 x i32>
466 %3 = bitcast i8* %y to <4 x i32>*
467 store <4 x i32> %2, <4 x i32>* %3, align 4
471 define i8* @ldrbu32_3(i8* %x, i8* %y) {
472 ; CHECK-LABEL: ldrbu32_3:
473 ; CHECK: @ %bb.0: @ %entry
474 ; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
475 ; CHECK-NEXT: vstrw.32 q0, [r1]
478 %z = getelementptr inbounds i8, i8* %x, i32 3
479 %0 = bitcast i8* %z to <4 x i8>*
480 %1 = load <4 x i8>, <4 x i8>* %0, align 1
481 %2 = zext <4 x i8> %1 to <4 x i32>
482 %3 = bitcast i8* %y to <4 x i32>*
483 store <4 x i32> %2, <4 x i32>* %3, align 4
487 define i8* @ldrbu32_127(i8* %x, i8* %y) {
488 ; CHECK-LABEL: ldrbu32_127:
489 ; CHECK: @ %bb.0: @ %entry
490 ; CHECK-NEXT: vldrb.u32 q0, [r0, #127]
491 ; CHECK-NEXT: vstrw.32 q0, [r1]
494 %z = getelementptr inbounds i8, i8* %x, i32 127
495 %0 = bitcast i8* %z to <4 x i8>*
496 %1 = load <4 x i8>, <4 x i8>* %0, align 1
497 %2 = zext <4 x i8> %1 to <4 x i32>
498 %3 = bitcast i8* %y to <4 x i32>*
499 store <4 x i32> %2, <4 x i32>* %3, align 4
503 define i8* @ldrbu32_128(i8* %x, i8* %y) {
504 ; CHECK-LABEL: ldrbu32_128:
505 ; CHECK: @ %bb.0: @ %entry
506 ; CHECK-NEXT: add.w r2, r0, #128
507 ; CHECK-NEXT: vldrb.u32 q0, [r2]
508 ; CHECK-NEXT: vstrw.32 q0, [r1]
511 %z = getelementptr inbounds i8, i8* %x, i32 128
512 %0 = bitcast i8* %z to <4 x i8>*
513 %1 = load <4 x i8>, <4 x i8>* %0, align 1
514 %2 = zext <4 x i8> %1 to <4 x i32>
515 %3 = bitcast i8* %y to <4 x i32>*
516 store <4 x i32> %2, <4 x i32>* %3, align 4
520 define i8* @ldrbu32_m127(i8* %x, i8* %y) {
521 ; CHECK-LABEL: ldrbu32_m127:
522 ; CHECK: @ %bb.0: @ %entry
523 ; CHECK-NEXT: vldrb.u32 q0, [r0, #-127]
524 ; CHECK-NEXT: vstrw.32 q0, [r1]
527 %z = getelementptr inbounds i8, i8* %x, i32 -127
528 %0 = bitcast i8* %z to <4 x i8>*
529 %1 = load <4 x i8>, <4 x i8>* %0, align 1
530 %2 = zext <4 x i8> %1 to <4 x i32>
531 %3 = bitcast i8* %y to <4 x i32>*
532 store <4 x i32> %2, <4 x i32>* %3, align 4
536 define i8* @ldrbu32_m128(i8* %x, i8* %y) {
537 ; CHECK-LABEL: ldrbu32_m128:
538 ; CHECK: @ %bb.0: @ %entry
539 ; CHECK-NEXT: sub.w r2, r0, #128
540 ; CHECK-NEXT: vldrb.u32 q0, [r2]
541 ; CHECK-NEXT: vstrw.32 q0, [r1]
544 %z = getelementptr inbounds i8, i8* %x, i32 -128
545 %0 = bitcast i8* %z to <4 x i8>*
546 %1 = load <4 x i8>, <4 x i8>* %0, align 1
547 %2 = zext <4 x i8> %1 to <4 x i32>
548 %3 = bitcast i8* %y to <4 x i32>*
549 store <4 x i32> %2, <4 x i32>* %3, align 4
554 define i8* @ldrbs32_4(i8* %x, i8* %y) {
555 ; CHECK-LABEL: ldrbs32_4:
556 ; CHECK: @ %bb.0: @ %entry
557 ; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
558 ; CHECK-NEXT: vstrw.32 q0, [r1]
561 %z = getelementptr inbounds i8, i8* %x, i32 4
562 %0 = bitcast i8* %z to <4 x i8>*
563 %1 = load <4 x i8>, <4 x i8>* %0, align 1
564 %2 = sext <4 x i8> %1 to <4 x i32>
565 %3 = bitcast i8* %y to <4 x i32>*
566 store <4 x i32> %2, <4 x i32>* %3, align 4
570 define i8* @ldrbs32_3(i8* %x, i8* %y) {
571 ; CHECK-LABEL: ldrbs32_3:
572 ; CHECK: @ %bb.0: @ %entry
573 ; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
574 ; CHECK-NEXT: vstrw.32 q0, [r1]
577 %z = getelementptr inbounds i8, i8* %x, i32 3
578 %0 = bitcast i8* %z to <4 x i8>*
579 %1 = load <4 x i8>, <4 x i8>* %0, align 1
580 %2 = sext <4 x i8> %1 to <4 x i32>
581 %3 = bitcast i8* %y to <4 x i32>*
582 store <4 x i32> %2, <4 x i32>* %3, align 4
586 define i8* @ldrbs32_127(i8* %x, i8* %y) {
587 ; CHECK-LABEL: ldrbs32_127:
588 ; CHECK: @ %bb.0: @ %entry
589 ; CHECK-NEXT: vldrb.s32 q0, [r0, #127]
590 ; CHECK-NEXT: vstrw.32 q0, [r1]
593 %z = getelementptr inbounds i8, i8* %x, i32 127
594 %0 = bitcast i8* %z to <4 x i8>*
595 %1 = load <4 x i8>, <4 x i8>* %0, align 1
596 %2 = sext <4 x i8> %1 to <4 x i32>
597 %3 = bitcast i8* %y to <4 x i32>*
598 store <4 x i32> %2, <4 x i32>* %3, align 4
602 define i8* @ldrbs32_128(i8* %x, i8* %y) {
603 ; CHECK-LABEL: ldrbs32_128:
604 ; CHECK: @ %bb.0: @ %entry
605 ; CHECK-NEXT: add.w r2, r0, #128
606 ; CHECK-NEXT: vldrb.s32 q0, [r2]
607 ; CHECK-NEXT: vstrw.32 q0, [r1]
610 %z = getelementptr inbounds i8, i8* %x, i32 128
611 %0 = bitcast i8* %z to <4 x i8>*
612 %1 = load <4 x i8>, <4 x i8>* %0, align 1
613 %2 = sext <4 x i8> %1 to <4 x i32>
614 %3 = bitcast i8* %y to <4 x i32>*
615 store <4 x i32> %2, <4 x i32>* %3, align 4
619 define i8* @ldrbs32_m127(i8* %x, i8* %y) {
620 ; CHECK-LABEL: ldrbs32_m127:
621 ; CHECK: @ %bb.0: @ %entry
622 ; CHECK-NEXT: vldrb.s32 q0, [r0, #-127]
623 ; CHECK-NEXT: vstrw.32 q0, [r1]
626 %z = getelementptr inbounds i8, i8* %x, i32 -127
627 %0 = bitcast i8* %z to <4 x i8>*
628 %1 = load <4 x i8>, <4 x i8>* %0, align 1
629 %2 = sext <4 x i8> %1 to <4 x i32>
630 %3 = bitcast i8* %y to <4 x i32>*
631 store <4 x i32> %2, <4 x i32>* %3, align 4
635 define i8* @ldrbs32_m128(i8* %x, i8* %y) {
636 ; CHECK-LABEL: ldrbs32_m128:
637 ; CHECK: @ %bb.0: @ %entry
638 ; CHECK-NEXT: sub.w r2, r0, #128
639 ; CHECK-NEXT: vldrb.s32 q0, [r2]
640 ; CHECK-NEXT: vstrw.32 q0, [r1]
643 %z = getelementptr inbounds i8, i8* %x, i32 -128
644 %0 = bitcast i8* %z to <4 x i8>*
645 %1 = load <4 x i8>, <4 x i8>* %0, align 1
646 %2 = sext <4 x i8> %1 to <4 x i32>
647 %3 = bitcast i8* %y to <4 x i32>*
648 store <4 x i32> %2, <4 x i32>* %3, align 4
653 define i8* @ldrbu16_4(i8* %x, i8* %y) {
654 ; CHECK-LABEL: ldrbu16_4:
655 ; CHECK: @ %bb.0: @ %entry
656 ; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
657 ; CHECK-NEXT: vstrh.16 q0, [r1]
660 %z = getelementptr inbounds i8, i8* %x, i32 4
661 %0 = bitcast i8* %z to <8 x i8>*
662 %1 = load <8 x i8>, <8 x i8>* %0, align 1
663 %2 = zext <8 x i8> %1 to <8 x i16>
664 %3 = bitcast i8* %y to <8 x i16>*
665 store <8 x i16> %2, <8 x i16>* %3, align 2
669 define i8* @ldrbu16_3(i8* %x, i8* %y) {
670 ; CHECK-LABEL: ldrbu16_3:
671 ; CHECK: @ %bb.0: @ %entry
672 ; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
673 ; CHECK-NEXT: vstrh.16 q0, [r1]
676 %z = getelementptr inbounds i8, i8* %x, i32 3
677 %0 = bitcast i8* %z to <8 x i8>*
678 %1 = load <8 x i8>, <8 x i8>* %0, align 1
679 %2 = zext <8 x i8> %1 to <8 x i16>
680 %3 = bitcast i8* %y to <8 x i16>*
681 store <8 x i16> %2, <8 x i16>* %3, align 2
685 define i8* @ldrbu16_127(i8* %x, i8* %y) {
686 ; CHECK-LABEL: ldrbu16_127:
687 ; CHECK: @ %bb.0: @ %entry
688 ; CHECK-NEXT: vldrb.u16 q0, [r0, #127]
689 ; CHECK-NEXT: vstrh.16 q0, [r1]
692 %z = getelementptr inbounds i8, i8* %x, i32 127
693 %0 = bitcast i8* %z to <8 x i8>*
694 %1 = load <8 x i8>, <8 x i8>* %0, align 1
695 %2 = zext <8 x i8> %1 to <8 x i16>
696 %3 = bitcast i8* %y to <8 x i16>*
697 store <8 x i16> %2, <8 x i16>* %3, align 2
701 define i8* @ldrbu16_128(i8* %x, i8* %y) {
702 ; CHECK-LABEL: ldrbu16_128:
703 ; CHECK: @ %bb.0: @ %entry
704 ; CHECK-NEXT: add.w r2, r0, #128
705 ; CHECK-NEXT: vldrb.u16 q0, [r2]
706 ; CHECK-NEXT: vstrh.16 q0, [r1]
709 %z = getelementptr inbounds i8, i8* %x, i32 128
710 %0 = bitcast i8* %z to <8 x i8>*
711 %1 = load <8 x i8>, <8 x i8>* %0, align 1
712 %2 = zext <8 x i8> %1 to <8 x i16>
713 %3 = bitcast i8* %y to <8 x i16>*
714 store <8 x i16> %2, <8 x i16>* %3, align 2
718 define i8* @ldrbu16_m127(i8* %x, i8* %y) {
719 ; CHECK-LABEL: ldrbu16_m127:
720 ; CHECK: @ %bb.0: @ %entry
721 ; CHECK-NEXT: vldrb.u16 q0, [r0, #-127]
722 ; CHECK-NEXT: vstrh.16 q0, [r1]
725 %z = getelementptr inbounds i8, i8* %x, i32 -127
726 %0 = bitcast i8* %z to <8 x i8>*
727 %1 = load <8 x i8>, <8 x i8>* %0, align 1
728 %2 = zext <8 x i8> %1 to <8 x i16>
729 %3 = bitcast i8* %y to <8 x i16>*
730 store <8 x i16> %2, <8 x i16>* %3, align 2
734 define i8* @ldrbu16_m128(i8* %x, i8* %y) {
735 ; CHECK-LABEL: ldrbu16_m128:
736 ; CHECK: @ %bb.0: @ %entry
737 ; CHECK-NEXT: sub.w r2, r0, #128
738 ; CHECK-NEXT: vldrb.u16 q0, [r2]
739 ; CHECK-NEXT: vstrh.16 q0, [r1]
742 %z = getelementptr inbounds i8, i8* %x, i32 -128
743 %0 = bitcast i8* %z to <8 x i8>*
744 %1 = load <8 x i8>, <8 x i8>* %0, align 1
745 %2 = zext <8 x i8> %1 to <8 x i16>
746 %3 = bitcast i8* %y to <8 x i16>*
747 store <8 x i16> %2, <8 x i16>* %3, align 2
752 define i8* @ldrbs16_4(i8* %x, i8* %y) {
753 ; CHECK-LABEL: ldrbs16_4:
754 ; CHECK: @ %bb.0: @ %entry
755 ; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
756 ; CHECK-NEXT: vstrh.16 q0, [r1]
759 %z = getelementptr inbounds i8, i8* %x, i32 4
760 %0 = bitcast i8* %z to <8 x i8>*
761 %1 = load <8 x i8>, <8 x i8>* %0, align 1
762 %2 = sext <8 x i8> %1 to <8 x i16>
763 %3 = bitcast i8* %y to <8 x i16>*
764 store <8 x i16> %2, <8 x i16>* %3, align 2
768 define i8* @ldrbs16_3(i8* %x, i8* %y) {
769 ; CHECK-LABEL: ldrbs16_3:
770 ; CHECK: @ %bb.0: @ %entry
771 ; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
772 ; CHECK-NEXT: vstrh.16 q0, [r1]
775 %z = getelementptr inbounds i8, i8* %x, i32 3
776 %0 = bitcast i8* %z to <8 x i8>*
777 %1 = load <8 x i8>, <8 x i8>* %0, align 1
778 %2 = sext <8 x i8> %1 to <8 x i16>
779 %3 = bitcast i8* %y to <8 x i16>*
780 store <8 x i16> %2, <8 x i16>* %3, align 2
784 define i8* @ldrbs16_127(i8* %x, i8* %y) {
785 ; CHECK-LABEL: ldrbs16_127:
786 ; CHECK: @ %bb.0: @ %entry
787 ; CHECK-NEXT: vldrb.s16 q0, [r0, #127]
788 ; CHECK-NEXT: vstrh.16 q0, [r1]
791 %z = getelementptr inbounds i8, i8* %x, i32 127
792 %0 = bitcast i8* %z to <8 x i8>*
793 %1 = load <8 x i8>, <8 x i8>* %0, align 1
794 %2 = sext <8 x i8> %1 to <8 x i16>
795 %3 = bitcast i8* %y to <8 x i16>*
796 store <8 x i16> %2, <8 x i16>* %3, align 2
800 define i8* @ldrbs16_128(i8* %x, i8* %y) {
801 ; CHECK-LABEL: ldrbs16_128:
802 ; CHECK: @ %bb.0: @ %entry
803 ; CHECK-NEXT: add.w r2, r0, #128
804 ; CHECK-NEXT: vldrb.s16 q0, [r2]
805 ; CHECK-NEXT: vstrh.16 q0, [r1]
808 %z = getelementptr inbounds i8, i8* %x, i32 128
809 %0 = bitcast i8* %z to <8 x i8>*
810 %1 = load <8 x i8>, <8 x i8>* %0, align 1
811 %2 = sext <8 x i8> %1 to <8 x i16>
812 %3 = bitcast i8* %y to <8 x i16>*
813 store <8 x i16> %2, <8 x i16>* %3, align 2
817 define i8* @ldrbs16_m127(i8* %x, i8* %y) {
818 ; CHECK-LABEL: ldrbs16_m127:
819 ; CHECK: @ %bb.0: @ %entry
820 ; CHECK-NEXT: vldrb.s16 q0, [r0, #-127]
821 ; CHECK-NEXT: vstrh.16 q0, [r1]
824 %z = getelementptr inbounds i8, i8* %x, i32 -127
825 %0 = bitcast i8* %z to <8 x i8>*
826 %1 = load <8 x i8>, <8 x i8>* %0, align 1
827 %2 = sext <8 x i8> %1 to <8 x i16>
828 %3 = bitcast i8* %y to <8 x i16>*
829 store <8 x i16> %2, <8 x i16>* %3, align 2
833 define i8* @ldrbs16_m128(i8* %x, i8* %y) {
834 ; CHECK-LABEL: ldrbs16_m128:
835 ; CHECK: @ %bb.0: @ %entry
836 ; CHECK-NEXT: sub.w r2, r0, #128
837 ; CHECK-NEXT: vldrb.s16 q0, [r2]
838 ; CHECK-NEXT: vstrh.16 q0, [r1]
841 %z = getelementptr inbounds i8, i8* %x, i32 -128
842 %0 = bitcast i8* %z to <8 x i8>*
843 %1 = load <8 x i8>, <8 x i8>* %0, align 1
844 %2 = sext <8 x i8> %1 to <8 x i16>
845 %3 = bitcast i8* %y to <8 x i16>*
846 store <8 x i16> %2, <8 x i16>* %3, align 2
851 define i8* @ldrbu8_4(i8* %x, i8* %y) {
852 ; CHECK-LABEL: ldrbu8_4:
853 ; CHECK: @ %bb.0: @ %entry
854 ; CHECK-NEXT: vldrb.u8 q0, [r0, #4]
855 ; CHECK-NEXT: vstrb.8 q0, [r1]
858 %z = getelementptr inbounds i8, i8* %x, i32 4
859 %0 = bitcast i8* %z to <16 x i8>*
860 %1 = load <16 x i8>, <16 x i8>* %0, align 1
861 %2 = bitcast i8* %y to <16 x i8>*
862 store <16 x i8> %1, <16 x i8>* %2, align 1
866 define i8* @ldrbu8_3(i8* %x, i8* %y) {
867 ; CHECK-LABEL: ldrbu8_3:
868 ; CHECK: @ %bb.0: @ %entry
869 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
870 ; CHECK-NEXT: vstrb.8 q0, [r1]
873 %z = getelementptr inbounds i8, i8* %x, i32 3
874 %0 = bitcast i8* %z to <16 x i8>*
875 %1 = load <16 x i8>, <16 x i8>* %0, align 1
876 %2 = bitcast i8* %y to <16 x i8>*
877 store <16 x i8> %1, <16 x i8>* %2, align 1
881 define i8* @ldrbu8_127(i8* %x, i8* %y) {
882 ; CHECK-LABEL: ldrbu8_127:
883 ; CHECK: @ %bb.0: @ %entry
884 ; CHECK-NEXT: vldrb.u8 q0, [r0, #127]
885 ; CHECK-NEXT: vstrb.8 q0, [r1]
888 %z = getelementptr inbounds i8, i8* %x, i32 127
889 %0 = bitcast i8* %z to <16 x i8>*
890 %1 = load <16 x i8>, <16 x i8>* %0, align 1
891 %2 = bitcast i8* %y to <16 x i8>*
892 store <16 x i8> %1, <16 x i8>* %2, align 1
896 define i8* @ldrbu8_128(i8* %x, i8* %y) {
897 ; CHECK-LABEL: ldrbu8_128:
898 ; CHECK: @ %bb.0: @ %entry
899 ; CHECK-NEXT: add.w r2, r0, #128
900 ; CHECK-NEXT: vldrb.u8 q0, [r2]
901 ; CHECK-NEXT: vstrb.8 q0, [r1]
904 %z = getelementptr inbounds i8, i8* %x, i32 128
905 %0 = bitcast i8* %z to <16 x i8>*
906 %1 = load <16 x i8>, <16 x i8>* %0, align 1
907 %2 = bitcast i8* %y to <16 x i8>*
908 store <16 x i8> %1, <16 x i8>* %2, align 1
912 define i8* @ldrbu8_m127(i8* %x, i8* %y) {
913 ; CHECK-LABEL: ldrbu8_m127:
914 ; CHECK: @ %bb.0: @ %entry
915 ; CHECK-NEXT: vldrb.u8 q0, [r0, #-127]
916 ; CHECK-NEXT: vstrb.8 q0, [r1]
919 %z = getelementptr inbounds i8, i8* %x, i32 -127
920 %0 = bitcast i8* %z to <16 x i8>*
921 %1 = load <16 x i8>, <16 x i8>* %0, align 1
922 %2 = bitcast i8* %y to <16 x i8>*
923 store <16 x i8> %1, <16 x i8>* %2, align 1
927 define i8* @ldrbu8_m128(i8* %x, i8* %y) {
928 ; CHECK-LABEL: ldrbu8_m128:
929 ; CHECK: @ %bb.0: @ %entry
930 ; CHECK-NEXT: sub.w r2, r0, #128
931 ; CHECK-NEXT: vldrb.u8 q0, [r2]
932 ; CHECK-NEXT: vstrb.8 q0, [r1]
935 %z = getelementptr inbounds i8, i8* %x, i32 -128
936 %0 = bitcast i8* %z to <16 x i8>*
937 %1 = load <16 x i8>, <16 x i8>* %0, align 1
938 %2 = bitcast i8* %y to <16 x i8>*
939 store <16 x i8> %1, <16 x i8>* %2, align 1
944 define i8* @ldrwf32_4(i8* %x, i8* %y) {
945 ; CHECK-LABEL: ldrwf32_4:
946 ; CHECK: @ %bb.0: @ %entry
947 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
948 ; CHECK-NEXT: vstrw.32 q0, [r1]
951 %z = getelementptr inbounds i8, i8* %x, i32 4
952 %0 = bitcast i8* %z to <4 x float>*
953 %1 = load <4 x float>, <4 x float>* %0, align 4
954 %2 = bitcast i8* %y to <4 x float>*
955 store <4 x float> %1, <4 x float>* %2, align 4
959 define i8* @ldrwf16_4(i8* %x, i8* %y) {
960 ; CHECK-LABEL: ldrwf16_4:
961 ; CHECK: @ %bb.0: @ %entry
962 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
963 ; CHECK-NEXT: vstrh.16 q0, [r1]
966 %z = getelementptr inbounds i8, i8* %x, i32 4
967 %0 = bitcast i8* %z to <8 x half>*
968 %1 = load <8 x half>, <8 x half>* %0, align 2
969 %2 = bitcast i8* %y to <8 x half>*
970 store <8 x half> %1, <8 x half>* %2, align 2
974 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
975 ; CHECK-LE-LABEL: ldrwi32_align1:
976 ; CHECK-LE: @ %bb.0: @ %entry
977 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
978 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
979 ; CHECK-LE-NEXT: bx lr
981 ; CHECK-BE-LABEL: ldrwi32_align1:
982 ; CHECK-BE: @ %bb.0: @ %entry
983 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
984 ; CHECK-BE-NEXT: vrev32.8 q0, q0
985 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
986 ; CHECK-BE-NEXT: bx lr
988 %z = getelementptr inbounds i8, i8* %x, i32 3
989 %0 = bitcast i8* %z to <4 x i32>*
990 %1 = load <4 x i32>, <4 x i32>* %0, align 1
991 %2 = bitcast i8* %y to <4 x i32>*
992 store <4 x i32> %1, <4 x i32>* %2, align 4
996 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
997 ; CHECK-LE-LABEL: ldrhi16_align1:
998 ; CHECK-LE: @ %bb.0: @ %entry
999 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1000 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1001 ; CHECK-LE-NEXT: bx lr
1003 ; CHECK-BE-LABEL: ldrhi16_align1:
1004 ; CHECK-BE: @ %bb.0: @ %entry
1005 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1006 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1007 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1008 ; CHECK-BE-NEXT: bx lr
1010 %z = getelementptr inbounds i8, i8* %x, i32 3
1011 %0 = bitcast i8* %z to <8 x i16>*
1012 %1 = load <8 x i16>, <8 x i16>* %0, align 1
1013 %2 = bitcast i8* %y to <8 x i16>*
1014 store <8 x i16> %1, <8 x i16>* %2, align 2
1018 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1019 ; CHECK-LABEL: ldrhi32_align1:
1020 ; CHECK: @ %bb.0: @ %entry
1021 ; CHECK-NEXT: .pad #8
1022 ; CHECK-NEXT: sub sp, #8
1023 ; CHECK-NEXT: ldr.w r3, [r0, #7]
1024 ; CHECK-NEXT: ldr.w r2, [r0, #3]
1025 ; CHECK-NEXT: strd r2, r3, [sp]
1026 ; CHECK-NEXT: mov r2, sp
1027 ; CHECK-NEXT: vldrh.s32 q0, [r2]
1028 ; CHECK-NEXT: vstrw.32 q0, [r1]
1029 ; CHECK-NEXT: add sp, #8
1032 %z = getelementptr inbounds i8, i8* %x, i32 3
1033 %0 = bitcast i8* %z to <4 x i16>*
1034 %1 = load <4 x i16>, <4 x i16>* %0, align 1
1035 %2 = bitcast i8* %y to <4 x i32>*
1036 %3 = sext <4 x i16> %1 to <4 x i32>
1037 store <4 x i32> %3, <4 x i32>* %2, align 4
1041 define i8* @ldrf32_align1(i8* %x, i8* %y) {
1042 ; CHECK-LE-LABEL: ldrf32_align1:
1043 ; CHECK-LE: @ %bb.0: @ %entry
1044 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1045 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
1046 ; CHECK-LE-NEXT: bx lr
1048 ; CHECK-BE-LABEL: ldrf32_align1:
1049 ; CHECK-BE: @ %bb.0: @ %entry
1050 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1051 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1052 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
1053 ; CHECK-BE-NEXT: bx lr
1055 %z = getelementptr inbounds i8, i8* %x, i32 3
1056 %0 = bitcast i8* %z to <4 x float>*
1057 %1 = load <4 x float>, <4 x float>* %0, align 1
1058 %2 = bitcast i8* %y to <4 x float>*
1059 store <4 x float> %1, <4 x float>* %2, align 4
1063 define i8* @ldrf16_align1(i8* %x, i8* %y) {
1064 ; CHECK-LE-LABEL: ldrf16_align1:
1065 ; CHECK-LE: @ %bb.0: @ %entry
1066 ; CHECK-LE-NEXT: vldrb.u8 q0, [r0, #3]
1067 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1068 ; CHECK-LE-NEXT: bx lr
1070 ; CHECK-BE-LABEL: ldrf16_align1:
1071 ; CHECK-BE: @ %bb.0: @ %entry
1072 ; CHECK-BE-NEXT: vldrb.u8 q0, [r0, #3]
1073 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1074 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1075 ; CHECK-BE-NEXT: bx lr
1077 %z = getelementptr inbounds i8, i8* %x, i32 3
1078 %0 = bitcast i8* %z to <8 x half>*
1079 %1 = load <8 x half>, <8 x half>* %0, align 1
1080 %2 = bitcast i8* %y to <8 x half>*
1081 store <8 x half> %1, <8 x half>* %2, align 2
1085 define i8* @ldrh16_align8(i8* %x, i8* %y) {
1086 ; CHECK-LE-LABEL: ldrh16_align8:
1087 ; CHECK-LE: @ %bb.0: @ %entry
1088 ; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #4]
1089 ; CHECK-LE-NEXT: vstrh.16 q0, [r1]
1090 ; CHECK-LE-NEXT: bx lr
1092 ; CHECK-BE-LABEL: ldrh16_align8:
1093 ; CHECK-BE: @ %bb.0: @ %entry
1094 ; CHECK-BE-NEXT: vldrh.u16 q0, [r0, #4]
1095 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
1096 ; CHECK-BE-NEXT: bx lr
1098 %z = getelementptr inbounds i8, i8* %x, i32 4
1099 %0 = bitcast i8* %z to <8 x i16>*
1100 %1 = load <8 x i16>, <8 x i16>* %0, align 8
1101 %2 = bitcast i8* %y to <8 x i16>*
1102 store <8 x i16> %1, <8 x i16>* %2, align 2
1110 define i8* @strw32_4(i8* %y, i8* %x) {
1111 ; CHECK-LABEL: strw32_4:
1112 ; CHECK: @ %bb.0: @ %entry
1113 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1114 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1117 %z = getelementptr inbounds i8, i8* %y, i32 4
1118 %0 = bitcast i8* %x to <4 x i32>*
1119 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1120 %2 = bitcast i8* %z to <4 x i32>*
1121 store <4 x i32> %1, <4 x i32>* %2, align 4
1125 define i8* @strw32_3(i8* %y, i8* %x) {
1126 ; CHECK-LABEL: strw32_3:
1127 ; CHECK: @ %bb.0: @ %entry
1128 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1129 ; CHECK-NEXT: adds r1, r0, #3
1130 ; CHECK-NEXT: vstrw.32 q0, [r1]
1133 %z = getelementptr inbounds i8, i8* %y, i32 3
1134 %0 = bitcast i8* %x to <4 x i32>*
1135 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1136 %2 = bitcast i8* %z to <4 x i32>*
1137 store <4 x i32> %1, <4 x i32>* %2, align 4
1141 define i8* @strw32_m4(i8* %y, i8* %x) {
1142 ; CHECK-LABEL: strw32_m4:
1143 ; CHECK: @ %bb.0: @ %entry
1144 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1145 ; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
1148 %z = getelementptr inbounds i8, i8* %y, i32 -4
1149 %0 = bitcast i8* %x to <4 x i32>*
1150 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1151 %2 = bitcast i8* %z to <4 x i32>*
1152 store <4 x i32> %1, <4 x i32>* %2, align 4
1156 define i8* @strw32_508(i8* %y, i8* %x) {
1157 ; CHECK-LABEL: strw32_508:
1158 ; CHECK: @ %bb.0: @ %entry
1159 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1160 ; CHECK-NEXT: vstrw.32 q0, [r0, #508]
1163 %z = getelementptr inbounds i8, i8* %y, i32 508
1164 %0 = bitcast i8* %x to <4 x i32>*
1165 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1166 %2 = bitcast i8* %z to <4 x i32>*
1167 store <4 x i32> %1, <4 x i32>* %2, align 4
1171 define i8* @strw32_512(i8* %y, i8* %x) {
1172 ; CHECK-LABEL: strw32_512:
1173 ; CHECK: @ %bb.0: @ %entry
1174 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1175 ; CHECK-NEXT: add.w r1, r0, #512
1176 ; CHECK-NEXT: vstrw.32 q0, [r1]
1179 %z = getelementptr inbounds i8, i8* %y, i32 512
1180 %0 = bitcast i8* %x to <4 x i32>*
1181 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1182 %2 = bitcast i8* %z to <4 x i32>*
1183 store <4 x i32> %1, <4 x i32>* %2, align 4
1187 define i8* @strw32_m508(i8* %y, i8* %x) {
1188 ; CHECK-LABEL: strw32_m508:
1189 ; CHECK: @ %bb.0: @ %entry
1190 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1191 ; CHECK-NEXT: vstrw.32 q0, [r0, #-508]
1194 %z = getelementptr inbounds i8, i8* %y, i32 -508
1195 %0 = bitcast i8* %x to <4 x i32>*
1196 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1197 %2 = bitcast i8* %z to <4 x i32>*
1198 store <4 x i32> %1, <4 x i32>* %2, align 4
1202 define i8* @strw32_m512(i8* %y, i8* %x) {
1203 ; CHECK-LABEL: strw32_m512:
1204 ; CHECK: @ %bb.0: @ %entry
1205 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1206 ; CHECK-NEXT: sub.w r1, r0, #512
1207 ; CHECK-NEXT: vstrw.32 q0, [r1]
1210 %z = getelementptr inbounds i8, i8* %y, i32 -512
1211 %0 = bitcast i8* %x to <4 x i32>*
1212 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1213 %2 = bitcast i8* %z to <4 x i32>*
1214 store <4 x i32> %1, <4 x i32>* %2, align 4
1219 define i8* @strh32_4(i8* %y, i8* %x) {
1220 ; CHECK-LABEL: strh32_4:
1221 ; CHECK: @ %bb.0: @ %entry
1222 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1223 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]
1226 %z = getelementptr inbounds i8, i8* %y, i32 4
1227 %0 = bitcast i8* %x to <4 x i16>*
1228 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1229 %2 = bitcast i8* %z to <4 x i16>*
1230 store <4 x i16> %1, <4 x i16>* %2, align 2
1234 define i8* @strh32_3(i8* %y, i8* %x) {
1235 ; CHECK-LABEL: strh32_3:
1236 ; CHECK: @ %bb.0: @ %entry
1237 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1238 ; CHECK-NEXT: adds r1, r0, #3
1239 ; CHECK-NEXT: vstrh.32 q0, [r1]
1242 %z = getelementptr inbounds i8, i8* %y, i32 3
1243 %0 = bitcast i8* %x to <4 x i16>*
1244 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1245 %2 = bitcast i8* %z to <4 x i16>*
1246 store <4 x i16> %1, <4 x i16>* %2, align 2
1250 define i8* @strh32_2(i8* %y, i8* %x) {
1251 ; CHECK-LABEL: strh32_2:
1252 ; CHECK: @ %bb.0: @ %entry
1253 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1254 ; CHECK-NEXT: vstrh.32 q0, [r0, #2]
1257 %z = getelementptr inbounds i8, i8* %y, i32 2
1258 %0 = bitcast i8* %x to <4 x i16>*
1259 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1260 %2 = bitcast i8* %z to <4 x i16>*
1261 store <4 x i16> %1, <4 x i16>* %2, align 2
1265 define i8* @strh32_254(i8* %y, i8* %x) {
1266 ; CHECK-LABEL: strh32_254:
1267 ; CHECK: @ %bb.0: @ %entry
1268 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1269 ; CHECK-NEXT: vstrh.32 q0, [r0, #254]
1272 %z = getelementptr inbounds i8, i8* %y, i32 254
1273 %0 = bitcast i8* %x to <4 x i16>*
1274 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1275 %2 = bitcast i8* %z to <4 x i16>*
1276 store <4 x i16> %1, <4 x i16>* %2, align 2
1280 define i8* @strh32_256(i8* %y, i8* %x) {
1281 ; CHECK-LABEL: strh32_256:
1282 ; CHECK: @ %bb.0: @ %entry
1283 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1284 ; CHECK-NEXT: add.w r1, r0, #256
1285 ; CHECK-NEXT: vstrh.32 q0, [r1]
1288 %z = getelementptr inbounds i8, i8* %y, i32 256
1289 %0 = bitcast i8* %x to <4 x i16>*
1290 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1291 %2 = bitcast i8* %z to <4 x i16>*
1292 store <4 x i16> %1, <4 x i16>* %2, align 2
1296 define i8* @strh32_m254(i8* %y, i8* %x) {
1297 ; CHECK-LABEL: strh32_m254:
1298 ; CHECK: @ %bb.0: @ %entry
1299 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1300 ; CHECK-NEXT: vstrh.32 q0, [r0, #-254]
1303 %z = getelementptr inbounds i8, i8* %y, i32 -254
1304 %0 = bitcast i8* %x to <4 x i16>*
1305 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1306 %2 = bitcast i8* %z to <4 x i16>*
1307 store <4 x i16> %1, <4 x i16>* %2, align 2
1311 define i8* @strh32_m256(i8* %y, i8* %x) {
1312 ; CHECK-LABEL: strh32_m256:
1313 ; CHECK: @ %bb.0: @ %entry
1314 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1315 ; CHECK-NEXT: sub.w r1, r0, #256
1316 ; CHECK-NEXT: vstrh.32 q0, [r1]
1319 %z = getelementptr inbounds i8, i8* %y, i32 -256
1320 %0 = bitcast i8* %x to <4 x i16>*
1321 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1322 %2 = bitcast i8* %z to <4 x i16>*
1323 store <4 x i16> %1, <4 x i16>* %2, align 2
1328 define i8* @strh16_4(i8* %y, i8* %x) {
1329 ; CHECK-LABEL: strh16_4:
1330 ; CHECK: @ %bb.0: @ %entry
1331 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1332 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1335 %z = getelementptr inbounds i8, i8* %y, i32 4
1336 %0 = bitcast i8* %x to <8 x i16>*
1337 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1338 %2 = bitcast i8* %z to <8 x i16>*
1339 store <8 x i16> %1, <8 x i16>* %2, align 2
1343 define i8* @strh16_3(i8* %y, i8* %x) {
1344 ; CHECK-LABEL: strh16_3:
1345 ; CHECK: @ %bb.0: @ %entry
1346 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1347 ; CHECK-NEXT: adds r1, r0, #3
1348 ; CHECK-NEXT: vstrh.16 q0, [r1]
1351 %z = getelementptr inbounds i8, i8* %y, i32 3
1352 %0 = bitcast i8* %x to <8 x i16>*
1353 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1354 %2 = bitcast i8* %z to <8 x i16>*
1355 store <8 x i16> %1, <8 x i16>* %2, align 2
1359 define i8* @strh16_2(i8* %y, i8* %x) {
1360 ; CHECK-LABEL: strh16_2:
1361 ; CHECK: @ %bb.0: @ %entry
1362 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1363 ; CHECK-NEXT: vstrh.16 q0, [r0, #2]
1366 %z = getelementptr inbounds i8, i8* %y, i32 2
1367 %0 = bitcast i8* %x to <8 x i16>*
1368 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1369 %2 = bitcast i8* %z to <8 x i16>*
1370 store <8 x i16> %1, <8 x i16>* %2, align 2
1374 define i8* @strh16_254(i8* %y, i8* %x) {
1375 ; CHECK-LABEL: strh16_254:
1376 ; CHECK: @ %bb.0: @ %entry
1377 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1378 ; CHECK-NEXT: vstrh.16 q0, [r0, #254]
1381 %z = getelementptr inbounds i8, i8* %y, i32 254
1382 %0 = bitcast i8* %x to <8 x i16>*
1383 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1384 %2 = bitcast i8* %z to <8 x i16>*
1385 store <8 x i16> %1, <8 x i16>* %2, align 2
1389 define i8* @strh16_256(i8* %y, i8* %x) {
1390 ; CHECK-LABEL: strh16_256:
1391 ; CHECK: @ %bb.0: @ %entry
1392 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1393 ; CHECK-NEXT: add.w r1, r0, #256
1394 ; CHECK-NEXT: vstrh.16 q0, [r1]
1397 %z = getelementptr inbounds i8, i8* %y, i32 256
1398 %0 = bitcast i8* %x to <8 x i16>*
1399 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1400 %2 = bitcast i8* %z to <8 x i16>*
1401 store <8 x i16> %1, <8 x i16>* %2, align 2
1405 define i8* @strh16_m254(i8* %y, i8* %x) {
1406 ; CHECK-LABEL: strh16_m254:
1407 ; CHECK: @ %bb.0: @ %entry
1408 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1409 ; CHECK-NEXT: vstrh.16 q0, [r0, #-254]
1412 %z = getelementptr inbounds i8, i8* %y, i32 -254
1413 %0 = bitcast i8* %x to <8 x i16>*
1414 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1415 %2 = bitcast i8* %z to <8 x i16>*
1416 store <8 x i16> %1, <8 x i16>* %2, align 2
1420 define i8* @strh16_m256(i8* %y, i8* %x) {
1421 ; CHECK-LABEL: strh16_m256:
1422 ; CHECK: @ %bb.0: @ %entry
1423 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1424 ; CHECK-NEXT: sub.w r1, r0, #256
1425 ; CHECK-NEXT: vstrh.16 q0, [r1]
1428 %z = getelementptr inbounds i8, i8* %y, i32 -256
1429 %0 = bitcast i8* %x to <8 x i16>*
1430 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1431 %2 = bitcast i8* %z to <8 x i16>*
1432 store <8 x i16> %1, <8 x i16>* %2, align 2
1437 define i8* @strb32_4(i8* %y, i8* %x) {
1438 ; CHECK-LABEL: strb32_4:
1439 ; CHECK: @ %bb.0: @ %entry
1440 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1441 ; CHECK-NEXT: vstrb.32 q0, [r0, #4]
1444 %z = getelementptr inbounds i8, i8* %y, i32 4
1445 %0 = bitcast i8* %x to <4 x i8>*
1446 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1447 %2 = bitcast i8* %z to <4 x i8>*
1448 store <4 x i8> %1, <4 x i8>* %2, align 1
1452 define i8* @strb32_3(i8* %y, i8* %x) {
1453 ; CHECK-LABEL: strb32_3:
1454 ; CHECK: @ %bb.0: @ %entry
1455 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1456 ; CHECK-NEXT: vstrb.32 q0, [r0, #3]
1459 %z = getelementptr inbounds i8, i8* %y, i32 3
1460 %0 = bitcast i8* %x to <4 x i8>*
1461 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1462 %2 = bitcast i8* %z to <4 x i8>*
1463 store <4 x i8> %1, <4 x i8>* %2, align 1
1467 define i8* @strb32_127(i8* %y, i8* %x) {
1468 ; CHECK-LABEL: strb32_127:
1469 ; CHECK: @ %bb.0: @ %entry
1470 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1471 ; CHECK-NEXT: vstrb.32 q0, [r0, #127]
1474 %z = getelementptr inbounds i8, i8* %y, i32 127
1475 %0 = bitcast i8* %x to <4 x i8>*
1476 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1477 %2 = bitcast i8* %z to <4 x i8>*
1478 store <4 x i8> %1, <4 x i8>* %2, align 1
1482 define i8* @strb32_128(i8* %y, i8* %x) {
1483 ; CHECK-LABEL: strb32_128:
1484 ; CHECK: @ %bb.0: @ %entry
1485 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1486 ; CHECK-NEXT: add.w r1, r0, #128
1487 ; CHECK-NEXT: vstrb.32 q0, [r1]
1490 %z = getelementptr inbounds i8, i8* %y, i32 128
1491 %0 = bitcast i8* %x to <4 x i8>*
1492 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1493 %2 = bitcast i8* %z to <4 x i8>*
1494 store <4 x i8> %1, <4 x i8>* %2, align 1
1498 define i8* @strb32_m127(i8* %y, i8* %x) {
1499 ; CHECK-LABEL: strb32_m127:
1500 ; CHECK: @ %bb.0: @ %entry
1501 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1502 ; CHECK-NEXT: vstrb.32 q0, [r0, #-127]
1505 %z = getelementptr inbounds i8, i8* %y, i32 -127
1506 %0 = bitcast i8* %x to <4 x i8>*
1507 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1508 %2 = bitcast i8* %z to <4 x i8>*
1509 store <4 x i8> %1, <4 x i8>* %2, align 1
1513 define i8* @strb32_m128(i8* %y, i8* %x) {
1514 ; CHECK-LABEL: strb32_m128:
1515 ; CHECK: @ %bb.0: @ %entry
1516 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1517 ; CHECK-NEXT: sub.w r1, r0, #128
1518 ; CHECK-NEXT: vstrb.32 q0, [r1]
1521 %z = getelementptr inbounds i8, i8* %y, i32 -128
1522 %0 = bitcast i8* %x to <4 x i8>*
1523 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1524 %2 = bitcast i8* %z to <4 x i8>*
1525 store <4 x i8> %1, <4 x i8>* %2, align 1
1530 define i8* @strb16_4(i8* %y, i8* %x) {
1531 ; CHECK-LABEL: strb16_4:
1532 ; CHECK: @ %bb.0: @ %entry
1533 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1534 ; CHECK-NEXT: vstrb.16 q0, [r0, #4]
1537 %z = getelementptr inbounds i8, i8* %y, i32 4
1538 %0 = bitcast i8* %x to <8 x i8>*
1539 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1540 %2 = bitcast i8* %z to <8 x i8>*
1541 store <8 x i8> %1, <8 x i8>* %2, align 1
1545 define i8* @strb16_3(i8* %y, i8* %x) {
1546 ; CHECK-LABEL: strb16_3:
1547 ; CHECK: @ %bb.0: @ %entry
1548 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1549 ; CHECK-NEXT: vstrb.16 q0, [r0, #3]
1552 %z = getelementptr inbounds i8, i8* %y, i32 3
1553 %0 = bitcast i8* %x to <8 x i8>*
1554 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1555 %2 = bitcast i8* %z to <8 x i8>*
1556 store <8 x i8> %1, <8 x i8>* %2, align 1
1560 define i8* @strb16_127(i8* %y, i8* %x) {
1561 ; CHECK-LABEL: strb16_127:
1562 ; CHECK: @ %bb.0: @ %entry
1563 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1564 ; CHECK-NEXT: vstrb.16 q0, [r0, #127]
1567 %z = getelementptr inbounds i8, i8* %y, i32 127
1568 %0 = bitcast i8* %x to <8 x i8>*
1569 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1570 %2 = bitcast i8* %z to <8 x i8>*
1571 store <8 x i8> %1, <8 x i8>* %2, align 1
1575 define i8* @strb16_128(i8* %y, i8* %x) {
1576 ; CHECK-LABEL: strb16_128:
1577 ; CHECK: @ %bb.0: @ %entry
1578 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1579 ; CHECK-NEXT: add.w r1, r0, #128
1580 ; CHECK-NEXT: vstrb.16 q0, [r1]
1583 %z = getelementptr inbounds i8, i8* %y, i32 128
1584 %0 = bitcast i8* %x to <8 x i8>*
1585 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1586 %2 = bitcast i8* %z to <8 x i8>*
1587 store <8 x i8> %1, <8 x i8>* %2, align 1
1591 define i8* @strb16_m127(i8* %y, i8* %x) {
1592 ; CHECK-LABEL: strb16_m127:
1593 ; CHECK: @ %bb.0: @ %entry
1594 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1595 ; CHECK-NEXT: vstrb.16 q0, [r0, #-127]
1598 %z = getelementptr inbounds i8, i8* %y, i32 -127
1599 %0 = bitcast i8* %x to <8 x i8>*
1600 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1601 %2 = bitcast i8* %z to <8 x i8>*
1602 store <8 x i8> %1, <8 x i8>* %2, align 1
1606 define i8* @strb16_m128(i8* %y, i8* %x) {
1607 ; CHECK-LABEL: strb16_m128:
1608 ; CHECK: @ %bb.0: @ %entry
1609 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1610 ; CHECK-NEXT: sub.w r1, r0, #128
1611 ; CHECK-NEXT: vstrb.16 q0, [r1]
1614 %z = getelementptr inbounds i8, i8* %y, i32 -128
1615 %0 = bitcast i8* %x to <8 x i8>*
1616 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1617 %2 = bitcast i8* %z to <8 x i8>*
1618 store <8 x i8> %1, <8 x i8>* %2, align 1
1623 define i8* @strb8_4(i8* %y, i8* %x) {
1624 ; CHECK-LABEL: strb8_4:
1625 ; CHECK: @ %bb.0: @ %entry
1626 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1627 ; CHECK-NEXT: vstrb.8 q0, [r0, #4]
1630 %z = getelementptr inbounds i8, i8* %y, i32 4
1631 %0 = bitcast i8* %x to <16 x i8>*
1632 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1633 %2 = bitcast i8* %z to <16 x i8>*
1634 store <16 x i8> %1, <16 x i8>* %2, align 1
1638 define i8* @strb8_3(i8* %y, i8* %x) {
1639 ; CHECK-LABEL: strb8_3:
1640 ; CHECK: @ %bb.0: @ %entry
1641 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1642 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1645 %z = getelementptr inbounds i8, i8* %y, i32 3
1646 %0 = bitcast i8* %x to <16 x i8>*
1647 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1648 %2 = bitcast i8* %z to <16 x i8>*
1649 store <16 x i8> %1, <16 x i8>* %2, align 1
1653 define i8* @strb8_127(i8* %y, i8* %x) {
1654 ; CHECK-LABEL: strb8_127:
1655 ; CHECK: @ %bb.0: @ %entry
1656 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1657 ; CHECK-NEXT: vstrb.8 q0, [r0, #127]
1660 %z = getelementptr inbounds i8, i8* %y, i32 127
1661 %0 = bitcast i8* %x to <16 x i8>*
1662 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1663 %2 = bitcast i8* %z to <16 x i8>*
1664 store <16 x i8> %1, <16 x i8>* %2, align 1
1668 define i8* @strb8_128(i8* %y, i8* %x) {
1669 ; CHECK-LABEL: strb8_128:
1670 ; CHECK: @ %bb.0: @ %entry
1671 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1672 ; CHECK-NEXT: add.w r1, r0, #128
1673 ; CHECK-NEXT: vstrb.8 q0, [r1]
1676 %z = getelementptr inbounds i8, i8* %y, i32 128
1677 %0 = bitcast i8* %x to <16 x i8>*
1678 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1679 %2 = bitcast i8* %z to <16 x i8>*
1680 store <16 x i8> %1, <16 x i8>* %2, align 1
1684 define i8* @strb8_m127(i8* %y, i8* %x) {
1685 ; CHECK-LABEL: strb8_m127:
1686 ; CHECK: @ %bb.0: @ %entry
1687 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1688 ; CHECK-NEXT: vstrb.8 q0, [r0, #-127]
1691 %z = getelementptr inbounds i8, i8* %y, i32 -127
1692 %0 = bitcast i8* %x to <16 x i8>*
1693 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1694 %2 = bitcast i8* %z to <16 x i8>*
1695 store <16 x i8> %1, <16 x i8>* %2, align 1
1699 define i8* @strb8_m128(i8* %y, i8* %x) {
1700 ; CHECK-LABEL: strb8_m128:
1701 ; CHECK: @ %bb.0: @ %entry
1702 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1703 ; CHECK-NEXT: sub.w r1, r0, #128
1704 ; CHECK-NEXT: vstrb.8 q0, [r1]
1707 %z = getelementptr inbounds i8, i8* %y, i32 -128
1708 %0 = bitcast i8* %x to <16 x i8>*
1709 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1710 %2 = bitcast i8* %z to <16 x i8>*
1711 store <16 x i8> %1, <16 x i8>* %2, align 1
1716 define i8* @strf32_4(i8* %y, i8* %x) {
1717 ; CHECK-LABEL: strf32_4:
1718 ; CHECK: @ %bb.0: @ %entry
1719 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1720 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1723 %z = getelementptr inbounds i8, i8* %y, i32 4
1724 %0 = bitcast i8* %x to <4 x float>*
1725 %1 = load <4 x float>, <4 x float>* %0, align 4
1726 %2 = bitcast i8* %z to <4 x float>*
1727 store <4 x float> %1, <4 x float>* %2, align 4
1731 define i8* @strf16_4(i8* %y, i8* %x) {
1732 ; CHECK-LABEL: strf16_4:
1733 ; CHECK: @ %bb.0: @ %entry
1734 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1735 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1738 %z = getelementptr inbounds i8, i8* %y, i32 4
1739 %0 = bitcast i8* %x to <8 x half>*
1740 %1 = load <8 x half>, <8 x half>* %0, align 2
1741 %2 = bitcast i8* %z to <8 x half>*
1742 store <8 x half> %1, <8 x half>* %2, align 2
1746 define i8* @strwi32_align1(i8* %y, i8* %x) {
1747 ; CHECK-LE-LABEL: strwi32_align1:
1748 ; CHECK-LE: @ %bb.0: @ %entry
1749 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1750 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1751 ; CHECK-LE-NEXT: bx lr
1753 ; CHECK-BE-LABEL: strwi32_align1:
1754 ; CHECK-BE: @ %bb.0: @ %entry
1755 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1756 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1757 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1758 ; CHECK-BE-NEXT: bx lr
1760 %z = getelementptr inbounds i8, i8* %y, i32 3
1761 %0 = bitcast i8* %x to <4 x i32>*
1762 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1763 %2 = bitcast i8* %z to <4 x i32>*
1764 store <4 x i32> %1, <4 x i32>* %2, align 1
1768 define i8* @strhi16_align1(i8* %y, i8* %x) {
1769 ; CHECK-LE-LABEL: strhi16_align1:
1770 ; CHECK-LE: @ %bb.0: @ %entry
1771 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1772 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1773 ; CHECK-LE-NEXT: bx lr
1775 ; CHECK-BE-LABEL: strhi16_align1:
1776 ; CHECK-BE: @ %bb.0: @ %entry
1777 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1778 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1779 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1780 ; CHECK-BE-NEXT: bx lr
1782 %z = getelementptr inbounds i8, i8* %y, i32 3
1783 %0 = bitcast i8* %x to <8 x i16>*
1784 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1785 %2 = bitcast i8* %z to <8 x i16>*
1786 store <8 x i16> %1, <8 x i16>* %2, align 1
1790 define i8* @strhi32_align1(i8* %y, i8* %x) {
1791 ; CHECK-LABEL: strhi32_align1:
1792 ; CHECK: @ %bb.0: @ %entry
1793 ; CHECK-NEXT: .pad #8
1794 ; CHECK-NEXT: sub sp, #8
1795 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1796 ; CHECK-NEXT: mov r1, sp
1797 ; CHECK-NEXT: vstrh.32 q0, [r1]
1798 ; CHECK-NEXT: ldrd r1, r2, [sp]
1799 ; CHECK-NEXT: str.w r1, [r0, #3]
1800 ; CHECK-NEXT: str.w r2, [r0, #7]
1801 ; CHECK-NEXT: add sp, #8
1804 %z = getelementptr inbounds i8, i8* %y, i32 3
1805 %0 = bitcast i8* %x to <4 x i32>*
1806 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1807 %2 = bitcast i8* %z to <4 x i16>*
1808 %3 = trunc <4 x i32> %1 to <4 x i16>
1809 store <4 x i16> %3, <4 x i16>* %2, align 1
1813 define i8* @strf32_align1(i8* %y, i8* %x) {
1814 ; CHECK-LE-LABEL: strf32_align1:
1815 ; CHECK-LE: @ %bb.0: @ %entry
1816 ; CHECK-LE-NEXT: vldrw.u32 q0, [r1]
1817 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1818 ; CHECK-LE-NEXT: bx lr
1820 ; CHECK-BE-LABEL: strf32_align1:
1821 ; CHECK-BE: @ %bb.0: @ %entry
1822 ; CHECK-BE-NEXT: vldrw.u32 q0, [r1]
1823 ; CHECK-BE-NEXT: vrev32.8 q0, q0
1824 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1825 ; CHECK-BE-NEXT: bx lr
1827 %z = getelementptr inbounds i8, i8* %y, i32 3
1828 %0 = bitcast i8* %x to <4 x float>*
1829 %1 = load <4 x float>, <4 x float>* %0, align 4
1830 %2 = bitcast i8* %z to <4 x float>*
1831 store <4 x float> %1, <4 x float>* %2, align 1
1835 define i8* @strf16_align1(i8* %y, i8* %x) {
1836 ; CHECK-LE-LABEL: strf16_align1:
1837 ; CHECK-LE: @ %bb.0: @ %entry
1838 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1839 ; CHECK-LE-NEXT: vstrb.8 q0, [r0, #3]
1840 ; CHECK-LE-NEXT: bx lr
1842 ; CHECK-BE-LABEL: strf16_align1:
1843 ; CHECK-BE: @ %bb.0: @ %entry
1844 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1845 ; CHECK-BE-NEXT: vrev16.8 q0, q0
1846 ; CHECK-BE-NEXT: vstrb.8 q0, [r0, #3]
1847 ; CHECK-BE-NEXT: bx lr
1849 %z = getelementptr inbounds i8, i8* %y, i32 3
1850 %0 = bitcast i8* %x to <8 x half>*
1851 %1 = load <8 x half>, <8 x half>* %0, align 2
1852 %2 = bitcast i8* %z to <8 x half>*
1853 store <8 x half> %1, <8 x half>* %2, align 1
1857 define i8* @strf16_align8(i8* %y, i8* %x) {
1858 ; CHECK-LE-LABEL: strf16_align8:
1859 ; CHECK-LE: @ %bb.0: @ %entry
1860 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
1861 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
1862 ; CHECK-LE-NEXT: bx lr
1864 ; CHECK-BE-LABEL: strf16_align8:
1865 ; CHECK-BE: @ %bb.0: @ %entry
1866 ; CHECK-BE-NEXT: vldrh.u16 q0, [r1]
1867 ; CHECK-BE-NEXT: vstrh.16 q0, [r0, #16]
1868 ; CHECK-BE-NEXT: bx lr
1870 %z = getelementptr inbounds i8, i8* %y, i32 16
1871 %0 = bitcast i8* %x to <8 x i16>*
1872 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1873 %2 = bitcast i8* %z to <8 x i16>*
1874 store <8 x i16> %1, <8 x i16>* %2, align 8