1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define i8* @ldrwu32_4(i8* %x, i8* %y) {
5 ; CHECK-LABEL: ldrwu32_4:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
8 ; CHECK-NEXT: adds r0, #4
9 ; CHECK-NEXT: vstrw.32 q0, [r1]
12 %z = getelementptr inbounds i8, i8* %x, i32 4
13 %0 = bitcast i8* %z to <4 x i32>*
14 %1 = load <4 x i32>, <4 x i32>* %0, align 4
15 %2 = bitcast i8* %y to <4 x i32>*
16 store <4 x i32> %1, <4 x i32>* %2, align 4
20 define i8* @ldrwu32_3(i8* %x, i8* %y) {
21 ; CHECK-LABEL: ldrwu32_3:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: adds r0, #3
24 ; CHECK-NEXT: vldrw.u32 q0, [r0]
25 ; CHECK-NEXT: vstrw.32 q0, [r1]
28 %z = getelementptr inbounds i8, i8* %x, i32 3
29 %0 = bitcast i8* %z to <4 x i32>*
30 %1 = load <4 x i32>, <4 x i32>* %0, align 4
31 %2 = bitcast i8* %y to <4 x i32>*
32 store <4 x i32> %1, <4 x i32>* %2, align 4
36 define i8* @ldrwu32_m4(i8* %x, i8* %y) {
37 ; CHECK-LABEL: ldrwu32_m4:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
40 ; CHECK-NEXT: subs r0, #4
41 ; CHECK-NEXT: vstrw.32 q0, [r1]
44 %z = getelementptr inbounds i8, i8* %x, i32 -4
45 %0 = bitcast i8* %z to <4 x i32>*
46 %1 = load <4 x i32>, <4 x i32>* %0, align 4
47 %2 = bitcast i8* %y to <4 x i32>*
48 store <4 x i32> %1, <4 x i32>* %2, align 4
52 define i8* @ldrwu32_508(i8* %x, i8* %y) {
53 ; CHECK-LABEL: ldrwu32_508:
54 ; CHECK: @ %bb.0: @ %entry
55 ; CHECK-NEXT: add.w r0, r0, #508
56 ; CHECK-NEXT: vldrw.u32 q0, [r0]
57 ; CHECK-NEXT: vstrw.32 q0, [r1]
60 %z = getelementptr inbounds i8, i8* %x, i32 508
61 %0 = bitcast i8* %z to <4 x i32>*
62 %1 = load <4 x i32>, <4 x i32>* %0, align 4
63 %2 = bitcast i8* %y to <4 x i32>*
64 store <4 x i32> %1, <4 x i32>* %2, align 4
68 define i8* @ldrwu32_512(i8* %x, i8* %y) {
69 ; CHECK-LABEL: ldrwu32_512:
70 ; CHECK: @ %bb.0: @ %entry
71 ; CHECK-NEXT: add.w r0, r0, #512
72 ; CHECK-NEXT: vldrw.u32 q0, [r0]
73 ; CHECK-NEXT: vstrw.32 q0, [r1]
76 %z = getelementptr inbounds i8, i8* %x, i32 512
77 %0 = bitcast i8* %z to <4 x i32>*
78 %1 = load <4 x i32>, <4 x i32>* %0, align 4
79 %2 = bitcast i8* %y to <4 x i32>*
80 store <4 x i32> %1, <4 x i32>* %2, align 4
84 define i8* @ldrwu32_m508(i8* %x, i8* %y) {
85 ; CHECK-LABEL: ldrwu32_m508:
86 ; CHECK: @ %bb.0: @ %entry
87 ; CHECK-NEXT: sub.w r0, r0, #508
88 ; CHECK-NEXT: vldrw.u32 q0, [r0]
89 ; CHECK-NEXT: vstrw.32 q0, [r1]
92 %z = getelementptr inbounds i8, i8* %x, i32 -508
93 %0 = bitcast i8* %z to <4 x i32>*
94 %1 = load <4 x i32>, <4 x i32>* %0, align 4
95 %2 = bitcast i8* %y to <4 x i32>*
96 store <4 x i32> %1, <4 x i32>* %2, align 4
100 define i8* @ldrwu32_m512(i8* %x, i8* %y) {
101 ; CHECK-LABEL: ldrwu32_m512:
102 ; CHECK: @ %bb.0: @ %entry
103 ; CHECK-NEXT: sub.w r0, r0, #512
104 ; CHECK-NEXT: vldrw.u32 q0, [r0]
105 ; CHECK-NEXT: vstrw.32 q0, [r1]
108 %z = getelementptr inbounds i8, i8* %x, i32 -512
109 %0 = bitcast i8* %z to <4 x i32>*
110 %1 = load <4 x i32>, <4 x i32>* %0, align 4
111 %2 = bitcast i8* %y to <4 x i32>*
112 store <4 x i32> %1, <4 x i32>* %2, align 4
117 define i8* @ldrhu32_4(i8* %x, i8* %y) {
118 ; CHECK-LABEL: ldrhu32_4:
119 ; CHECK: @ %bb.0: @ %entry
120 ; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
121 ; CHECK-NEXT: adds r0, #4
122 ; CHECK-NEXT: vstrw.32 q0, [r1]
125 %z = getelementptr inbounds i8, i8* %x, i32 4
126 %0 = bitcast i8* %z to <4 x i16>*
127 %1 = load <4 x i16>, <4 x i16>* %0, align 2
128 %2 = zext <4 x i16> %1 to <4 x i32>
129 %3 = bitcast i8* %y to <4 x i32>*
130 store <4 x i32> %2, <4 x i32>* %3, align 4
134 define i8* @ldrhu32_3(i8* %x, i8* %y) {
135 ; CHECK-LABEL: ldrhu32_3:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: adds r0, #3
138 ; CHECK-NEXT: vldrh.u32 q0, [r0]
139 ; CHECK-NEXT: vstrw.32 q0, [r1]
142 %z = getelementptr inbounds i8, i8* %x, i32 3
143 %0 = bitcast i8* %z to <4 x i16>*
144 %1 = load <4 x i16>, <4 x i16>* %0, align 2
145 %2 = zext <4 x i16> %1 to <4 x i32>
146 %3 = bitcast i8* %y to <4 x i32>*
147 store <4 x i32> %2, <4 x i32>* %3, align 4
151 define i8* @ldrhu32_2(i8* %x, i8* %y) {
152 ; CHECK-LABEL: ldrhu32_2:
153 ; CHECK: @ %bb.0: @ %entry
154 ; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
155 ; CHECK-NEXT: adds r0, #2
156 ; CHECK-NEXT: vstrw.32 q0, [r1]
159 %z = getelementptr inbounds i8, i8* %x, i32 2
160 %0 = bitcast i8* %z to <4 x i16>*
161 %1 = load <4 x i16>, <4 x i16>* %0, align 2
162 %2 = zext <4 x i16> %1 to <4 x i32>
163 %3 = bitcast i8* %y to <4 x i32>*
164 store <4 x i32> %2, <4 x i32>* %3, align 4
168 define i8* @ldrhu32_254(i8* %x, i8* %y) {
169 ; CHECK-LABEL: ldrhu32_254:
170 ; CHECK: @ %bb.0: @ %entry
171 ; CHECK-NEXT: adds r0, #254
172 ; CHECK-NEXT: vldrh.u32 q0, [r0]
173 ; CHECK-NEXT: vstrw.32 q0, [r1]
176 %z = getelementptr inbounds i8, i8* %x, i32 254
177 %0 = bitcast i8* %z to <4 x i16>*
178 %1 = load <4 x i16>, <4 x i16>* %0, align 2
179 %2 = zext <4 x i16> %1 to <4 x i32>
180 %3 = bitcast i8* %y to <4 x i32>*
181 store <4 x i32> %2, <4 x i32>* %3, align 4
185 define i8* @ldrhu32_256(i8* %x, i8* %y) {
186 ; CHECK-LABEL: ldrhu32_256:
187 ; CHECK: @ %bb.0: @ %entry
188 ; CHECK-NEXT: add.w r0, r0, #256
189 ; CHECK-NEXT: vldrh.u32 q0, [r0]
190 ; CHECK-NEXT: vstrw.32 q0, [r1]
193 %z = getelementptr inbounds i8, i8* %x, i32 256
194 %0 = bitcast i8* %z to <4 x i16>*
195 %1 = load <4 x i16>, <4 x i16>* %0, align 2
196 %2 = zext <4 x i16> %1 to <4 x i32>
197 %3 = bitcast i8* %y to <4 x i32>*
198 store <4 x i32> %2, <4 x i32>* %3, align 4
203 define i8* @ldrhs32_4(i8* %x, i8* %y) {
204 ; CHECK-LABEL: ldrhs32_4:
205 ; CHECK: @ %bb.0: @ %entry
206 ; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
207 ; CHECK-NEXT: adds r0, #4
208 ; CHECK-NEXT: vstrw.32 q0, [r1]
211 %z = getelementptr inbounds i8, i8* %x, i32 4
212 %0 = bitcast i8* %z to <4 x i16>*
213 %1 = load <4 x i16>, <4 x i16>* %0, align 2
214 %2 = sext <4 x i16> %1 to <4 x i32>
215 %3 = bitcast i8* %y to <4 x i32>*
216 store <4 x i32> %2, <4 x i32>* %3, align 4
220 define i8* @ldrhs32_3(i8* %x, i8* %y) {
221 ; CHECK-LABEL: ldrhs32_3:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: adds r0, #3
224 ; CHECK-NEXT: vldrh.s32 q0, [r0]
225 ; CHECK-NEXT: vstrw.32 q0, [r1]
228 %z = getelementptr inbounds i8, i8* %x, i32 3
229 %0 = bitcast i8* %z to <4 x i16>*
230 %1 = load <4 x i16>, <4 x i16>* %0, align 2
231 %2 = sext <4 x i16> %1 to <4 x i32>
232 %3 = bitcast i8* %y to <4 x i32>*
233 store <4 x i32> %2, <4 x i32>* %3, align 4
237 define i8* @ldrhs32_2(i8* %x, i8* %y) {
238 ; CHECK-LABEL: ldrhs32_2:
239 ; CHECK: @ %bb.0: @ %entry
240 ; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
241 ; CHECK-NEXT: adds r0, #2
242 ; CHECK-NEXT: vstrw.32 q0, [r1]
245 %z = getelementptr inbounds i8, i8* %x, i32 2
246 %0 = bitcast i8* %z to <4 x i16>*
247 %1 = load <4 x i16>, <4 x i16>* %0, align 2
248 %2 = sext <4 x i16> %1 to <4 x i32>
249 %3 = bitcast i8* %y to <4 x i32>*
250 store <4 x i32> %2, <4 x i32>* %3, align 4
254 define i8* @ldrhs32_254(i8* %x, i8* %y) {
255 ; CHECK-LABEL: ldrhs32_254:
256 ; CHECK: @ %bb.0: @ %entry
257 ; CHECK-NEXT: adds r0, #254
258 ; CHECK-NEXT: vldrh.s32 q0, [r0]
259 ; CHECK-NEXT: vstrw.32 q0, [r1]
262 %z = getelementptr inbounds i8, i8* %x, i32 254
263 %0 = bitcast i8* %z to <4 x i16>*
264 %1 = load <4 x i16>, <4 x i16>* %0, align 2
265 %2 = sext <4 x i16> %1 to <4 x i32>
266 %3 = bitcast i8* %y to <4 x i32>*
267 store <4 x i32> %2, <4 x i32>* %3, align 4
271 define i8* @ldrhs32_256(i8* %x, i8* %y) {
272 ; CHECK-LABEL: ldrhs32_256:
273 ; CHECK: @ %bb.0: @ %entry
274 ; CHECK-NEXT: add.w r0, r0, #256
275 ; CHECK-NEXT: vldrh.s32 q0, [r0]
276 ; CHECK-NEXT: vstrw.32 q0, [r1]
279 %z = getelementptr inbounds i8, i8* %x, i32 256
280 %0 = bitcast i8* %z to <4 x i16>*
281 %1 = load <4 x i16>, <4 x i16>* %0, align 2
282 %2 = sext <4 x i16> %1 to <4 x i32>
283 %3 = bitcast i8* %y to <4 x i32>*
284 store <4 x i32> %2, <4 x i32>* %3, align 4
289 define i8* @ldrhu16_4(i8* %x, i8* %y) {
290 ; CHECK-LABEL: ldrhu16_4:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
293 ; CHECK-NEXT: adds r0, #4
294 ; CHECK-NEXT: vstrh.16 q0, [r1]
297 %z = getelementptr inbounds i8, i8* %x, i32 4
298 %0 = bitcast i8* %z to <8 x i16>*
299 %1 = load <8 x i16>, <8 x i16>* %0, align 2
300 %2 = bitcast i8* %y to <8 x i16>*
301 store <8 x i16> %1, <8 x i16>* %2, align 2
305 define i8* @ldrhu16_3(i8* %x, i8* %y) {
306 ; CHECK-LABEL: ldrhu16_3:
307 ; CHECK: @ %bb.0: @ %entry
308 ; CHECK-NEXT: adds r0, #3
309 ; CHECK-NEXT: vldrh.u16 q0, [r0]
310 ; CHECK-NEXT: vstrh.16 q0, [r1]
313 %z = getelementptr inbounds i8, i8* %x, i32 3
314 %0 = bitcast i8* %z to <8 x i16>*
315 %1 = load <8 x i16>, <8 x i16>* %0, align 2
316 %2 = bitcast i8* %y to <8 x i16>*
317 store <8 x i16> %1, <8 x i16>* %2, align 2
321 define i8* @ldrhu16_2(i8* %x, i8* %y) {
322 ; CHECK-LABEL: ldrhu16_2:
323 ; CHECK: @ %bb.0: @ %entry
324 ; CHECK-NEXT: vldrh.u16 q0, [r0, #2]
325 ; CHECK-NEXT: adds r0, #2
326 ; CHECK-NEXT: vstrh.16 q0, [r1]
329 %z = getelementptr inbounds i8, i8* %x, i32 2
330 %0 = bitcast i8* %z to <8 x i16>*
331 %1 = load <8 x i16>, <8 x i16>* %0, align 2
332 %2 = bitcast i8* %y to <8 x i16>*
333 store <8 x i16> %1, <8 x i16>* %2, align 2
337 define i8* @ldrhu16_254(i8* %x, i8* %y) {
338 ; CHECK-LABEL: ldrhu16_254:
339 ; CHECK: @ %bb.0: @ %entry
340 ; CHECK-NEXT: adds r0, #254
341 ; CHECK-NEXT: vldrh.u16 q0, [r0]
342 ; CHECK-NEXT: vstrh.16 q0, [r1]
345 %z = getelementptr inbounds i8, i8* %x, i32 254
346 %0 = bitcast i8* %z to <8 x i16>*
347 %1 = load <8 x i16>, <8 x i16>* %0, align 2
348 %2 = bitcast i8* %y to <8 x i16>*
349 store <8 x i16> %1, <8 x i16>* %2, align 2
353 define i8* @ldrhu16_256(i8* %x, i8* %y) {
354 ; CHECK-LABEL: ldrhu16_256:
355 ; CHECK: @ %bb.0: @ %entry
356 ; CHECK-NEXT: add.w r0, r0, #256
357 ; CHECK-NEXT: vldrh.u16 q0, [r0]
358 ; CHECK-NEXT: vstrh.16 q0, [r1]
361 %z = getelementptr inbounds i8, i8* %x, i32 256
362 %0 = bitcast i8* %z to <8 x i16>*
363 %1 = load <8 x i16>, <8 x i16>* %0, align 2
364 %2 = bitcast i8* %y to <8 x i16>*
365 store <8 x i16> %1, <8 x i16>* %2, align 2
370 define i8* @ldrbu32_4(i8* %x, i8* %y) {
371 ; CHECK-LABEL: ldrbu32_4:
372 ; CHECK: @ %bb.0: @ %entry
373 ; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
374 ; CHECK-NEXT: adds r0, #4
375 ; CHECK-NEXT: vstrw.32 q0, [r1]
378 %z = getelementptr inbounds i8, i8* %x, i32 4
379 %0 = bitcast i8* %z to <4 x i8>*
380 %1 = load <4 x i8>, <4 x i8>* %0, align 1
381 %2 = zext <4 x i8> %1 to <4 x i32>
382 %3 = bitcast i8* %y to <4 x i32>*
383 store <4 x i32> %2, <4 x i32>* %3, align 4
387 define i8* @ldrbu32_3(i8* %x, i8* %y) {
388 ; CHECK-LABEL: ldrbu32_3:
389 ; CHECK: @ %bb.0: @ %entry
390 ; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
391 ; CHECK-NEXT: adds r0, #3
392 ; CHECK-NEXT: vstrw.32 q0, [r1]
395 %z = getelementptr inbounds i8, i8* %x, i32 3
396 %0 = bitcast i8* %z to <4 x i8>*
397 %1 = load <4 x i8>, <4 x i8>* %0, align 1
398 %2 = zext <4 x i8> %1 to <4 x i32>
399 %3 = bitcast i8* %y to <4 x i32>*
400 store <4 x i32> %2, <4 x i32>* %3, align 4
404 define i8* @ldrbu32_127(i8* %x, i8* %y) {
405 ; CHECK-LABEL: ldrbu32_127:
406 ; CHECK: @ %bb.0: @ %entry
407 ; CHECK-NEXT: adds r0, #127
408 ; CHECK-NEXT: vldrb.u32 q0, [r0]
409 ; CHECK-NEXT: vstrw.32 q0, [r1]
412 %z = getelementptr inbounds i8, i8* %x, i32 127
413 %0 = bitcast i8* %z to <4 x i8>*
414 %1 = load <4 x i8>, <4 x i8>* %0, align 1
415 %2 = zext <4 x i8> %1 to <4 x i32>
416 %3 = bitcast i8* %y to <4 x i32>*
417 store <4 x i32> %2, <4 x i32>* %3, align 4
421 define i8* @ldrbu32_128(i8* %x, i8* %y) {
422 ; CHECK-LABEL: ldrbu32_128:
423 ; CHECK: @ %bb.0: @ %entry
424 ; CHECK-NEXT: adds r0, #128
425 ; CHECK-NEXT: vldrb.u32 q0, [r0]
426 ; CHECK-NEXT: vstrw.32 q0, [r1]
429 %z = getelementptr inbounds i8, i8* %x, i32 128
430 %0 = bitcast i8* %z to <4 x i8>*
431 %1 = load <4 x i8>, <4 x i8>* %0, align 1
432 %2 = zext <4 x i8> %1 to <4 x i32>
433 %3 = bitcast i8* %y to <4 x i32>*
434 store <4 x i32> %2, <4 x i32>* %3, align 4
439 define i8* @ldrbs32_4(i8* %x, i8* %y) {
440 ; CHECK-LABEL: ldrbs32_4:
441 ; CHECK: @ %bb.0: @ %entry
442 ; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
443 ; CHECK-NEXT: adds r0, #4
444 ; CHECK-NEXT: vstrw.32 q0, [r1]
447 %z = getelementptr inbounds i8, i8* %x, i32 4
448 %0 = bitcast i8* %z to <4 x i8>*
449 %1 = load <4 x i8>, <4 x i8>* %0, align 1
450 %2 = sext <4 x i8> %1 to <4 x i32>
451 %3 = bitcast i8* %y to <4 x i32>*
452 store <4 x i32> %2, <4 x i32>* %3, align 4
456 define i8* @ldrbs32_3(i8* %x, i8* %y) {
457 ; CHECK-LABEL: ldrbs32_3:
458 ; CHECK: @ %bb.0: @ %entry
459 ; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
460 ; CHECK-NEXT: adds r0, #3
461 ; CHECK-NEXT: vstrw.32 q0, [r1]
464 %z = getelementptr inbounds i8, i8* %x, i32 3
465 %0 = bitcast i8* %z to <4 x i8>*
466 %1 = load <4 x i8>, <4 x i8>* %0, align 1
467 %2 = sext <4 x i8> %1 to <4 x i32>
468 %3 = bitcast i8* %y to <4 x i32>*
469 store <4 x i32> %2, <4 x i32>* %3, align 4
473 define i8* @ldrbs32_127(i8* %x, i8* %y) {
474 ; CHECK-LABEL: ldrbs32_127:
475 ; CHECK: @ %bb.0: @ %entry
476 ; CHECK-NEXT: adds r0, #127
477 ; CHECK-NEXT: vldrb.s32 q0, [r0]
478 ; CHECK-NEXT: vstrw.32 q0, [r1]
481 %z = getelementptr inbounds i8, i8* %x, i32 127
482 %0 = bitcast i8* %z to <4 x i8>*
483 %1 = load <4 x i8>, <4 x i8>* %0, align 1
484 %2 = sext <4 x i8> %1 to <4 x i32>
485 %3 = bitcast i8* %y to <4 x i32>*
486 store <4 x i32> %2, <4 x i32>* %3, align 4
490 define i8* @ldrbs32_128(i8* %x, i8* %y) {
491 ; CHECK-LABEL: ldrbs32_128:
492 ; CHECK: @ %bb.0: @ %entry
493 ; CHECK-NEXT: adds r0, #128
494 ; CHECK-NEXT: vldrb.s32 q0, [r0]
495 ; CHECK-NEXT: vstrw.32 q0, [r1]
498 %z = getelementptr inbounds i8, i8* %x, i32 128
499 %0 = bitcast i8* %z to <4 x i8>*
500 %1 = load <4 x i8>, <4 x i8>* %0, align 1
501 %2 = sext <4 x i8> %1 to <4 x i32>
502 %3 = bitcast i8* %y to <4 x i32>*
503 store <4 x i32> %2, <4 x i32>* %3, align 4
508 define i8* @ldrbu16_4(i8* %x, i8* %y) {
509 ; CHECK-LABEL: ldrbu16_4:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
512 ; CHECK-NEXT: adds r0, #4
513 ; CHECK-NEXT: vstrh.16 q0, [r1]
516 %z = getelementptr inbounds i8, i8* %x, i32 4
517 %0 = bitcast i8* %z to <8 x i8>*
518 %1 = load <8 x i8>, <8 x i8>* %0, align 1
519 %2 = zext <8 x i8> %1 to <8 x i16>
520 %3 = bitcast i8* %y to <8 x i16>*
521 store <8 x i16> %2, <8 x i16>* %3, align 2
525 define i8* @ldrbu16_3(i8* %x, i8* %y) {
526 ; CHECK-LABEL: ldrbu16_3:
527 ; CHECK: @ %bb.0: @ %entry
528 ; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
529 ; CHECK-NEXT: adds r0, #3
530 ; CHECK-NEXT: vstrh.16 q0, [r1]
533 %z = getelementptr inbounds i8, i8* %x, i32 3
534 %0 = bitcast i8* %z to <8 x i8>*
535 %1 = load <8 x i8>, <8 x i8>* %0, align 1
536 %2 = zext <8 x i8> %1 to <8 x i16>
537 %3 = bitcast i8* %y to <8 x i16>*
538 store <8 x i16> %2, <8 x i16>* %3, align 2
542 define i8* @ldrbu16_127(i8* %x, i8* %y) {
543 ; CHECK-LABEL: ldrbu16_127:
544 ; CHECK: @ %bb.0: @ %entry
545 ; CHECK-NEXT: adds r0, #127
546 ; CHECK-NEXT: vldrb.u16 q0, [r0]
547 ; CHECK-NEXT: vstrh.16 q0, [r1]
550 %z = getelementptr inbounds i8, i8* %x, i32 127
551 %0 = bitcast i8* %z to <8 x i8>*
552 %1 = load <8 x i8>, <8 x i8>* %0, align 1
553 %2 = zext <8 x i8> %1 to <8 x i16>
554 %3 = bitcast i8* %y to <8 x i16>*
555 store <8 x i16> %2, <8 x i16>* %3, align 2
559 define i8* @ldrbu16_128(i8* %x, i8* %y) {
560 ; CHECK-LABEL: ldrbu16_128:
561 ; CHECK: @ %bb.0: @ %entry
562 ; CHECK-NEXT: adds r0, #128
563 ; CHECK-NEXT: vldrb.u16 q0, [r0]
564 ; CHECK-NEXT: vstrh.16 q0, [r1]
567 %z = getelementptr inbounds i8, i8* %x, i32 128
568 %0 = bitcast i8* %z to <8 x i8>*
569 %1 = load <8 x i8>, <8 x i8>* %0, align 1
570 %2 = zext <8 x i8> %1 to <8 x i16>
571 %3 = bitcast i8* %y to <8 x i16>*
572 store <8 x i16> %2, <8 x i16>* %3, align 2
577 define i8* @ldrbs16_4(i8* %x, i8* %y) {
578 ; CHECK-LABEL: ldrbs16_4:
579 ; CHECK: @ %bb.0: @ %entry
580 ; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
581 ; CHECK-NEXT: adds r0, #4
582 ; CHECK-NEXT: vstrh.16 q0, [r1]
585 %z = getelementptr inbounds i8, i8* %x, i32 4
586 %0 = bitcast i8* %z to <8 x i8>*
587 %1 = load <8 x i8>, <8 x i8>* %0, align 1
588 %2 = sext <8 x i8> %1 to <8 x i16>
589 %3 = bitcast i8* %y to <8 x i16>*
590 store <8 x i16> %2, <8 x i16>* %3, align 2
594 define i8* @ldrbs16_3(i8* %x, i8* %y) {
595 ; CHECK-LABEL: ldrbs16_3:
596 ; CHECK: @ %bb.0: @ %entry
597 ; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
598 ; CHECK-NEXT: adds r0, #3
599 ; CHECK-NEXT: vstrh.16 q0, [r1]
602 %z = getelementptr inbounds i8, i8* %x, i32 3
603 %0 = bitcast i8* %z to <8 x i8>*
604 %1 = load <8 x i8>, <8 x i8>* %0, align 1
605 %2 = sext <8 x i8> %1 to <8 x i16>
606 %3 = bitcast i8* %y to <8 x i16>*
607 store <8 x i16> %2, <8 x i16>* %3, align 2
611 define i8* @ldrbs16_127(i8* %x, i8* %y) {
612 ; CHECK-LABEL: ldrbs16_127:
613 ; CHECK: @ %bb.0: @ %entry
614 ; CHECK-NEXT: adds r0, #127
615 ; CHECK-NEXT: vldrb.s16 q0, [r0]
616 ; CHECK-NEXT: vstrh.16 q0, [r1]
619 %z = getelementptr inbounds i8, i8* %x, i32 127
620 %0 = bitcast i8* %z to <8 x i8>*
621 %1 = load <8 x i8>, <8 x i8>* %0, align 1
622 %2 = sext <8 x i8> %1 to <8 x i16>
623 %3 = bitcast i8* %y to <8 x i16>*
624 store <8 x i16> %2, <8 x i16>* %3, align 2
628 define i8* @ldrbs16_128(i8* %x, i8* %y) {
629 ; CHECK-LABEL: ldrbs16_128:
630 ; CHECK: @ %bb.0: @ %entry
631 ; CHECK-NEXT: adds r0, #128
632 ; CHECK-NEXT: vldrb.s16 q0, [r0]
633 ; CHECK-NEXT: vstrh.16 q0, [r1]
636 %z = getelementptr inbounds i8, i8* %x, i32 128
637 %0 = bitcast i8* %z to <8 x i8>*
638 %1 = load <8 x i8>, <8 x i8>* %0, align 1
639 %2 = sext <8 x i8> %1 to <8 x i16>
640 %3 = bitcast i8* %y to <8 x i16>*
641 store <8 x i16> %2, <8 x i16>* %3, align 2
646 define i8* @ldrbu8_4(i8* %x, i8* %y) {
647 ; CHECK-LABEL: ldrbu8_4:
648 ; CHECK: @ %bb.0: @ %entry
649 ; CHECK-NEXT: vldrb.u8 q0, [r0, #4]
650 ; CHECK-NEXT: adds r0, #4
651 ; CHECK-NEXT: vstrb.8 q0, [r1]
654 %z = getelementptr inbounds i8, i8* %x, i32 4
655 %0 = bitcast i8* %z to <16 x i8>*
656 %1 = load <16 x i8>, <16 x i8>* %0, align 1
657 %2 = bitcast i8* %y to <16 x i8>*
658 store <16 x i8> %1, <16 x i8>* %2, align 1
662 define i8* @ldrbu8_3(i8* %x, i8* %y) {
663 ; CHECK-LABEL: ldrbu8_3:
664 ; CHECK: @ %bb.0: @ %entry
665 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
666 ; CHECK-NEXT: adds r0, #3
667 ; CHECK-NEXT: vstrb.8 q0, [r1]
670 %z = getelementptr inbounds i8, i8* %x, i32 3
671 %0 = bitcast i8* %z to <16 x i8>*
672 %1 = load <16 x i8>, <16 x i8>* %0, align 1
673 %2 = bitcast i8* %y to <16 x i8>*
674 store <16 x i8> %1, <16 x i8>* %2, align 1
678 define i8* @ldrbu8_127(i8* %x, i8* %y) {
679 ; CHECK-LABEL: ldrbu8_127:
680 ; CHECK: @ %bb.0: @ %entry
681 ; CHECK-NEXT: adds r0, #127
682 ; CHECK-NEXT: vldrb.u8 q0, [r0]
683 ; CHECK-NEXT: vstrb.8 q0, [r1]
686 %z = getelementptr inbounds i8, i8* %x, i32 127
687 %0 = bitcast i8* %z to <16 x i8>*
688 %1 = load <16 x i8>, <16 x i8>* %0, align 1
689 %2 = bitcast i8* %y to <16 x i8>*
690 store <16 x i8> %1, <16 x i8>* %2, align 1
694 define i8* @ldrbu8_128(i8* %x, i8* %y) {
695 ; CHECK-LABEL: ldrbu8_128:
696 ; CHECK: @ %bb.0: @ %entry
697 ; CHECK-NEXT: adds r0, #128
698 ; CHECK-NEXT: vldrb.u8 q0, [r0]
699 ; CHECK-NEXT: vstrb.8 q0, [r1]
702 %z = getelementptr inbounds i8, i8* %x, i32 128
703 %0 = bitcast i8* %z to <16 x i8>*
704 %1 = load <16 x i8>, <16 x i8>* %0, align 1
705 %2 = bitcast i8* %y to <16 x i8>*
706 store <16 x i8> %1, <16 x i8>* %2, align 1
710 define i8* @ldrwf32_4(i8* %x, i8* %y) {
711 ; CHECK-LABEL: ldrwf32_4:
712 ; CHECK: @ %bb.0: @ %entry
713 ; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
714 ; CHECK-NEXT: adds r0, #4
715 ; CHECK-NEXT: vstrw.32 q0, [r1]
718 %z = getelementptr inbounds i8, i8* %x, i32 4
719 %0 = bitcast i8* %z to <4 x float>*
720 %1 = load <4 x float>, <4 x float>* %0, align 4
721 %2 = bitcast i8* %y to <4 x float>*
722 store <4 x float> %1, <4 x float>* %2, align 4
726 define i8* @ldrwf16_4(i8* %x, i8* %y) {
727 ; CHECK-LABEL: ldrwf16_4:
728 ; CHECK: @ %bb.0: @ %entry
729 ; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
730 ; CHECK-NEXT: adds r0, #4
731 ; CHECK-NEXT: vstrh.16 q0, [r1]
734 %z = getelementptr inbounds i8, i8* %x, i32 4
735 %0 = bitcast i8* %z to <8 x half>*
736 %1 = load <8 x half>, <8 x half>* %0, align 2
737 %2 = bitcast i8* %y to <8 x half>*
738 store <8 x half> %1, <8 x half>* %2, align 2
742 define i8* @ldrwi32_align1(i8* %x, i8* %y) {
743 ; CHECK-LABEL: ldrwi32_align1:
744 ; CHECK: @ %bb.0: @ %entry
745 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
746 ; CHECK-NEXT: adds r0, #3
747 ; CHECK-NEXT: vstrw.32 q0, [r1]
750 %z = getelementptr inbounds i8, i8* %x, i32 3
751 %0 = bitcast i8* %z to <4 x i32>*
752 %1 = load <4 x i32>, <4 x i32>* %0, align 1
753 %2 = bitcast i8* %y to <4 x i32>*
754 store <4 x i32> %1, <4 x i32>* %2, align 4
758 define i8* @ldrhi16_align1(i8* %x, i8* %y) {
759 ; CHECK-LABEL: ldrhi16_align1:
760 ; CHECK: @ %bb.0: @ %entry
761 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
762 ; CHECK-NEXT: adds r0, #3
763 ; CHECK-NEXT: vstrh.16 q0, [r1]
766 %z = getelementptr inbounds i8, i8* %x, i32 3
767 %0 = bitcast i8* %z to <8 x i16>*
768 %1 = load <8 x i16>, <8 x i16>* %0, align 1
769 %2 = bitcast i8* %y to <8 x i16>*
770 store <8 x i16> %1, <8 x i16>* %2, align 2
774 define i8* @ldrhi32_align1(i8* %x, i8* %y) {
775 ; CHECK-LABEL: ldrhi32_align1:
776 ; CHECK: @ %bb.0: @ %entry
777 ; CHECK-NEXT: .pad #8
778 ; CHECK-NEXT: sub sp, #8
779 ; CHECK-NEXT: ldr r2, [r0, #3]!
780 ; CHECK-NEXT: str r2, [sp]
781 ; CHECK-NEXT: ldr r2, [r0, #4]
782 ; CHECK-NEXT: str r2, [sp, #4]
783 ; CHECK-NEXT: mov r2, sp
784 ; CHECK-NEXT: vldrh.s32 q0, [r2]
785 ; CHECK-NEXT: vstrw.32 q0, [r1]
786 ; CHECK-NEXT: add sp, #8
789 %z = getelementptr inbounds i8, i8* %x, i32 3
790 %0 = bitcast i8* %z to <4 x i16>*
791 %1 = load <4 x i16>, <4 x i16>* %0, align 1
792 %2 = bitcast i8* %y to <4 x i32>*
793 %3 = sext <4 x i16> %1 to <4 x i32>
794 store <4 x i32> %3, <4 x i32>* %2, align 4
798 define i8* @ldrf32_align1(i8* %x, i8* %y) {
799 ; CHECK-LABEL: ldrf32_align1:
800 ; CHECK: @ %bb.0: @ %entry
801 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
802 ; CHECK-NEXT: adds r0, #3
803 ; CHECK-NEXT: vstrw.32 q0, [r1]
806 %z = getelementptr inbounds i8, i8* %x, i32 3
807 %0 = bitcast i8* %z to <4 x float>*
808 %1 = load <4 x float>, <4 x float>* %0, align 1
809 %2 = bitcast i8* %y to <4 x float>*
810 store <4 x float> %1, <4 x float>* %2, align 4
814 define i8* @ldrf16_align1(i8* %x, i8* %y) {
815 ; CHECK-LABEL: ldrf16_align1:
816 ; CHECK: @ %bb.0: @ %entry
817 ; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
818 ; CHECK-NEXT: adds r0, #3
819 ; CHECK-NEXT: vstrh.16 q0, [r1]
822 %z = getelementptr inbounds i8, i8* %x, i32 3
823 %0 = bitcast i8* %z to <8 x half>*
824 %1 = load <8 x half>, <8 x half>* %0, align 1
825 %2 = bitcast i8* %y to <8 x half>*
826 store <8 x half> %1, <8 x half>* %2, align 2
834 define i8* @strw32_4(i8* %y, i8* %x) {
835 ; CHECK-LABEL: strw32_4:
836 ; CHECK: @ %bb.0: @ %entry
837 ; CHECK-NEXT: vldrw.u32 q0, [r1]
838 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
839 ; CHECK-NEXT: adds r0, #4
842 %z = getelementptr inbounds i8, i8* %y, i32 4
843 %0 = bitcast i8* %x to <4 x i32>*
844 %1 = load <4 x i32>, <4 x i32>* %0, align 4
845 %2 = bitcast i8* %z to <4 x i32>*
846 store <4 x i32> %1, <4 x i32>* %2, align 4
850 define i8* @strw32_3(i8* %y, i8* %x) {
851 ; CHECK-LABEL: strw32_3:
852 ; CHECK: @ %bb.0: @ %entry
853 ; CHECK-NEXT: adds r0, #3
854 ; CHECK-NEXT: vldrw.u32 q0, [r1]
855 ; CHECK-NEXT: vstrw.32 q0, [r0]
858 %z = getelementptr inbounds i8, i8* %y, i32 3
859 %0 = bitcast i8* %x to <4 x i32>*
860 %1 = load <4 x i32>, <4 x i32>* %0, align 4
861 %2 = bitcast i8* %z to <4 x i32>*
862 store <4 x i32> %1, <4 x i32>* %2, align 4
866 define i8* @strw32_m4(i8* %y, i8* %x) {
867 ; CHECK-LABEL: strw32_m4:
868 ; CHECK: @ %bb.0: @ %entry
869 ; CHECK-NEXT: vldrw.u32 q0, [r1]
870 ; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
871 ; CHECK-NEXT: subs r0, #4
874 %z = getelementptr inbounds i8, i8* %y, i32 -4
875 %0 = bitcast i8* %x to <4 x i32>*
876 %1 = load <4 x i32>, <4 x i32>* %0, align 4
877 %2 = bitcast i8* %z to <4 x i32>*
878 store <4 x i32> %1, <4 x i32>* %2, align 4
882 define i8* @strw32_508(i8* %y, i8* %x) {
883 ; CHECK-LABEL: strw32_508:
884 ; CHECK: @ %bb.0: @ %entry
885 ; CHECK-NEXT: add.w r0, r0, #508
886 ; CHECK-NEXT: vldrw.u32 q0, [r1]
887 ; CHECK-NEXT: vstrw.32 q0, [r0]
890 %z = getelementptr inbounds i8, i8* %y, i32 508
891 %0 = bitcast i8* %x to <4 x i32>*
892 %1 = load <4 x i32>, <4 x i32>* %0, align 4
893 %2 = bitcast i8* %z to <4 x i32>*
894 store <4 x i32> %1, <4 x i32>* %2, align 4
898 define i8* @strw32_512(i8* %y, i8* %x) {
899 ; CHECK-LABEL: strw32_512:
900 ; CHECK: @ %bb.0: @ %entry
901 ; CHECK-NEXT: add.w r0, r0, #512
902 ; CHECK-NEXT: vldrw.u32 q0, [r1]
903 ; CHECK-NEXT: vstrw.32 q0, [r0]
906 %z = getelementptr inbounds i8, i8* %y, i32 512
907 %0 = bitcast i8* %x to <4 x i32>*
908 %1 = load <4 x i32>, <4 x i32>* %0, align 4
909 %2 = bitcast i8* %z to <4 x i32>*
910 store <4 x i32> %1, <4 x i32>* %2, align 4
914 define i8* @strw32_m508(i8* %y, i8* %x) {
915 ; CHECK-LABEL: strw32_m508:
916 ; CHECK: @ %bb.0: @ %entry
917 ; CHECK-NEXT: sub.w r0, r0, #508
918 ; CHECK-NEXT: vldrw.u32 q0, [r1]
919 ; CHECK-NEXT: vstrw.32 q0, [r0]
922 %z = getelementptr inbounds i8, i8* %y, i32 -508
923 %0 = bitcast i8* %x to <4 x i32>*
924 %1 = load <4 x i32>, <4 x i32>* %0, align 4
925 %2 = bitcast i8* %z to <4 x i32>*
926 store <4 x i32> %1, <4 x i32>* %2, align 4
930 define i8* @strw32_m512(i8* %y, i8* %x) {
931 ; CHECK-LABEL: strw32_m512:
932 ; CHECK: @ %bb.0: @ %entry
933 ; CHECK-NEXT: sub.w r0, r0, #512
934 ; CHECK-NEXT: vldrw.u32 q0, [r1]
935 ; CHECK-NEXT: vstrw.32 q0, [r0]
938 %z = getelementptr inbounds i8, i8* %y, i32 -512
939 %0 = bitcast i8* %x to <4 x i32>*
940 %1 = load <4 x i32>, <4 x i32>* %0, align 4
941 %2 = bitcast i8* %z to <4 x i32>*
942 store <4 x i32> %1, <4 x i32>* %2, align 4
947 define i8* @strh32_4(i8* %y, i8* %x) {
948 ; CHECK-LABEL: strh32_4:
949 ; CHECK: @ %bb.0: @ %entry
950 ; CHECK-NEXT: vldrh.u32 q0, [r1]
951 ; CHECK-NEXT: vstrh.32 q0, [r0, #4]
952 ; CHECK-NEXT: adds r0, #4
955 %z = getelementptr inbounds i8, i8* %y, i32 4
956 %0 = bitcast i8* %x to <4 x i16>*
957 %1 = load <4 x i16>, <4 x i16>* %0, align 2
958 %2 = bitcast i8* %z to <4 x i16>*
959 store <4 x i16> %1, <4 x i16>* %2, align 2
963 define i8* @strh32_3(i8* %y, i8* %x) {
964 ; CHECK-LABEL: strh32_3:
965 ; CHECK: @ %bb.0: @ %entry
966 ; CHECK-NEXT: adds r0, #3
967 ; CHECK-NEXT: vldrh.u32 q0, [r1]
968 ; CHECK-NEXT: vstrh.32 q0, [r0]
971 %z = getelementptr inbounds i8, i8* %y, i32 3
972 %0 = bitcast i8* %x to <4 x i16>*
973 %1 = load <4 x i16>, <4 x i16>* %0, align 2
974 %2 = bitcast i8* %z to <4 x i16>*
975 store <4 x i16> %1, <4 x i16>* %2, align 2
979 define i8* @strh32_2(i8* %y, i8* %x) {
980 ; CHECK-LABEL: strh32_2:
981 ; CHECK: @ %bb.0: @ %entry
982 ; CHECK-NEXT: vldrh.u32 q0, [r1]
983 ; CHECK-NEXT: vstrh.32 q0, [r0, #2]
984 ; CHECK-NEXT: adds r0, #2
987 %z = getelementptr inbounds i8, i8* %y, i32 2
988 %0 = bitcast i8* %x to <4 x i16>*
989 %1 = load <4 x i16>, <4 x i16>* %0, align 2
990 %2 = bitcast i8* %z to <4 x i16>*
991 store <4 x i16> %1, <4 x i16>* %2, align 2
995 define i8* @strh32_254(i8* %y, i8* %x) {
996 ; CHECK-LABEL: strh32_254:
997 ; CHECK: @ %bb.0: @ %entry
998 ; CHECK-NEXT: adds r0, #254
999 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1000 ; CHECK-NEXT: vstrh.32 q0, [r0]
1003 %z = getelementptr inbounds i8, i8* %y, i32 254
1004 %0 = bitcast i8* %x to <4 x i16>*
1005 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1006 %2 = bitcast i8* %z to <4 x i16>*
1007 store <4 x i16> %1, <4 x i16>* %2, align 2
1011 define i8* @strh32_256(i8* %y, i8* %x) {
1012 ; CHECK-LABEL: strh32_256:
1013 ; CHECK: @ %bb.0: @ %entry
1014 ; CHECK-NEXT: add.w r0, r0, #256
1015 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1016 ; CHECK-NEXT: vstrh.32 q0, [r0]
1019 %z = getelementptr inbounds i8, i8* %y, i32 256
1020 %0 = bitcast i8* %x to <4 x i16>*
1021 %1 = load <4 x i16>, <4 x i16>* %0, align 2
1022 %2 = bitcast i8* %z to <4 x i16>*
1023 store <4 x i16> %1, <4 x i16>* %2, align 2
1028 define i8* @strh16_4(i8* %y, i8* %x) {
1029 ; CHECK-LABEL: strh16_4:
1030 ; CHECK: @ %bb.0: @ %entry
1031 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1032 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1033 ; CHECK-NEXT: adds r0, #4
1036 %z = getelementptr inbounds i8, i8* %y, i32 4
1037 %0 = bitcast i8* %x to <8 x i16>*
1038 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1039 %2 = bitcast i8* %z to <8 x i16>*
1040 store <8 x i16> %1, <8 x i16>* %2, align 2
1044 define i8* @strh16_3(i8* %y, i8* %x) {
1045 ; CHECK-LABEL: strh16_3:
1046 ; CHECK: @ %bb.0: @ %entry
1047 ; CHECK-NEXT: adds r0, #3
1048 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1049 ; CHECK-NEXT: vstrh.16 q0, [r0]
1052 %z = getelementptr inbounds i8, i8* %y, i32 3
1053 %0 = bitcast i8* %x to <8 x i16>*
1054 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1055 %2 = bitcast i8* %z to <8 x i16>*
1056 store <8 x i16> %1, <8 x i16>* %2, align 2
1060 define i8* @strh16_2(i8* %y, i8* %x) {
1061 ; CHECK-LABEL: strh16_2:
1062 ; CHECK: @ %bb.0: @ %entry
1063 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1064 ; CHECK-NEXT: vstrh.16 q0, [r0, #2]
1065 ; CHECK-NEXT: adds r0, #2
1068 %z = getelementptr inbounds i8, i8* %y, i32 2
1069 %0 = bitcast i8* %x to <8 x i16>*
1070 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1071 %2 = bitcast i8* %z to <8 x i16>*
1072 store <8 x i16> %1, <8 x i16>* %2, align 2
1076 define i8* @strh16_254(i8* %y, i8* %x) {
1077 ; CHECK-LABEL: strh16_254:
1078 ; CHECK: @ %bb.0: @ %entry
1079 ; CHECK-NEXT: adds r0, #254
1080 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1081 ; CHECK-NEXT: vstrh.16 q0, [r0]
1084 %z = getelementptr inbounds i8, i8* %y, i32 254
1085 %0 = bitcast i8* %x to <8 x i16>*
1086 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1087 %2 = bitcast i8* %z to <8 x i16>*
1088 store <8 x i16> %1, <8 x i16>* %2, align 2
1092 define i8* @strh16_256(i8* %y, i8* %x) {
1093 ; CHECK-LABEL: strh16_256:
1094 ; CHECK: @ %bb.0: @ %entry
1095 ; CHECK-NEXT: add.w r0, r0, #256
1096 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1097 ; CHECK-NEXT: vstrh.16 q0, [r0]
1100 %z = getelementptr inbounds i8, i8* %y, i32 256
1101 %0 = bitcast i8* %x to <8 x i16>*
1102 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1103 %2 = bitcast i8* %z to <8 x i16>*
1104 store <8 x i16> %1, <8 x i16>* %2, align 2
1109 define i8* @strb32_4(i8* %y, i8* %x) {
1110 ; CHECK-LABEL: strb32_4:
1111 ; CHECK: @ %bb.0: @ %entry
1112 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1113 ; CHECK-NEXT: vstrb.32 q0, [r0, #4]
1114 ; CHECK-NEXT: adds r0, #4
1117 %z = getelementptr inbounds i8, i8* %y, i32 4
1118 %0 = bitcast i8* %x to <4 x i8>*
1119 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1120 %2 = bitcast i8* %z to <4 x i8>*
1121 store <4 x i8> %1, <4 x i8>* %2, align 1
1125 define i8* @strb32_3(i8* %y, i8* %x) {
1126 ; CHECK-LABEL: strb32_3:
1127 ; CHECK: @ %bb.0: @ %entry
1128 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1129 ; CHECK-NEXT: vstrb.32 q0, [r0, #3]
1130 ; CHECK-NEXT: adds r0, #3
1133 %z = getelementptr inbounds i8, i8* %y, i32 3
1134 %0 = bitcast i8* %x to <4 x i8>*
1135 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1136 %2 = bitcast i8* %z to <4 x i8>*
1137 store <4 x i8> %1, <4 x i8>* %2, align 1
1141 define i8* @strb32_127(i8* %y, i8* %x) {
1142 ; CHECK-LABEL: strb32_127:
1143 ; CHECK: @ %bb.0: @ %entry
1144 ; CHECK-NEXT: adds r0, #127
1145 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1146 ; CHECK-NEXT: vstrb.32 q0, [r0]
1149 %z = getelementptr inbounds i8, i8* %y, i32 127
1150 %0 = bitcast i8* %x to <4 x i8>*
1151 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1152 %2 = bitcast i8* %z to <4 x i8>*
1153 store <4 x i8> %1, <4 x i8>* %2, align 1
1157 define i8* @strb32_128(i8* %y, i8* %x) {
1158 ; CHECK-LABEL: strb32_128:
1159 ; CHECK: @ %bb.0: @ %entry
1160 ; CHECK-NEXT: adds r0, #128
1161 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1162 ; CHECK-NEXT: vstrb.32 q0, [r0]
1165 %z = getelementptr inbounds i8, i8* %y, i32 128
1166 %0 = bitcast i8* %x to <4 x i8>*
1167 %1 = load <4 x i8>, <4 x i8>* %0, align 1
1168 %2 = bitcast i8* %z to <4 x i8>*
1169 store <4 x i8> %1, <4 x i8>* %2, align 1
1174 define i8* @strb16_4(i8* %y, i8* %x) {
1175 ; CHECK-LABEL: strb16_4:
1176 ; CHECK: @ %bb.0: @ %entry
1177 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1178 ; CHECK-NEXT: vstrb.16 q0, [r0, #4]
1179 ; CHECK-NEXT: adds r0, #4
1182 %z = getelementptr inbounds i8, i8* %y, i32 4
1183 %0 = bitcast i8* %x to <8 x i8>*
1184 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1185 %2 = bitcast i8* %z to <8 x i8>*
1186 store <8 x i8> %1, <8 x i8>* %2, align 1
1190 define i8* @strb16_3(i8* %y, i8* %x) {
1191 ; CHECK-LABEL: strb16_3:
1192 ; CHECK: @ %bb.0: @ %entry
1193 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1194 ; CHECK-NEXT: vstrb.16 q0, [r0, #3]
1195 ; CHECK-NEXT: adds r0, #3
1198 %z = getelementptr inbounds i8, i8* %y, i32 3
1199 %0 = bitcast i8* %x to <8 x i8>*
1200 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1201 %2 = bitcast i8* %z to <8 x i8>*
1202 store <8 x i8> %1, <8 x i8>* %2, align 1
1206 define i8* @strb16_127(i8* %y, i8* %x) {
1207 ; CHECK-LABEL: strb16_127:
1208 ; CHECK: @ %bb.0: @ %entry
1209 ; CHECK-NEXT: adds r0, #127
1210 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1211 ; CHECK-NEXT: vstrb.16 q0, [r0]
1214 %z = getelementptr inbounds i8, i8* %y, i32 127
1215 %0 = bitcast i8* %x to <8 x i8>*
1216 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1217 %2 = bitcast i8* %z to <8 x i8>*
1218 store <8 x i8> %1, <8 x i8>* %2, align 1
1222 define i8* @strb16_128(i8* %y, i8* %x) {
1223 ; CHECK-LABEL: strb16_128:
1224 ; CHECK: @ %bb.0: @ %entry
1225 ; CHECK-NEXT: adds r0, #128
1226 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1227 ; CHECK-NEXT: vstrb.16 q0, [r0]
1230 %z = getelementptr inbounds i8, i8* %y, i32 128
1231 %0 = bitcast i8* %x to <8 x i8>*
1232 %1 = load <8 x i8>, <8 x i8>* %0, align 1
1233 %2 = bitcast i8* %z to <8 x i8>*
1234 store <8 x i8> %1, <8 x i8>* %2, align 1
1239 define i8* @strb8_4(i8* %y, i8* %x) {
1240 ; CHECK-LABEL: strb8_4:
1241 ; CHECK: @ %bb.0: @ %entry
1242 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1243 ; CHECK-NEXT: vstrb.8 q0, [r0, #4]
1244 ; CHECK-NEXT: adds r0, #4
1247 %z = getelementptr inbounds i8, i8* %y, i32 4
1248 %0 = bitcast i8* %x to <16 x i8>*
1249 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1250 %2 = bitcast i8* %z to <16 x i8>*
1251 store <16 x i8> %1, <16 x i8>* %2, align 1
1255 define i8* @strb8_3(i8* %y, i8* %x) {
1256 ; CHECK-LABEL: strb8_3:
1257 ; CHECK: @ %bb.0: @ %entry
1258 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1259 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1260 ; CHECK-NEXT: adds r0, #3
1263 %z = getelementptr inbounds i8, i8* %y, i32 3
1264 %0 = bitcast i8* %x to <16 x i8>*
1265 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1266 %2 = bitcast i8* %z to <16 x i8>*
1267 store <16 x i8> %1, <16 x i8>* %2, align 1
1271 define i8* @strb8_127(i8* %y, i8* %x) {
1272 ; CHECK-LABEL: strb8_127:
1273 ; CHECK: @ %bb.0: @ %entry
1274 ; CHECK-NEXT: adds r0, #127
1275 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1276 ; CHECK-NEXT: vstrb.8 q0, [r0]
1279 %z = getelementptr inbounds i8, i8* %y, i32 127
1280 %0 = bitcast i8* %x to <16 x i8>*
1281 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1282 %2 = bitcast i8* %z to <16 x i8>*
1283 store <16 x i8> %1, <16 x i8>* %2, align 1
1287 define i8* @strb8_128(i8* %y, i8* %x) {
1288 ; CHECK-LABEL: strb8_128:
1289 ; CHECK: @ %bb.0: @ %entry
1290 ; CHECK-NEXT: adds r0, #128
1291 ; CHECK-NEXT: vldrb.u8 q0, [r1]
1292 ; CHECK-NEXT: vstrb.8 q0, [r0]
1295 %z = getelementptr inbounds i8, i8* %y, i32 128
1296 %0 = bitcast i8* %x to <16 x i8>*
1297 %1 = load <16 x i8>, <16 x i8>* %0, align 1
1298 %2 = bitcast i8* %z to <16 x i8>*
1299 store <16 x i8> %1, <16 x i8>* %2, align 1
1303 define i8* @strf32_4(i8* %y, i8* %x) {
1304 ; CHECK-LABEL: strf32_4:
1305 ; CHECK: @ %bb.0: @ %entry
1306 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1307 ; CHECK-NEXT: vstrw.32 q0, [r0, #4]
1308 ; CHECK-NEXT: adds r0, #4
1311 %z = getelementptr inbounds i8, i8* %y, i32 4
1312 %0 = bitcast i8* %x to <4 x float>*
1313 %1 = load <4 x float>, <4 x float>* %0, align 4
1314 %2 = bitcast i8* %z to <4 x float>*
1315 store <4 x float> %1, <4 x float>* %2, align 4
1319 define i8* @strf16_4(i8* %y, i8* %x) {
1320 ; CHECK-LABEL: strf16_4:
1321 ; CHECK: @ %bb.0: @ %entry
1322 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1323 ; CHECK-NEXT: vstrh.16 q0, [r0, #4]
1324 ; CHECK-NEXT: adds r0, #4
1327 %z = getelementptr inbounds i8, i8* %y, i32 4
1328 %0 = bitcast i8* %x to <8 x half>*
1329 %1 = load <8 x half>, <8 x half>* %0, align 2
1330 %2 = bitcast i8* %z to <8 x half>*
1331 store <8 x half> %1, <8 x half>* %2, align 2
1335 define i8* @strwi32_align1(i8* %y, i8* %x) {
1336 ; CHECK-LABEL: strwi32_align1:
1337 ; CHECK: @ %bb.0: @ %entry
1338 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1339 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1340 ; CHECK-NEXT: adds r0, #3
1343 %z = getelementptr inbounds i8, i8* %y, i32 3
1344 %0 = bitcast i8* %x to <4 x i32>*
1345 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1346 %2 = bitcast i8* %z to <4 x i32>*
1347 store <4 x i32> %1, <4 x i32>* %2, align 1
1351 define i8* @strhi16_align1(i8* %y, i8* %x) {
1352 ; CHECK-LABEL: strhi16_align1:
1353 ; CHECK: @ %bb.0: @ %entry
1354 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1355 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1356 ; CHECK-NEXT: adds r0, #3
1359 %z = getelementptr inbounds i8, i8* %y, i32 3
1360 %0 = bitcast i8* %x to <8 x i16>*
1361 %1 = load <8 x i16>, <8 x i16>* %0, align 2
1362 %2 = bitcast i8* %z to <8 x i16>*
1363 store <8 x i16> %1, <8 x i16>* %2, align 1
1367 define i8* @strhi32_align1(i8* %y, i8* %x) {
1368 ; CHECK-LABEL: strhi32_align1:
1369 ; CHECK: @ %bb.0: @ %entry
1370 ; CHECK-NEXT: .pad #8
1371 ; CHECK-NEXT: sub sp, #8
1372 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1373 ; CHECK-NEXT: mov r1, sp
1374 ; CHECK-NEXT: vstrh.32 q0, [r1]
1375 ; CHECK-NEXT: ldrd r1, r2, [sp]
1376 ; CHECK-NEXT: str r1, [r0, #3]!
1377 ; CHECK-NEXT: str r2, [r0, #4]
1378 ; CHECK-NEXT: add sp, #8
1381 %z = getelementptr inbounds i8, i8* %y, i32 3
1382 %0 = bitcast i8* %x to <4 x i32>*
1383 %1 = load <4 x i32>, <4 x i32>* %0, align 4
1384 %2 = bitcast i8* %z to <4 x i16>*
1385 %3 = trunc <4 x i32> %1 to <4 x i16>
1386 store <4 x i16> %3, <4 x i16>* %2, align 1
1390 define i8* @strf32_align1(i8* %y, i8* %x) {
1391 ; CHECK-LABEL: strf32_align1:
1392 ; CHECK: @ %bb.0: @ %entry
1393 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1394 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1395 ; CHECK-NEXT: adds r0, #3
1398 %z = getelementptr inbounds i8, i8* %y, i32 3
1399 %0 = bitcast i8* %x to <4 x float>*
1400 %1 = load <4 x float>, <4 x float>* %0, align 4
1401 %2 = bitcast i8* %z to <4 x float>*
1402 store <4 x float> %1, <4 x float>* %2, align 1
1406 define i8* @strf16_align1(i8* %y, i8* %x) {
1407 ; CHECK-LABEL: strf16_align1:
1408 ; CHECK: @ %bb.0: @ %entry
1409 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1410 ; CHECK-NEXT: vstrb.8 q0, [r0, #3]
1411 ; CHECK-NEXT: adds r0, #3
1414 %z = getelementptr inbounds i8, i8* %y, i32 3
1415 %0 = bitcast i8* %x to <8 x half>*
1416 %1 = load <8 x half>, <8 x half>* %0, align 2
1417 %2 = bitcast i8* %z to <8 x half>*
1418 store <8 x half> %1, <8 x half>* %2, align 1