1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s
5 define <16 x i8> @load_v3i8(ptr %src) {
6 ; CHECK-LABEL: load_v3i8:
8 ; CHECK-NEXT: ldrb w8, [x0, #2]
9 ; CHECK-NEXT: ldrh w9, [x0]
10 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
11 ; CHECK-NEXT: fmov s0, w8
14 ; BE-LABEL: load_v3i8:
16 ; BE-NEXT: sub sp, sp, #16
17 ; BE-NEXT: .cfi_def_cfa_offset 16
18 ; BE-NEXT: ldrh w8, [x0]
19 ; BE-NEXT: strh w8, [sp, #12]
20 ; BE-NEXT: ldr s0, [sp, #12]
21 ; BE-NEXT: rev32 v0.8b, v0.8b
22 ; BE-NEXT: ushll v0.8h, v0.8b, #0
23 ; BE-NEXT: umov w8, v0.h[0]
24 ; BE-NEXT: umov w9, v0.h[1]
25 ; BE-NEXT: fmov s0, w8
26 ; BE-NEXT: add x8, x0, #2
27 ; BE-NEXT: mov v0.b[1], w9
28 ; BE-NEXT: ld1 { v0.b }[2], [x8]
29 ; BE-NEXT: rev64 v0.16b, v0.16b
30 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
31 ; BE-NEXT: add sp, sp, #16
33 %l = load <3 x i8>, ptr %src, align 1
34 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
38 define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
39 ; CHECK-LABEL: load_v3i8_to_4xi32:
41 ; CHECK-NEXT: ldrb w8, [x0, #2]
42 ; CHECK-NEXT: ldrh w9, [x0]
43 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
44 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
45 ; CHECK-NEXT: fmov s0, w8
46 ; CHECK-NEXT: zip1.8b v0, v0, v0
47 ; CHECK-NEXT: ushll.4s v0, v0, #0
48 ; CHECK-NEXT: and.16b v0, v0, v1
51 ; BE-LABEL: load_v3i8_to_4xi32:
53 ; BE-NEXT: sub sp, sp, #16
54 ; BE-NEXT: .cfi_def_cfa_offset 16
55 ; BE-NEXT: ldrh w8, [x0]
56 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
57 ; BE-NEXT: strh w8, [sp, #12]
58 ; BE-NEXT: ldr s0, [sp, #12]
59 ; BE-NEXT: ldrsb w8, [x0, #2]
60 ; BE-NEXT: rev32 v0.8b, v0.8b
61 ; BE-NEXT: ushll v0.8h, v0.8b, #0
62 ; BE-NEXT: mov v0.h[1], v0.h[1]
63 ; BE-NEXT: mov v0.h[2], w8
64 ; BE-NEXT: ushll v0.4s, v0.4h, #0
65 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
66 ; BE-NEXT: rev64 v0.4s, v0.4s
67 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
68 ; BE-NEXT: add sp, sp, #16
70 %l = load <3 x i8>, ptr %src, align 1
71 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
72 %e = zext <4 x i8> %s to <4 x i32>
76 define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
77 ; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
79 ; CHECK-NEXT: ldrb w8, [x0, #2]
80 ; CHECK-NEXT: ldrh w9, [x0]
81 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
82 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
83 ; CHECK-NEXT: fmov s0, w8
84 ; CHECK-NEXT: zip1.8b v0, v0, v0
85 ; CHECK-NEXT: ushll.4s v0, v0, #0
86 ; CHECK-NEXT: and.16b v0, v0, v1
89 ; BE-LABEL: load_v3i8_to_4xi32_align_2:
91 ; BE-NEXT: sub sp, sp, #16
92 ; BE-NEXT: .cfi_def_cfa_offset 16
93 ; BE-NEXT: ldrh w8, [x0]
94 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
95 ; BE-NEXT: strh w8, [sp, #12]
96 ; BE-NEXT: ldr s0, [sp, #12]
97 ; BE-NEXT: ldrsb w8, [x0, #2]
98 ; BE-NEXT: rev32 v0.8b, v0.8b
99 ; BE-NEXT: ushll v0.8h, v0.8b, #0
100 ; BE-NEXT: mov v0.h[1], v0.h[1]
101 ; BE-NEXT: mov v0.h[2], w8
102 ; BE-NEXT: ushll v0.4s, v0.4h, #0
103 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
104 ; BE-NEXT: rev64 v0.4s, v0.4s
105 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
106 ; BE-NEXT: add sp, sp, #16
108 %l = load <3 x i8>, ptr %src, align 2
109 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
110 %e = zext <4 x i8> %s to <4 x i32>
114 define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
115 ; CHECK-LABEL: load_v3i8_to_4xi32_align_4:
117 ; CHECK-NEXT: ldr s0, [x0]
118 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
119 ; CHECK-NEXT: zip1.8b v0, v0, v0
120 ; CHECK-NEXT: ushll.4s v0, v0, #0
121 ; CHECK-NEXT: and.16b v0, v0, v1
124 ; BE-LABEL: load_v3i8_to_4xi32_align_4:
126 ; BE-NEXT: ldr s0, [x0]
127 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
128 ; BE-NEXT: rev32 v0.8b, v0.8b
129 ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
130 ; BE-NEXT: ushll v0.4s, v0.4h, #0
131 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
132 ; BE-NEXT: rev64 v0.4s, v0.4s
133 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
135 %l = load <3 x i8>, ptr %src, align 4
136 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
137 %e = zext <4 x i8> %s to <4 x i32>
141 define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
142 ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
144 ; CHECK-NEXT: ldrb w8, [x0, #3]
145 ; CHECK-NEXT: ldurh w9, [x0, #1]
146 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
147 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
148 ; CHECK-NEXT: fmov s0, w8
149 ; CHECK-NEXT: zip1.8b v0, v0, v0
150 ; CHECK-NEXT: ushll.4s v0, v0, #0
151 ; CHECK-NEXT: and.16b v0, v0, v1
154 ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
156 ; BE-NEXT: sub sp, sp, #16
157 ; BE-NEXT: .cfi_def_cfa_offset 16
158 ; BE-NEXT: ldurh w8, [x0, #1]
159 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
160 ; BE-NEXT: strh w8, [sp, #12]
161 ; BE-NEXT: ldr s0, [sp, #12]
162 ; BE-NEXT: ldrsb w8, [x0, #3]
163 ; BE-NEXT: rev32 v0.8b, v0.8b
164 ; BE-NEXT: ushll v0.8h, v0.8b, #0
165 ; BE-NEXT: mov v0.h[1], v0.h[1]
166 ; BE-NEXT: mov v0.h[2], w8
167 ; BE-NEXT: ushll v0.4s, v0.4h, #0
168 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
169 ; BE-NEXT: rev64 v0.4s, v0.4s
170 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
171 ; BE-NEXT: add sp, sp, #16
173 %src.1 = getelementptr inbounds i8, ptr %src, i64 1
174 %l = load <3 x i8>, ptr %src.1, align 1
175 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
176 %e = zext <4 x i8> %s to <4 x i32>
180 define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
181 ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
183 ; CHECK-NEXT: ldrb w8, [x0, #5]
184 ; CHECK-NEXT: ldurh w9, [x0, #3]
185 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
186 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
187 ; CHECK-NEXT: fmov s0, w8
188 ; CHECK-NEXT: zip1.8b v0, v0, v0
189 ; CHECK-NEXT: ushll.4s v0, v0, #0
190 ; CHECK-NEXT: and.16b v0, v0, v1
193 ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
195 ; BE-NEXT: sub sp, sp, #16
196 ; BE-NEXT: .cfi_def_cfa_offset 16
197 ; BE-NEXT: ldurh w8, [x0, #3]
198 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
199 ; BE-NEXT: strh w8, [sp, #12]
200 ; BE-NEXT: ldr s0, [sp, #12]
201 ; BE-NEXT: ldrsb w8, [x0, #5]
202 ; BE-NEXT: rev32 v0.8b, v0.8b
203 ; BE-NEXT: ushll v0.8h, v0.8b, #0
204 ; BE-NEXT: mov v0.h[1], v0.h[1]
205 ; BE-NEXT: mov v0.h[2], w8
206 ; BE-NEXT: ushll v0.4s, v0.4h, #0
207 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
208 ; BE-NEXT: rev64 v0.4s, v0.4s
209 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
210 ; BE-NEXT: add sp, sp, #16
212 %src.3 = getelementptr inbounds i8, ptr %src, i64 3
213 %l = load <3 x i8>, ptr %src.3, align 1
214 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
215 %e = zext <4 x i8> %s to <4 x i32>
219 define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) {
220 ; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
222 ; CHECK-NEXT: sub sp, sp, #16
223 ; CHECK-NEXT: .cfi_def_cfa_offset 16
224 ; CHECK-NEXT: ldrh w8, [x0]
225 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
226 ; CHECK-NEXT: strh w8, [sp, #12]
227 ; CHECK-NEXT: ldr s0, [sp, #12]
228 ; CHECK-NEXT: ldrsb w8, [x0, #2]
229 ; CHECK-NEXT: ushll.8h v0, v0, #0
230 ; CHECK-NEXT: mov.h v0[1], v0[1]
231 ; CHECK-NEXT: mov.h v0[2], w8
232 ; CHECK-NEXT: ushll.4s v0, v0, #0
233 ; CHECK-NEXT: and.16b v0, v0, v1
234 ; CHECK-NEXT: add sp, sp, #16
237 ; BE-LABEL: volatile_load_v3i8_to_4xi32:
239 ; BE-NEXT: sub sp, sp, #16
240 ; BE-NEXT: .cfi_def_cfa_offset 16
241 ; BE-NEXT: ldrh w8, [x0]
242 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
243 ; BE-NEXT: strh w8, [sp, #12]
244 ; BE-NEXT: ldr s0, [sp, #12]
245 ; BE-NEXT: ldrsb w8, [x0, #2]
246 ; BE-NEXT: rev32 v0.8b, v0.8b
247 ; BE-NEXT: ushll v0.8h, v0.8b, #0
248 ; BE-NEXT: mov v0.h[1], v0.h[1]
249 ; BE-NEXT: mov v0.h[2], w8
250 ; BE-NEXT: ushll v0.4s, v0.4h, #0
251 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
252 ; BE-NEXT: rev64 v0.4s, v0.4s
253 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
254 ; BE-NEXT: add sp, sp, #16
256 %l = load volatile <3 x i8>, ptr %src, align 1
257 %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
258 %e = zext <4 x i8> %s to <4 x i32>
262 define <3 x i32> @load_v3i32(ptr %src) {
263 ; CHECK-LABEL: load_v3i32:
265 ; CHECK-NEXT: ldr d0, [x0]
266 ; CHECK-NEXT: add x8, x0, #8
267 ; CHECK-NEXT: ld1.s { v0 }[2], [x8]
270 ; BE-LABEL: load_v3i32:
272 ; BE-NEXT: ldr d0, [x0]
273 ; BE-NEXT: add x8, x0, #8
274 ; BE-NEXT: rev64 v0.4s, v0.4s
275 ; BE-NEXT: ld1 { v0.s }[2], [x8]
276 ; BE-NEXT: rev64 v0.4s, v0.4s
277 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
279 %l = load <3 x i32>, ptr %src, align 1
283 define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
284 ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
286 ; CHECK-NEXT: ldrb w8, [x0, #2]
287 ; CHECK-NEXT: ldrh w9, [x0]
288 ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
289 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
290 ; CHECK-NEXT: fmov s0, w8
291 ; CHECK-NEXT: zip1.8b v0, v0, v0
292 ; CHECK-NEXT: ushll.4s v0, v0, #0
293 ; CHECK-NEXT: and.16b v0, v0, v1
296 ; BE-LABEL: load_v3i8_zext_to_3xi32:
298 ; BE-NEXT: sub sp, sp, #16
299 ; BE-NEXT: .cfi_def_cfa_offset 16
300 ; BE-NEXT: ldrh w8, [x0]
301 ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
302 ; BE-NEXT: strh w8, [sp, #12]
303 ; BE-NEXT: add x8, x0, #2
304 ; BE-NEXT: ldr s0, [sp, #12]
305 ; BE-NEXT: rev32 v0.8b, v0.8b
306 ; BE-NEXT: ushll v0.8h, v0.8b, #0
307 ; BE-NEXT: ld1 { v0.b }[4], [x8]
308 ; BE-NEXT: ushll v0.4s, v0.4h, #0
309 ; BE-NEXT: and v0.16b, v0.16b, v1.16b
310 ; BE-NEXT: rev64 v0.4s, v0.4s
311 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
312 ; BE-NEXT: add sp, sp, #16
314 %l = load <3 x i8>, ptr %src, align 1
315 %e = zext <3 x i8> %l to <3 x i32>
319 define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {
320 ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
322 ; CHECK-NEXT: ldrb w8, [x0, #2]
323 ; CHECK-NEXT: ldrh w9, [x0]
324 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
325 ; CHECK-NEXT: fmov s0, w8
326 ; CHECK-NEXT: zip1.8b v0, v0, v0
327 ; CHECK-NEXT: ushll.4s v0, v0, #0
328 ; CHECK-NEXT: shl.4s v0, v0, #24
329 ; CHECK-NEXT: sshr.4s v0, v0, #24
332 ; BE-LABEL: load_v3i8_sext_to_3xi32:
334 ; BE-NEXT: sub sp, sp, #16
335 ; BE-NEXT: .cfi_def_cfa_offset 16
336 ; BE-NEXT: ldrh w8, [x0]
337 ; BE-NEXT: strh w8, [sp, #12]
338 ; BE-NEXT: add x8, x0, #2
339 ; BE-NEXT: ldr s0, [sp, #12]
340 ; BE-NEXT: rev32 v0.8b, v0.8b
341 ; BE-NEXT: ushll v0.8h, v0.8b, #0
342 ; BE-NEXT: ld1 { v0.b }[4], [x8]
343 ; BE-NEXT: ushll v0.4s, v0.4h, #0
344 ; BE-NEXT: shl v0.4s, v0.4s, #24
345 ; BE-NEXT: sshr v0.4s, v0.4s, #24
346 ; BE-NEXT: rev64 v0.4s, v0.4s
347 ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
348 ; BE-NEXT: add sp, sp, #16
350 %l = load <3 x i8>, ptr %src, align 1
351 %e = sext <3 x i8> %l to <3 x i32>
355 define void @store_trunc_from_64bits(ptr %src, ptr %dst) {
356 ; CHECK-LABEL: store_trunc_from_64bits:
357 ; CHECK: ; %bb.0: ; %entry
358 ; CHECK-NEXT: ldr w8, [x0]
359 ; CHECK-NEXT: add x9, x0, #4
360 ; CHECK-NEXT: ld1r.4h { v0 }, [x9]
361 ; CHECK-NEXT: fmov s1, w8
362 ; CHECK-NEXT: strb w8, [x1]
363 ; CHECK-NEXT: add x8, x1, #1
364 ; CHECK-NEXT: st1.b { v1 }[2], [x8]
365 ; CHECK-NEXT: add x8, x1, #2
366 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
369 ; BE-LABEL: store_trunc_from_64bits:
370 ; BE: // %bb.0: // %entry
371 ; BE-NEXT: sub sp, sp, #16
372 ; BE-NEXT: .cfi_def_cfa_offset 16
373 ; BE-NEXT: ldr s0, [x0]
374 ; BE-NEXT: ldrh w8, [x0, #4]
375 ; BE-NEXT: rev32 v0.4h, v0.4h
376 ; BE-NEXT: mov v0.h[2], w8
377 ; BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
378 ; BE-NEXT: rev32 v0.16b, v0.16b
379 ; BE-NEXT: str s0, [sp, #12]
380 ; BE-NEXT: ldrh w9, [sp, #12]
381 ; BE-NEXT: strb w8, [x1, #2]
382 ; BE-NEXT: strh w9, [x1]
383 ; BE-NEXT: add sp, sp, #16
386 %l = load <3 x i16>, ptr %src, align 1
387 %t = trunc <3 x i16> %l to <3 x i8>
388 store <3 x i8> %t, ptr %dst, align 1
392 define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
393 ; CHECK-LABEL: store_trunc_add_from_64bits:
394 ; CHECK: ; %bb.0: ; %entry
395 ; CHECK-NEXT: ldr s0, [x0]
396 ; CHECK-NEXT: add x9, x0, #4
398 ; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
400 ; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF]
401 ; CHECK-NEXT: add x8, x1, #1
402 ; CHECK-NEXT: ld1.h { v0 }[2], [x9]
403 ; CHECK-NEXT: add x9, x1, #2
404 ; CHECK-NEXT: add.4h v0, v0, v1
405 ; CHECK-NEXT: st1.b { v0 }[2], [x8]
406 ; CHECK-NEXT: st1.b { v0 }[4], [x9]
407 ; CHECK-NEXT: st1.b { v0 }[0], [x1]
409 ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
411 ; BE-LABEL: store_trunc_add_from_64bits:
412 ; BE: // %bb.0: // %entry
413 ; BE-NEXT: sub sp, sp, #16
414 ; BE-NEXT: .cfi_def_cfa_offset 16
415 ; BE-NEXT: ldr s0, [x0]
416 ; BE-NEXT: add x8, x0, #4
417 ; BE-NEXT: rev32 v0.4h, v0.4h
418 ; BE-NEXT: ld1 { v0.h }[2], [x8]
419 ; BE-NEXT: adrp x8, .LCPI11_0
420 ; BE-NEXT: add x8, x8, :lo12:.LCPI11_0
421 ; BE-NEXT: ld1 { v1.4h }, [x8]
422 ; BE-NEXT: add v0.4h, v0.4h, v1.4h
423 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
424 ; BE-NEXT: umov w8, v0.h[2]
425 ; BE-NEXT: rev32 v1.16b, v1.16b
426 ; BE-NEXT: str s1, [sp, #12]
427 ; BE-NEXT: ldrh w9, [sp, #12]
428 ; BE-NEXT: strb w8, [x1, #2]
429 ; BE-NEXT: strh w9, [x1]
430 ; BE-NEXT: add sp, sp, #16
433 %l = load <3 x i16>, ptr %src, align 1
434 %a = add <3 x i16> %l, <i16 3, i16 4, i16 5>
435 %t = trunc <3 x i16> %a to <3 x i8>
436 store <3 x i8> %t, ptr %dst, align 1
440 define void @load_ext_to_64bits(ptr %src, ptr %dst) {
441 ; CHECK-LABEL: load_ext_to_64bits:
442 ; CHECK: ; %bb.0: ; %entry
443 ; CHECK-NEXT: ldrb w8, [x0, #2]
444 ; CHECK-NEXT: ldrh w9, [x0]
445 ; CHECK-NEXT: orr w8, w9, w8, lsl #16
446 ; CHECK-NEXT: fmov s0, w8
447 ; CHECK-NEXT: add x8, x1, #4
448 ; CHECK-NEXT: zip1.8b v0, v0, v0
449 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
450 ; CHECK-NEXT: st1.h { v0 }[2], [x8]
451 ; CHECK-NEXT: str s0, [x1]
454 ; BE-LABEL: load_ext_to_64bits:
455 ; BE: // %bb.0: // %entry
456 ; BE-NEXT: sub sp, sp, #16
457 ; BE-NEXT: .cfi_def_cfa_offset 16
458 ; BE-NEXT: ldrh w8, [x0]
459 ; BE-NEXT: strh w8, [sp, #12]
460 ; BE-NEXT: add x8, x0, #2
461 ; BE-NEXT: ldr s0, [sp, #12]
462 ; BE-NEXT: rev32 v0.8b, v0.8b
463 ; BE-NEXT: ushll v0.8h, v0.8b, #0
464 ; BE-NEXT: ld1 { v0.b }[4], [x8]
465 ; BE-NEXT: add x8, x1, #4
466 ; BE-NEXT: bic v0.4h, #255, lsl #8
467 ; BE-NEXT: rev32 v1.8h, v0.8h
468 ; BE-NEXT: st1 { v0.h }[2], [x8]
469 ; BE-NEXT: str s1, [x1]
470 ; BE-NEXT: add sp, sp, #16
473 %l = load <3 x i8>, ptr %src, align 1
474 %e = zext <3 x i8> %l to <3 x i16>
475 store <3 x i16> %e, ptr %dst, align 1
479 define void @load_ext_to_64bits_default_align(ptr %src, ptr %dst) {
480 ; CHECK-LABEL: load_ext_to_64bits_default_align:
481 ; CHECK: ; %bb.0: ; %entry
482 ; CHECK-NEXT: ldr s0, [x0]
483 ; CHECK-NEXT: add x8, x1, #4
484 ; CHECK-NEXT: zip1.8b v0, v0, v0
485 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
486 ; CHECK-NEXT: st1.h { v0 }[2], [x8]
487 ; CHECK-NEXT: str s0, [x1]
490 ; BE-LABEL: load_ext_to_64bits_default_align:
491 ; BE: // %bb.0: // %entry
492 ; BE-NEXT: ldr s0, [x0]
493 ; BE-NEXT: add x8, x1, #4
494 ; BE-NEXT: rev32 v0.8b, v0.8b
495 ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
496 ; BE-NEXT: bic v0.4h, #255, lsl #8
497 ; BE-NEXT: rev32 v1.8h, v0.8h
498 ; BE-NEXT: st1 { v0.h }[2], [x8]
499 ; BE-NEXT: str s1, [x1]
502 %l = load <3 x i8>, ptr %src
503 %e = zext <3 x i8> %l to <3 x i16>
504 store <3 x i16> %e, ptr %dst, align 1
508 define void @load_ext_to_64bits_align_4(ptr %src, ptr %dst) {
509 ; CHECK-LABEL: load_ext_to_64bits_align_4:
510 ; CHECK: ; %bb.0: ; %entry
511 ; CHECK-NEXT: ldr s0, [x0]
512 ; CHECK-NEXT: add x8, x1, #4
513 ; CHECK-NEXT: zip1.8b v0, v0, v0
514 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
515 ; CHECK-NEXT: st1.h { v0 }[2], [x8]
516 ; CHECK-NEXT: str s0, [x1]
519 ; BE-LABEL: load_ext_to_64bits_align_4:
520 ; BE: // %bb.0: // %entry
521 ; BE-NEXT: ldr s0, [x0]
522 ; BE-NEXT: add x8, x1, #4
523 ; BE-NEXT: rev32 v0.8b, v0.8b
524 ; BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
525 ; BE-NEXT: bic v0.4h, #255, lsl #8
526 ; BE-NEXT: rev32 v1.8h, v0.8h
527 ; BE-NEXT: st1 { v0.h }[2], [x8]
528 ; BE-NEXT: str s1, [x1]
531 %l = load <3 x i8>, ptr %src, align 4
532 %e = zext <3 x i8> %l to <3 x i16>
533 store <3 x i16> %e, ptr %dst, align 1
537 define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
538 ; CHECK-LABEL: load_ext_add_to_64bits:
539 ; CHECK: ; %bb.0: ; %entry
540 ; CHECK-NEXT: ldrb w9, [x0, #2]
541 ; CHECK-NEXT: ldrh w10, [x0]
543 ; CHECK-NEXT: adrp x8, lCPI15_0@PAGE
545 ; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF]
546 ; CHECK-NEXT: add x8, x1, #4
547 ; CHECK-NEXT: orr w9, w10, w9, lsl #16
548 ; CHECK-NEXT: fmov s0, w9
549 ; CHECK-NEXT: zip1.8b v0, v0, v0
550 ; CHECK-NEXT: bic.4h v0, #255, lsl #8
551 ; CHECK-NEXT: add.4h v0, v0, v1
552 ; CHECK-NEXT: st1.h { v0 }[2], [x8]
553 ; CHECK-NEXT: str s0, [x1]
555 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
557 ; BE-LABEL: load_ext_add_to_64bits:
558 ; BE: // %bb.0: // %entry
559 ; BE-NEXT: sub sp, sp, #16
560 ; BE-NEXT: .cfi_def_cfa_offset 16
561 ; BE-NEXT: ldrh w8, [x0]
562 ; BE-NEXT: strh w8, [sp, #12]
563 ; BE-NEXT: add x8, x0, #2
564 ; BE-NEXT: ldr s0, [sp, #12]
565 ; BE-NEXT: rev32 v0.8b, v0.8b
566 ; BE-NEXT: ushll v0.8h, v0.8b, #0
567 ; BE-NEXT: ld1 { v0.b }[4], [x8]
568 ; BE-NEXT: adrp x8, .LCPI15_0
569 ; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
570 ; BE-NEXT: ld1 { v1.4h }, [x8]
571 ; BE-NEXT: add x8, x1, #4
572 ; BE-NEXT: bic v0.4h, #255, lsl #8
573 ; BE-NEXT: add v0.4h, v0.4h, v1.4h
574 ; BE-NEXT: rev32 v1.8h, v0.8h
575 ; BE-NEXT: st1 { v0.h }[2], [x8]
576 ; BE-NEXT: str s1, [x1]
577 ; BE-NEXT: add sp, sp, #16
580 %l = load <3 x i8>, ptr %src, align 1
581 %e = zext <3 x i8> %l to <3 x i16>
582 %a = add <3 x i16> %e, <i16 3, i16 4, i16 5>
583 store <3 x i16> %a, ptr %dst, align 1
587 define void @shift_trunc_store(ptr %src, ptr %dst) {
588 ; CHECK-LABEL: shift_trunc_store:
590 ; CHECK-NEXT: ldr q0, [x0]
591 ; CHECK-NEXT: add x8, x1, #1
592 ; CHECK-NEXT: add x9, x1, #2
593 ; CHECK-NEXT: ushr.4s v0, v0, #16
594 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
595 ; CHECK-NEXT: st1.b { v0 }[8], [x9]
596 ; CHECK-NEXT: st1.b { v0 }[0], [x1]
599 ; BE-LABEL: shift_trunc_store:
601 ; BE-NEXT: sub sp, sp, #16
602 ; BE-NEXT: .cfi_def_cfa_offset 16
603 ; BE-NEXT: ld1 { v0.4s }, [x0]
604 ; BE-NEXT: shrn v0.4h, v0.4s, #16
605 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
606 ; BE-NEXT: umov w8, v0.h[2]
607 ; BE-NEXT: rev32 v1.16b, v1.16b
608 ; BE-NEXT: str s1, [sp, #12]
609 ; BE-NEXT: ldrh w9, [sp, #12]
610 ; BE-NEXT: strb w8, [x1, #2]
611 ; BE-NEXT: strh w9, [x1]
612 ; BE-NEXT: add sp, sp, #16
614 %l = load <3 x i32>, ptr %src
615 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
616 %t = trunc <3 x i32> %s to <3 x i8>
617 store <3 x i8> %t, ptr %dst, align 1
621 define void @shift_trunc_store_default_align(ptr %src, ptr %dst) {
622 ; CHECK-LABEL: shift_trunc_store_default_align:
624 ; CHECK-NEXT: ldr q0, [x0]
625 ; CHECK-NEXT: add x8, x1, #1
626 ; CHECK-NEXT: add x9, x1, #2
627 ; CHECK-NEXT: ushr.4s v0, v0, #16
628 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
629 ; CHECK-NEXT: st1.b { v0 }[8], [x9]
630 ; CHECK-NEXT: st1.b { v0 }[0], [x1]
633 ; BE-LABEL: shift_trunc_store_default_align:
635 ; BE-NEXT: sub sp, sp, #16
636 ; BE-NEXT: .cfi_def_cfa_offset 16
637 ; BE-NEXT: ld1 { v0.4s }, [x0]
638 ; BE-NEXT: shrn v0.4h, v0.4s, #16
639 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
640 ; BE-NEXT: umov w8, v0.h[2]
641 ; BE-NEXT: rev32 v1.16b, v1.16b
642 ; BE-NEXT: str s1, [sp, #12]
643 ; BE-NEXT: ldrh w9, [sp, #12]
644 ; BE-NEXT: strb w8, [x1, #2]
645 ; BE-NEXT: strh w9, [x1]
646 ; BE-NEXT: add sp, sp, #16
648 %l = load <3 x i32>, ptr %src
649 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
650 %t = trunc <3 x i32> %s to <3 x i8>
651 store <3 x i8> %t, ptr %dst
655 define void @shift_trunc_store_align_4(ptr %src, ptr %dst) {
656 ; CHECK-LABEL: shift_trunc_store_align_4:
658 ; CHECK-NEXT: ldr q0, [x0]
659 ; CHECK-NEXT: add x8, x1, #1
660 ; CHECK-NEXT: add x9, x1, #2
661 ; CHECK-NEXT: ushr.4s v0, v0, #16
662 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
663 ; CHECK-NEXT: st1.b { v0 }[8], [x9]
664 ; CHECK-NEXT: st1.b { v0 }[0], [x1]
667 ; BE-LABEL: shift_trunc_store_align_4:
669 ; BE-NEXT: sub sp, sp, #16
670 ; BE-NEXT: .cfi_def_cfa_offset 16
671 ; BE-NEXT: ld1 { v0.4s }, [x0]
672 ; BE-NEXT: shrn v0.4h, v0.4s, #16
673 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
674 ; BE-NEXT: umov w8, v0.h[2]
675 ; BE-NEXT: rev32 v1.16b, v1.16b
676 ; BE-NEXT: str s1, [sp, #12]
677 ; BE-NEXT: ldrh w9, [sp, #12]
678 ; BE-NEXT: strb w8, [x1, #2]
679 ; BE-NEXT: strh w9, [x1]
680 ; BE-NEXT: add sp, sp, #16
682 %l = load <3 x i32>, ptr %src
683 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
684 %t = trunc <3 x i32> %s to <3 x i8>
685 store <3 x i8> %t, ptr %dst, align 4
689 define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) {
690 ; CHECK-LABEL: shift_trunc_store_const_offset_1:
692 ; CHECK-NEXT: ldr q0, [x0]
693 ; CHECK-NEXT: add x8, x1, #2
694 ; CHECK-NEXT: add x9, x1, #3
695 ; CHECK-NEXT: ushr.4s v0, v0, #16
696 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
697 ; CHECK-NEXT: add x8, x1, #1
698 ; CHECK-NEXT: st1.b { v0 }[8], [x9]
699 ; CHECK-NEXT: st1.b { v0 }[0], [x8]
702 ; BE-LABEL: shift_trunc_store_const_offset_1:
704 ; BE-NEXT: sub sp, sp, #16
705 ; BE-NEXT: .cfi_def_cfa_offset 16
706 ; BE-NEXT: ld1 { v0.4s }, [x0]
707 ; BE-NEXT: shrn v0.4h, v0.4s, #16
708 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
709 ; BE-NEXT: umov w8, v0.h[2]
710 ; BE-NEXT: rev32 v1.16b, v1.16b
711 ; BE-NEXT: str s1, [sp, #12]
712 ; BE-NEXT: ldrh w9, [sp, #12]
713 ; BE-NEXT: strb w8, [x1, #3]
714 ; BE-NEXT: sturh w9, [x1, #1]
715 ; BE-NEXT: add sp, sp, #16
717 %l = load <3 x i32>, ptr %src
718 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
719 %t = trunc <3 x i32> %s to <3 x i8>
720 %dst.1 = getelementptr inbounds i8, ptr %dst, i64 1
721 store <3 x i8> %t, ptr %dst.1, align 1
725 define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) {
726 ; CHECK-LABEL: shift_trunc_store_const_offset_3:
728 ; CHECK-NEXT: ldr q0, [x0]
729 ; CHECK-NEXT: add x8, x1, #4
730 ; CHECK-NEXT: add x9, x1, #5
731 ; CHECK-NEXT: ushr.4s v0, v0, #16
732 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
733 ; CHECK-NEXT: add x8, x1, #3
734 ; CHECK-NEXT: st1.b { v0 }[8], [x9]
735 ; CHECK-NEXT: st1.b { v0 }[0], [x8]
738 ; BE-LABEL: shift_trunc_store_const_offset_3:
740 ; BE-NEXT: sub sp, sp, #16
741 ; BE-NEXT: .cfi_def_cfa_offset 16
742 ; BE-NEXT: ld1 { v0.4s }, [x0]
743 ; BE-NEXT: shrn v0.4h, v0.4s, #16
744 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
745 ; BE-NEXT: umov w8, v0.h[2]
746 ; BE-NEXT: rev32 v1.16b, v1.16b
747 ; BE-NEXT: str s1, [sp, #12]
748 ; BE-NEXT: ldrh w9, [sp, #12]
749 ; BE-NEXT: strb w8, [x1, #5]
750 ; BE-NEXT: sturh w9, [x1, #3]
751 ; BE-NEXT: add sp, sp, #16
753 %l = load <3 x i32>, ptr %src
754 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
755 %t = trunc <3 x i32> %s to <3 x i8>
756 %dst.3 = getelementptr inbounds i8, ptr %dst, i64 3
757 store <3 x i8> %t, ptr %dst.3, align 1
761 define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
762 ; CHECK-LABEL: shift_trunc_volatile_store:
764 ; CHECK-NEXT: sub sp, sp, #16
765 ; CHECK-NEXT: .cfi_def_cfa_offset 16
766 ; CHECK-NEXT: ldr q0, [x0]
767 ; CHECK-NEXT: shrn.4h v0, v0, #16
768 ; CHECK-NEXT: uzp1.8b v1, v0, v0
769 ; CHECK-NEXT: umov.h w8, v0[2]
770 ; CHECK-NEXT: str s1, [sp, #12]
771 ; CHECK-NEXT: ldrh w9, [sp, #12]
772 ; CHECK-NEXT: strb w8, [x1, #2]
773 ; CHECK-NEXT: strh w9, [x1]
774 ; CHECK-NEXT: add sp, sp, #16
777 ; BE-LABEL: shift_trunc_volatile_store:
779 ; BE-NEXT: sub sp, sp, #16
780 ; BE-NEXT: .cfi_def_cfa_offset 16
781 ; BE-NEXT: ld1 { v0.4s }, [x0]
782 ; BE-NEXT: shrn v0.4h, v0.4s, #16
783 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
784 ; BE-NEXT: umov w8, v0.h[2]
785 ; BE-NEXT: rev32 v1.16b, v1.16b
786 ; BE-NEXT: str s1, [sp, #12]
787 ; BE-NEXT: ldrh w9, [sp, #12]
788 ; BE-NEXT: strb w8, [x1, #2]
789 ; BE-NEXT: strh w9, [x1]
790 ; BE-NEXT: add sp, sp, #16
792 %l = load <3 x i32>, ptr %src
793 %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
794 %t = trunc <3 x i32> %s to <3 x i8>
795 store volatile <3 x i8> %t, ptr %dst, align 1
799 define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
800 ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
802 ; CHECK-NEXT: ldrb w9, [x0, #2]
803 ; CHECK-NEXT: ldrh w10, [x0]
805 ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
807 ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
808 ; CHECK-NEXT: add x8, x0, #2
809 ; CHECK-NEXT: orr w9, w10, w9, lsl #16
810 ; CHECK-NEXT: fmov s0, w9
811 ; CHECK-NEXT: zip1.8b v0, v0, v0
812 ; CHECK-NEXT: uaddw.4s v0, v1, v0
813 ; CHECK-NEXT: st1.b { v0 }[8], [x8]
814 ; CHECK-NEXT: add x8, x0, #1
815 ; CHECK-NEXT: st1.b { v0 }[0], [x0]
816 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
818 ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
820 ; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
822 ; BE-NEXT: sub sp, sp, #16
823 ; BE-NEXT: .cfi_def_cfa_offset 16
824 ; BE-NEXT: ldrh w9, [x0]
825 ; BE-NEXT: adrp x8, .LCPI22_0
826 ; BE-NEXT: add x8, x8, :lo12:.LCPI22_0
827 ; BE-NEXT: ld1 { v1.4h }, [x8]
828 ; BE-NEXT: strh w9, [sp, #12]
829 ; BE-NEXT: add x9, x0, #2
830 ; BE-NEXT: ldr s0, [sp, #12]
831 ; BE-NEXT: rev32 v0.8b, v0.8b
832 ; BE-NEXT: ushll v0.8h, v0.8b, #0
833 ; BE-NEXT: ld1 { v0.b }[4], [x9]
834 ; BE-NEXT: add v0.4h, v0.4h, v1.4h
835 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
836 ; BE-NEXT: umov w8, v0.h[2]
837 ; BE-NEXT: rev32 v1.16b, v1.16b
838 ; BE-NEXT: str s1, [sp, #8]
839 ; BE-NEXT: ldrh w9, [sp, #8]
840 ; BE-NEXT: strb w8, [x0, #2]
841 ; BE-NEXT: strh w9, [x0]
842 ; BE-NEXT: add sp, sp, #16
844 %l = load <3 x i8>, ptr %src, align 1
845 %e = zext <3 x i8> %l to <3 x i32>
846 %add = add <3 x i32> %e, <i32 1, i32 2, i32 3>
847 %t = trunc <3 x i32> %add to <3 x i8>
848 store <3 x i8> %t, ptr %src
852 define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {
853 ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
855 ; CHECK-NEXT: ldrb w9, [x0, #2]
856 ; CHECK-NEXT: ldrh w10, [x0]
858 ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
860 ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
861 ; CHECK-NEXT: add x8, x0, #2
862 ; CHECK-NEXT: orr w9, w10, w9, lsl #16
863 ; CHECK-NEXT: fmov s0, w9
864 ; CHECK-NEXT: zip1.8b v0, v0, v0
865 ; CHECK-NEXT: uaddw.4s v0, v1, v0
866 ; CHECK-NEXT: st1.b { v0 }[8], [x8]
867 ; CHECK-NEXT: add x8, x0, #1
868 ; CHECK-NEXT: st1.b { v0 }[0], [x0]
869 ; CHECK-NEXT: st1.b { v0 }[4], [x8]
871 ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
873 ; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
875 ; BE-NEXT: sub sp, sp, #16
876 ; BE-NEXT: .cfi_def_cfa_offset 16
877 ; BE-NEXT: ldrh w9, [x0]
878 ; BE-NEXT: adrp x8, .LCPI23_0
879 ; BE-NEXT: add x8, x8, :lo12:.LCPI23_0
880 ; BE-NEXT: ld1 { v1.4h }, [x8]
881 ; BE-NEXT: strh w9, [sp, #12]
882 ; BE-NEXT: add x9, x0, #2
883 ; BE-NEXT: ldr s0, [sp, #12]
884 ; BE-NEXT: rev32 v0.8b, v0.8b
885 ; BE-NEXT: ushll v0.8h, v0.8b, #0
886 ; BE-NEXT: ld1 { v0.b }[4], [x9]
887 ; BE-NEXT: add v0.4h, v0.4h, v1.4h
888 ; BE-NEXT: uzp1 v1.8b, v0.8b, v0.8b
889 ; BE-NEXT: umov w8, v0.h[2]
890 ; BE-NEXT: rev32 v1.16b, v1.16b
891 ; BE-NEXT: str s1, [sp, #8]
892 ; BE-NEXT: ldrh w9, [sp, #8]
893 ; BE-NEXT: strb w8, [x0, #2]
894 ; BE-NEXT: strh w9, [x0]
895 ; BE-NEXT: add sp, sp, #16
897 %l = load <3 x i8>, ptr %src, align 1
898 %e = sext <3 x i8> %l to <3 x i32>
899 %add = add <3 x i32> %e, <i32 1, i32 2, i32 3>
900 %t = trunc <3 x i32> %add to <3 x i8>
901 store <3 x i8> %t, ptr %src