1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -o - %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
4 define <16 x i8> @test1(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: ld1r { v1.8b }, [x1]
8 ; CHECK-NEXT: ld1r { v0.8b }, [x0]
9 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
12 %0 = load i8, ptr %a, align 1
13 %1 = insertelement <8 x i8> poison, i8 %0, i64 0
14 %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
15 %2 = load i8, ptr %b, align 1
16 %3 = insertelement <8 x i8> poison, i8 %2, i64 0
17 %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
18 %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
19 ret <16 x i8> %shuffle.i
22 define <16 x i8> @test2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
24 ; CHECK: // %bb.0: // %entry
25 ; CHECK-NEXT: ld1r { v1.8b }, [x1]
26 ; CHECK-NEXT: ldrb w8, [x0]
27 ; CHECK-NEXT: dup v0.8b, w8
28 ; CHECK-NEXT: mov v1.b[7], w8
29 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
32 %0 = load i8, ptr %a, align 1
33 %1 = insertelement <8 x i8> poison, i8 %0, i64 0
34 %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
35 %2 = load i8, ptr %b, align 1
36 %3 = insertelement <8 x i8> poison, i8 %2, i64 0
37 %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
38 %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
39 ret <16 x i8> %shuffle.i
42 define <16 x i8> @test3(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
44 ; CHECK: // %bb.0: // %entry
45 ; CHECK-NEXT: ld1r { v0.8b }, [x0]
46 ; CHECK-NEXT: ld1r { v1.8b }, [x1]
47 ; CHECK-NEXT: zip1 v0.16b, v0.16b, v1.16b
50 %0 = load i8, ptr %a, align 1
51 %1 = insertelement <8 x i8> poison, i8 %0, i64 0
52 %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
53 %2 = load i8, ptr %b, align 1
54 %3 = insertelement <8 x i8> poison, i8 %2, i64 0
55 %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
56 %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
57 ret <16 x i8> %shuffle.i
60 define <16 x i8> @test4(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
62 ; CHECK: // %bb.0: // %entry
63 ; CHECK-NEXT: ld1r { v1.8b }, [x0]
64 ; CHECK-NEXT: ld1r { v0.8b }, [x1]
65 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
68 %0 = load i8, ptr %a, align 1
69 %1 = insertelement <8 x i8> poison, i8 %0, i64 0
70 %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
71 %2 = load i8, ptr %b, align 1
72 %3 = insertelement <8 x i8> poison, i8 %2, i64 0
73 %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
74 %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75 ret <16 x i8> %shuffle.i
78 define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
80 ; CHECK: // %bb.0: // %entry
81 ; CHECK-NEXT: ldr b0, [x0]
82 ; CHECK-NEXT: adrp x8, .LCPI4_0
83 ; CHECK-NEXT: ld1r { v1.16b }, [x1]
84 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
85 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
88 %0 = load i8, ptr %a, align 1
89 %1 = insertelement <8 x i8> poison, i8 %0, i64 0
90 %lane = shufflevector <8 x i8> %1, <8 x i8> poison, <8 x i32> zeroinitializer
91 %2 = load i8, ptr %b, align 1
92 %3 = insertelement <8 x i8> poison, i8 %2, i64 0
93 %lane2 = shufflevector <8 x i8> %3, <8 x i8> poison, <8 x i32> zeroinitializer
94 %shuffle.i = shufflevector <8 x i8> %lane, <8 x i8> %lane2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15>
95 ret <16 x i8> %shuffle.i
98 define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
100 ; CHECK: // %bb.0: // %entry
101 ; CHECK-NEXT: ld1r { v1.8b }, [x1]
102 ; CHECK-NEXT: ld1r { v0.8b }, [x0]
103 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
104 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
107 %0 = load i8, ptr %a, align 1
108 %1 = insertelement <4 x i8> poison, i8 %0, i64 0
109 %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
110 %2 = load i8, ptr %b, align 1
111 %3 = insertelement <4 x i8> poison, i8 %2, i64 0
112 %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
113 %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
114 ret <8 x i8> %shuffle.i
117 define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
118 ; CHECK-LABEL: test7:
119 ; CHECK: // %bb.0: // %entry
120 ; CHECK-NEXT: ld1r { v1.8b }, [x0]
121 ; CHECK-NEXT: ld1r { v0.8b }, [x1]
122 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
123 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
126 %0 = load i8, ptr %a, align 1
127 %1 = insertelement <4 x i8> poison, i8 %0, i64 0
128 %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
129 %2 = load i8, ptr %b, align 1
130 %3 = insertelement <4 x i8> poison, i8 %2, i64 0
131 %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
132 %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
133 ret <8 x i8> %shuffle.i
136 define <8 x i16> @test8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
137 ; CHECK-LABEL: test8:
138 ; CHECK: // %bb.0: // %entry
139 ; CHECK-NEXT: ld1r { v1.4h }, [x1]
140 ; CHECK-NEXT: ld1r { v0.4h }, [x0]
141 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
144 %0 = load i16, ptr %a, align 1
145 %1 = insertelement <4 x i16> poison, i16 %0, i64 0
146 %lane = shufflevector <4 x i16> %1, <4 x i16> poison, <4 x i32> zeroinitializer
147 %2 = load i16, ptr %b, align 1
148 %3 = insertelement <4 x i16> poison, i16 %2, i64 0
149 %lane2 = shufflevector <4 x i16> %3, <4 x i16> poison, <4 x i32> zeroinitializer
150 %shuffle.i = shufflevector <4 x i16> %lane, <4 x i16> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
151 ret <8 x i16> %shuffle.i
154 define <4 x i32> @test9(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
155 ; CHECK-LABEL: test9:
156 ; CHECK: // %bb.0: // %entry
157 ; CHECK-NEXT: ld1r { v1.2s }, [x1]
158 ; CHECK-NEXT: ld1r { v0.2s }, [x0]
159 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
162 %0 = load i32, ptr %a, align 1
163 %1 = insertelement <2 x i32> poison, i32 %0, i64 0
164 %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
165 %2 = load i32, ptr %b, align 1
166 %3 = insertelement <2 x i32> poison, i32 %2, i64 0
167 %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
168 %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
169 ret <4 x i32> %shuffle.i
172 define <2 x i64> @test10(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
173 ; CHECK-LABEL: test10:
174 ; CHECK: // %bb.0: // %entry
175 ; CHECK-NEXT: ldr d0, [x0]
176 ; CHECK-NEXT: ldr d1, [x1]
177 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
180 %0 = load i64, ptr %a, align 1
181 %lane = bitcast i64 %0 to <1 x i64>
182 %1 = load i64, ptr %b, align 1
183 %lane2 = bitcast i64 %1 to <1 x i64>
184 %shuffle.i = shufflevector <1 x i64> %lane, <1 x i64> %lane2, <2 x i32> <i32 0, i32 1>
185 ret <2 x i64> %shuffle.i
188 define <8 x i8> @test11(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
189 ; CHECK-LABEL: test11:
190 ; CHECK: // %bb.0: // %entry
191 ; CHECK-NEXT: ld1r { v1.8b }, [x0]
192 ; CHECK-NEXT: ld1r { v2.8b }, [x1]
193 ; CHECK-NEXT: mov v0.16b, v1.16b
194 ; CHECK-NEXT: mov v0.h[2], v2.h[0]
195 ; CHECK-NEXT: mov v0.h[3], v1.h[0]
196 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
199 %0 = load i8, ptr %a, align 1
200 %1 = insertelement <4 x i8> poison, i8 %0, i64 0
201 %lane = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
202 %2 = load i8, ptr %b, align 1
203 %3 = insertelement <4 x i8> poison, i8 %2, i64 0
204 %lane2 = shufflevector <4 x i8> %3, <4 x i8> poison, <4 x i32> zeroinitializer
205 %shuffle.i = shufflevector <4 x i8> %lane, <4 x i8> %lane2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 1>
206 ret <8 x i8> %shuffle.i
209 define <4 x i32> @test12(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
210 ; CHECK-LABEL: test12:
211 ; CHECK: // %bb.0: // %entry
212 ; CHECK-NEXT: ld1r { v0.2s }, [x0]
213 ; CHECK-NEXT: ldr w8, [x1]
214 ; CHECK-NEXT: mov v1.16b, v0.16b
215 ; CHECK-NEXT: mov v0.s[1], w8
216 ; CHECK-NEXT: mov v1.s[0], w8
217 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
220 %0 = load i32, ptr %a, align 1
221 %1 = insertelement <2 x i32> poison, i32 %0, i64 0
222 %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
223 %2 = load i32, ptr %b, align 1
224 %3 = insertelement <2 x i32> poison, i32 %2, i64 0
225 %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
226 %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
227 ret <4 x i32> %shuffle.i
230 define <2 x i64> @test13(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
231 ; CHECK-LABEL: test13:
232 ; CHECK: // %bb.0: // %entry
233 ; CHECK-NEXT: ldr d1, [x0]
234 ; CHECK-NEXT: ldr d0, [x1]
235 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
238 %0 = load i64, ptr %a, align 1
239 %lane = bitcast i64 %0 to <1 x i64>
240 %1 = load i64, ptr %b, align 1
241 %lane2 = bitcast i64 %1 to <1 x i64>
242 %shuffle.i = shufflevector <1 x i64> %lane, <1 x i64> %lane2, <2 x i32> <i32 1, i32 0>
243 ret <2 x i64> %shuffle.i
246 define <3 x i32> @test14(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
247 ; CHECK-LABEL: test14:
248 ; CHECK: // %bb.0: // %entry
249 ; CHECK-NEXT: ldr w8, [x0]
250 ; CHECK-NEXT: fmov s0, w8
251 ; CHECK-NEXT: mov v0.s[1], w8
252 ; CHECK-NEXT: ld1 { v0.s }[2], [x1]
255 %0 = load i32, ptr %a, align 1
256 %1 = insertelement <2 x i32> poison, i32 %0, i64 0
257 %lane = shufflevector <2 x i32> %1, <2 x i32> poison, <2 x i32> zeroinitializer
258 %2 = load i32, ptr %b, align 1
259 %3 = insertelement <2 x i32> poison, i32 %2, i64 0
260 %lane2 = shufflevector <2 x i32> %3, <2 x i32> poison, <2 x i32> zeroinitializer
261 %shuffle.i = shufflevector <2 x i32> %lane, <2 x i32> %lane2, <3 x i32> <i32 0, i32 1, i32 2>
262 ret <3 x i32> %shuffle.i