1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
4 define <8 x i8> @inserti8_first(ptr %p) {
5 ; CHECK-LABEL: inserti8_first:
7 ; CHECK-NEXT: ldr d0, [x0]
9 %q = getelementptr inbounds i8, ptr %p, i32 1
10 %l1 = load <8 x i8>, ptr %q
12 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
13 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
17 define <8 x i8> @inserti8_last(ptr %p) {
18 ; CHECK-LABEL: inserti8_last:
20 ; CHECK-NEXT: ldur d0, [x0, #1]
22 %q = getelementptr inbounds i8, ptr %p, i32 8
23 %l1 = load <8 x i8>, ptr %p
25 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
26 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
30 define <8 x i16> @inserti8_first_sext(ptr %p) {
31 ; CHECK-LABEL: inserti8_first_sext:
33 ; CHECK-NEXT: ldr d0, [x0]
34 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
36 %q = getelementptr inbounds i8, ptr %p, i32 1
37 %l1 = load <8 x i8>, ptr %q
38 %s1 = sext <8 x i8> %l1 to <8 x i16>
40 %s2 = sext i8 %l2 to i16
41 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
42 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
46 define <8 x i16> @inserti8_last_sext(ptr %p) {
47 ; CHECK-LABEL: inserti8_last_sext:
49 ; CHECK-NEXT: ldur d0, [x0, #1]
50 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
52 %q = getelementptr inbounds i8, ptr %p, i32 8
53 %l1 = load <8 x i8>, ptr %p
54 %s1 = sext <8 x i8> %l1 to <8 x i16>
56 %s2 = sext i8 %l2 to i16
57 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
58 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
62 define <8 x i16> @inserti8_first_zext(ptr %p) {
63 ; CHECK-LABEL: inserti8_first_zext:
65 ; CHECK-NEXT: ldr d0, [x0]
66 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
68 %q = getelementptr inbounds i8, ptr %p, i32 1
69 %l1 = load <8 x i8>, ptr %q
70 %s1 = zext <8 x i8> %l1 to <8 x i16>
72 %s2 = zext i8 %l2 to i16
73 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
74 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
78 define <8 x i16> @inserti8_last_zext(ptr %p) {
79 ; CHECK-LABEL: inserti8_last_zext:
81 ; CHECK-NEXT: ldur d0, [x0, #1]
82 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
84 %q = getelementptr inbounds i8, ptr %p, i32 8
85 %l1 = load <8 x i8>, ptr %p
86 %s1 = zext <8 x i8> %l1 to <8 x i16>
88 %s2 = zext i8 %l2 to i16
89 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
90 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7
94 define <8 x i32> @inserti32_first(ptr %p) {
95 ; CHECK-LABEL: inserti32_first:
97 ; CHECK-NEXT: ldp q0, q1, [x0]
99 %q = getelementptr inbounds i8, ptr %p, i32 4
100 %l1 = load <8 x i32>, ptr %q
101 %l2 = load i32, ptr %p
102 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
103 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
107 define <8 x i32> @inserti32_last(ptr %p) {
108 ; CHECK-LABEL: inserti32_last:
110 ; CHECK-NEXT: ldur q0, [x0, #4]
111 ; CHECK-NEXT: ldur q1, [x0, #20]
113 %q = getelementptr inbounds i8, ptr %p, i32 32
114 %l1 = load <8 x i32>, ptr %p
115 %l2 = load i32, ptr %q
116 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
117 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
121 define <8 x i32> @inserti32_first_multiuse(ptr %p) {
122 ; CHECK-LABEL: inserti32_first_multiuse:
124 ; CHECK-NEXT: ldp q3, q2, [x0]
125 ; CHECK-NEXT: ldur q1, [x0, #20]
126 ; CHECK-NEXT: ldur q0, [x0, #4]
127 ; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
128 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
130 %q = getelementptr inbounds i8, ptr %p, i32 4
131 %l1 = load <8 x i32>, ptr %q
132 %l2 = load i32, ptr %p
133 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
134 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0
135 %a = add <8 x i32> %l1, %ins
139 define <8 x i32> @inserti32_last_multiuse(ptr %p) {
140 ; CHECK-LABEL: inserti32_last_multiuse:
142 ; CHECK-NEXT: ldp q0, q1, [x0]
143 ; CHECK-NEXT: ldur q2, [x0, #20]
144 ; CHECK-NEXT: ldur q3, [x0, #4]
145 ; CHECK-NEXT: add v0.4s, v0.4s, v3.4s
146 ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s
148 %q = getelementptr inbounds i8, ptr %p, i32 32
149 %l1 = load <8 x i32>, ptr %p
150 %l2 = load i32, ptr %q
151 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
152 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7
153 %a = add <8 x i32> %l1, %ins
157 define <4 x float> @insertf32_first(ptr %p) {
158 ; CHECK-LABEL: insertf32_first:
160 ; CHECK-NEXT: ldr q0, [x0]
162 %q = getelementptr inbounds i8, ptr %p, i32 4
163 %l1 = load <4 x float>, ptr %q
164 %l2 = load float, ptr %p
165 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
166 %ins = insertelement <4 x float> %s, float %l2, i32 0
170 define <4 x float> @insertf32_last(ptr %p) {
171 ; CHECK-LABEL: insertf32_last:
173 ; CHECK-NEXT: ldur q0, [x0, #4]
175 %q = getelementptr inbounds i8, ptr %p, i32 16
176 %l1 = load <4 x float>, ptr %p
177 %l2 = load float, ptr %q
178 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
179 %ins = insertelement <4 x float> %s, float %l2, i32 3
183 define <2 x i64> @inserti64_first(ptr %p) {
184 ; CHECK-LABEL: inserti64_first:
186 ; CHECK-NEXT: ldr q0, [x0]
188 %q = getelementptr inbounds i8, ptr %p, i32 8
189 %l1 = load <2 x i64>, ptr %q
190 %l2 = load i64, ptr %p
191 %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 undef, i32 0>
192 %ins = insertelement <2 x i64> %s, i64 %l2, i32 0
196 define <2 x i64> @inserti64_last(ptr %p) {
197 ; CHECK-LABEL: inserti64_last:
199 ; CHECK-NEXT: ldur q0, [x0, #8]
201 %q = getelementptr inbounds i8, ptr %p, i32 16
202 %l1 = load <2 x i64>, ptr %p
203 %l2 = load i64, ptr %q
204 %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
205 %ins = insertelement <2 x i64> %s, i64 %l2, i32 1
209 define <8 x i8> @inserti8_first_undef(ptr %p) {
210 ; CHECK-LABEL: inserti8_first_undef:
212 ; CHECK-NEXT: ldr d0, [x0]
214 %q = getelementptr inbounds i8, ptr %p, i32 1
215 %l1 = load <8 x i8>, ptr %q
216 %l2 = load i8, ptr %p
217 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 undef, i32 3, i32 4, i32 5, i32 6>
218 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
222 define <8 x i8> @inserti8_last_undef(ptr %p) {
223 ; CHECK-LABEL: inserti8_last_undef:
225 ; CHECK-NEXT: ldur d0, [x0, #1]
227 %q = getelementptr inbounds i8, ptr %p, i32 8
228 %l1 = load <8 x i8>, ptr %p
229 %l2 = load i8, ptr %q
230 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
231 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
237 define <8 x i16> @wrong_zextandsext(ptr %p) {
238 ; CHECK-LABEL: wrong_zextandsext:
240 ; CHECK-NEXT: ldur d0, [x0, #1]
241 ; CHECK-NEXT: ldrsb w8, [x0]
242 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
243 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14
244 ; CHECK-NEXT: mov v0.h[0], w8
246 %q = getelementptr inbounds i8, ptr %p, i32 1
247 %l1 = load <8 x i8>, ptr %q
248 %s1 = zext <8 x i8> %l1 to <8 x i16>
249 %l2 = load i8, ptr %p
250 %s2 = sext i8 %l2 to i16
251 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
252 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0
256 define <8 x i8> @wrongidx_first(ptr %p) {
257 ; CHECK-LABEL: wrongidx_first:
259 ; CHECK-NEXT: ldur d0, [x0, #1]
260 ; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
261 ; CHECK-NEXT: ld1 { v0.b }[7], [x0]
262 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
264 %q = getelementptr inbounds i8, ptr %p, i32 1
265 %l1 = load <8 x i8>, ptr %q
266 %l2 = load i8, ptr %p
267 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
268 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
272 define <8 x i8> @wrong_last(ptr %p) {
273 ; CHECK-LABEL: wrong_last:
275 ; CHECK-NEXT: ldr d0, [x0]
276 ; CHECK-NEXT: add x8, x0, #8
277 ; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1
278 ; CHECK-NEXT: ld1 { v0.b }[0], [x8]
279 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
281 %q = getelementptr inbounds i8, ptr %p, i32 8
282 %l1 = load <8 x i8>, ptr %p
283 %l2 = load i8, ptr %q
284 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
285 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
289 define <8 x i8> @wrong_shuffle(ptr %p) {
290 ; CHECK-LABEL: wrong_shuffle:
292 ; CHECK-NEXT: ldur d0, [x0, #1]
293 ; CHECK-NEXT: adrp x8, .LCPI19_0
294 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
295 ; CHECK-NEXT: mov v0.d[1], v0.d[0]
296 ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
297 ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
298 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
300 %q = getelementptr inbounds i8, ptr %p, i32 1
301 %l1 = load <8 x i8>, ptr %q
302 %l2 = load i8, ptr %p
303 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6>
304 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
308 define <8 x i16> @wrong_exttype(ptr %p) {
309 ; CHECK-LABEL: wrong_exttype:
311 ; CHECK-NEXT: ldur d0, [x0, #1]
312 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
313 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14
314 ; CHECK-NEXT: ld1 { v0.h }[0], [x0]
316 %q = getelementptr inbounds i8, ptr %p, i32 1
317 %l1 = load <8 x i8>, ptr %q
318 %s1 = sext <8 x i8> %l1 to <8 x i16>
319 %l2 = load i16, ptr %p
320 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
321 %ins = insertelement <8 x i16> %s, i16 %l2, i32 0
325 define <4 x i32> @wrong_exttype2(ptr %p) {
326 ; CHECK-LABEL: wrong_exttype2:
328 ; CHECK-NEXT: ldur s0, [x0, #1]
329 ; CHECK-NEXT: ldrsh w8, [x0]
330 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0
331 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
332 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #12
333 ; CHECK-NEXT: mov v0.s[0], w8
335 %q = getelementptr inbounds i8, ptr %p, i32 1
336 %l1 = load <4 x i8>, ptr %q
337 %s1 = sext <4 x i8> %l1 to <4 x i32>
338 %l2 = load i16, ptr %p
339 %s2 = sext i16 %l2 to i32
340 %s = shufflevector <4 x i32> %s1, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
341 %ins = insertelement <4 x i32> %s, i32 %s2, i32 0
345 define <8 x i8> @wrong_offsetfirst(ptr %p) {
346 ; CHECK-LABEL: wrong_offsetfirst:
348 ; CHECK-NEXT: ldur d0, [x0, #-1]
349 ; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
350 ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
351 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
353 %q = getelementptr inbounds i8, ptr %p, i32 -1
354 %l1 = load <8 x i8>, ptr %q
355 %l2 = load i8, ptr %p
356 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
357 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
361 define <8 x i8> @wrong_offsetlast(ptr %p) {
362 ; CHECK-LABEL: wrong_offsetlast:
364 ; CHECK-NEXT: ldr d0, [x0]
365 ; CHECK-NEXT: add x8, x0, #7
366 ; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1
367 ; CHECK-NEXT: ld1 { v0.b }[7], [x8]
368 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
370 %q = getelementptr inbounds i8, ptr %p, i32 7
371 %l1 = load <8 x i8>, ptr %p
372 %l2 = load i8, ptr %q
373 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
374 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7
379 define <8 x i8> @storebetween(ptr %p, ptr %r) {
380 ; CHECK-LABEL: storebetween:
382 ; CHECK-NEXT: ldur d0, [x0, #1]
383 ; CHECK-NEXT: strb wzr, [x1]
384 ; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7
385 ; CHECK-NEXT: ld1 { v0.b }[0], [x0]
386 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
388 %q = getelementptr inbounds i8, ptr %p, i32 1
389 %l1 = load <8 x i8>, ptr %q
391 %l2 = load i8, ptr %p
392 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
393 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
397 define <8 x i8> @storebefore(ptr %p, ptr %r) {
398 ; CHECK-LABEL: storebefore:
400 ; CHECK-NEXT: strb wzr, [x1]
401 ; CHECK-NEXT: ldr d0, [x0]
403 %q = getelementptr inbounds i8, ptr %p, i32 1
405 %l1 = load <8 x i8>, ptr %q
406 %l2 = load i8, ptr %p
407 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
408 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0
412 define <8 x i8> @storeafter(ptr %p, ptr %r) {
413 ; CHECK-LABEL: storeafter:
415 ; CHECK-NEXT: ldr d0, [x0]
416 ; CHECK-NEXT: strb wzr, [x1]
418 %q = getelementptr inbounds i8, ptr %p, i32 1
419 %l1 = load <8 x i8>, ptr %q
420 %l2 = load i8, ptr %p
422 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
423 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0