1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
7 define <16 x i8> @insert_v16i8_2_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
8 ; CHECK-LABEL: insert_v16i8_2_1:
10 ; CHECK-NEXT: mov v0.16b, v1.16b
11 ; CHECK-NEXT: mov v0.h[0], v2.h[0]
13 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
17 define <16 x i8> @insert_v16i8_2_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
18 ; CHECK-LABEL: insert_v16i8_2_2:
20 ; CHECK-NEXT: mov v0.16b, v1.16b
21 ; CHECK-NEXT: mov v0.h[1], v2.h[0]
23 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
27 define <16 x i8> @insert_v16i8_2_6(float %tmp, <16 x i8> %b, <16 x i8> %a) {
28 ; CHECK-LABEL: insert_v16i8_2_6:
30 ; CHECK-NEXT: mov v0.16b, v1.16b
31 ; CHECK-NEXT: mov v0.h[6], v2.h[0]
33 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 30, i32 31>
37 define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
38 ; CHECK-LABEL: insert_v16i8_4_1:
40 ; CHECK-NEXT: mov v0.16b, v1.16b
41 ; CHECK-NEXT: mov v0.s[0], v2.s[0]
43 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
47 define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) {
48 ; CHECK-LABEL: insert_v16i8_4_15:
50 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3
51 ; CHECK-NEXT: adrp x8, .LCPI4_0
52 ; CHECK-NEXT: mov v3.16b, v1.16b
53 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
54 ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
56 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
60 define <16 x i8> @insert_v16i8_4_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
61 ; CHECK-LABEL: insert_v16i8_4_2:
63 ; CHECK-NEXT: mov v0.16b, v1.16b
64 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
66 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
70 define <16 x i8> @insert_v16i8_4_3(float %tmp, <16 x i8> %b, <16 x i8> %a) {
71 ; CHECK-LABEL: insert_v16i8_4_3:
73 ; CHECK-NEXT: mov v0.16b, v1.16b
74 ; CHECK-NEXT: mov v0.s[2], v2.s[0]
76 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31>
80 define <16 x i8> @insert_v16i8_4_4(float %tmp, <16 x i8> %b, <16 x i8> %a) {
81 ; CHECK-LABEL: insert_v16i8_4_4:
83 ; CHECK-NEXT: mov v0.16b, v1.16b
84 ; CHECK-NEXT: mov v0.s[3], v2.s[0]
86 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3>
90 define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) {
91 ; CHECK-LABEL: insert_v8i8_4_1:
93 ; CHECK-NEXT: fmov d0, d2
94 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
95 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
96 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
98 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
102 define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) {
103 ; CHECK-LABEL: insert_v8i8_4_2:
105 ; CHECK-NEXT: fmov d0, d1
106 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
107 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
108 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
110 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
114 define <16 x i8> @insert_v16i8_8_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
115 ; CHECK-LABEL: insert_v16i8_8_1:
117 ; CHECK-NEXT: mov v0.16b, v2.16b
118 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
120 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
124 define <16 x i8> @insert_v16i8_8_2(float %tmp, <16 x i8> %b, <16 x i8> %a) {
125 ; CHECK-LABEL: insert_v16i8_8_2:
127 ; CHECK-NEXT: mov v0.16b, v1.16b
128 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
130 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
136 define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
137 ; CHECK-LABEL: insert_v8i16_2_1:
139 ; CHECK-NEXT: mov v0.16b, v1.16b
140 ; CHECK-NEXT: mov v0.s[0], v2.s[0]
142 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
146 define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) {
147 ; CHECK-LABEL: insert_v8i16_2_15:
149 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3
150 ; CHECK-NEXT: adrp x8, .LCPI13_0
151 ; CHECK-NEXT: mov v3.16b, v1.16b
152 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
153 ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
155 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
159 define <8 x i16> @insert_v8i16_2_2(float %tmp, <8 x i16> %b, <8 x i16> %a) {
160 ; CHECK-LABEL: insert_v8i16_2_2:
162 ; CHECK-NEXT: mov v0.16b, v1.16b
163 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
165 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15>
169 define <8 x i16> @insert_v8i16_2_3(float %tmp, <8 x i16> %b, <8 x i16> %a) {
170 ; CHECK-LABEL: insert_v8i16_2_3:
172 ; CHECK-NEXT: mov v0.16b, v1.16b
173 ; CHECK-NEXT: mov v0.s[2], v2.s[0]
175 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15>
179 define <8 x i16> @insert_v8i16_2_4(float %tmp, <8 x i16> %b, <8 x i16> %a) {
180 ; CHECK-LABEL: insert_v8i16_2_4:
182 ; CHECK-NEXT: mov v0.16b, v1.16b
183 ; CHECK-NEXT: mov v0.s[3], v2.s[0]
185 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1>
189 define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) {
190 ; CHECK-LABEL: insert_v4i16_2_1:
192 ; CHECK-NEXT: fmov d0, d2
193 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
194 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
195 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
197 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
201 define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) {
202 ; CHECK-LABEL: insert_v4i16_2_2:
204 ; CHECK-NEXT: fmov d0, d1
205 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
206 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
207 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
209 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
213 define <8 x i16> @insert_v8i16_4_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
214 ; CHECK-LABEL: insert_v8i16_4_1:
216 ; CHECK-NEXT: mov v0.16b, v2.16b
217 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
219 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
223 define <8 x i16> @insert_v8i16_4_2(float %tmp, <8 x i16> %b, <8 x i16> %a) {
224 ; CHECK-LABEL: insert_v8i16_4_2:
226 ; CHECK-NEXT: mov v0.16b, v1.16b
227 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
229 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
235 define <4 x i32> @insert_v4i32_2_1(float %tmp, <4 x i32> %b, <4 x i32> %a) {
236 ; CHECK-LABEL: insert_v4i32_2_1:
238 ; CHECK-NEXT: mov v0.16b, v2.16b
239 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
241 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
245 define <4 x i32> @insert_v4i32_2_2(float %tmp, <4 x i32> %b, <4 x i32> %a) {
246 ; CHECK-LABEL: insert_v4i32_2_2:
248 ; CHECK-NEXT: mov v0.16b, v1.16b
249 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
251 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
260 define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) {
261 ; CHECK-LABEL: load_v16i8_4_1:
263 ; CHECK-NEXT: mov v0.16b, v1.16b
264 ; CHECK-NEXT: ld1 { v0.s }[0], [x0]
266 %l = load <4 x i8>, ptr %a
267 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
268 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
272 define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) {
273 ; CHECK-LABEL: load_v16i8_4_15:
275 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
276 ; CHECK-NEXT: adrp x8, .LCPI24_0
277 ; CHECK-NEXT: ldr s0, [x0]
278 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0]
279 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
281 %l = load <4 x i8>, ptr %a
282 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
283 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
287 define <16 x i8> @load_v16i8_4_2(float %tmp, <16 x i8> %b, ptr %a) {
288 ; CHECK-LABEL: load_v16i8_4_2:
290 ; CHECK-NEXT: mov v0.16b, v1.16b
291 ; CHECK-NEXT: ld1 { v0.s }[1], [x0]
293 %l = load <4 x i8>, ptr %a
294 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
295 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
299 define <16 x i8> @load_v16i8_4_3(float %tmp, <16 x i8> %b, ptr %a) {
300 ; CHECK-LABEL: load_v16i8_4_3:
302 ; CHECK-NEXT: mov v0.16b, v1.16b
303 ; CHECK-NEXT: ld1 { v0.s }[2], [x0]
305 %l = load <4 x i8>, ptr %a
306 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
307 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31>
311 define <16 x i8> @load_v16i8_4_4(float %tmp, <16 x i8> %b, ptr %a) {
312 ; CHECK-LABEL: load_v16i8_4_4:
314 ; CHECK-NEXT: mov v0.16b, v1.16b
315 ; CHECK-NEXT: ld1 { v0.s }[3], [x0]
317 %l = load <4 x i8>, ptr %a
318 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
319 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3>
323 define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, ptr %a) {
324 ; CHECK-LABEL: load_v8i8_4_1:
326 ; CHECK-NEXT: ldr s0, [x0]
327 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
328 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
329 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
331 %l = load <4 x i8>, ptr %a
332 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
333 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
337 define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, ptr %a) {
338 ; CHECK-LABEL: load_v8i8_4_2:
340 ; CHECK-NEXT: fmov d0, d1
341 ; CHECK-NEXT: ldr s2, [x0]
342 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
343 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
345 %l = load <4 x i8>, ptr %a
346 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
347 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
351 define <16 x i8> @load_v16i8_8_1(float %tmp, <16 x i8> %b, ptr %a) {
352 ; CHECK-LABEL: load_v16i8_8_1:
354 ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
355 ; CHECK-NEXT: ldr d0, [x0]
356 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
358 %l = load <8 x i8>, ptr %a
359 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
360 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
364 define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) {
365 ; CHECK-LABEL: load_v16i8_8_2:
367 ; CHECK-NEXT: mov v0.16b, v1.16b
368 ; CHECK-NEXT: ldr d2, [x0]
369 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
371 %l = load <8 x i8>, ptr %a
372 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
373 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
379 define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
380 ; CHECK-LABEL: load_v8i16_2_1:
382 ; CHECK-NEXT: ldrh w8, [x0]
383 ; CHECK-NEXT: add x9, x0, #2
384 ; CHECK-NEXT: fmov s0, w8
385 ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
386 ; CHECK-NEXT: xtn v2.4h, v0.4s
387 ; CHECK-NEXT: mov v0.16b, v1.16b
388 ; CHECK-NEXT: mov v0.s[0], v2.s[0]
390 %l = load <2 x i16>, ptr %a
391 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
392 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
396 define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
397 ; CHECK-LABEL: load_v8i16_2_15:
399 ; CHECK-NEXT: ldrh w8, [x0]
400 ; CHECK-NEXT: add x9, x0, #2
401 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1
402 ; CHECK-NEXT: fmov s2, w8
403 ; CHECK-NEXT: adrp x8, .LCPI33_0
404 ; CHECK-NEXT: ld1 { v2.h }[2], [x9]
405 ; CHECK-NEXT: xtn v0.4h, v2.4s
406 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
407 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
409 %l = load <2 x i16>, ptr %a
410 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
411 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
415 define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) {
416 ; CHECK-LABEL: load_v8i16_2_2:
418 ; CHECK-NEXT: ldrh w8, [x0]
419 ; CHECK-NEXT: add x9, x0, #2
420 ; CHECK-NEXT: fmov s0, w8
421 ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
422 ; CHECK-NEXT: xtn v2.4h, v0.4s
423 ; CHECK-NEXT: mov v0.16b, v1.16b
424 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
426 %l = load <2 x i16>, ptr %a
427 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
428 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15>
432 define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) {
433 ; CHECK-LABEL: load_v8i16_2_3:
435 ; CHECK-NEXT: ldrh w8, [x0]
436 ; CHECK-NEXT: add x9, x0, #2
437 ; CHECK-NEXT: fmov s0, w8
438 ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
439 ; CHECK-NEXT: xtn v2.4h, v0.4s
440 ; CHECK-NEXT: mov v0.16b, v1.16b
441 ; CHECK-NEXT: mov v0.s[2], v2.s[0]
443 %l = load <2 x i16>, ptr %a
444 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
445 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15>
449 define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) {
450 ; CHECK-LABEL: load_v8i16_2_4:
452 ; CHECK-NEXT: ldrh w8, [x0]
453 ; CHECK-NEXT: add x9, x0, #2
454 ; CHECK-NEXT: fmov s0, w8
455 ; CHECK-NEXT: ld1 { v0.h }[2], [x9]
456 ; CHECK-NEXT: xtn v2.4h, v0.4s
457 ; CHECK-NEXT: mov v0.16b, v1.16b
458 ; CHECK-NEXT: mov v0.s[3], v2.s[0]
460 %l = load <2 x i16>, ptr %a
461 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
462 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1>
466 define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
467 ; CHECK-LABEL: load_v4i16_2_1:
469 ; CHECK-NEXT: ld1 { v0.h }[0], [x0]
470 ; CHECK-NEXT: add x8, x0, #2
471 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
472 ; CHECK-NEXT: ld1 { v0.h }[2], [x8]
473 ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
474 ; CHECK-NEXT: mov v0.s[1], v1.s[1]
475 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
477 %l = load <2 x i16>, ptr %a
478 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
479 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
483 define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) {
484 ; CHECK-LABEL: load_v4i16_2_2:
486 ; CHECK-NEXT: ld1 { v0.h }[0], [x0]
487 ; CHECK-NEXT: add x8, x0, #2
488 ; CHECK-NEXT: ld1 { v0.h }[2], [x8]
489 ; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h
490 ; CHECK-NEXT: fmov d0, d1
491 ; CHECK-NEXT: mov v0.s[1], v2.s[0]
492 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
494 %l = load <2 x i16>, ptr %a
495 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
496 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
500 define <8 x i16> @load_v8i16_4_1(float %tmp, <8 x i16> %b, ptr %a) {
501 ; CHECK-LABEL: load_v8i16_4_1:
503 ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
504 ; CHECK-NEXT: ldr d0, [x0]
505 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
507 %l = load <4 x i16>, ptr %a
508 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
509 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
513 define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, ptr %a) {
514 ; CHECK-LABEL: load_v8i16_4_2:
516 ; CHECK-NEXT: mov v0.16b, v1.16b
517 ; CHECK-NEXT: ldr d2, [x0]
518 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
520 %l = load <4 x i16>, ptr %a
521 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
522 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
528 define <4 x i32> @load_v4i32_2_1(float %tmp, <4 x i32> %b, ptr %a) {
529 ; CHECK-LABEL: load_v4i32_2_1:
531 ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
532 ; CHECK-NEXT: ldr d0, [x0]
533 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
535 %l = load <2 x i32>, ptr %a
536 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
537 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
541 define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, ptr %a) {
542 ; CHECK-LABEL: load_v4i32_2_2:
544 ; CHECK-NEXT: mov v0.16b, v1.16b
545 ; CHECK-NEXT: ldr d2, [x0]
546 ; CHECK-NEXT: mov v0.d[1], v2.d[0]
548 %l = load <2 x i32>, ptr %a
549 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
550 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
554 ; More than a single vector
556 define <8 x i8> @load2_v4i8(float %tmp, ptr %a, ptr %b) {
557 ; CHECK-LABEL: load2_v4i8:
559 ; CHECK-NEXT: ldr s0, [x0]
560 ; CHECK-NEXT: ld1 { v0.s }[1], [x1]
561 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
563 %la = load <4 x i8>, ptr %a
564 %lb = load <4 x i8>, ptr %b
565 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
569 define <16 x i8> @load3_v4i8(float %tmp, ptr %a, ptr %b) {
570 ; CHECK-LABEL: load3_v4i8:
572 ; CHECK-NEXT: ldp s0, s1, [x0]
573 ; CHECK-NEXT: ld1 { v0.s }[1], [x1]
574 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
576 %la = load <4 x i8>, ptr %a
577 %lb = load <4 x i8>, ptr %b
578 %c = getelementptr <4 x i8>, ptr %a, i64 1
579 %d = getelementptr <4 x i8>, ptr %b, i64 1
580 %lc = load <4 x i8>, ptr %c
581 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
582 %s2 = shufflevector <4 x i8> %lc, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
583 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
587 define <16 x i8> @load4_v4i8(float %tmp, ptr %a, ptr %b) {
588 ; CHECK-LABEL: load4_v4i8:
590 ; CHECK-NEXT: ldp s0, s1, [x0]
591 ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4
592 ; CHECK-NEXT: ld1 { v1.s }[1], [x1]
593 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
595 %la = load <4 x i8>, ptr %a
596 %lb = load <4 x i8>, ptr %b
597 %c = getelementptr <4 x i8>, ptr %a, i64 1
598 %d = getelementptr <4 x i8>, ptr %b, i64 1
599 %lc = load <4 x i8>, ptr %c
600 %ld = load <4 x i8>, ptr %d
601 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
602 %s2 = shufflevector <4 x i8> %lc, <4 x i8> %ld, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
603 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
607 define <16 x i8> @load2multi1_v4i8(float %tmp, ptr %a, ptr %b) {
608 ; CHECK-LABEL: load2multi1_v4i8:
610 ; CHECK-NEXT: ldr s0, [x0]
611 ; CHECK-NEXT: ld1 { v0.s }[1], [x1]
612 ; CHECK-NEXT: mov v0.d[1], v0.d[0]
614 %la = load <4 x i8>, ptr %a
615 %lb = load <4 x i8>, ptr %b
616 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
617 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
621 define <16 x i8> @load2multi2_v4i8(float %tmp, ptr %a, ptr %b) {
622 ; CHECK-LABEL: load2multi2_v4i8:
624 ; CHECK-NEXT: ldr s0, [x0]
625 ; CHECK-NEXT: ldr s1, [x1]
626 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
627 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0
628 ; CHECK-NEXT: mov v1.d[1], v1.d[0]
629 ; CHECK-NEXT: mov v0.d[1], v0.d[0]
630 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
632 %la = load <4 x i8>, ptr %a
633 %lb = load <4 x i8>, ptr %b
634 %s1 = shufflevector <4 x i8> %la, <4 x i8> %la, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
635 %s2 = shufflevector <4 x i8> %lb, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
636 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
640 define void @loads_before_stores(ptr %i44) {
641 ; CHECK-LABEL: loads_before_stores:
642 ; CHECK: // %bb.0: // %bb
643 ; CHECK-NEXT: ldr s0, [x0, #28]
644 ; CHECK-NEXT: add x8, x0, #20
645 ; CHECK-NEXT: ldrh w9, [x0, #26]
646 ; CHECK-NEXT: ldrh w10, [x0, #24]
647 ; CHECK-NEXT: ld1 { v0.s }[1], [x8]
648 ; CHECK-NEXT: strh w9, [x0, #20]
649 ; CHECK-NEXT: strh w10, [x0, #30]
650 ; CHECK-NEXT: stur d0, [x0, #22]
653 %i45 = getelementptr inbounds i8, ptr %i44, i64 20
654 %i46 = getelementptr inbounds i8, ptr %i44, i64 26
655 %i48 = load i8, ptr %i46, align 1
656 %i49 = getelementptr inbounds i8, ptr %i44, i64 21
657 %i50 = getelementptr inbounds i8, ptr %i44, i64 27
658 %i52 = load i8, ptr %i50, align 1
659 %i53 = getelementptr inbounds i8, ptr %i44, i64 22
660 %i54 = getelementptr inbounds i8, ptr %i44, i64 28
661 %i61 = getelementptr inbounds i8, ptr %i44, i64 24
662 %i62 = getelementptr inbounds i8, ptr %i44, i64 30
663 %i63 = load i8, ptr %i61, align 1
664 %i65 = getelementptr inbounds i8, ptr %i44, i64 25
665 %i66 = getelementptr inbounds i8, ptr %i44, i64 31
666 %i67 = load i8, ptr %i65, align 1
667 %0 = load <4 x i8>, ptr %i45, align 1
668 store i8 %i48, ptr %i45, align 1
669 store i8 %i52, ptr %i49, align 1
670 %1 = load <4 x i8>, ptr %i54, align 1
671 store i8 %i63, ptr %i62, align 1
672 %2 = shufflevector <4 x i8> %1, <4 x i8> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
673 store <8 x i8> %2, ptr %i53, align 1
674 store i8 %i67, ptr %i66, align 1