1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+neon < %s | FileCheck %s
4 ; Inserting an element from the bottom 128-bits of an SVE type into a NEON vector should use INS (element) to
5 ; avoid pointless FMOV trips.
7 ; --------- extraction from nxv16i8
9 define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
10 ; CHECK-LABEL: test_lane0_nxv16i8:
12 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
13 ; CHECK-NEXT: mov v0.b[0], v1.b[0]
14 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
16 %c = extractelement <vscale x 16 x i8> %b, i32 0
17 %d = insertelement <8 x i8> %a, i8 %c, i32 0
21 define <8 x i8> @test_lane15_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
22 ; CHECK-LABEL: test_lane15_nxv16i8:
24 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
25 ; CHECK-NEXT: mov v0.b[7], v1.b[15]
26 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
28 %c = extractelement <vscale x 16 x i8> %b, i32 15
29 %d = insertelement <8 x i8> %a, i8 %c, i32 7
33 define <16 x i8> @test_q_lane0_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
34 ; CHECK-LABEL: test_q_lane0_nxv16i8:
36 ; CHECK-NEXT: mov v0.b[0], v1.b[0]
38 %c = extractelement <vscale x 16 x i8> %b, i32 0
39 %d = insertelement <16 x i8> %a, i8 %c, i32 0
43 define <16 x i8> @test_q_lane15_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
44 ; CHECK-LABEL: test_q_lane15_nxv16i8:
46 ; CHECK-NEXT: mov v0.b[15], v1.b[15]
48 %c = extractelement <vscale x 16 x i8> %b, i32 15
49 %d = insertelement <16 x i8> %a, i8 %c, i32 15
53 ; (negative test) Extracted element is not within Vn
54 define <16 x i8> @test_q_lane16_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
55 ; CHECK-LABEL: test_q_lane16_nxv16i8:
57 ; CHECK-NEXT: mov z1.b, z1.b[16]
58 ; CHECK-NEXT: fmov w8, s1
59 ; CHECK-NEXT: mov v0.b[15], w8
61 %c = extractelement <vscale x 16 x i8> %b, i32 16
62 %d = insertelement <16 x i8> %a, i8 %c, i32 15
66 ; --------- extraction from nxv8f16
68 define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
69 ; CHECK-LABEL: test_lane0_nxv8f16:
71 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
72 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
73 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
75 %c = extractelement <vscale x 8 x half> %b, i32 0
76 %d = insertelement <4 x half> %a, half %c, i32 0
80 define <4 x half> @test_lane7_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
81 ; CHECK-LABEL: test_lane7_nxv8f16:
83 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
84 ; CHECK-NEXT: mov v0.h[3], v1.h[7]
85 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
87 %c = extractelement <vscale x 8 x half> %b, i32 7
88 %d = insertelement <4 x half> %a, half %c, i32 3
92 define <8 x half> @test_q_lane0_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
93 ; CHECK-LABEL: test_q_lane0_nxv8f16:
95 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
97 %c = extractelement <vscale x 8 x half> %b, i32 0
98 %d = insertelement <8 x half> %a, half %c, i32 0
102 define <8 x half> @test_q_lane7_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
103 ; CHECK-LABEL: test_q_lane7_nxv8f16:
105 ; CHECK-NEXT: mov v0.h[7], v1.h[7]
107 %c = extractelement <vscale x 8 x half> %b, i32 7
108 %d = insertelement <8 x half> %a, half %c, i32 7
112 ; (negative test) Extracted element is not within Vn
113 define <8 x half> @test_q_lane8_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
114 ; CHECK-LABEL: test_q_lane8_nxv8f16:
116 ; CHECK-NEXT: mov z1.h, z1.h[8]
117 ; CHECK-NEXT: mov v0.h[7], v1.h[0]
119 %c = extractelement <vscale x 8 x half> %b, i32 8
120 %d = insertelement <8 x half> %a, half %c, i32 7
124 ; --------- extraction from nxv8bf16
126 define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
127 ; CHECK-LABEL: test_lane0_nxv8bf16:
129 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
130 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
131 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
133 %c = extractelement <vscale x 8 x bfloat> %b, i32 0
134 %d = insertelement <4 x bfloat> %a, bfloat %c, i32 0
138 define <4 x bfloat> @test_lane7_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
139 ; CHECK-LABEL: test_lane7_nxv8bf16:
141 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
142 ; CHECK-NEXT: mov v0.h[3], v1.h[7]
143 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
145 %c = extractelement <vscale x 8 x bfloat> %b, i32 7
146 %d = insertelement <4 x bfloat> %a, bfloat %c, i32 3
150 define <8 x bfloat> @test_q_lane0_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
151 ; CHECK-LABEL: test_q_lane0_nxv8bf16:
153 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
155 %c = extractelement <vscale x 8 x bfloat> %b, i32 0
156 %d = insertelement <8 x bfloat> %a, bfloat %c, i32 0
160 define <8 x bfloat> @test_q_lane7_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
161 ; CHECK-LABEL: test_q_lane7_nxv8bf16:
163 ; CHECK-NEXT: mov v0.h[7], v1.h[7]
165 %c = extractelement <vscale x 8 x bfloat> %b, i32 7
166 %d = insertelement <8 x bfloat> %a, bfloat %c, i32 7
170 ; (negative test) Extracted element is not within Vn
171 define <8 x bfloat> @test_q_lane8_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
172 ; CHECK-LABEL: test_q_lane8_nxv8bf16:
174 ; CHECK-NEXT: mov z1.h, z1.h[8]
175 ; CHECK-NEXT: mov v0.h[7], v1.h[0]
177 %c = extractelement <vscale x 8 x bfloat> %b, i32 8
178 %d = insertelement <8 x bfloat> %a, bfloat %c, i32 7
182 ; --------- extraction from nxv8i16
184 define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
185 ; CHECK-LABEL: test_lane0_nxv8i16:
187 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
188 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
189 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
191 %c = extractelement <vscale x 8 x i16> %b, i32 0
192 %d = insertelement <4 x i16> %a, i16 %c, i32 0
196 define <4 x i16> @test_lane7_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
197 ; CHECK-LABEL: test_lane7_nxv8i16:
199 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
200 ; CHECK-NEXT: mov v0.h[3], v1.h[7]
201 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
203 %c = extractelement <vscale x 8 x i16> %b, i32 7
204 %d = insertelement <4 x i16> %a, i16 %c, i32 3
208 define <8 x i16> @test_q_lane0_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
209 ; CHECK-LABEL: test_q_lane0_nxv8i16:
211 ; CHECK-NEXT: mov v0.h[0], v1.h[0]
213 %c = extractelement <vscale x 8 x i16> %b, i32 0
214 %d = insertelement <8 x i16> %a, i16 %c, i32 0
218 define <8 x i16> @test_q_lane7_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
219 ; CHECK-LABEL: test_q_lane7_nxv8i16:
221 ; CHECK-NEXT: mov v0.h[7], v1.h[7]
223 %c = extractelement <vscale x 8 x i16> %b, i32 7
224 %d = insertelement <8 x i16> %a, i16 %c, i32 7
228 ; (negative test) Extracted element is not within Vn
229 define <8 x i16> @test_q_lane8_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
230 ; CHECK-LABEL: test_q_lane8_nxv8i16:
232 ; CHECK-NEXT: mov z1.h, z1.h[8]
233 ; CHECK-NEXT: fmov w8, s1
234 ; CHECK-NEXT: mov v0.h[7], w8
236 %c = extractelement <vscale x 8 x i16> %b, i32 8
237 %d = insertelement <8 x i16> %a, i16 %c, i32 7
241 ; --------- extraction from nxv4f32
243 define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
244 ; CHECK-LABEL: test_lane0_nxv4f32:
246 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
247 ; CHECK-NEXT: mov v0.s[0], v1.s[0]
248 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
250 %c = extractelement <vscale x 4 x float> %b, i32 0
251 %d = insertelement <2 x float> %a, float %c, i32 0
255 define <2 x float> @test_lane3_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
256 ; CHECK-LABEL: test_lane3_nxv4f32:
258 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
259 ; CHECK-NEXT: mov v0.s[1], v1.s[3]
260 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
262 %c = extractelement <vscale x 4 x float> %b, i32 3
263 %d = insertelement <2 x float> %a, float %c, i32 1
267 define <4 x float> @test_q_lane0_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
268 ; CHECK-LABEL: test_q_lane0_nxv4f32:
270 ; CHECK-NEXT: mov v0.s[0], v1.s[0]
272 %c = extractelement <vscale x 4 x float> %b, i32 0
273 %d = insertelement <4 x float> %a, float %c, i32 0
277 define <4 x float> @test_q_lane3_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
278 ; CHECK-LABEL: test_q_lane3_nxv4f32:
280 ; CHECK-NEXT: mov v0.s[3], v1.s[3]
282 %c = extractelement <vscale x 4 x float> %b, i32 3
283 %d = insertelement <4 x float> %a, float %c, i32 3
287 ; (negative test) Extracted element is not within Vn
288 define <4 x float> @test_q_lane4_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b) {
289 ; CHECK-LABEL: test_q_lane4_nxv4f32:
291 ; CHECK-NEXT: mov z1.s, z1.s[4]
292 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
294 %c = extractelement <vscale x 4 x float> %b, i32 4
295 %d = insertelement <4 x float> %a, float %c, i32 3
299 ; --------- extraction from nxv4i32
301 define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
302 ; CHECK-LABEL: test_lane0_nxv4i32:
304 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
305 ; CHECK-NEXT: mov v0.s[0], v1.s[0]
306 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
308 %c = extractelement <vscale x 4 x i32> %b, i32 0
309 %d = insertelement <2 x i32> %a, i32 %c, i32 0
313 define <2 x i32> @test_lane3_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
314 ; CHECK-LABEL: test_lane3_nxv4i32:
316 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
317 ; CHECK-NEXT: mov v0.s[1], v1.s[3]
318 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
320 %c = extractelement <vscale x 4 x i32> %b, i32 3
321 %d = insertelement <2 x i32> %a, i32 %c, i32 1
325 define <4 x i32> @test_q_lane0_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
326 ; CHECK-LABEL: test_q_lane0_nxv4i32:
328 ; CHECK-NEXT: mov v0.s[0], v1.s[0]
330 %c = extractelement <vscale x 4 x i32> %b, i32 0
331 %d = insertelement <4 x i32> %a, i32 %c, i32 0
335 define <4 x i32> @test_q_lane3_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
336 ; CHECK-LABEL: test_q_lane3_nxv4i32:
338 ; CHECK-NEXT: mov v0.s[3], v1.s[3]
340 %c = extractelement <vscale x 4 x i32> %b, i32 3
341 %d = insertelement <4 x i32> %a, i32 %c, i32 3
345 ; (negative test) Extracted element is not within Vn
346 define <4 x i32> @test_q_lane4_nxv4i32(<4 x i32> %a, <vscale x 4 x i32> %b) {
347 ; CHECK-LABEL: test_q_lane4_nxv4i32:
349 ; CHECK-NEXT: mov z1.s, z1.s[4]
350 ; CHECK-NEXT: fmov w8, s1
351 ; CHECK-NEXT: mov v0.s[3], w8
353 %c = extractelement <vscale x 4 x i32> %b, i32 4
354 %d = insertelement <4 x i32> %a, i32 %c, i32 3
358 ; --------- extraction from nxv2f64
360 define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, <vscale x 2 x double> %b) {
361 ; CHECK-LABEL: test_lane0_nxv2f64:
363 ; CHECK-NEXT: mov v0.d[0], v1.d[0]
364 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
366 %c = extractelement <vscale x 2 x double> %b, i32 0
367 %d = insertelement <1 x double> %a, double %c, i32 0
371 define <1 x double> @test_lane1_nxv2f64(<1 x double> %a, <vscale x 2 x double> %b) {
372 ; CHECK-LABEL: test_lane1_nxv2f64:
374 ; CHECK-NEXT: mov v0.d[0], v1.d[1]
375 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
377 %c = extractelement <vscale x 2 x double> %b, i32 1
378 %d = insertelement <1 x double> %a, double %c, i32 0
382 define <2 x double> @test_q_lane0_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
383 ; CHECK-LABEL: test_q_lane0_nxv2f64:
385 ; CHECK-NEXT: mov v0.d[0], v1.d[0]
387 %c = extractelement <vscale x 2 x double> %b, i32 0
388 %d = insertelement <2 x double> %a, double %c, i32 0
392 define <2 x double> @test_q_lane1_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
393 ; CHECK-LABEL: test_q_lane1_nxv2f64:
395 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
397 %c = extractelement <vscale x 2 x double> %b, i32 1
398 %d = insertelement <2 x double> %a, double %c, i32 1
402 ; (negative test) Extracted element is not within Vn
403 define <2 x double> @test_q_lane2_nxv2f64(<2 x double> %a, <vscale x 2 x double> %b) {
404 ; CHECK-LABEL: test_q_lane2_nxv2f64:
406 ; CHECK-NEXT: mov z1.d, z1.d[2]
407 ; CHECK-NEXT: mov v0.d[1], v1.d[0]
409 %c = extractelement <vscale x 2 x double> %b, i32 2
410 %d = insertelement <2 x double> %a, double %c, i32 1
414 ; --------- extraction from nxv2i64
416 define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
417 ; CHECK-LABEL: test_lane0_nxv2i64:
419 ; CHECK-NEXT: mov v0.d[0], v1.d[0]
420 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
422 %c = extractelement <vscale x 2 x i64> %b, i32 0
423 %d = insertelement <1 x i64> %a, i64 %c, i32 0
427 define <1 x i64> @test_lane1_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
428 ; CHECK-LABEL: test_lane1_nxv2i64:
430 ; CHECK-NEXT: mov v0.d[0], v1.d[1]
431 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
433 %c = extractelement <vscale x 2 x i64> %b, i32 1
434 %d = insertelement <1 x i64> %a, i64 %c, i32 0
438 define <2 x i64> @test_q_lane0_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
439 ; CHECK-LABEL: test_q_lane0_nxv2i64:
441 ; CHECK-NEXT: mov v0.d[0], v1.d[0]
443 %c = extractelement <vscale x 2 x i64> %b, i32 0
444 %d = insertelement <2 x i64> %a, i64 %c, i32 0
448 define <2 x i64> @test_q_lane1_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
449 ; CHECK-LABEL: test_q_lane1_nxv2i64:
451 ; CHECK-NEXT: mov v0.d[1], v1.d[1]
453 %c = extractelement <vscale x 2 x i64> %b, i32 1
454 %d = insertelement <2 x i64> %a, i64 %c, i32 1
458 ; (negative test) Extracted element is not within Vn
459 define <2 x i64> @test_q_lane2_nxv2i64(<2 x i64> %a, <vscale x 2 x i64> %b) {
460 ; CHECK-LABEL: test_q_lane2_nxv2i64:
462 ; CHECK-NEXT: mov z1.d, z1.d[2]
463 ; CHECK-NEXT: fmov x8, d1
464 ; CHECK-NEXT: mov v0.d[1], x8
466 %c = extractelement <vscale x 2 x i64> %b, i32 2
467 %d = insertelement <2 x i64> %a, i64 %c, i32 1