1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft -mattr=+neon -verify-machineinstrs | FileCheck %s
4 define <8 x i8> @v_dup8(i8 %A) nounwind {
7 ; CHECK-NEXT: vdup.8 d16, r0
8 ; CHECK-NEXT: vmov r0, r1, d16
9 ; CHECK-NEXT: mov pc, lr
10 %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
11 %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
12 %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
13 %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
14 %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
15 %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
16 %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
17 %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
21 define <4 x i16> @v_dup16(i16 %A) nounwind {
22 ; CHECK-LABEL: v_dup16:
24 ; CHECK-NEXT: vdup.16 d16, r0
25 ; CHECK-NEXT: vmov r0, r1, d16
26 ; CHECK-NEXT: mov pc, lr
27 %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
28 %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
29 %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
30 %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
34 define <2 x i32> @v_dup32(i32 %A) nounwind {
35 ; CHECK-LABEL: v_dup32:
37 ; CHECK-NEXT: vdup.32 d16, r0
38 ; CHECK-NEXT: vmov r0, r1, d16
39 ; CHECK-NEXT: mov pc, lr
40 %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
41 %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
45 define <2 x float> @v_dupfloat(float %A) nounwind {
46 ; CHECK-LABEL: v_dupfloat:
48 ; CHECK-NEXT: vdup.32 d16, r0
49 ; CHECK-NEXT: vmov r0, r1, d16
50 ; CHECK-NEXT: mov pc, lr
51 %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
52 %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
56 define <16 x i8> @v_dupQ8(i8 %A) nounwind {
57 ; CHECK-LABEL: v_dupQ8:
59 ; CHECK-NEXT: vdup.8 q8, r0
60 ; CHECK-NEXT: vmov r0, r1, d16
61 ; CHECK-NEXT: vmov r2, r3, d17
62 ; CHECK-NEXT: mov pc, lr
63 %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
64 %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
65 %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
66 %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
67 %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
68 %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
69 %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
70 %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
71 %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
72 %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
73 %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
74 %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
75 %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
76 %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
77 %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
78 %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
82 define <8 x i16> @v_dupQ16(i16 %A) nounwind {
83 ; CHECK-LABEL: v_dupQ16:
85 ; CHECK-NEXT: vdup.16 q8, r0
86 ; CHECK-NEXT: vmov r0, r1, d16
87 ; CHECK-NEXT: vmov r2, r3, d17
88 ; CHECK-NEXT: mov pc, lr
89 %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
90 %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
91 %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
92 %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
93 %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
94 %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
95 %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
96 %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
100 define <4 x i32> @v_dupQ32(i32 %A) nounwind {
101 ; CHECK-LABEL: v_dupQ32:
103 ; CHECK-NEXT: mov r1, r0
104 ; CHECK-NEXT: mov r2, r0
105 ; CHECK-NEXT: mov r3, r0
106 ; CHECK-NEXT: mov pc, lr
107 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
108 %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
109 %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
110 %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
114 define <4 x float> @v_dupQfloat(float %A) nounwind {
115 ; CHECK-LABEL: v_dupQfloat:
117 ; CHECK-NEXT: vdup.32 q8, r0
118 ; CHECK-NEXT: vmov r0, r1, d16
119 ; CHECK-NEXT: vmov r2, r3, d17
120 ; CHECK-NEXT: mov pc, lr
121 %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
122 %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
123 %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
124 %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
125 ret <4 x float> %tmp4
128 ; Check to make sure it works with shuffles, too.
130 define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
131 ; CHECK-LABEL: v_shuffledup8:
133 ; CHECK-NEXT: vdup.8 d16, r0
134 ; CHECK-NEXT: vmov r0, r1, d16
135 ; CHECK-NEXT: mov pc, lr
136 %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
137 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
141 define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
142 ; CHECK-LABEL: v_shuffledup16:
144 ; CHECK-NEXT: vdup.16 d16, r0
145 ; CHECK-NEXT: vmov r0, r1, d16
146 ; CHECK-NEXT: mov pc, lr
147 %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
148 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
152 define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
153 ; CHECK-LABEL: v_shuffledup32:
155 ; CHECK-NEXT: vdup.32 d16, r0
156 ; CHECK-NEXT: vmov r0, r1, d16
157 ; CHECK-NEXT: mov pc, lr
158 %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
159 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
163 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
164 ; CHECK-LABEL: v_shuffledupfloat:
166 ; CHECK-NEXT: vdup.32 d16, r0
167 ; CHECK-NEXT: vmov r0, r1, d16
168 ; CHECK-NEXT: mov pc, lr
169 %tmp1 = insertelement <2 x float> undef, float %A, i32 0
170 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
171 ret <2 x float> %tmp2
174 define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
175 ; CHECK-LABEL: v_shuffledupQ8:
177 ; CHECK-NEXT: vdup.8 q8, r0
178 ; CHECK-NEXT: vmov r0, r1, d16
179 ; CHECK-NEXT: vmov r2, r3, d17
180 ; CHECK-NEXT: mov pc, lr
181 %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
182 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
186 define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
187 ; CHECK-LABEL: v_shuffledupQ16:
189 ; CHECK-NEXT: vdup.16 q8, r0
190 ; CHECK-NEXT: vmov r0, r1, d16
191 ; CHECK-NEXT: vmov r2, r3, d17
192 ; CHECK-NEXT: mov pc, lr
193 %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
194 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
198 define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
199 ; CHECK-LABEL: v_shuffledupQ32:
201 ; CHECK-NEXT: vdup.32 q8, r0
202 ; CHECK-NEXT: vmov r0, r1, d16
203 ; CHECK-NEXT: vmov r2, r3, d17
204 ; CHECK-NEXT: mov pc, lr
205 %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
206 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
210 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
211 ; CHECK-LABEL: v_shuffledupQfloat:
213 ; CHECK-NEXT: vdup.32 q8, r0
214 ; CHECK-NEXT: vmov r0, r1, d16
215 ; CHECK-NEXT: vmov r2, r3, d17
216 ; CHECK-NEXT: mov pc, lr
217 %tmp1 = insertelement <4 x float> undef, float %A, i32 0
218 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
219 ret <4 x float> %tmp2
222 define arm_aapcs_vfpcc <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
223 ; CHECK-LABEL: vduplane8:
225 ; CHECK-NEXT: vdup.8 d0, d0[1]
226 ; CHECK-NEXT: mov pc, lr
227 %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
231 define arm_aapcs_vfpcc <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
232 ; CHECK-LABEL: vduplane16:
234 ; CHECK-NEXT: vdup.16 d0, d0[1]
235 ; CHECK-NEXT: mov pc, lr
236 %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
240 define arm_aapcs_vfpcc <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
241 ; CHECK-LABEL: vduplane32:
243 ; CHECK-NEXT: vdup.32 d0, d0[1]
244 ; CHECK-NEXT: mov pc, lr
245 %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
249 define arm_aapcs_vfpcc <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
250 ; CHECK-LABEL: vduplanefloat:
252 ; CHECK-NEXT: vdup.32 d0, d0[1]
253 ; CHECK-NEXT: mov pc, lr
254 %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
255 ret <2 x float> %tmp2
258 define arm_aapcs_vfpcc <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
259 ; CHECK-LABEL: vduplaneQ8:
261 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
262 ; CHECK-NEXT: vdup.8 q0, d0[1]
263 ; CHECK-NEXT: mov pc, lr
264 %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
268 define arm_aapcs_vfpcc <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
269 ; CHECK-LABEL: vduplaneQ16:
271 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
272 ; CHECK-NEXT: vdup.16 q0, d0[1]
273 ; CHECK-NEXT: mov pc, lr
274 %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
278 define arm_aapcs_vfpcc <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
279 ; CHECK-LABEL: vduplaneQ32:
281 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
282 ; CHECK-NEXT: vdup.32 q0, d0[1]
283 ; CHECK-NEXT: mov pc, lr
284 %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
288 define arm_aapcs_vfpcc <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
289 ; CHECK-LABEL: vduplaneQfloat:
291 ; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0
292 ; CHECK-NEXT: vdup.32 q0, d0[1]
293 ; CHECK-NEXT: mov pc, lr
294 %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
295 ret <4 x float> %tmp2
298 define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: mov r0, r2
302 ; CHECK-NEXT: mov r1, r3
303 ; CHECK-NEXT: mov pc, lr
305 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
309 define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
311 ; CHECK: @ %bb.0: @ %entry
312 ; CHECK-NEXT: mov r2, r0
313 ; CHECK-NEXT: mov r3, r1
314 ; CHECK-NEXT: mov pc, lr
316 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
320 define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
322 ; CHECK: @ %bb.0: @ %entry
323 ; CHECK-NEXT: mov r0, r2
324 ; CHECK-NEXT: mov r1, r3
325 ; CHECK-NEXT: mov pc, lr
327 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
331 define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
333 ; CHECK: @ %bb.0: @ %entry
334 ; CHECK-NEXT: mov r2, r0
335 ; CHECK-NEXT: mov r3, r1
336 ; CHECK-NEXT: mov pc, lr
338 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
343 define void @redundantVdup(ptr %ptr) nounwind {
344 ; CHECK-LABEL: redundantVdup:
346 ; CHECK-NEXT: vmov.i8 d16, #0x80
347 ; CHECK-NEXT: vstr d16, [r0]
348 ; CHECK-NEXT: mov pc, lr
349 %1 = insertelement <8 x i8> undef, i8 -128, i32 0
350 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
351 store <8 x i8> %2, ptr %ptr, align 8
355 define <4 x i32> @tdupi(i32 %x, i32 %y) {
356 ; CHECK-LABEL: tdupi:
358 ; CHECK-NEXT: mov r3, r1
359 ; CHECK-NEXT: mov r1, r0
360 ; CHECK-NEXT: mov r2, r0
361 ; CHECK-NEXT: mov pc, lr
362 %1 = insertelement <4 x i32> undef, i32 %x, i32 0
363 %2 = insertelement <4 x i32> %1, i32 %x, i32 1
364 %3 = insertelement <4 x i32> %2, i32 %x, i32 2
365 %4 = insertelement <4 x i32> %3, i32 %y, i32 3
369 define <4 x float> @tdupf(float %x, float %y) {
370 ; CHECK-LABEL: tdupf:
372 ; CHECK-NEXT: vdup.32 q0, r0
373 ; CHECK-NEXT: vmov s3, r1
374 ; CHECK-NEXT: vmov r0, r1, d0
375 ; CHECK-NEXT: vmov r2, r3, d1
376 ; CHECK-NEXT: mov pc, lr
377 %1 = insertelement <4 x float> undef, float %x, i32 0
378 %2 = insertelement <4 x float> %1, float %x, i32 1
379 %3 = insertelement <4 x float> %2, float %x, i32 2
380 %4 = insertelement <4 x float> %3, float %y, i32 3
384 ; This test checks that when splatting an element from a vector into another,
385 ; the value isn't moved out to GPRs first.
386 define <4 x i32> @tduplane(<4 x i32> %invec) {
387 ; CHECK-LABEL: tduplane:
389 ; CHECK-NEXT: vmov d16, r0, r1
390 ; CHECK-NEXT: mov r3, #255
391 ; CHECK-NEXT: vmov.32 r0, d16[1]
392 ; CHECK-NEXT: mov r1, r0
393 ; CHECK-NEXT: mov r2, r0
394 ; CHECK-NEXT: mov pc, lr
395 %in = extractelement <4 x i32> %invec, i32 1
396 %1 = insertelement <4 x i32> undef, i32 %in, i32 0
397 %2 = insertelement <4 x i32> %1, i32 %in, i32 1
398 %3 = insertelement <4 x i32> %2, i32 %in, i32 2
399 %4 = insertelement <4 x i32> %3, i32 255, i32 3
403 define <2 x float> @check_f32(<4 x float> %v) nounwind {
404 ; CHECK-LABEL: check_f32:
406 ; CHECK-NEXT: vmov d16, r2, r3
407 ; CHECK-NEXT: vdup.32 d16, d16[1]
408 ; CHECK-NEXT: vmov r0, r1, d16
409 ; CHECK-NEXT: mov pc, lr
410 %x = extractelement <4 x float> %v, i32 3
411 %1 = insertelement <2 x float> undef, float %x, i32 0
412 %2 = insertelement <2 x float> %1, float %x, i32 1
416 define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
417 ; CHECK-LABEL: check_i32:
419 ; CHECK-NEXT: vmov d16, r2, r3
420 ; CHECK-NEXT: vdup.32 d16, d16[1]
421 ; CHECK-NEXT: vmov r0, r1, d16
422 ; CHECK-NEXT: mov pc, lr
423 %x = extractelement <4 x i32> %v, i32 3
424 %1 = insertelement <2 x i32> undef, i32 %x, i32 0
425 %2 = insertelement <2 x i32> %1, i32 %x, i32 1
429 define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
430 ; CHECK-LABEL: check_i16:
432 ; CHECK-NEXT: vmov d16, r0, r1
433 ; CHECK-NEXT: vdup.16 d16, d16[3]
434 ; CHECK-NEXT: vmov r0, r1, d16
435 ; CHECK-NEXT: mov pc, lr
436 %x = extractelement <8 x i16> %v, i32 3
437 %1 = insertelement <4 x i16> undef, i16 %x, i32 0
438 %2 = insertelement <4 x i16> %1, i16 %x, i32 1
442 define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
443 ; CHECK-LABEL: check_i8:
445 ; CHECK-NEXT: vmov d16, r0, r1
446 ; CHECK-NEXT: vdup.8 d16, d16[3]
447 ; CHECK-NEXT: vmov r0, r1, d16
448 ; CHECK-NEXT: mov pc, lr
449 %x = extractelement <16 x i8> %v, i32 3
450 %1 = insertelement <8 x i8> undef, i8 %x, i32 0
451 %2 = insertelement <8 x i8> %1, i8 %x, i32 1
455 ; Check that an SPR splat produces a vdup.
457 define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) {
458 ; CHECK-LABEL: check_spr_splat2:
460 ; CHECK-NEXT: lsl r2, r2, #16
461 ; CHECK-NEXT: vmov d16, r0, r1
462 ; CHECK-NEXT: asr r2, r2, #16
463 ; CHECK-NEXT: vmov s0, r2
464 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
465 ; CHECK-NEXT: vdup.32 d17, d0[0]
466 ; CHECK-NEXT: vsub.f32 d16, d17, d16
467 ; CHECK-NEXT: vmov r0, r1, d16
468 ; CHECK-NEXT: mov pc, lr
469 %conv = sitofp i16 %q to float
470 %splat.splatinsert = insertelement <2 x float> undef, float %conv, i32 0
471 %splat.splat = shufflevector <2 x float> %splat.splatinsert, <2 x float> undef, <2 x i32> zeroinitializer
472 %sub = fsub <2 x float> %splat.splat, %p
476 define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) {
477 ; CHECK-LABEL: check_spr_splat4:
479 ; CHECK-NEXT: ldrsh r12, [sp]
480 ; CHECK-NEXT: vmov d17, r2, r3
481 ; CHECK-NEXT: vmov d16, r0, r1
482 ; CHECK-NEXT: vmov s0, r12
483 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
484 ; CHECK-NEXT: vdup.32 q9, d0[0]
485 ; CHECK-NEXT: vsub.f32 q8, q9, q8
486 ; CHECK-NEXT: vmov r0, r1, d16
487 ; CHECK-NEXT: vmov r2, r3, d17
488 ; CHECK-NEXT: mov pc, lr
489 %conv = sitofp i16 %q to float
490 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
491 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
492 %sub = fsub <4 x float> %splat.splat, %p
495 ; Same codegen as above test; scalar is splatted using vld1, so shuffle index is irrelevant.
496 define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
497 ; CHECK-LABEL: check_spr_splat4_lane1:
499 ; CHECK-NEXT: ldrsh r12, [sp]
500 ; CHECK-NEXT: vmov d17, r2, r3
501 ; CHECK-NEXT: vmov d16, r0, r1
502 ; CHECK-NEXT: vmov s0, r12
503 ; CHECK-NEXT: vcvt.f32.s32 s0, s0
504 ; CHECK-NEXT: vdup.32 q9, d0[0]
505 ; CHECK-NEXT: vsub.f32 q8, q9, q8
506 ; CHECK-NEXT: vmov r0, r1, d16
507 ; CHECK-NEXT: vmov r2, r3, d17
508 ; CHECK-NEXT: mov pc, lr
509 %conv = sitofp i16 %q to float
510 %splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1
511 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
512 %sub = fsub <4 x float> %splat.splat, %p
516 ; Also make sure we don't barf on variable-index extractelts, where we almost
517 ; could have generated a vdup.
519 define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
520 ; CHECK-LABEL: check_i8_varidx:
522 ; CHECK-NEXT: .save {r11}
523 ; CHECK-NEXT: push {r11}
524 ; CHECK-NEXT: .setfp r11, sp
525 ; CHECK-NEXT: mov r11, sp
526 ; CHECK-NEXT: .pad #28
527 ; CHECK-NEXT: sub sp, sp, #28
528 ; CHECK-NEXT: bic sp, sp, #15
529 ; CHECK-NEXT: ldr r12, [r11, #4]
530 ; CHECK-NEXT: vmov d17, r2, r3
531 ; CHECK-NEXT: vmov d16, r0, r1
532 ; CHECK-NEXT: mov r1, sp
533 ; CHECK-NEXT: and r0, r12, #15
534 ; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128], r0
535 ; CHECK-NEXT: vld1.8 {d16[]}, [r1]
536 ; CHECK-NEXT: vmov r0, r1, d16
537 ; CHECK-NEXT: mov sp, r11
538 ; CHECK-NEXT: pop {r11}
539 ; CHECK-NEXT: mov pc, lr
540 %x = extractelement <16 x i8> %v, i32 %idx
541 %1 = insertelement <8 x i8> undef, i8 %x, i32 0
542 %2 = insertelement <8 x i8> %1, i8 %x, i32 1