1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
5 ; CHECK-LABEL: sext32_0246_0ext:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
8 ; CHECK-NEXT: vmullb.s32 q1, q0, q2
9 ; CHECK-NEXT: vmov q0, q1
12 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
13 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
14 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
15 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
16 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
17 %out = mul <2 x i64> %out1, %out2
21 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
22 ; CHECK-LABEL: sext32_0ext_0246:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
25 ; CHECK-NEXT: vmullb.s32 q1, q2, q0
26 ; CHECK-NEXT: vmov q0, q1
29 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
30 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
31 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
32 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
33 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
34 %out = mul <2 x i64> %out2, %out1
38 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
39 ; CHECK-LABEL: sext32_0246_ext0:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: .save {r4, r5, r7, lr}
42 ; CHECK-NEXT: push {r4, r5, r7, lr}
43 ; CHECK-NEXT: vmov r1, s2
44 ; CHECK-NEXT: vmov r3, s0
45 ; CHECK-NEXT: umull lr, r12, r1, r0
46 ; CHECK-NEXT: umull r2, r5, r3, r0
47 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
48 ; CHECK-NEXT: asrs r2, r0, #31
49 ; CHECK-NEXT: mla r4, r1, r2, r12
50 ; CHECK-NEXT: asrs r1, r1, #31
51 ; CHECK-NEXT: mla r2, r3, r2, r5
52 ; CHECK-NEXT: asrs r3, r3, #31
53 ; CHECK-NEXT: mla r1, r1, r0, r4
54 ; CHECK-NEXT: mla r0, r3, r0, r2
55 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
56 ; CHECK-NEXT: pop {r4, r5, r7, pc}
58 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
59 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
60 %ext = sext i32 %src2 to i64
61 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
62 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
63 %out = mul <2 x i64> %out1, %shuf2
67 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
68 ; CHECK-LABEL: sext32_ext0_0246:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: .save {r4, r5, r7, lr}
71 ; CHECK-NEXT: push {r4, r5, r7, lr}
72 ; CHECK-NEXT: vmov r1, s2
73 ; CHECK-NEXT: asrs r4, r0, #31
74 ; CHECK-NEXT: vmov r3, s0
75 ; CHECK-NEXT: umull lr, r12, r0, r1
76 ; CHECK-NEXT: umull r2, r5, r0, r3
77 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
78 ; CHECK-NEXT: asrs r2, r1, #31
79 ; CHECK-NEXT: mla r2, r0, r2, r12
80 ; CHECK-NEXT: mla r1, r4, r1, r2
81 ; CHECK-NEXT: asrs r2, r3, #31
82 ; CHECK-NEXT: mla r0, r0, r2, r5
83 ; CHECK-NEXT: mla r0, r4, r3, r0
84 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
85 ; CHECK-NEXT: pop {r4, r5, r7, pc}
87 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
88 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
89 %ext = sext i32 %src2 to i64
90 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
91 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
92 %out = mul <2 x i64> %shuf2, %out1
96 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
97 ; CHECK-LABEL: sext32_1357_0ext:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
100 ; CHECK-NEXT: vrev64.32 q2, q0
101 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
104 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
105 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
106 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
107 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
108 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
109 %out = mul <2 x i64> %out1, %out2
113 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
114 ; CHECK-LABEL: sext32_0ext_1357:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vrev64.32 q1, q0
117 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
118 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
121 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
122 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
123 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
124 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
125 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
126 %out = mul <2 x i64> %out2, %out1
130 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
131 ; CHECK-LABEL: sext32_1357_ext0:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: .save {r4, r5, r7, lr}
134 ; CHECK-NEXT: push {r4, r5, r7, lr}
135 ; CHECK-NEXT: vrev64.32 q1, q0
136 ; CHECK-NEXT: vmov r1, s6
137 ; CHECK-NEXT: vmov r3, s4
138 ; CHECK-NEXT: umull lr, r12, r1, r0
139 ; CHECK-NEXT: umull r2, r5, r3, r0
140 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
141 ; CHECK-NEXT: asrs r2, r0, #31
142 ; CHECK-NEXT: mla r4, r1, r2, r12
143 ; CHECK-NEXT: asrs r1, r1, #31
144 ; CHECK-NEXT: mla r2, r3, r2, r5
145 ; CHECK-NEXT: asrs r3, r3, #31
146 ; CHECK-NEXT: mla r1, r1, r0, r4
147 ; CHECK-NEXT: mla r0, r3, r0, r2
148 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
149 ; CHECK-NEXT: pop {r4, r5, r7, pc}
151 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
152 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
153 %ext = sext i32 %src2 to i64
154 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
155 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
156 %out = mul <2 x i64> %out1, %shuf2
160 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
161 ; CHECK-LABEL: sext32_ext0_1357:
162 ; CHECK: @ %bb.0: @ %entry
163 ; CHECK-NEXT: .save {r4, r5, r7, lr}
164 ; CHECK-NEXT: push {r4, r5, r7, lr}
165 ; CHECK-NEXT: vrev64.32 q1, q0
166 ; CHECK-NEXT: asrs r4, r0, #31
167 ; CHECK-NEXT: vmov r1, s6
168 ; CHECK-NEXT: vmov r3, s4
169 ; CHECK-NEXT: umull lr, r12, r0, r1
170 ; CHECK-NEXT: umull r2, r5, r0, r3
171 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
172 ; CHECK-NEXT: asrs r2, r1, #31
173 ; CHECK-NEXT: mla r2, r0, r2, r12
174 ; CHECK-NEXT: mla r1, r4, r1, r2
175 ; CHECK-NEXT: asrs r2, r3, #31
176 ; CHECK-NEXT: mla r0, r0, r2, r5
177 ; CHECK-NEXT: mla r0, r4, r3, r0
178 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
179 ; CHECK-NEXT: pop {r4, r5, r7, pc}
181 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
182 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
183 %ext = sext i32 %src2 to i64
184 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
185 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
186 %out = mul <2 x i64> %shuf2, %out1
190 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
191 ; CHECK-LABEL: sext32_0213_0ext:
192 ; CHECK: @ %bb.0: @ %entry
193 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
194 ; CHECK-NEXT: vmullb.s32 q2, q0, q3
195 ; CHECK-NEXT: vmov.f32 s0, s1
196 ; CHECK-NEXT: vmov.f32 s2, s3
197 ; CHECK-NEXT: vmullb.s32 q1, q0, q3
198 ; CHECK-NEXT: vmov q0, q2
201 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
202 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
203 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
204 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
205 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
206 %out = mul <4 x i64> %out1, %out2
210 define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
211 ; CHECK-LABEL: sext32_0ext_0213:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
214 ; CHECK-NEXT: vmullb.s32 q2, q3, q0
215 ; CHECK-NEXT: vmov.f32 s0, s1
216 ; CHECK-NEXT: vmov.f32 s2, s3
217 ; CHECK-NEXT: vmullb.s32 q1, q3, q0
218 ; CHECK-NEXT: vmov q0, q2
221 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
222 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
223 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
224 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
225 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
226 %out = mul <4 x i64> %out2, %out1
230 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
231 ; CHECK-LABEL: sext32_0213_ext0:
232 ; CHECK: @ %bb.0: @ %entry
233 ; CHECK-NEXT: .save {r4, r5, r7, lr}
234 ; CHECK-NEXT: push {r4, r5, r7, lr}
235 ; CHECK-NEXT: vmov.f32 s4, s1
236 ; CHECK-NEXT: vmov.f32 s6, s3
237 ; CHECK-NEXT: vmov r3, s4
238 ; CHECK-NEXT: vmov r1, s6
239 ; CHECK-NEXT: umull r2, r5, r3, r0
240 ; CHECK-NEXT: umull lr, r12, r1, r0
241 ; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
242 ; CHECK-NEXT: asrs r2, r0, #31
243 ; CHECK-NEXT: mla r4, r1, r2, r12
244 ; CHECK-NEXT: asrs r1, r1, #31
245 ; CHECK-NEXT: mla r5, r3, r2, r5
246 ; CHECK-NEXT: asrs r3, r3, #31
247 ; CHECK-NEXT: mla r1, r1, r0, r4
248 ; CHECK-NEXT: mla r3, r3, r0, r5
249 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r1
250 ; CHECK-NEXT: vmov r1, s2
251 ; CHECK-NEXT: umull r3, r5, r1, r0
252 ; CHECK-NEXT: mla r5, r1, r2, r5
253 ; CHECK-NEXT: asrs r1, r1, #31
254 ; CHECK-NEXT: mla r12, r1, r0, r5
255 ; CHECK-NEXT: vmov r5, s0
256 ; CHECK-NEXT: umull r4, r1, r5, r0
257 ; CHECK-NEXT: mla r1, r5, r2, r1
258 ; CHECK-NEXT: asrs r2, r5, #31
259 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
260 ; CHECK-NEXT: mla r0, r2, r0, r1
261 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
262 ; CHECK-NEXT: pop {r4, r5, r7, pc}
264 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
265 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
266 %ext = sext i32 %src2 to i64
267 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
268 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
269 %out = mul <4 x i64> %out1, %shuf2
273 define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
274 ; CHECK-LABEL: sext32_ext0_0213:
275 ; CHECK: @ %bb.0: @ %entry
276 ; CHECK-NEXT: .save {r4, r5, r7, lr}
277 ; CHECK-NEXT: push {r4, r5, r7, lr}
278 ; CHECK-NEXT: vmov.f32 s4, s1
279 ; CHECK-NEXT: asrs r4, r0, #31
280 ; CHECK-NEXT: vmov.f32 s6, s3
281 ; CHECK-NEXT: vmov r3, s4
282 ; CHECK-NEXT: vmov r1, s6
283 ; CHECK-NEXT: umull r2, r5, r0, r3
284 ; CHECK-NEXT: umull lr, r12, r0, r1
285 ; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
286 ; CHECK-NEXT: asrs r2, r1, #31
287 ; CHECK-NEXT: mla r2, r0, r2, r12
288 ; CHECK-NEXT: mla r1, r4, r1, r2
289 ; CHECK-NEXT: asrs r2, r3, #31
290 ; CHECK-NEXT: mla r2, r0, r2, r5
291 ; CHECK-NEXT: mla r2, r4, r3, r2
292 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
293 ; CHECK-NEXT: vmov r1, s2
294 ; CHECK-NEXT: umull r2, r3, r0, r1
295 ; CHECK-NEXT: asrs r5, r1, #31
296 ; CHECK-NEXT: mla r3, r0, r5, r3
297 ; CHECK-NEXT: mla r12, r4, r1, r3
298 ; CHECK-NEXT: vmov r3, s0
299 ; CHECK-NEXT: umull r5, r1, r0, r3
300 ; CHECK-NEXT: vmov q0[2], q0[0], r5, r2
301 ; CHECK-NEXT: asrs r2, r3, #31
302 ; CHECK-NEXT: mla r0, r0, r2, r1
303 ; CHECK-NEXT: mla r0, r4, r3, r0
304 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
305 ; CHECK-NEXT: pop {r4, r5, r7, pc}
307 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
308 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
309 %ext = sext i32 %src2 to i64
310 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
311 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
312 %out = mul <4 x i64> %shuf2, %out1
316 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
317 ; CHECK-LABEL: zext32_0246_0ext:
318 ; CHECK: @ %bb.0: @ %entry
319 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
320 ; CHECK-NEXT: vmullb.u32 q1, q0, q2
321 ; CHECK-NEXT: vmov q0, q1
324 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
325 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
326 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
327 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
328 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
329 %out = mul <2 x i64> %out1, %out2
333 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
334 ; CHECK-LABEL: zext32_0ext_0246:
335 ; CHECK: @ %bb.0: @ %entry
336 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
337 ; CHECK-NEXT: vmullb.u32 q1, q2, q0
338 ; CHECK-NEXT: vmov q0, q1
341 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
342 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
343 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
344 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
345 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
346 %out = mul <2 x i64> %out2, %out1
350 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
351 ; CHECK-LABEL: zext32_0246_ext0:
352 ; CHECK: @ %bb.0: @ %entry
353 ; CHECK-NEXT: vmov r1, s2
354 ; CHECK-NEXT: vmov r3, s0
355 ; CHECK-NEXT: umull r1, r2, r1, r0
356 ; CHECK-NEXT: umull r0, r3, r3, r0
357 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
358 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
361 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
362 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
363 %ext = zext i32 %src2 to i64
364 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
365 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
366 %out = mul <2 x i64> %out1, %shuf2
370 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
371 ; CHECK-LABEL: zext32_ext0_0246:
372 ; CHECK: @ %bb.0: @ %entry
373 ; CHECK-NEXT: vmov r1, s2
374 ; CHECK-NEXT: vmov r3, s0
375 ; CHECK-NEXT: umull r1, r2, r0, r1
376 ; CHECK-NEXT: umull r0, r3, r0, r3
377 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
378 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
381 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
382 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
383 %ext = zext i32 %src2 to i64
384 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
385 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
386 %out = mul <2 x i64> %shuf2, %out1
390 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
391 ; CHECK-LABEL: zext32_1357_0ext:
392 ; CHECK: @ %bb.0: @ %entry
393 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
394 ; CHECK-NEXT: vrev64.32 q2, q0
395 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
398 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
399 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
400 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
401 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
402 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
403 %out = mul <2 x i64> %out1, %out2
407 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
408 ; CHECK-LABEL: zext32_0ext_1357:
409 ; CHECK: @ %bb.0: @ %entry
410 ; CHECK-NEXT: vrev64.32 q1, q0
411 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
412 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
415 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
416 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
417 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
418 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
419 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
420 %out = mul <2 x i64> %out2, %out1
424 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
425 ; CHECK-LABEL: zext32_1357_ext0:
426 ; CHECK: @ %bb.0: @ %entry
427 ; CHECK-NEXT: vrev64.32 q1, q0
428 ; CHECK-NEXT: vmov r1, s6
429 ; CHECK-NEXT: vmov r3, s4
430 ; CHECK-NEXT: umull r1, r2, r1, r0
431 ; CHECK-NEXT: umull r0, r3, r3, r0
432 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
433 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
436 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
437 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
438 %ext = zext i32 %src2 to i64
439 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
440 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
441 %out = mul <2 x i64> %out1, %shuf2
445 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
446 ; CHECK-LABEL: zext32_ext0_1357:
447 ; CHECK: @ %bb.0: @ %entry
448 ; CHECK-NEXT: vrev64.32 q1, q0
449 ; CHECK-NEXT: vmov r1, s6
450 ; CHECK-NEXT: vmov r3, s4
451 ; CHECK-NEXT: umull r1, r2, r0, r1
452 ; CHECK-NEXT: umull r0, r3, r0, r3
453 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
454 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
457 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
458 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
459 %ext = zext i32 %src2 to i64
460 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
461 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
462 %out = mul <2 x i64> %shuf2, %out1
466 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
467 ; CHECK-LABEL: zext32_0213_0ext:
468 ; CHECK: @ %bb.0: @ %entry
469 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
470 ; CHECK-NEXT: vmullb.u32 q2, q0, q3
471 ; CHECK-NEXT: vmov.f32 s0, s1
472 ; CHECK-NEXT: vmov.f32 s2, s3
473 ; CHECK-NEXT: vmullb.u32 q1, q0, q3
474 ; CHECK-NEXT: vmov q0, q2
477 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
478 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
479 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
480 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
481 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
482 %out = mul <4 x i64> %out1, %out2
486 define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
487 ; CHECK-LABEL: zext32_0ext_0213:
488 ; CHECK: @ %bb.0: @ %entry
489 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
490 ; CHECK-NEXT: vmullb.u32 q2, q3, q0
491 ; CHECK-NEXT: vmov.f32 s0, s1
492 ; CHECK-NEXT: vmov.f32 s2, s3
493 ; CHECK-NEXT: vmullb.u32 q1, q3, q0
494 ; CHECK-NEXT: vmov q0, q2
497 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
498 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
499 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
500 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
501 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
502 %out = mul <4 x i64> %out2, %out1
506 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
507 ; CHECK-LABEL: zext32_0213_ext0:
508 ; CHECK: @ %bb.0: @ %entry
509 ; CHECK-NEXT: vmov r1, s2
510 ; CHECK-NEXT: vmov r3, s0
511 ; CHECK-NEXT: vmov.f32 s0, s1
512 ; CHECK-NEXT: vmov.f32 s2, s3
513 ; CHECK-NEXT: umull r1, r12, r1, r0
514 ; CHECK-NEXT: umull r3, r2, r3, r0
515 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
516 ; CHECK-NEXT: vmov r1, s2
517 ; CHECK-NEXT: vmov r3, s0
518 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
519 ; CHECK-NEXT: vmov q0, q2
520 ; CHECK-NEXT: umull r1, r2, r1, r0
521 ; CHECK-NEXT: umull r0, r3, r3, r0
522 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
523 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
526 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
527 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
528 %ext = zext i32 %src2 to i64
529 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
530 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
531 %out = mul <4 x i64> %out1, %shuf2
535 define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
536 ; CHECK-LABEL: zext32_ext0_0213:
537 ; CHECK: @ %bb.0: @ %entry
538 ; CHECK-NEXT: vmov r1, s2
539 ; CHECK-NEXT: vmov r3, s0
540 ; CHECK-NEXT: vmov.f32 s0, s1
541 ; CHECK-NEXT: vmov.f32 s2, s3
542 ; CHECK-NEXT: umull r1, r12, r0, r1
543 ; CHECK-NEXT: umull r3, r2, r0, r3
544 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
545 ; CHECK-NEXT: vmov r1, s2
546 ; CHECK-NEXT: vmov r3, s0
547 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
548 ; CHECK-NEXT: vmov q0, q2
549 ; CHECK-NEXT: umull r1, r2, r0, r1
550 ; CHECK-NEXT: umull r0, r3, r0, r3
551 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
552 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
555 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
556 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
557 %ext = zext i32 %src2 to i64
558 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
559 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
560 %out = mul <4 x i64> %shuf2, %out1
564 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
565 ; CHECK-LABEL: sext16_02468101214_0ext:
566 ; CHECK: @ %bb.0: @ %entry
567 ; CHECK-NEXT: vdup.32 q1, r0
568 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
571 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
572 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
573 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
574 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
575 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
576 %out = mul <4 x i32> %out1, %out2
580 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
581 ; CHECK-LABEL: sext16_0ext_02468101214:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vdup.32 q1, r0
584 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
587 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
588 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
589 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
590 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
591 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
592 %out = mul <4 x i32> %out2, %out1
596 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
597 ; CHECK-LABEL: sext16_02468101214_ext0:
598 ; CHECK: @ %bb.0: @ %entry
599 ; CHECK-NEXT: vmovlb.s16 q0, q0
600 ; CHECK-NEXT: sxth r0, r0
601 ; CHECK-NEXT: vmul.i32 q0, q0, r0
604 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
605 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
606 %ext = sext i16 %src2 to i32
607 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
608 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
609 %out = mul <4 x i32> %out1, %shuf2
613 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
614 ; CHECK-LABEL: sext16_ext0_02468101214:
615 ; CHECK: @ %bb.0: @ %entry
616 ; CHECK-NEXT: vmovlb.s16 q0, q0
617 ; CHECK-NEXT: sxth r0, r0
618 ; CHECK-NEXT: vmul.i32 q0, q0, r0
621 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
622 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
623 %ext = sext i16 %src2 to i32
624 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
625 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
626 %out = mul <4 x i32> %shuf2, %out1
630 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
631 ; CHECK-LABEL: sext16_13579111315_0ext:
632 ; CHECK: @ %bb.0: @ %entry
633 ; CHECK-NEXT: vdup.32 q1, r0
634 ; CHECK-NEXT: vrev32.16 q0, q0
635 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
638 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
639 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
640 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
641 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
642 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
643 %out = mul <4 x i32> %out1, %out2
647 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
648 ; CHECK-LABEL: sext16_0ext_13579111315:
649 ; CHECK: @ %bb.0: @ %entry
650 ; CHECK-NEXT: vrev32.16 q0, q0
651 ; CHECK-NEXT: vdup.32 q1, r0
652 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
655 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
656 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
657 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
658 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
659 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
660 %out = mul <4 x i32> %out2, %out1
664 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
665 ; CHECK-LABEL: sext16_13579111315_ext0:
666 ; CHECK: @ %bb.0: @ %entry
667 ; CHECK-NEXT: vmovlt.s16 q0, q0
668 ; CHECK-NEXT: sxth r0, r0
669 ; CHECK-NEXT: vmul.i32 q0, q0, r0
672 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
673 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
674 %ext = sext i16 %src2 to i32
675 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
676 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
677 %out = mul <4 x i32> %out1, %shuf2
681 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
682 ; CHECK-LABEL: sext16_ext0_13579111315:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vmovlt.s16 q0, q0
685 ; CHECK-NEXT: sxth r0, r0
686 ; CHECK-NEXT: vmul.i32 q0, q0, r0
689 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
690 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
691 %ext = sext i16 %src2 to i32
692 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
693 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
694 %out = mul <4 x i32> %shuf2, %out1
698 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
699 ; CHECK-LABEL: sext16_02461357_0ext:
700 ; CHECK: @ %bb.0: @ %entry
701 ; CHECK-NEXT: vdup.16 q2, r0
702 ; CHECK-NEXT: vrev32.16 q1, q0
703 ; CHECK-NEXT: vmullb.s16 q1, q1, q2
704 ; CHECK-NEXT: vmullb.s16 q0, q0, q2
707 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
708 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
709 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
710 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
711 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
712 %out = mul <8 x i32> %out1, %out2
716 define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
717 ; CHECK-LABEL: sext16_0ext_02461357:
718 ; CHECK: @ %bb.0: @ %entry
719 ; CHECK-NEXT: vrev32.16 q1, q0
720 ; CHECK-NEXT: vdup.16 q2, r0
721 ; CHECK-NEXT: vmullb.s16 q1, q2, q1
722 ; CHECK-NEXT: vmullb.s16 q0, q2, q0
725 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
726 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
727 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
728 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
729 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
730 %out = mul <8 x i32> %out2, %out1
734 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
735 ; CHECK-LABEL: sext16_02461357_ext0:
736 ; CHECK: @ %bb.0: @ %entry
737 ; CHECK-NEXT: vmovlb.s16 q1, q0
738 ; CHECK-NEXT: sxth r0, r0
739 ; CHECK-NEXT: vmul.i32 q2, q1, r0
740 ; CHECK-NEXT: vmovlt.s16 q0, q0
741 ; CHECK-NEXT: vmul.i32 q1, q0, r0
742 ; CHECK-NEXT: vmov q0, q2
745 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
746 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
747 %ext = sext i16 %src2 to i32
748 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
749 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
750 %out = mul <8 x i32> %out1, %shuf2
754 define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
755 ; CHECK-LABEL: sext16_ext0_02461357:
756 ; CHECK: @ %bb.0: @ %entry
757 ; CHECK-NEXT: vmovlb.s16 q1, q0
758 ; CHECK-NEXT: sxth r0, r0
759 ; CHECK-NEXT: vmul.i32 q2, q1, r0
760 ; CHECK-NEXT: vmovlt.s16 q0, q0
761 ; CHECK-NEXT: vmul.i32 q1, q0, r0
762 ; CHECK-NEXT: vmov q0, q2
765 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
766 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
767 %ext = sext i16 %src2 to i32
768 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
769 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
770 %out = mul <8 x i32> %shuf2, %out1
774 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
775 ; CHECK-LABEL: zext16_02468101214_0ext:
776 ; CHECK: @ %bb.0: @ %entry
777 ; CHECK-NEXT: vdup.32 q1, r0
778 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
781 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
782 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
783 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
784 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
785 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
786 %out = mul <4 x i32> %out1, %out2
790 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
791 ; CHECK-LABEL: zext16_0ext_02468101214:
792 ; CHECK: @ %bb.0: @ %entry
793 ; CHECK-NEXT: vdup.32 q1, r0
794 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
797 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
798 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
799 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
800 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
801 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
802 %out = mul <4 x i32> %out2, %out1
806 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
807 ; CHECK-LABEL: zext16_02468101214_ext0:
808 ; CHECK: @ %bb.0: @ %entry
809 ; CHECK-NEXT: vmovlb.u16 q0, q0
810 ; CHECK-NEXT: uxth r0, r0
811 ; CHECK-NEXT: vmul.i32 q0, q0, r0
814 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
815 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
816 %ext = zext i16 %src2 to i32
817 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
818 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
819 %out = mul <4 x i32> %out1, %shuf2
823 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
824 ; CHECK-LABEL: zext16_ext0_02468101214:
825 ; CHECK: @ %bb.0: @ %entry
826 ; CHECK-NEXT: vmovlb.u16 q0, q0
827 ; CHECK-NEXT: uxth r0, r0
828 ; CHECK-NEXT: vmul.i32 q0, q0, r0
831 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
832 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
833 %ext = zext i16 %src2 to i32
834 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
835 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
836 %out = mul <4 x i32> %shuf2, %out1
840 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
841 ; CHECK-LABEL: zext16_13579111315_0ext:
842 ; CHECK: @ %bb.0: @ %entry
843 ; CHECK-NEXT: vdup.32 q1, r0
844 ; CHECK-NEXT: vrev32.16 q0, q0
845 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
848 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
849 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
850 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
851 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
852 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
853 %out = mul <4 x i32> %out1, %out2
857 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
858 ; CHECK-LABEL: zext16_0ext_13579111315:
859 ; CHECK: @ %bb.0: @ %entry
860 ; CHECK-NEXT: vrev32.16 q0, q0
861 ; CHECK-NEXT: vdup.32 q1, r0
862 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
865 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
866 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
867 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
868 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
869 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
870 %out = mul <4 x i32> %out2, %out1
874 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
875 ; CHECK-LABEL: zext16_13579111315_ext0:
876 ; CHECK: @ %bb.0: @ %entry
877 ; CHECK-NEXT: vmovlt.u16 q0, q0
878 ; CHECK-NEXT: uxth r0, r0
879 ; CHECK-NEXT: vmul.i32 q0, q0, r0
882 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
883 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
884 %ext = zext i16 %src2 to i32
885 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
886 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
887 %out = mul <4 x i32> %out1, %shuf2
891 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
892 ; CHECK-LABEL: zext16_ext0_13579111315:
893 ; CHECK: @ %bb.0: @ %entry
894 ; CHECK-NEXT: vmovlt.u16 q0, q0
895 ; CHECK-NEXT: uxth r0, r0
896 ; CHECK-NEXT: vmul.i32 q0, q0, r0
899 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
900 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
901 %ext = zext i16 %src2 to i32
902 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
903 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
904 %out = mul <4 x i32> %shuf2, %out1
908 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
909 ; CHECK-LABEL: zext16_02461357_0ext:
910 ; CHECK: @ %bb.0: @ %entry
911 ; CHECK-NEXT: vdup.16 q2, r0
912 ; CHECK-NEXT: vrev32.16 q1, q0
913 ; CHECK-NEXT: vmullb.u16 q1, q1, q2
914 ; CHECK-NEXT: vmullb.u16 q0, q0, q2
917 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
918 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
919 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
920 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
921 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
922 %out = mul <8 x i32> %out1, %out2
926 define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
927 ; CHECK-LABEL: zext16_0ext_02461357:
928 ; CHECK: @ %bb.0: @ %entry
929 ; CHECK-NEXT: vrev32.16 q1, q0
930 ; CHECK-NEXT: vdup.16 q2, r0
931 ; CHECK-NEXT: vmullb.u16 q1, q2, q1
932 ; CHECK-NEXT: vmullb.u16 q0, q2, q0
935 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
936 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
937 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
938 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
939 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
940 %out = mul <8 x i32> %out2, %out1
944 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
945 ; CHECK-LABEL: zext16_02461357_ext0:
946 ; CHECK: @ %bb.0: @ %entry
947 ; CHECK-NEXT: vmovlb.u16 q1, q0
948 ; CHECK-NEXT: uxth r0, r0
949 ; CHECK-NEXT: vmul.i32 q2, q1, r0
950 ; CHECK-NEXT: vmovlt.u16 q0, q0
951 ; CHECK-NEXT: vmul.i32 q1, q0, r0
952 ; CHECK-NEXT: vmov q0, q2
955 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
956 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
957 %ext = zext i16 %src2 to i32
958 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
959 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
960 %out = mul <8 x i32> %out1, %shuf2
964 define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
965 ; CHECK-LABEL: zext16_ext0_02461357:
966 ; CHECK: @ %bb.0: @ %entry
967 ; CHECK-NEXT: vmovlb.u16 q1, q0
968 ; CHECK-NEXT: uxth r0, r0
969 ; CHECK-NEXT: vmul.i32 q2, q1, r0
970 ; CHECK-NEXT: vmovlt.u16 q0, q0
971 ; CHECK-NEXT: vmul.i32 q1, q0, r0
972 ; CHECK-NEXT: vmov q0, q2
975 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
976 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
977 %ext = zext i16 %src2 to i32
978 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
979 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
980 %out = mul <8 x i32> %shuf2, %out1
984 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
985 ; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
986 ; CHECK: @ %bb.0: @ %entry
987 ; CHECK-NEXT: vdup.16 q1, r0
988 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
991 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
992 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
993 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
994 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
995 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
996 %out = mul <8 x i16> %out1, %out2
1000 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1001 ; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
1002 ; CHECK: @ %bb.0: @ %entry
1003 ; CHECK-NEXT: vdup.16 q1, r0
1004 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1007 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1008 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1009 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1010 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1011 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1012 %out = mul <8 x i16> %out2, %out1
1016 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1017 ; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
1018 ; CHECK: @ %bb.0: @ %entry
1019 ; CHECK-NEXT: vmovlb.s8 q0, q0
1020 ; CHECK-NEXT: sxtb r0, r0
1021 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1024 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1025 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1026 %ext = sext i8 %src2 to i16
1027 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1028 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1029 %out = mul <8 x i16> %out1, %shuf2
1033 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1034 ; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
1035 ; CHECK: @ %bb.0: @ %entry
1036 ; CHECK-NEXT: vmovlb.s8 q0, q0
1037 ; CHECK-NEXT: sxtb r0, r0
1038 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1041 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1042 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1043 %ext = sext i8 %src2 to i16
1044 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1045 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1046 %out = mul <8 x i16> %shuf2, %out1
1050 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1051 ; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
1052 ; CHECK: @ %bb.0: @ %entry
1053 ; CHECK-NEXT: vdup.16 q1, r0
1054 ; CHECK-NEXT: vrev16.8 q0, q0
1055 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
1058 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1059 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1060 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1061 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1062 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1063 %out = mul <8 x i16> %out1, %out2
1067 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1068 ; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
1069 ; CHECK: @ %bb.0: @ %entry
1070 ; CHECK-NEXT: vrev16.8 q0, q0
1071 ; CHECK-NEXT: vdup.16 q1, r0
1072 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1075 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1076 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1077 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1078 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1079 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1080 %out = mul <8 x i16> %out2, %out1
1084 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1085 ; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
1086 ; CHECK: @ %bb.0: @ %entry
1087 ; CHECK-NEXT: vmovlt.s8 q0, q0
1088 ; CHECK-NEXT: sxtb r0, r0
1089 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1092 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1093 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1094 %ext = sext i8 %src2 to i16
1095 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1096 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1097 %out = mul <8 x i16> %out1, %shuf2
1101 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1102 ; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
1103 ; CHECK: @ %bb.0: @ %entry
1104 ; CHECK-NEXT: vmovlt.s8 q0, q0
1105 ; CHECK-NEXT: sxtb r0, r0
1106 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1109 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1110 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1111 %ext = sext i8 %src2 to i16
1112 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1113 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1114 %out = mul <8 x i16> %shuf2, %out1
1118 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1119 ; CHECK-LABEL: sext8_0246810121413579111315_0ext:
1120 ; CHECK: @ %bb.0: @ %entry
1121 ; CHECK-NEXT: vdup.8 q2, r0
1122 ; CHECK-NEXT: vrev16.8 q1, q0
1123 ; CHECK-NEXT: vmullb.s8 q1, q1, q2
1124 ; CHECK-NEXT: vmullb.s8 q0, q0, q2
1127 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1128 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1129 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1130 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1131 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1132 %out = mul <16 x i16> %out1, %out2
1136 define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1137 ; CHECK-LABEL: sext8_0ext_0246810121413579111315:
1138 ; CHECK: @ %bb.0: @ %entry
1139 ; CHECK-NEXT: vrev16.8 q1, q0
1140 ; CHECK-NEXT: vdup.8 q2, r0
1141 ; CHECK-NEXT: vmullb.s8 q1, q2, q1
1142 ; CHECK-NEXT: vmullb.s8 q0, q2, q0
1145 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1146 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1147 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1148 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1149 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1150 %out = mul <16 x i16> %out2, %out1
1154 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1155 ; CHECK-LABEL: sext8_0246810121413579111315_ext0:
1156 ; CHECK: @ %bb.0: @ %entry
1157 ; CHECK-NEXT: vmovlb.s8 q1, q0
1158 ; CHECK-NEXT: sxtb r0, r0
1159 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1160 ; CHECK-NEXT: vmovlt.s8 q0, q0
1161 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1162 ; CHECK-NEXT: vmov q0, q2
1165 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1166 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1167 %ext = sext i8 %src2 to i16
1168 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1169 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1170 %out = mul <16 x i16> %out1, %shuf2
1174 define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1175 ; CHECK-LABEL: sext8_ext0_0246810121413579111315:
1176 ; CHECK: @ %bb.0: @ %entry
1177 ; CHECK-NEXT: vmovlb.s8 q1, q0
1178 ; CHECK-NEXT: sxtb r0, r0
1179 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1180 ; CHECK-NEXT: vmovlt.s8 q0, q0
1181 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1182 ; CHECK-NEXT: vmov q0, q2
1185 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1186 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1187 %ext = sext i8 %src2 to i16
1188 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1189 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1190 %out = mul <16 x i16> %shuf2, %out1
1194 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1195 ; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
1196 ; CHECK: @ %bb.0: @ %entry
1197 ; CHECK-NEXT: vdup.16 q1, r0
1198 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1201 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1202 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1203 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1204 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1205 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1206 %out = mul <8 x i16> %out1, %out2
1210 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1211 ; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
1212 ; CHECK: @ %bb.0: @ %entry
1213 ; CHECK-NEXT: vdup.16 q1, r0
1214 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1217 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1218 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1219 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1220 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1221 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1222 %out = mul <8 x i16> %out2, %out1
1226 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1227 ; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
1228 ; CHECK: @ %bb.0: @ %entry
1229 ; CHECK-NEXT: vmovlb.u8 q0, q0
1230 ; CHECK-NEXT: uxtb r0, r0
1231 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1234 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1235 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1236 %ext = zext i8 %src2 to i16
1237 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1238 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1239 %out = mul <8 x i16> %out1, %shuf2
1243 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1244 ; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
1245 ; CHECK: @ %bb.0: @ %entry
1246 ; CHECK-NEXT: vmovlb.u8 q0, q0
1247 ; CHECK-NEXT: uxtb r0, r0
1248 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1251 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1252 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1253 %ext = zext i8 %src2 to i16
1254 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1255 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1256 %out = mul <8 x i16> %shuf2, %out1
1260 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1261 ; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
1262 ; CHECK: @ %bb.0: @ %entry
1263 ; CHECK-NEXT: vdup.16 q1, r0
1264 ; CHECK-NEXT: vrev16.8 q0, q0
1265 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1268 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1269 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1270 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1271 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1272 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1273 %out = mul <8 x i16> %out1, %out2
1277 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1278 ; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
1279 ; CHECK: @ %bb.0: @ %entry
1280 ; CHECK-NEXT: vrev16.8 q0, q0
1281 ; CHECK-NEXT: vdup.16 q1, r0
1282 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1285 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1286 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1287 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1288 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1289 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1290 %out = mul <8 x i16> %out2, %out1
1294 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1295 ; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
1296 ; CHECK: @ %bb.0: @ %entry
1297 ; CHECK-NEXT: vmovlt.u8 q0, q0
1298 ; CHECK-NEXT: uxtb r0, r0
1299 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1302 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1303 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1304 %ext = zext i8 %src2 to i16
1305 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1306 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1307 %out = mul <8 x i16> %out1, %shuf2
1311 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1312 ; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
1313 ; CHECK: @ %bb.0: @ %entry
1314 ; CHECK-NEXT: vmovlt.u8 q0, q0
1315 ; CHECK-NEXT: uxtb r0, r0
1316 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1319 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1320 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1321 %ext = zext i8 %src2 to i16
1322 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1323 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1324 %out = mul <8 x i16> %shuf2, %out1
1328 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1329 ; CHECK-LABEL: zext8_0246810121413579111315_0ext:
1330 ; CHECK: @ %bb.0: @ %entry
1331 ; CHECK-NEXT: vdup.8 q2, r0
1332 ; CHECK-NEXT: vrev16.8 q1, q0
1333 ; CHECK-NEXT: vmullb.u8 q1, q1, q2
1334 ; CHECK-NEXT: vmullb.u8 q0, q0, q2
1337 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1338 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1339 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1340 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1341 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1342 %out = mul <16 x i16> %out1, %out2
1346 define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1347 ; CHECK-LABEL: zext8_0ext_0246810121413579111315:
1348 ; CHECK: @ %bb.0: @ %entry
1349 ; CHECK-NEXT: vrev16.8 q1, q0
1350 ; CHECK-NEXT: vdup.8 q2, r0
1351 ; CHECK-NEXT: vmullb.u8 q1, q2, q1
1352 ; CHECK-NEXT: vmullb.u8 q0, q2, q0
1355 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1356 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1357 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1358 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1359 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1360 %out = mul <16 x i16> %out2, %out1
1364 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1365 ; CHECK-LABEL: zext8_0246810121413579111315_ext0:
1366 ; CHECK: @ %bb.0: @ %entry
1367 ; CHECK-NEXT: vmovlb.u8 q1, q0
1368 ; CHECK-NEXT: uxtb r0, r0
1369 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1370 ; CHECK-NEXT: vmovlt.u8 q0, q0
1371 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1372 ; CHECK-NEXT: vmov q0, q2
1375 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1376 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1377 %ext = zext i8 %src2 to i16
1378 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1379 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1380 %out = mul <16 x i16> %out1, %shuf2
1384 define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1385 ; CHECK-LABEL: zext8_ext0_0246810121413579111315:
1386 ; CHECK: @ %bb.0: @ %entry
1387 ; CHECK-NEXT: vmovlb.u8 q1, q0
1388 ; CHECK-NEXT: uxtb r0, r0
1389 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1390 ; CHECK-NEXT: vmovlt.u8 q0, q0
1391 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1392 ; CHECK-NEXT: vmov q0, q2
1395 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1396 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1397 %ext = zext i8 %src2 to i16
1398 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1399 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1400 %out = mul <16 x i16> %shuf2, %out1