1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
5 ; CHECK-LABEL: sext32_0246_0ext:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
8 ; CHECK-NEXT: vmullb.s32 q1, q0, q2
9 ; CHECK-NEXT: vmov q0, q1
12 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
13 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
14 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
15 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
16 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
17 %out = mul <2 x i64> %out1, %out2
21 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
22 ; CHECK-LABEL: sext32_0ext_0246:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
25 ; CHECK-NEXT: vmullb.s32 q1, q2, q0
26 ; CHECK-NEXT: vmov q0, q1
29 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
30 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
31 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
32 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
33 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
34 %out = mul <2 x i64> %out2, %out1
38 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
39 ; CHECK-LABEL: sext32_0246_ext0:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: .save {r4, r5, r7, lr}
42 ; CHECK-NEXT: push {r4, r5, r7, lr}
43 ; CHECK-NEXT: vmov r1, s2
44 ; CHECK-NEXT: vmov r3, s0
45 ; CHECK-NEXT: umull lr, r12, r1, r0
46 ; CHECK-NEXT: umull r2, r5, r3, r0
47 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
48 ; CHECK-NEXT: asrs r2, r0, #31
49 ; CHECK-NEXT: mla r4, r1, r2, r12
50 ; CHECK-NEXT: asrs r1, r1, #31
51 ; CHECK-NEXT: mla r2, r3, r2, r5
52 ; CHECK-NEXT: asrs r3, r3, #31
53 ; CHECK-NEXT: mla r1, r1, r0, r4
54 ; CHECK-NEXT: mla r0, r3, r0, r2
55 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
56 ; CHECK-NEXT: pop {r4, r5, r7, pc}
58 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
59 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
60 %ext = sext i32 %src2 to i64
61 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
62 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
63 %out = mul <2 x i64> %out1, %shuf2
67 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
68 ; CHECK-LABEL: sext32_ext0_0246:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: .save {r4, r5, r7, lr}
71 ; CHECK-NEXT: push {r4, r5, r7, lr}
72 ; CHECK-NEXT: vmov r1, s2
73 ; CHECK-NEXT: asrs r4, r0, #31
74 ; CHECK-NEXT: vmov r3, s0
75 ; CHECK-NEXT: umull lr, r12, r0, r1
76 ; CHECK-NEXT: umull r2, r5, r0, r3
77 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
78 ; CHECK-NEXT: asrs r2, r1, #31
79 ; CHECK-NEXT: mla r2, r0, r2, r12
80 ; CHECK-NEXT: mla r1, r4, r1, r2
81 ; CHECK-NEXT: asrs r2, r3, #31
82 ; CHECK-NEXT: mla r0, r0, r2, r5
83 ; CHECK-NEXT: mla r0, r4, r3, r0
84 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
85 ; CHECK-NEXT: pop {r4, r5, r7, pc}
87 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
88 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
89 %ext = sext i32 %src2 to i64
90 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
91 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
92 %out = mul <2 x i64> %shuf2, %out1
96 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
97 ; CHECK-LABEL: sext32_1357_0ext:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
100 ; CHECK-NEXT: vrev64.32 q2, q0
101 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
104 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
105 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
106 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
107 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
108 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
109 %out = mul <2 x i64> %out1, %out2
113 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
114 ; CHECK-LABEL: sext32_0ext_1357:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vrev64.32 q1, q0
117 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
118 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
121 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
122 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
123 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
124 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
125 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
126 %out = mul <2 x i64> %out2, %out1
130 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
131 ; CHECK-LABEL: sext32_1357_ext0:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: .save {r4, r5, r7, lr}
134 ; CHECK-NEXT: push {r4, r5, r7, lr}
135 ; CHECK-NEXT: vrev64.32 q1, q0
136 ; CHECK-NEXT: vmov r1, s6
137 ; CHECK-NEXT: vmov r3, s4
138 ; CHECK-NEXT: umull lr, r12, r1, r0
139 ; CHECK-NEXT: umull r2, r5, r3, r0
140 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
141 ; CHECK-NEXT: asrs r2, r0, #31
142 ; CHECK-NEXT: mla r4, r1, r2, r12
143 ; CHECK-NEXT: asrs r1, r1, #31
144 ; CHECK-NEXT: mla r2, r3, r2, r5
145 ; CHECK-NEXT: asrs r3, r3, #31
146 ; CHECK-NEXT: mla r1, r1, r0, r4
147 ; CHECK-NEXT: mla r0, r3, r0, r2
148 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
149 ; CHECK-NEXT: pop {r4, r5, r7, pc}
151 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
152 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
153 %ext = sext i32 %src2 to i64
154 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
155 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
156 %out = mul <2 x i64> %out1, %shuf2
160 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
161 ; CHECK-LABEL: sext32_ext0_1357:
162 ; CHECK: @ %bb.0: @ %entry
163 ; CHECK-NEXT: .save {r4, r5, r7, lr}
164 ; CHECK-NEXT: push {r4, r5, r7, lr}
165 ; CHECK-NEXT: vrev64.32 q1, q0
166 ; CHECK-NEXT: asrs r4, r0, #31
167 ; CHECK-NEXT: vmov r1, s6
168 ; CHECK-NEXT: vmov r3, s4
169 ; CHECK-NEXT: umull lr, r12, r0, r1
170 ; CHECK-NEXT: umull r2, r5, r0, r3
171 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
172 ; CHECK-NEXT: asrs r2, r1, #31
173 ; CHECK-NEXT: mla r2, r0, r2, r12
174 ; CHECK-NEXT: mla r1, r4, r1, r2
175 ; CHECK-NEXT: asrs r2, r3, #31
176 ; CHECK-NEXT: mla r0, r0, r2, r5
177 ; CHECK-NEXT: mla r0, r4, r3, r0
178 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
179 ; CHECK-NEXT: pop {r4, r5, r7, pc}
181 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
182 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
183 %ext = sext i32 %src2 to i64
184 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
185 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
186 %out = mul <2 x i64> %shuf2, %out1
190 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
191 ; CHECK-LABEL: sext32_0213_0ext:
192 ; CHECK: @ %bb.0: @ %entry
193 ; CHECK-NEXT: .vsave {d8, d9}
194 ; CHECK-NEXT: vpush {d8, d9}
195 ; CHECK-NEXT: vmov q4, q0
196 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
197 ; CHECK-NEXT: vmov.f32 s17, s4
198 ; CHECK-NEXT: vmov.f32 s0, s1
199 ; CHECK-NEXT: vmullb.s32 q2, q4, q3
200 ; CHECK-NEXT: vmov.f32 s2, s3
201 ; CHECK-NEXT: vmullb.s32 q1, q0, q3
202 ; CHECK-NEXT: vmov q0, q2
203 ; CHECK-NEXT: vpop {d8, d9}
206 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
207 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
208 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
209 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
210 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
211 %out = mul <4 x i64> %out1, %out2
215 define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
216 ; CHECK-LABEL: sext32_0ext_0213:
217 ; CHECK: @ %bb.0: @ %entry
218 ; CHECK-NEXT: .vsave {d8, d9}
219 ; CHECK-NEXT: vpush {d8, d9}
220 ; CHECK-NEXT: vmov q4, q0
221 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
222 ; CHECK-NEXT: vmov.f32 s17, s4
223 ; CHECK-NEXT: vmov.f32 s0, s1
224 ; CHECK-NEXT: vmullb.s32 q2, q3, q4
225 ; CHECK-NEXT: vmov.f32 s2, s3
226 ; CHECK-NEXT: vmullb.s32 q1, q3, q0
227 ; CHECK-NEXT: vmov q0, q2
228 ; CHECK-NEXT: vpop {d8, d9}
231 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
232 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
233 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
234 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
235 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
236 %out = mul <4 x i64> %out2, %out1
240 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
241 ; CHECK-LABEL: sext32_0213_ext0:
242 ; CHECK: @ %bb.0: @ %entry
243 ; CHECK-NEXT: .save {r4, r5, r7, lr}
244 ; CHECK-NEXT: push {r4, r5, r7, lr}
245 ; CHECK-NEXT: vmov.f32 s4, s1
246 ; CHECK-NEXT: vmov.f32 s6, s3
247 ; CHECK-NEXT: vmov r3, s4
248 ; CHECK-NEXT: vmov r1, s6
249 ; CHECK-NEXT: umull r2, r5, r3, r0
250 ; CHECK-NEXT: umull lr, r12, r1, r0
251 ; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
252 ; CHECK-NEXT: asrs r2, r0, #31
253 ; CHECK-NEXT: mla r4, r1, r2, r12
254 ; CHECK-NEXT: asrs r1, r1, #31
255 ; CHECK-NEXT: mla r5, r3, r2, r5
256 ; CHECK-NEXT: asrs r3, r3, #31
257 ; CHECK-NEXT: mla r1, r1, r0, r4
258 ; CHECK-NEXT: mla r3, r3, r0, r5
259 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r1
260 ; CHECK-NEXT: vmov r1, s2
261 ; CHECK-NEXT: umull r3, r5, r1, r0
262 ; CHECK-NEXT: mla r5, r1, r2, r5
263 ; CHECK-NEXT: asrs r1, r1, #31
264 ; CHECK-NEXT: mla r12, r1, r0, r5
265 ; CHECK-NEXT: vmov r5, s0
266 ; CHECK-NEXT: umull r4, r1, r5, r0
267 ; CHECK-NEXT: mla r1, r5, r2, r1
268 ; CHECK-NEXT: asrs r2, r5, #31
269 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
270 ; CHECK-NEXT: mla r0, r2, r0, r1
271 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
272 ; CHECK-NEXT: pop {r4, r5, r7, pc}
274 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
275 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
276 %ext = sext i32 %src2 to i64
277 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
278 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
279 %out = mul <4 x i64> %out1, %shuf2
283 define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
284 ; CHECK-LABEL: sext32_ext0_0213:
285 ; CHECK: @ %bb.0: @ %entry
286 ; CHECK-NEXT: .save {r4, r5, r7, lr}
287 ; CHECK-NEXT: push {r4, r5, r7, lr}
288 ; CHECK-NEXT: vmov.f32 s4, s1
289 ; CHECK-NEXT: asrs r4, r0, #31
290 ; CHECK-NEXT: vmov.f32 s6, s3
291 ; CHECK-NEXT: vmov r3, s4
292 ; CHECK-NEXT: vmov r1, s6
293 ; CHECK-NEXT: umull r2, r5, r0, r3
294 ; CHECK-NEXT: umull lr, r12, r0, r1
295 ; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
296 ; CHECK-NEXT: asrs r2, r1, #31
297 ; CHECK-NEXT: mla r2, r0, r2, r12
298 ; CHECK-NEXT: mla r1, r4, r1, r2
299 ; CHECK-NEXT: asrs r2, r3, #31
300 ; CHECK-NEXT: mla r2, r0, r2, r5
301 ; CHECK-NEXT: mla r2, r4, r3, r2
302 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
303 ; CHECK-NEXT: vmov r1, s2
304 ; CHECK-NEXT: umull r2, r3, r0, r1
305 ; CHECK-NEXT: asrs r5, r1, #31
306 ; CHECK-NEXT: mla r3, r0, r5, r3
307 ; CHECK-NEXT: mla r12, r4, r1, r3
308 ; CHECK-NEXT: vmov r3, s0
309 ; CHECK-NEXT: umull r5, r1, r0, r3
310 ; CHECK-NEXT: vmov q0[2], q0[0], r5, r2
311 ; CHECK-NEXT: asrs r2, r3, #31
312 ; CHECK-NEXT: mla r0, r0, r2, r1
313 ; CHECK-NEXT: mla r0, r4, r3, r0
314 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
315 ; CHECK-NEXT: pop {r4, r5, r7, pc}
317 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
318 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
319 %ext = sext i32 %src2 to i64
320 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
321 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
322 %out = mul <4 x i64> %shuf2, %out1
326 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
327 ; CHECK-LABEL: zext32_0246_0ext:
328 ; CHECK: @ %bb.0: @ %entry
329 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
330 ; CHECK-NEXT: vmullb.u32 q1, q0, q2
331 ; CHECK-NEXT: vmov q0, q1
334 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
335 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
336 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
337 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
338 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
339 %out = mul <2 x i64> %out1, %out2
343 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
344 ; CHECK-LABEL: zext32_0ext_0246:
345 ; CHECK: @ %bb.0: @ %entry
346 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
347 ; CHECK-NEXT: vmullb.u32 q1, q2, q0
348 ; CHECK-NEXT: vmov q0, q1
351 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
352 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
353 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
354 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
355 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
356 %out = mul <2 x i64> %out2, %out1
360 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
361 ; CHECK-LABEL: zext32_0246_ext0:
362 ; CHECK: @ %bb.0: @ %entry
363 ; CHECK-NEXT: vmov r1, s2
364 ; CHECK-NEXT: vmov r3, s0
365 ; CHECK-NEXT: umull r1, r2, r1, r0
366 ; CHECK-NEXT: umull r0, r3, r3, r0
367 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
368 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
371 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
372 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
373 %ext = zext i32 %src2 to i64
374 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
375 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
376 %out = mul <2 x i64> %out1, %shuf2
380 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
381 ; CHECK-LABEL: zext32_ext0_0246:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vmov r1, s2
384 ; CHECK-NEXT: vmov r3, s0
385 ; CHECK-NEXT: umull r1, r2, r0, r1
386 ; CHECK-NEXT: umull r0, r3, r0, r3
387 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
388 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
391 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
392 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
393 %ext = zext i32 %src2 to i64
394 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
395 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
396 %out = mul <2 x i64> %shuf2, %out1
400 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
401 ; CHECK-LABEL: zext32_1357_0ext:
402 ; CHECK: @ %bb.0: @ %entry
403 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
404 ; CHECK-NEXT: vrev64.32 q2, q0
405 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
408 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
409 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
410 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
411 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
412 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
413 %out = mul <2 x i64> %out1, %out2
417 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
418 ; CHECK-LABEL: zext32_0ext_1357:
419 ; CHECK: @ %bb.0: @ %entry
420 ; CHECK-NEXT: vrev64.32 q1, q0
421 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
422 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
425 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
426 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
427 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
428 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
429 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
430 %out = mul <2 x i64> %out2, %out1
434 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
435 ; CHECK-LABEL: zext32_1357_ext0:
436 ; CHECK: @ %bb.0: @ %entry
437 ; CHECK-NEXT: vrev64.32 q1, q0
438 ; CHECK-NEXT: vmov r1, s6
439 ; CHECK-NEXT: vmov r3, s4
440 ; CHECK-NEXT: umull r1, r2, r1, r0
441 ; CHECK-NEXT: umull r0, r3, r3, r0
442 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
443 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
446 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
447 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
448 %ext = zext i32 %src2 to i64
449 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
450 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
451 %out = mul <2 x i64> %out1, %shuf2
455 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
456 ; CHECK-LABEL: zext32_ext0_1357:
457 ; CHECK: @ %bb.0: @ %entry
458 ; CHECK-NEXT: vrev64.32 q1, q0
459 ; CHECK-NEXT: vmov r1, s6
460 ; CHECK-NEXT: vmov r3, s4
461 ; CHECK-NEXT: umull r1, r2, r0, r1
462 ; CHECK-NEXT: umull r0, r3, r0, r3
463 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
464 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
467 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
468 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
469 %ext = zext i32 %src2 to i64
470 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
471 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
472 %out = mul <2 x i64> %shuf2, %out1
476 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
477 ; CHECK-LABEL: zext32_0213_0ext:
478 ; CHECK: @ %bb.0: @ %entry
479 ; CHECK-NEXT: .vsave {d8, d9}
480 ; CHECK-NEXT: vpush {d8, d9}
481 ; CHECK-NEXT: vmov q4, q0
482 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
483 ; CHECK-NEXT: vmov.f32 s17, s4
484 ; CHECK-NEXT: vmov.f32 s0, s1
485 ; CHECK-NEXT: vmullb.u32 q2, q4, q3
486 ; CHECK-NEXT: vmov.f32 s2, s3
487 ; CHECK-NEXT: vmullb.u32 q1, q0, q3
488 ; CHECK-NEXT: vmov q0, q2
489 ; CHECK-NEXT: vpop {d8, d9}
492 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
493 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
494 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
495 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
496 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
497 %out = mul <4 x i64> %out1, %out2
501 define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
502 ; CHECK-LABEL: zext32_0ext_0213:
503 ; CHECK: @ %bb.0: @ %entry
504 ; CHECK-NEXT: .vsave {d8, d9}
505 ; CHECK-NEXT: vpush {d8, d9}
506 ; CHECK-NEXT: vmov q4, q0
507 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
508 ; CHECK-NEXT: vmov.f32 s17, s4
509 ; CHECK-NEXT: vmov.f32 s0, s1
510 ; CHECK-NEXT: vmullb.u32 q2, q3, q4
511 ; CHECK-NEXT: vmov.f32 s2, s3
512 ; CHECK-NEXT: vmullb.u32 q1, q3, q0
513 ; CHECK-NEXT: vmov q0, q2
514 ; CHECK-NEXT: vpop {d8, d9}
517 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
518 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
519 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
520 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
521 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
522 %out = mul <4 x i64> %out2, %out1
526 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
527 ; CHECK-LABEL: zext32_0213_ext0:
528 ; CHECK: @ %bb.0: @ %entry
529 ; CHECK-NEXT: vmov r1, s2
530 ; CHECK-NEXT: vmov r3, s0
531 ; CHECK-NEXT: vmov.f32 s0, s1
532 ; CHECK-NEXT: vmov.f32 s2, s3
533 ; CHECK-NEXT: umull r1, r12, r1, r0
534 ; CHECK-NEXT: umull r3, r2, r3, r0
535 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
536 ; CHECK-NEXT: vmov r1, s2
537 ; CHECK-NEXT: vmov r3, s0
538 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
539 ; CHECK-NEXT: vmov q0, q2
540 ; CHECK-NEXT: umull r1, r2, r1, r0
541 ; CHECK-NEXT: umull r0, r3, r3, r0
542 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
543 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
546 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
547 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
548 %ext = zext i32 %src2 to i64
549 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
550 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
551 %out = mul <4 x i64> %out1, %shuf2
555 define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
556 ; CHECK-LABEL: zext32_ext0_0213:
557 ; CHECK: @ %bb.0: @ %entry
558 ; CHECK-NEXT: vmov r1, s2
559 ; CHECK-NEXT: vmov r3, s0
560 ; CHECK-NEXT: vmov.f32 s0, s1
561 ; CHECK-NEXT: vmov.f32 s2, s3
562 ; CHECK-NEXT: umull r1, r12, r0, r1
563 ; CHECK-NEXT: umull r3, r2, r0, r3
564 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
565 ; CHECK-NEXT: vmov r1, s2
566 ; CHECK-NEXT: vmov r3, s0
567 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
568 ; CHECK-NEXT: vmov q0, q2
569 ; CHECK-NEXT: umull r1, r2, r0, r1
570 ; CHECK-NEXT: umull r0, r3, r0, r3
571 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
572 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
575 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
576 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
577 %ext = zext i32 %src2 to i64
578 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
579 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
580 %out = mul <4 x i64> %shuf2, %out1
584 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
585 ; CHECK-LABEL: sext16_02468101214_0ext:
586 ; CHECK: @ %bb.0: @ %entry
587 ; CHECK-NEXT: vdup.32 q1, r0
588 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
591 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
592 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
593 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
594 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
595 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
596 %out = mul <4 x i32> %out1, %out2
600 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
601 ; CHECK-LABEL: sext16_0ext_02468101214:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: vdup.32 q1, r0
604 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
607 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
608 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
609 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
610 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
611 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
612 %out = mul <4 x i32> %out2, %out1
616 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
617 ; CHECK-LABEL: sext16_02468101214_ext0:
618 ; CHECK: @ %bb.0: @ %entry
619 ; CHECK-NEXT: vmovlb.s16 q0, q0
620 ; CHECK-NEXT: sxth r0, r0
621 ; CHECK-NEXT: vmul.i32 q0, q0, r0
624 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
625 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
626 %ext = sext i16 %src2 to i32
627 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
628 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
629 %out = mul <4 x i32> %out1, %shuf2
633 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
634 ; CHECK-LABEL: sext16_ext0_02468101214:
635 ; CHECK: @ %bb.0: @ %entry
636 ; CHECK-NEXT: vmovlb.s16 q0, q0
637 ; CHECK-NEXT: sxth r0, r0
638 ; CHECK-NEXT: vmul.i32 q0, q0, r0
641 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
642 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
643 %ext = sext i16 %src2 to i32
644 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
645 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
646 %out = mul <4 x i32> %shuf2, %out1
650 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
651 ; CHECK-LABEL: sext16_13579111315_0ext:
652 ; CHECK: @ %bb.0: @ %entry
653 ; CHECK-NEXT: vdup.32 q1, r0
654 ; CHECK-NEXT: vrev32.16 q0, q0
655 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
658 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
659 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
660 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
661 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
662 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
663 %out = mul <4 x i32> %out1, %out2
667 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
668 ; CHECK-LABEL: sext16_0ext_13579111315:
669 ; CHECK: @ %bb.0: @ %entry
670 ; CHECK-NEXT: vrev32.16 q0, q0
671 ; CHECK-NEXT: vdup.32 q1, r0
672 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
675 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
676 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
677 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
678 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
679 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
680 %out = mul <4 x i32> %out2, %out1
684 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
685 ; CHECK-LABEL: sext16_13579111315_ext0:
686 ; CHECK: @ %bb.0: @ %entry
687 ; CHECK-NEXT: vmovlt.s16 q0, q0
688 ; CHECK-NEXT: sxth r0, r0
689 ; CHECK-NEXT: vmul.i32 q0, q0, r0
692 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
693 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
694 %ext = sext i16 %src2 to i32
695 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
696 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
697 %out = mul <4 x i32> %out1, %shuf2
701 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
702 ; CHECK-LABEL: sext16_ext0_13579111315:
703 ; CHECK: @ %bb.0: @ %entry
704 ; CHECK-NEXT: vmovlt.s16 q0, q0
705 ; CHECK-NEXT: sxth r0, r0
706 ; CHECK-NEXT: vmul.i32 q0, q0, r0
709 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
710 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
711 %ext = sext i16 %src2 to i32
712 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
713 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
714 %out = mul <4 x i32> %shuf2, %out1
718 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
719 ; CHECK-LABEL: sext16_02461357_0ext:
720 ; CHECK: @ %bb.0: @ %entry
721 ; CHECK-NEXT: vdup.16 q2, r0
722 ; CHECK-NEXT: vrev32.16 q1, q0
723 ; CHECK-NEXT: vmullb.s16 q1, q1, q2
724 ; CHECK-NEXT: vmullb.s16 q0, q0, q2
727 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
728 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
729 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
730 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
731 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
732 %out = mul <8 x i32> %out1, %out2
736 define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
737 ; CHECK-LABEL: sext16_0ext_02461357:
738 ; CHECK: @ %bb.0: @ %entry
739 ; CHECK-NEXT: vrev32.16 q1, q0
740 ; CHECK-NEXT: vdup.16 q2, r0
741 ; CHECK-NEXT: vmullb.s16 q1, q2, q1
742 ; CHECK-NEXT: vmullb.s16 q0, q2, q0
745 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
746 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
747 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
748 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
749 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
750 %out = mul <8 x i32> %out2, %out1
754 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
755 ; CHECK-LABEL: sext16_02461357_ext0:
756 ; CHECK: @ %bb.0: @ %entry
757 ; CHECK-NEXT: vmovlb.s16 q1, q0
758 ; CHECK-NEXT: sxth r0, r0
759 ; CHECK-NEXT: vmul.i32 q2, q1, r0
760 ; CHECK-NEXT: vmovlt.s16 q0, q0
761 ; CHECK-NEXT: vmul.i32 q1, q0, r0
762 ; CHECK-NEXT: vmov q0, q2
765 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
766 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
767 %ext = sext i16 %src2 to i32
768 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
769 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
770 %out = mul <8 x i32> %out1, %shuf2
774 define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
775 ; CHECK-LABEL: sext16_ext0_02461357:
776 ; CHECK: @ %bb.0: @ %entry
777 ; CHECK-NEXT: vmovlb.s16 q1, q0
778 ; CHECK-NEXT: sxth r0, r0
779 ; CHECK-NEXT: vmul.i32 q2, q1, r0
780 ; CHECK-NEXT: vmovlt.s16 q0, q0
781 ; CHECK-NEXT: vmul.i32 q1, q0, r0
782 ; CHECK-NEXT: vmov q0, q2
785 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
786 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
787 %ext = sext i16 %src2 to i32
788 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
789 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
790 %out = mul <8 x i32> %shuf2, %out1
794 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
795 ; CHECK-LABEL: zext16_02468101214_0ext:
796 ; CHECK: @ %bb.0: @ %entry
797 ; CHECK-NEXT: vdup.32 q1, r0
798 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
801 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
802 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
803 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
804 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
805 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
806 %out = mul <4 x i32> %out1, %out2
810 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
811 ; CHECK-LABEL: zext16_0ext_02468101214:
812 ; CHECK: @ %bb.0: @ %entry
813 ; CHECK-NEXT: vdup.32 q1, r0
814 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
817 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
818 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
819 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
820 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
821 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
822 %out = mul <4 x i32> %out2, %out1
826 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
827 ; CHECK-LABEL: zext16_02468101214_ext0:
828 ; CHECK: @ %bb.0: @ %entry
829 ; CHECK-NEXT: vmovlb.u16 q0, q0
830 ; CHECK-NEXT: uxth r0, r0
831 ; CHECK-NEXT: vmul.i32 q0, q0, r0
834 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
835 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
836 %ext = zext i16 %src2 to i32
837 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
838 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
839 %out = mul <4 x i32> %out1, %shuf2
843 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
844 ; CHECK-LABEL: zext16_ext0_02468101214:
845 ; CHECK: @ %bb.0: @ %entry
846 ; CHECK-NEXT: vmovlb.u16 q0, q0
847 ; CHECK-NEXT: uxth r0, r0
848 ; CHECK-NEXT: vmul.i32 q0, q0, r0
851 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
852 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
853 %ext = zext i16 %src2 to i32
854 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
855 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
856 %out = mul <4 x i32> %shuf2, %out1
860 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
861 ; CHECK-LABEL: zext16_13579111315_0ext:
862 ; CHECK: @ %bb.0: @ %entry
863 ; CHECK-NEXT: vdup.32 q1, r0
864 ; CHECK-NEXT: vrev32.16 q0, q0
865 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
868 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
869 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
870 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
871 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
872 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
873 %out = mul <4 x i32> %out1, %out2
877 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
878 ; CHECK-LABEL: zext16_0ext_13579111315:
879 ; CHECK: @ %bb.0: @ %entry
880 ; CHECK-NEXT: vrev32.16 q0, q0
881 ; CHECK-NEXT: vdup.32 q1, r0
882 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
885 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
886 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
887 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
888 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
889 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
890 %out = mul <4 x i32> %out2, %out1
894 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
895 ; CHECK-LABEL: zext16_13579111315_ext0:
896 ; CHECK: @ %bb.0: @ %entry
897 ; CHECK-NEXT: vmovlt.u16 q0, q0
898 ; CHECK-NEXT: uxth r0, r0
899 ; CHECK-NEXT: vmul.i32 q0, q0, r0
902 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
903 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
904 %ext = zext i16 %src2 to i32
905 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
906 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
907 %out = mul <4 x i32> %out1, %shuf2
911 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
912 ; CHECK-LABEL: zext16_ext0_13579111315:
913 ; CHECK: @ %bb.0: @ %entry
914 ; CHECK-NEXT: vmovlt.u16 q0, q0
915 ; CHECK-NEXT: uxth r0, r0
916 ; CHECK-NEXT: vmul.i32 q0, q0, r0
919 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
920 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
921 %ext = zext i16 %src2 to i32
922 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
923 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
924 %out = mul <4 x i32> %shuf2, %out1
928 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
929 ; CHECK-LABEL: zext16_02461357_0ext:
930 ; CHECK: @ %bb.0: @ %entry
931 ; CHECK-NEXT: vdup.16 q2, r0
932 ; CHECK-NEXT: vrev32.16 q1, q0
933 ; CHECK-NEXT: vmullb.u16 q1, q1, q2
934 ; CHECK-NEXT: vmullb.u16 q0, q0, q2
937 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
938 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
939 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
940 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
941 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
942 %out = mul <8 x i32> %out1, %out2
946 define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
947 ; CHECK-LABEL: zext16_0ext_02461357:
948 ; CHECK: @ %bb.0: @ %entry
949 ; CHECK-NEXT: vrev32.16 q1, q0
950 ; CHECK-NEXT: vdup.16 q2, r0
951 ; CHECK-NEXT: vmullb.u16 q1, q2, q1
952 ; CHECK-NEXT: vmullb.u16 q0, q2, q0
955 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
956 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
957 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
958 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
959 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
960 %out = mul <8 x i32> %out2, %out1
964 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
965 ; CHECK-LABEL: zext16_02461357_ext0:
966 ; CHECK: @ %bb.0: @ %entry
967 ; CHECK-NEXT: vmovlb.u16 q1, q0
968 ; CHECK-NEXT: uxth r0, r0
969 ; CHECK-NEXT: vmul.i32 q2, q1, r0
970 ; CHECK-NEXT: vmovlt.u16 q0, q0
971 ; CHECK-NEXT: vmul.i32 q1, q0, r0
972 ; CHECK-NEXT: vmov q0, q2
975 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
976 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
977 %ext = zext i16 %src2 to i32
978 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
979 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
980 %out = mul <8 x i32> %out1, %shuf2
984 define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
985 ; CHECK-LABEL: zext16_ext0_02461357:
986 ; CHECK: @ %bb.0: @ %entry
987 ; CHECK-NEXT: vmovlb.u16 q1, q0
988 ; CHECK-NEXT: uxth r0, r0
989 ; CHECK-NEXT: vmul.i32 q2, q1, r0
990 ; CHECK-NEXT: vmovlt.u16 q0, q0
991 ; CHECK-NEXT: vmul.i32 q1, q0, r0
992 ; CHECK-NEXT: vmov q0, q2
995 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
996 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
997 %ext = zext i16 %src2 to i32
998 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
999 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
1000 %out = mul <8 x i32> %shuf2, %out1
1004 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1005 ; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
1006 ; CHECK: @ %bb.0: @ %entry
1007 ; CHECK-NEXT: vdup.16 q1, r0
1008 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
1011 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1012 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1013 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1014 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1015 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1016 %out = mul <8 x i16> %out1, %out2
1020 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1021 ; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
1022 ; CHECK: @ %bb.0: @ %entry
1023 ; CHECK-NEXT: vdup.16 q1, r0
1024 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1027 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1028 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1029 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1030 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1031 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1032 %out = mul <8 x i16> %out2, %out1
1036 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1037 ; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
1038 ; CHECK: @ %bb.0: @ %entry
1039 ; CHECK-NEXT: vmovlb.s8 q0, q0
1040 ; CHECK-NEXT: sxtb r0, r0
1041 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1044 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1045 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1046 %ext = sext i8 %src2 to i16
1047 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1048 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1049 %out = mul <8 x i16> %out1, %shuf2
1053 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1054 ; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
1055 ; CHECK: @ %bb.0: @ %entry
1056 ; CHECK-NEXT: vmovlb.s8 q0, q0
1057 ; CHECK-NEXT: sxtb r0, r0
1058 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1061 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1062 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1063 %ext = sext i8 %src2 to i16
1064 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1065 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1066 %out = mul <8 x i16> %shuf2, %out1
1070 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1071 ; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
1072 ; CHECK: @ %bb.0: @ %entry
1073 ; CHECK-NEXT: vdup.16 q1, r0
1074 ; CHECK-NEXT: vrev16.8 q0, q0
1075 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
1078 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1079 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1080 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1081 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1082 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1083 %out = mul <8 x i16> %out1, %out2
1087 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1088 ; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
1089 ; CHECK: @ %bb.0: @ %entry
1090 ; CHECK-NEXT: vrev16.8 q0, q0
1091 ; CHECK-NEXT: vdup.16 q1, r0
1092 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1095 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1096 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1097 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1098 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1099 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1100 %out = mul <8 x i16> %out2, %out1
1104 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1105 ; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
1106 ; CHECK: @ %bb.0: @ %entry
1107 ; CHECK-NEXT: vmovlt.s8 q0, q0
1108 ; CHECK-NEXT: sxtb r0, r0
1109 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1112 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1113 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1114 %ext = sext i8 %src2 to i16
1115 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1116 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1117 %out = mul <8 x i16> %out1, %shuf2
1121 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1122 ; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
1123 ; CHECK: @ %bb.0: @ %entry
1124 ; CHECK-NEXT: vmovlt.s8 q0, q0
1125 ; CHECK-NEXT: sxtb r0, r0
1126 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1129 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1130 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1131 %ext = sext i8 %src2 to i16
1132 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1133 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1134 %out = mul <8 x i16> %shuf2, %out1
1138 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1139 ; CHECK-LABEL: sext8_0246810121413579111315_0ext:
1140 ; CHECK: @ %bb.0: @ %entry
1141 ; CHECK-NEXT: vdup.8 q2, r0
1142 ; CHECK-NEXT: vrev16.8 q1, q0
1143 ; CHECK-NEXT: vmullb.s8 q1, q1, q2
1144 ; CHECK-NEXT: vmullb.s8 q0, q0, q2
1147 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1148 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1149 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1150 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1151 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1152 %out = mul <16 x i16> %out1, %out2
1156 define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1157 ; CHECK-LABEL: sext8_0ext_0246810121413579111315:
1158 ; CHECK: @ %bb.0: @ %entry
1159 ; CHECK-NEXT: vrev16.8 q1, q0
1160 ; CHECK-NEXT: vdup.8 q2, r0
1161 ; CHECK-NEXT: vmullb.s8 q1, q2, q1
1162 ; CHECK-NEXT: vmullb.s8 q0, q2, q0
1165 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1166 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1167 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1168 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1169 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1170 %out = mul <16 x i16> %out2, %out1
1174 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1175 ; CHECK-LABEL: sext8_0246810121413579111315_ext0:
1176 ; CHECK: @ %bb.0: @ %entry
1177 ; CHECK-NEXT: vmovlb.s8 q1, q0
1178 ; CHECK-NEXT: sxtb r0, r0
1179 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1180 ; CHECK-NEXT: vmovlt.s8 q0, q0
1181 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1182 ; CHECK-NEXT: vmov q0, q2
1185 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1186 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1187 %ext = sext i8 %src2 to i16
1188 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1189 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1190 %out = mul <16 x i16> %out1, %shuf2
1194 define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1195 ; CHECK-LABEL: sext8_ext0_0246810121413579111315:
1196 ; CHECK: @ %bb.0: @ %entry
1197 ; CHECK-NEXT: vmovlb.s8 q1, q0
1198 ; CHECK-NEXT: sxtb r0, r0
1199 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1200 ; CHECK-NEXT: vmovlt.s8 q0, q0
1201 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1202 ; CHECK-NEXT: vmov q0, q2
1205 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1206 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1207 %ext = sext i8 %src2 to i16
1208 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1209 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1210 %out = mul <16 x i16> %shuf2, %out1
1214 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1215 ; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
1216 ; CHECK: @ %bb.0: @ %entry
1217 ; CHECK-NEXT: vdup.16 q1, r0
1218 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1221 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1222 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1223 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1224 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1225 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1226 %out = mul <8 x i16> %out1, %out2
1230 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1231 ; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
1232 ; CHECK: @ %bb.0: @ %entry
1233 ; CHECK-NEXT: vdup.16 q1, r0
1234 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1237 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1238 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1239 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1240 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1241 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1242 %out = mul <8 x i16> %out2, %out1
1246 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1247 ; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
1248 ; CHECK: @ %bb.0: @ %entry
1249 ; CHECK-NEXT: vmovlb.u8 q0, q0
1250 ; CHECK-NEXT: uxtb r0, r0
1251 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1254 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1255 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1256 %ext = zext i8 %src2 to i16
1257 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1258 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1259 %out = mul <8 x i16> %out1, %shuf2
1263 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1264 ; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
1265 ; CHECK: @ %bb.0: @ %entry
1266 ; CHECK-NEXT: vmovlb.u8 q0, q0
1267 ; CHECK-NEXT: uxtb r0, r0
1268 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1271 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1272 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1273 %ext = zext i8 %src2 to i16
1274 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1275 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1276 %out = mul <8 x i16> %shuf2, %out1
1280 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1281 ; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
1282 ; CHECK: @ %bb.0: @ %entry
1283 ; CHECK-NEXT: vdup.16 q1, r0
1284 ; CHECK-NEXT: vrev16.8 q0, q0
1285 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1288 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1289 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1290 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1291 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1292 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1293 %out = mul <8 x i16> %out1, %out2
1297 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1298 ; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
1299 ; CHECK: @ %bb.0: @ %entry
1300 ; CHECK-NEXT: vrev16.8 q0, q0
1301 ; CHECK-NEXT: vdup.16 q1, r0
1302 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1305 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1306 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1307 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1308 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1309 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1310 %out = mul <8 x i16> %out2, %out1
1314 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1315 ; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
1316 ; CHECK: @ %bb.0: @ %entry
1317 ; CHECK-NEXT: vmovlt.u8 q0, q0
1318 ; CHECK-NEXT: uxtb r0, r0
1319 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1322 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1323 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1324 %ext = zext i8 %src2 to i16
1325 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1326 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1327 %out = mul <8 x i16> %out1, %shuf2
1331 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1332 ; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
1333 ; CHECK: @ %bb.0: @ %entry
1334 ; CHECK-NEXT: vmovlt.u8 q0, q0
1335 ; CHECK-NEXT: uxtb r0, r0
1336 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1339 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1340 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1341 %ext = zext i8 %src2 to i16
1342 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1343 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1344 %out = mul <8 x i16> %shuf2, %out1
1348 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1349 ; CHECK-LABEL: zext8_0246810121413579111315_0ext:
1350 ; CHECK: @ %bb.0: @ %entry
1351 ; CHECK-NEXT: vdup.8 q2, r0
1352 ; CHECK-NEXT: vrev16.8 q1, q0
1353 ; CHECK-NEXT: vmullb.u8 q1, q1, q2
1354 ; CHECK-NEXT: vmullb.u8 q0, q0, q2
1357 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1358 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1359 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1360 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1361 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1362 %out = mul <16 x i16> %out1, %out2
1366 define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1367 ; CHECK-LABEL: zext8_0ext_0246810121413579111315:
1368 ; CHECK: @ %bb.0: @ %entry
1369 ; CHECK-NEXT: vrev16.8 q1, q0
1370 ; CHECK-NEXT: vdup.8 q2, r0
1371 ; CHECK-NEXT: vmullb.u8 q1, q2, q1
1372 ; CHECK-NEXT: vmullb.u8 q0, q2, q0
1375 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1376 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1377 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1378 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1379 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1380 %out = mul <16 x i16> %out2, %out1
1384 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1385 ; CHECK-LABEL: zext8_0246810121413579111315_ext0:
1386 ; CHECK: @ %bb.0: @ %entry
1387 ; CHECK-NEXT: vmovlb.u8 q1, q0
1388 ; CHECK-NEXT: uxtb r0, r0
1389 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1390 ; CHECK-NEXT: vmovlt.u8 q0, q0
1391 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1392 ; CHECK-NEXT: vmov q0, q2
1395 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1396 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1397 %ext = zext i8 %src2 to i16
1398 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1399 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1400 %out = mul <16 x i16> %out1, %shuf2
1404 define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1405 ; CHECK-LABEL: zext8_ext0_0246810121413579111315:
1406 ; CHECK: @ %bb.0: @ %entry
1407 ; CHECK-NEXT: vmovlb.u8 q1, q0
1408 ; CHECK-NEXT: uxtb r0, r0
1409 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1410 ; CHECK-NEXT: vmovlt.u8 q0, q0
1411 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1412 ; CHECK-NEXT: vmov q0, q2
1415 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1416 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1417 %ext = zext i8 %src2 to i16
1418 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1419 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1420 %out = mul <16 x i16> %shuf2, %out1