1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
5 ; CHECK-LABEL: sext32_0246_0ext:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
8 ; CHECK-NEXT: vmullb.s32 q1, q0, q2
9 ; CHECK-NEXT: vmov q0, q1
12 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
13 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
14 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
15 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
16 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
17 %out = mul <2 x i64> %out1, %out2
21 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
22 ; CHECK-LABEL: sext32_0ext_0246:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
25 ; CHECK-NEXT: vmullb.s32 q1, q2, q0
26 ; CHECK-NEXT: vmov q0, q1
29 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
30 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
31 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
32 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
33 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
34 %out = mul <2 x i64> %out2, %out1
38 define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
39 ; CHECK-LABEL: sext32_0246_ext0:
40 ; CHECK: @ %bb.0: @ %entry
41 ; CHECK-NEXT: .save {r4, r5, r7, lr}
42 ; CHECK-NEXT: push {r4, r5, r7, lr}
43 ; CHECK-NEXT: vmov r1, s2
44 ; CHECK-NEXT: vmov r3, s0
45 ; CHECK-NEXT: umull lr, r12, r1, r0
46 ; CHECK-NEXT: umull r2, r5, r3, r0
47 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
48 ; CHECK-NEXT: asrs r2, r0, #31
49 ; CHECK-NEXT: mla r4, r1, r2, r12
50 ; CHECK-NEXT: asrs r1, r1, #31
51 ; CHECK-NEXT: mla r2, r3, r2, r5
52 ; CHECK-NEXT: asrs r3, r3, #31
53 ; CHECK-NEXT: mla r1, r1, r0, r4
54 ; CHECK-NEXT: mla r0, r3, r0, r2
55 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
56 ; CHECK-NEXT: pop {r4, r5, r7, pc}
58 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
59 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
60 %ext = sext i32 %src2 to i64
61 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
62 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
63 %out = mul <2 x i64> %out1, %shuf2
67 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
68 ; CHECK-LABEL: sext32_ext0_0246:
69 ; CHECK: @ %bb.0: @ %entry
70 ; CHECK-NEXT: .save {r4, r5, r7, lr}
71 ; CHECK-NEXT: push {r4, r5, r7, lr}
72 ; CHECK-NEXT: vmov r1, s2
73 ; CHECK-NEXT: asrs r4, r0, #31
74 ; CHECK-NEXT: vmov r3, s0
75 ; CHECK-NEXT: umull lr, r12, r0, r1
76 ; CHECK-NEXT: umull r2, r5, r0, r3
77 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
78 ; CHECK-NEXT: asrs r2, r1, #31
79 ; CHECK-NEXT: mla r2, r0, r2, r12
80 ; CHECK-NEXT: mla r1, r4, r1, r2
81 ; CHECK-NEXT: asrs r2, r3, #31
82 ; CHECK-NEXT: mla r0, r0, r2, r5
83 ; CHECK-NEXT: mla r0, r4, r3, r0
84 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
85 ; CHECK-NEXT: pop {r4, r5, r7, pc}
87 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
88 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
89 %ext = sext i32 %src2 to i64
90 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
91 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
92 %out = mul <2 x i64> %shuf2, %out1
96 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
97 ; CHECK-LABEL: sext32_1357_0ext:
98 ; CHECK: @ %bb.0: @ %entry
99 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
100 ; CHECK-NEXT: vrev64.32 q2, q0
101 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
104 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
105 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
106 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
107 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
108 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
109 %out = mul <2 x i64> %out1, %out2
113 define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
114 ; CHECK-LABEL: sext32_0ext_1357:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vrev64.32 q1, q0
117 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
118 ; CHECK-NEXT: vmullb.s32 q0, q2, q1
121 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
122 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
123 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
124 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
125 %out2 = sext <2 x i32> %shuf2 to <2 x i64>
126 %out = mul <2 x i64> %out2, %out1
130 define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
131 ; CHECK-LABEL: sext32_1357_ext0:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: .save {r4, r5, r7, lr}
134 ; CHECK-NEXT: push {r4, r5, r7, lr}
135 ; CHECK-NEXT: vrev64.32 q1, q0
136 ; CHECK-NEXT: vmov r1, s6
137 ; CHECK-NEXT: vmov r3, s4
138 ; CHECK-NEXT: umull lr, r12, r1, r0
139 ; CHECK-NEXT: umull r2, r5, r3, r0
140 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
141 ; CHECK-NEXT: asrs r2, r0, #31
142 ; CHECK-NEXT: mla r4, r1, r2, r12
143 ; CHECK-NEXT: asrs r1, r1, #31
144 ; CHECK-NEXT: mla r2, r3, r2, r5
145 ; CHECK-NEXT: asrs r3, r3, #31
146 ; CHECK-NEXT: mla r1, r1, r0, r4
147 ; CHECK-NEXT: mla r0, r3, r0, r2
148 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
149 ; CHECK-NEXT: pop {r4, r5, r7, pc}
151 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
152 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
153 %ext = sext i32 %src2 to i64
154 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
155 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
156 %out = mul <2 x i64> %out1, %shuf2
160 define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
161 ; CHECK-LABEL: sext32_ext0_1357:
162 ; CHECK: @ %bb.0: @ %entry
163 ; CHECK-NEXT: .save {r4, r5, r7, lr}
164 ; CHECK-NEXT: push {r4, r5, r7, lr}
165 ; CHECK-NEXT: vrev64.32 q1, q0
166 ; CHECK-NEXT: asrs r4, r0, #31
167 ; CHECK-NEXT: vmov r1, s6
168 ; CHECK-NEXT: vmov r3, s4
169 ; CHECK-NEXT: umull lr, r12, r0, r1
170 ; CHECK-NEXT: umull r2, r5, r0, r3
171 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
172 ; CHECK-NEXT: asrs r2, r1, #31
173 ; CHECK-NEXT: mla r2, r0, r2, r12
174 ; CHECK-NEXT: mla r1, r4, r1, r2
175 ; CHECK-NEXT: asrs r2, r3, #31
176 ; CHECK-NEXT: mla r0, r0, r2, r5
177 ; CHECK-NEXT: mla r0, r4, r3, r0
178 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
179 ; CHECK-NEXT: pop {r4, r5, r7, pc}
181 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
182 %out1 = sext <2 x i32> %shuf1 to <2 x i64>
183 %ext = sext i32 %src2 to i64
184 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
185 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
186 %out = mul <2 x i64> %shuf2, %out1
190 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
191 ; CHECK-LABEL: sext32_0213_0ext:
192 ; CHECK: @ %bb.0: @ %entry
193 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
194 ; CHECK-NEXT: vmullb.s32 q2, q0, q3
195 ; CHECK-NEXT: vmov.f32 s0, s1
196 ; CHECK-NEXT: vmov.f32 s2, s3
197 ; CHECK-NEXT: vmullb.s32 q1, q0, q3
198 ; CHECK-NEXT: vmov q0, q2
201 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
202 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
203 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
204 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
205 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
206 %out = mul <4 x i64> %out1, %out2
210 define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
211 ; CHECK-LABEL: sext32_0ext_0213:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
214 ; CHECK-NEXT: vmullb.s32 q2, q3, q0
215 ; CHECK-NEXT: vmov.f32 s0, s1
216 ; CHECK-NEXT: vmov.f32 s2, s3
217 ; CHECK-NEXT: vmullb.s32 q1, q3, q0
218 ; CHECK-NEXT: vmov q0, q2
221 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
222 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
223 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
224 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
225 %out2 = sext <4 x i32> %shuf2 to <4 x i64>
226 %out = mul <4 x i64> %out2, %out1
230 define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
231 ; CHECK-LABEL: sext32_0213_ext0:
232 ; CHECK: @ %bb.0: @ %entry
233 ; CHECK-NEXT: .save {r4, r5, r7, lr}
234 ; CHECK-NEXT: push {r4, r5, r7, lr}
235 ; CHECK-NEXT: vmov q1, q0
236 ; CHECK-NEXT: vmov r1, s6
237 ; CHECK-NEXT: vmov r3, s4
238 ; CHECK-NEXT: vmov.f32 s4, s5
239 ; CHECK-NEXT: vmov.f32 s6, s7
240 ; CHECK-NEXT: umull lr, r12, r1, r0
241 ; CHECK-NEXT: umull r2, r5, r3, r0
242 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
243 ; CHECK-NEXT: asrs r2, r0, #31
244 ; CHECK-NEXT: mla r4, r1, r2, r12
245 ; CHECK-NEXT: asrs r1, r1, #31
246 ; CHECK-NEXT: mla r5, r3, r2, r5
247 ; CHECK-NEXT: asrs r3, r3, #31
248 ; CHECK-NEXT: mla r1, r1, r0, r4
249 ; CHECK-NEXT: vmov r4, s4
250 ; CHECK-NEXT: mla r3, r3, r0, r5
251 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
252 ; CHECK-NEXT: vmov r1, s6
253 ; CHECK-NEXT: umull r5, lr, r4, r0
254 ; CHECK-NEXT: umull r3, r12, r1, r0
255 ; CHECK-NEXT: vmov q1[2], q1[0], r5, r3
256 ; CHECK-NEXT: mla r3, r1, r2, r12
257 ; CHECK-NEXT: asrs r1, r1, #31
258 ; CHECK-NEXT: mla r2, r4, r2, lr
259 ; CHECK-NEXT: mla r1, r1, r0, r3
260 ; CHECK-NEXT: asrs r3, r4, #31
261 ; CHECK-NEXT: mla r0, r3, r0, r2
262 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
263 ; CHECK-NEXT: pop {r4, r5, r7, pc}
265 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
266 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
267 %ext = sext i32 %src2 to i64
268 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
269 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
270 %out = mul <4 x i64> %out1, %shuf2
274 define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
275 ; CHECK-LABEL: sext32_ext0_0213:
276 ; CHECK: @ %bb.0: @ %entry
277 ; CHECK-NEXT: .save {r4, r5, r7, lr}
278 ; CHECK-NEXT: push {r4, r5, r7, lr}
279 ; CHECK-NEXT: vmov q1, q0
280 ; CHECK-NEXT: asrs r4, r0, #31
281 ; CHECK-NEXT: vmov r1, s6
282 ; CHECK-NEXT: vmov r3, s4
283 ; CHECK-NEXT: vmov.f32 s4, s5
284 ; CHECK-NEXT: vmov.f32 s6, s7
285 ; CHECK-NEXT: umull lr, r12, r0, r1
286 ; CHECK-NEXT: umull r2, r5, r0, r3
287 ; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
288 ; CHECK-NEXT: asrs r2, r1, #31
289 ; CHECK-NEXT: mla r2, r0, r2, r12
290 ; CHECK-NEXT: mla r1, r4, r1, r2
291 ; CHECK-NEXT: asrs r2, r3, #31
292 ; CHECK-NEXT: mla r2, r0, r2, r5
293 ; CHECK-NEXT: vmov r5, s4
294 ; CHECK-NEXT: mla r2, r4, r3, r2
295 ; CHECK-NEXT: vmov q0[3], q0[1], r2, r1
296 ; CHECK-NEXT: vmov r1, s6
297 ; CHECK-NEXT: umull r3, lr, r0, r5
298 ; CHECK-NEXT: umull r2, r12, r0, r1
299 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
300 ; CHECK-NEXT: asrs r2, r1, #31
301 ; CHECK-NEXT: mla r2, r0, r2, r12
302 ; CHECK-NEXT: mla r1, r4, r1, r2
303 ; CHECK-NEXT: asrs r2, r5, #31
304 ; CHECK-NEXT: mla r0, r0, r2, lr
305 ; CHECK-NEXT: mla r0, r4, r5, r0
306 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
307 ; CHECK-NEXT: pop {r4, r5, r7, pc}
309 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
310 %out1 = sext <4 x i32> %shuf1 to <4 x i64>
311 %ext = sext i32 %src2 to i64
312 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
313 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
314 %out = mul <4 x i64> %shuf2, %out1
318 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
319 ; CHECK-LABEL: zext32_0246_0ext:
320 ; CHECK: @ %bb.0: @ %entry
321 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
322 ; CHECK-NEXT: vmullb.u32 q1, q0, q2
323 ; CHECK-NEXT: vmov q0, q1
326 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
327 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
328 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
329 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
330 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
331 %out = mul <2 x i64> %out1, %out2
335 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
336 ; CHECK-LABEL: zext32_0ext_0246:
337 ; CHECK: @ %bb.0: @ %entry
338 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
339 ; CHECK-NEXT: vmullb.u32 q1, q2, q0
340 ; CHECK-NEXT: vmov q0, q1
343 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
344 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
345 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
346 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
347 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
348 %out = mul <2 x i64> %out2, %out1
352 define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
353 ; CHECK-LABEL: zext32_0246_ext0:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vmov r1, s2
356 ; CHECK-NEXT: vmov r3, s0
357 ; CHECK-NEXT: umull r1, r2, r1, r0
358 ; CHECK-NEXT: umull r0, r3, r3, r0
359 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
360 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
363 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
364 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
365 %ext = zext i32 %src2 to i64
366 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
367 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
368 %out = mul <2 x i64> %out1, %shuf2
372 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
373 ; CHECK-LABEL: zext32_ext0_0246:
374 ; CHECK: @ %bb.0: @ %entry
375 ; CHECK-NEXT: vmov r1, s2
376 ; CHECK-NEXT: vmov r3, s0
377 ; CHECK-NEXT: umull r1, r2, r0, r1
378 ; CHECK-NEXT: umull r0, r3, r0, r3
379 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
380 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
383 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
384 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
385 %ext = zext i32 %src2 to i64
386 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
387 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
388 %out = mul <2 x i64> %shuf2, %out1
392 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
393 ; CHECK-LABEL: zext32_1357_0ext:
394 ; CHECK: @ %bb.0: @ %entry
395 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r0
396 ; CHECK-NEXT: vrev64.32 q2, q0
397 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
400 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
401 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
402 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
403 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
404 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
405 %out = mul <2 x i64> %out1, %out2
409 define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
410 ; CHECK-LABEL: zext32_0ext_1357:
411 ; CHECK: @ %bb.0: @ %entry
412 ; CHECK-NEXT: vrev64.32 q1, q0
413 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r0
414 ; CHECK-NEXT: vmullb.u32 q0, q2, q1
417 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
418 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
419 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
420 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
421 %out2 = zext <2 x i32> %shuf2 to <2 x i64>
422 %out = mul <2 x i64> %out2, %out1
426 define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
427 ; CHECK-LABEL: zext32_1357_ext0:
428 ; CHECK: @ %bb.0: @ %entry
429 ; CHECK-NEXT: vrev64.32 q1, q0
430 ; CHECK-NEXT: vmov r1, s6
431 ; CHECK-NEXT: vmov r3, s4
432 ; CHECK-NEXT: umull r1, r2, r1, r0
433 ; CHECK-NEXT: umull r0, r3, r3, r0
434 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
435 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
438 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
439 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
440 %ext = zext i32 %src2 to i64
441 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
442 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
443 %out = mul <2 x i64> %out1, %shuf2
447 define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
448 ; CHECK-LABEL: zext32_ext0_1357:
449 ; CHECK: @ %bb.0: @ %entry
450 ; CHECK-NEXT: vrev64.32 q1, q0
451 ; CHECK-NEXT: vmov r1, s6
452 ; CHECK-NEXT: vmov r3, s4
453 ; CHECK-NEXT: umull r1, r2, r0, r1
454 ; CHECK-NEXT: umull r0, r3, r0, r3
455 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
456 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
459 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
460 %out1 = zext <2 x i32> %shuf1 to <2 x i64>
461 %ext = zext i32 %src2 to i64
462 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
463 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
464 %out = mul <2 x i64> %shuf2, %out1
468 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
469 ; CHECK-LABEL: zext32_0213_0ext:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
472 ; CHECK-NEXT: vmullb.u32 q2, q0, q3
473 ; CHECK-NEXT: vmov.f32 s0, s1
474 ; CHECK-NEXT: vmov.f32 s2, s3
475 ; CHECK-NEXT: vmullb.u32 q1, q0, q3
476 ; CHECK-NEXT: vmov q0, q2
479 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
480 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
481 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
482 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
483 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
484 %out = mul <4 x i64> %out1, %out2
488 define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
489 ; CHECK-LABEL: zext32_0ext_0213:
490 ; CHECK: @ %bb.0: @ %entry
491 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
492 ; CHECK-NEXT: vmullb.u32 q2, q3, q0
493 ; CHECK-NEXT: vmov.f32 s0, s1
494 ; CHECK-NEXT: vmov.f32 s2, s3
495 ; CHECK-NEXT: vmullb.u32 q1, q3, q0
496 ; CHECK-NEXT: vmov q0, q2
499 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
500 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
501 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
502 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
503 %out2 = zext <4 x i32> %shuf2 to <4 x i64>
504 %out = mul <4 x i64> %out2, %out1
508 define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
509 ; CHECK-LABEL: zext32_0213_ext0:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vmov r1, s2
512 ; CHECK-NEXT: vmov r3, s0
513 ; CHECK-NEXT: vmov.f32 s0, s1
514 ; CHECK-NEXT: vmov.f32 s2, s3
515 ; CHECK-NEXT: umull r1, r12, r1, r0
516 ; CHECK-NEXT: umull r3, r2, r3, r0
517 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
518 ; CHECK-NEXT: vmov r1, s2
519 ; CHECK-NEXT: vmov r3, s0
520 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
521 ; CHECK-NEXT: vmov q0, q2
522 ; CHECK-NEXT: umull r1, r2, r1, r0
523 ; CHECK-NEXT: umull r0, r3, r3, r0
524 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
525 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
528 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
529 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
530 %ext = zext i32 %src2 to i64
531 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
532 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
533 %out = mul <4 x i64> %out1, %shuf2
537 define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
538 ; CHECK-LABEL: zext32_ext0_0213:
539 ; CHECK: @ %bb.0: @ %entry
540 ; CHECK-NEXT: vmov r1, s2
541 ; CHECK-NEXT: vmov r3, s0
542 ; CHECK-NEXT: vmov.f32 s0, s1
543 ; CHECK-NEXT: vmov.f32 s2, s3
544 ; CHECK-NEXT: umull r1, r12, r0, r1
545 ; CHECK-NEXT: umull r3, r2, r0, r3
546 ; CHECK-NEXT: vmov q2[2], q2[0], r3, r1
547 ; CHECK-NEXT: vmov r1, s2
548 ; CHECK-NEXT: vmov r3, s0
549 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r12
550 ; CHECK-NEXT: vmov q0, q2
551 ; CHECK-NEXT: umull r1, r2, r0, r1
552 ; CHECK-NEXT: umull r0, r3, r0, r3
553 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
554 ; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
557 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
558 %out1 = zext <4 x i32> %shuf1 to <4 x i64>
559 %ext = zext i32 %src2 to i64
560 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
561 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
562 %out = mul <4 x i64> %shuf2, %out1
566 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
567 ; CHECK-LABEL: sext16_02468101214_0ext:
568 ; CHECK: @ %bb.0: @ %entry
569 ; CHECK-NEXT: vdup.32 q1, r0
570 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
573 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
574 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
575 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
576 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
577 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
578 %out = mul <4 x i32> %out1, %out2
582 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
583 ; CHECK-LABEL: sext16_0ext_02468101214:
584 ; CHECK: @ %bb.0: @ %entry
585 ; CHECK-NEXT: vdup.32 q1, r0
586 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
589 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
590 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
591 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
592 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
593 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
594 %out = mul <4 x i32> %out2, %out1
598 define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
599 ; CHECK-LABEL: sext16_02468101214_ext0:
600 ; CHECK: @ %bb.0: @ %entry
601 ; CHECK-NEXT: vmovlb.s16 q0, q0
602 ; CHECK-NEXT: sxth r0, r0
603 ; CHECK-NEXT: vmul.i32 q0, q0, r0
606 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
607 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
608 %ext = sext i16 %src2 to i32
609 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
610 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
611 %out = mul <4 x i32> %out1, %shuf2
615 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
616 ; CHECK-LABEL: sext16_ext0_02468101214:
617 ; CHECK: @ %bb.0: @ %entry
618 ; CHECK-NEXT: vmovlb.s16 q0, q0
619 ; CHECK-NEXT: sxth r0, r0
620 ; CHECK-NEXT: vmul.i32 q0, q0, r0
623 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
624 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
625 %ext = sext i16 %src2 to i32
626 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
627 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
628 %out = mul <4 x i32> %shuf2, %out1
632 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
633 ; CHECK-LABEL: sext16_13579111315_0ext:
634 ; CHECK: @ %bb.0: @ %entry
635 ; CHECK-NEXT: vdup.32 q1, r0
636 ; CHECK-NEXT: vrev32.16 q0, q0
637 ; CHECK-NEXT: vmullb.s16 q0, q0, q1
640 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
641 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
642 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
643 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
644 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
645 %out = mul <4 x i32> %out1, %out2
649 define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
650 ; CHECK-LABEL: sext16_0ext_13579111315:
651 ; CHECK: @ %bb.0: @ %entry
652 ; CHECK-NEXT: vrev32.16 q0, q0
653 ; CHECK-NEXT: vdup.32 q1, r0
654 ; CHECK-NEXT: vmullb.s16 q0, q1, q0
657 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
658 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
659 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
660 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
661 %out2 = sext <4 x i16> %shuf2 to <4 x i32>
662 %out = mul <4 x i32> %out2, %out1
666 define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
667 ; CHECK-LABEL: sext16_13579111315_ext0:
668 ; CHECK: @ %bb.0: @ %entry
669 ; CHECK-NEXT: vmovlt.s16 q0, q0
670 ; CHECK-NEXT: sxth r0, r0
671 ; CHECK-NEXT: vmul.i32 q0, q0, r0
674 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
675 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
676 %ext = sext i16 %src2 to i32
677 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
678 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
679 %out = mul <4 x i32> %out1, %shuf2
683 define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
684 ; CHECK-LABEL: sext16_ext0_13579111315:
685 ; CHECK: @ %bb.0: @ %entry
686 ; CHECK-NEXT: vmovlt.s16 q0, q0
687 ; CHECK-NEXT: sxth r0, r0
688 ; CHECK-NEXT: vmul.i32 q0, q0, r0
691 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
692 %out1 = sext <4 x i16> %shuf1 to <4 x i32>
693 %ext = sext i16 %src2 to i32
694 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
695 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
696 %out = mul <4 x i32> %shuf2, %out1
700 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
701 ; CHECK-LABEL: sext16_02461357_0ext:
702 ; CHECK: @ %bb.0: @ %entry
703 ; CHECK-NEXT: vdup.16 q2, r0
704 ; CHECK-NEXT: vrev32.16 q1, q0
705 ; CHECK-NEXT: vmullb.s16 q1, q1, q2
706 ; CHECK-NEXT: vmullb.s16 q0, q0, q2
709 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
710 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
711 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
712 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
713 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
714 %out = mul <8 x i32> %out1, %out2
718 define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
719 ; CHECK-LABEL: sext16_0ext_02461357:
720 ; CHECK: @ %bb.0: @ %entry
721 ; CHECK-NEXT: vrev32.16 q1, q0
722 ; CHECK-NEXT: vdup.16 q2, r0
723 ; CHECK-NEXT: vmullb.s16 q1, q2, q1
724 ; CHECK-NEXT: vmullb.s16 q0, q2, q0
727 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
728 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
729 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
730 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
731 %out2 = sext <8 x i16> %shuf2 to <8 x i32>
732 %out = mul <8 x i32> %out2, %out1
736 define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
737 ; CHECK-LABEL: sext16_02461357_ext0:
738 ; CHECK: @ %bb.0: @ %entry
739 ; CHECK-NEXT: vmovlb.s16 q1, q0
740 ; CHECK-NEXT: sxth r0, r0
741 ; CHECK-NEXT: vmul.i32 q2, q1, r0
742 ; CHECK-NEXT: vmovlt.s16 q0, q0
743 ; CHECK-NEXT: vmul.i32 q1, q0, r0
744 ; CHECK-NEXT: vmov q0, q2
747 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
748 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
749 %ext = sext i16 %src2 to i32
750 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
751 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
752 %out = mul <8 x i32> %out1, %shuf2
756 define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
757 ; CHECK-LABEL: sext16_ext0_02461357:
758 ; CHECK: @ %bb.0: @ %entry
759 ; CHECK-NEXT: vmovlb.s16 q1, q0
760 ; CHECK-NEXT: sxth r0, r0
761 ; CHECK-NEXT: vmul.i32 q2, q1, r0
762 ; CHECK-NEXT: vmovlt.s16 q0, q0
763 ; CHECK-NEXT: vmul.i32 q1, q0, r0
764 ; CHECK-NEXT: vmov q0, q2
767 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
768 %out1 = sext <8 x i16> %shuf1 to <8 x i32>
769 %ext = sext i16 %src2 to i32
770 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
771 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
772 %out = mul <8 x i32> %shuf2, %out1
776 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
777 ; CHECK-LABEL: zext16_02468101214_0ext:
778 ; CHECK: @ %bb.0: @ %entry
779 ; CHECK-NEXT: vdup.32 q1, r0
780 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
783 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
784 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
785 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
786 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
787 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
788 %out = mul <4 x i32> %out1, %out2
792 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
793 ; CHECK-LABEL: zext16_0ext_02468101214:
794 ; CHECK: @ %bb.0: @ %entry
795 ; CHECK-NEXT: vdup.32 q1, r0
796 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
799 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
800 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
801 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
802 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
803 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
804 %out = mul <4 x i32> %out2, %out1
808 define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
809 ; CHECK-LABEL: zext16_02468101214_ext0:
810 ; CHECK: @ %bb.0: @ %entry
811 ; CHECK-NEXT: vmovlb.u16 q0, q0
812 ; CHECK-NEXT: uxth r0, r0
813 ; CHECK-NEXT: vmul.i32 q0, q0, r0
816 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
817 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
818 %ext = zext i16 %src2 to i32
819 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
820 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
821 %out = mul <4 x i32> %out1, %shuf2
825 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
826 ; CHECK-LABEL: zext16_ext0_02468101214:
827 ; CHECK: @ %bb.0: @ %entry
828 ; CHECK-NEXT: vmovlb.u16 q0, q0
829 ; CHECK-NEXT: uxth r0, r0
830 ; CHECK-NEXT: vmul.i32 q0, q0, r0
833 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
834 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
835 %ext = zext i16 %src2 to i32
836 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
837 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
838 %out = mul <4 x i32> %shuf2, %out1
842 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
843 ; CHECK-LABEL: zext16_13579111315_0ext:
844 ; CHECK: @ %bb.0: @ %entry
845 ; CHECK-NEXT: vdup.32 q1, r0
846 ; CHECK-NEXT: vrev32.16 q0, q0
847 ; CHECK-NEXT: vmullb.u16 q0, q0, q1
850 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
851 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
852 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
853 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
854 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
855 %out = mul <4 x i32> %out1, %out2
859 define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
860 ; CHECK-LABEL: zext16_0ext_13579111315:
861 ; CHECK: @ %bb.0: @ %entry
862 ; CHECK-NEXT: vrev32.16 q0, q0
863 ; CHECK-NEXT: vdup.32 q1, r0
864 ; CHECK-NEXT: vmullb.u16 q0, q1, q0
867 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
868 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
869 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
870 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
871 %out2 = zext <4 x i16> %shuf2 to <4 x i32>
872 %out = mul <4 x i32> %out2, %out1
876 define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
877 ; CHECK-LABEL: zext16_13579111315_ext0:
878 ; CHECK: @ %bb.0: @ %entry
879 ; CHECK-NEXT: vmovlt.u16 q0, q0
880 ; CHECK-NEXT: uxth r0, r0
881 ; CHECK-NEXT: vmul.i32 q0, q0, r0
884 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
885 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
886 %ext = zext i16 %src2 to i32
887 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
888 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
889 %out = mul <4 x i32> %out1, %shuf2
893 define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
894 ; CHECK-LABEL: zext16_ext0_13579111315:
895 ; CHECK: @ %bb.0: @ %entry
896 ; CHECK-NEXT: vmovlt.u16 q0, q0
897 ; CHECK-NEXT: uxth r0, r0
898 ; CHECK-NEXT: vmul.i32 q0, q0, r0
901 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
902 %out1 = zext <4 x i16> %shuf1 to <4 x i32>
903 %ext = zext i16 %src2 to i32
904 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
905 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
906 %out = mul <4 x i32> %shuf2, %out1
910 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
911 ; CHECK-LABEL: zext16_02461357_0ext:
912 ; CHECK: @ %bb.0: @ %entry
913 ; CHECK-NEXT: vdup.16 q2, r0
914 ; CHECK-NEXT: vrev32.16 q1, q0
915 ; CHECK-NEXT: vmullb.u16 q1, q1, q2
916 ; CHECK-NEXT: vmullb.u16 q0, q0, q2
919 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
920 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
921 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
922 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
923 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
924 %out = mul <8 x i32> %out1, %out2
928 define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
929 ; CHECK-LABEL: zext16_0ext_02461357:
930 ; CHECK: @ %bb.0: @ %entry
931 ; CHECK-NEXT: vrev32.16 q1, q0
932 ; CHECK-NEXT: vdup.16 q2, r0
933 ; CHECK-NEXT: vmullb.u16 q1, q2, q1
934 ; CHECK-NEXT: vmullb.u16 q0, q2, q0
937 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
938 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
939 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
940 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
941 %out2 = zext <8 x i16> %shuf2 to <8 x i32>
942 %out = mul <8 x i32> %out2, %out1
946 define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
947 ; CHECK-LABEL: zext16_02461357_ext0:
948 ; CHECK: @ %bb.0: @ %entry
949 ; CHECK-NEXT: vmovlb.u16 q1, q0
950 ; CHECK-NEXT: uxth r0, r0
951 ; CHECK-NEXT: vmul.i32 q2, q1, r0
952 ; CHECK-NEXT: vmovlt.u16 q0, q0
953 ; CHECK-NEXT: vmul.i32 q1, q0, r0
954 ; CHECK-NEXT: vmov q0, q2
957 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
958 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
959 %ext = zext i16 %src2 to i32
960 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
961 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
962 %out = mul <8 x i32> %out1, %shuf2
966 define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
967 ; CHECK-LABEL: zext16_ext0_02461357:
968 ; CHECK: @ %bb.0: @ %entry
969 ; CHECK-NEXT: vmovlb.u16 q1, q0
970 ; CHECK-NEXT: uxth r0, r0
971 ; CHECK-NEXT: vmul.i32 q2, q1, r0
972 ; CHECK-NEXT: vmovlt.u16 q0, q0
973 ; CHECK-NEXT: vmul.i32 q1, q0, r0
974 ; CHECK-NEXT: vmov q0, q2
977 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
978 %out1 = zext <8 x i16> %shuf1 to <8 x i32>
979 %ext = zext i16 %src2 to i32
980 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
981 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
982 %out = mul <8 x i32> %shuf2, %out1
986 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
987 ; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
988 ; CHECK: @ %bb.0: @ %entry
989 ; CHECK-NEXT: vdup.16 q1, r0
990 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
993 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
994 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
995 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
996 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
997 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
998 %out = mul <8 x i16> %out1, %out2
1002 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1003 ; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
1004 ; CHECK: @ %bb.0: @ %entry
1005 ; CHECK-NEXT: vdup.16 q1, r0
1006 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1009 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1010 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1011 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1012 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1013 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1014 %out = mul <8 x i16> %out2, %out1
1018 define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1019 ; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
1020 ; CHECK: @ %bb.0: @ %entry
1021 ; CHECK-NEXT: vmovlb.s8 q0, q0
1022 ; CHECK-NEXT: sxtb r0, r0
1023 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1026 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1027 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1028 %ext = sext i8 %src2 to i16
1029 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1030 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1031 %out = mul <8 x i16> %out1, %shuf2
1035 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1036 ; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
1037 ; CHECK: @ %bb.0: @ %entry
1038 ; CHECK-NEXT: vmovlb.s8 q0, q0
1039 ; CHECK-NEXT: sxtb r0, r0
1040 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1043 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1044 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1045 %ext = sext i8 %src2 to i16
1046 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1047 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1048 %out = mul <8 x i16> %shuf2, %out1
1052 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1053 ; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
1054 ; CHECK: @ %bb.0: @ %entry
1055 ; CHECK-NEXT: vdup.16 q1, r0
1056 ; CHECK-NEXT: vrev16.8 q0, q0
1057 ; CHECK-NEXT: vmullb.s8 q0, q0, q1
1060 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1061 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1062 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1063 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1064 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1065 %out = mul <8 x i16> %out1, %out2
1069 define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1070 ; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
1071 ; CHECK: @ %bb.0: @ %entry
1072 ; CHECK-NEXT: vrev16.8 q0, q0
1073 ; CHECK-NEXT: vdup.16 q1, r0
1074 ; CHECK-NEXT: vmullb.s8 q0, q1, q0
1077 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1078 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1079 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1080 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1081 %out2 = sext <8 x i8> %shuf2 to <8 x i16>
1082 %out = mul <8 x i16> %out2, %out1
1086 define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1087 ; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
1088 ; CHECK: @ %bb.0: @ %entry
1089 ; CHECK-NEXT: vmovlt.s8 q0, q0
1090 ; CHECK-NEXT: sxtb r0, r0
1091 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1094 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1095 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1096 %ext = sext i8 %src2 to i16
1097 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1098 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1099 %out = mul <8 x i16> %out1, %shuf2
1103 define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1104 ; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
1105 ; CHECK: @ %bb.0: @ %entry
1106 ; CHECK-NEXT: vmovlt.s8 q0, q0
1107 ; CHECK-NEXT: sxtb r0, r0
1108 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1111 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1112 %out1 = sext <8 x i8> %shuf1 to <8 x i16>
1113 %ext = sext i8 %src2 to i16
1114 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1115 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1116 %out = mul <8 x i16> %shuf2, %out1
1120 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1121 ; CHECK-LABEL: sext8_0246810121413579111315_0ext:
1122 ; CHECK: @ %bb.0: @ %entry
1123 ; CHECK-NEXT: vdup.8 q2, r0
1124 ; CHECK-NEXT: vrev16.8 q1, q0
1125 ; CHECK-NEXT: vmullb.s8 q1, q1, q2
1126 ; CHECK-NEXT: vmullb.s8 q0, q0, q2
1129 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1130 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1131 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1132 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1133 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1134 %out = mul <16 x i16> %out1, %out2
1138 define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1139 ; CHECK-LABEL: sext8_0ext_0246810121413579111315:
1140 ; CHECK: @ %bb.0: @ %entry
1141 ; CHECK-NEXT: vrev16.8 q1, q0
1142 ; CHECK-NEXT: vdup.8 q2, r0
1143 ; CHECK-NEXT: vmullb.s8 q1, q2, q1
1144 ; CHECK-NEXT: vmullb.s8 q0, q2, q0
1147 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1148 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1149 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1150 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1151 %out2 = sext <16 x i8> %shuf2 to <16 x i16>
1152 %out = mul <16 x i16> %out2, %out1
1156 define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1157 ; CHECK-LABEL: sext8_0246810121413579111315_ext0:
1158 ; CHECK: @ %bb.0: @ %entry
1159 ; CHECK-NEXT: vmovlb.s8 q1, q0
1160 ; CHECK-NEXT: sxtb r0, r0
1161 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1162 ; CHECK-NEXT: vmovlt.s8 q0, q0
1163 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1164 ; CHECK-NEXT: vmov q0, q2
1167 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1168 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1169 %ext = sext i8 %src2 to i16
1170 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1171 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1172 %out = mul <16 x i16> %out1, %shuf2
1176 define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1177 ; CHECK-LABEL: sext8_ext0_0246810121413579111315:
1178 ; CHECK: @ %bb.0: @ %entry
1179 ; CHECK-NEXT: vmovlb.s8 q1, q0
1180 ; CHECK-NEXT: sxtb r0, r0
1181 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1182 ; CHECK-NEXT: vmovlt.s8 q0, q0
1183 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1184 ; CHECK-NEXT: vmov q0, q2
1187 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1188 %out1 = sext <16 x i8> %shuf1 to <16 x i16>
1189 %ext = sext i8 %src2 to i16
1190 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1191 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1192 %out = mul <16 x i16> %shuf2, %out1
1196 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
1197 ; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
1198 ; CHECK: @ %bb.0: @ %entry
1199 ; CHECK-NEXT: vdup.16 q1, r0
1200 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1203 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1204 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1205 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1206 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1207 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1208 %out = mul <8 x i16> %out1, %out2
1212 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1213 ; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
1214 ; CHECK: @ %bb.0: @ %entry
1215 ; CHECK-NEXT: vdup.16 q1, r0
1216 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1219 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1220 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1221 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1222 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1223 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1224 %out = mul <8 x i16> %out2, %out1
1228 define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
1229 ; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
1230 ; CHECK: @ %bb.0: @ %entry
1231 ; CHECK-NEXT: vmovlb.u8 q0, q0
1232 ; CHECK-NEXT: uxtb r0, r0
1233 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1236 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1237 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1238 %ext = zext i8 %src2 to i16
1239 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1240 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1241 %out = mul <8 x i16> %out1, %shuf2
1245 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
1246 ; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
1247 ; CHECK: @ %bb.0: @ %entry
1248 ; CHECK-NEXT: vmovlb.u8 q0, q0
1249 ; CHECK-NEXT: uxtb r0, r0
1250 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1253 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1254 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1255 %ext = zext i8 %src2 to i16
1256 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1257 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1258 %out = mul <8 x i16> %shuf2, %out1
1262 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
1263 ; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
1264 ; CHECK: @ %bb.0: @ %entry
1265 ; CHECK-NEXT: vdup.16 q1, r0
1266 ; CHECK-NEXT: vrev16.8 q0, q0
1267 ; CHECK-NEXT: vmullb.u8 q0, q0, q1
1270 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1271 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1272 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1273 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1274 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1275 %out = mul <8 x i16> %out1, %out2
1279 define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1280 ; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
1281 ; CHECK: @ %bb.0: @ %entry
1282 ; CHECK-NEXT: vrev16.8 q0, q0
1283 ; CHECK-NEXT: vdup.16 q1, r0
1284 ; CHECK-NEXT: vmullb.u8 q0, q1, q0
1287 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1288 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1289 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
1290 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
1291 %out2 = zext <8 x i8> %shuf2 to <8 x i16>
1292 %out = mul <8 x i16> %out2, %out1
1296 define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
1297 ; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
1298 ; CHECK: @ %bb.0: @ %entry
1299 ; CHECK-NEXT: vmovlt.u8 q0, q0
1300 ; CHECK-NEXT: uxtb r0, r0
1301 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1304 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1305 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1306 %ext = zext i8 %src2 to i16
1307 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1308 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1309 %out = mul <8 x i16> %out1, %shuf2
1313 define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
1314 ; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
1315 ; CHECK: @ %bb.0: @ %entry
1316 ; CHECK-NEXT: vmovlt.u8 q0, q0
1317 ; CHECK-NEXT: uxtb r0, r0
1318 ; CHECK-NEXT: vmul.i16 q0, q0, r0
1321 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1322 %out1 = zext <8 x i8> %shuf1 to <8 x i16>
1323 %ext = zext i8 %src2 to i16
1324 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
1325 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
1326 %out = mul <8 x i16> %shuf2, %out1
1330 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
1331 ; CHECK-LABEL: zext8_0246810121413579111315_0ext:
1332 ; CHECK: @ %bb.0: @ %entry
1333 ; CHECK-NEXT: vdup.8 q2, r0
1334 ; CHECK-NEXT: vrev16.8 q1, q0
1335 ; CHECK-NEXT: vmullb.u8 q1, q1, q2
1336 ; CHECK-NEXT: vmullb.u8 q0, q0, q2
1339 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1340 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1341 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1342 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1343 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1344 %out = mul <16 x i16> %out1, %out2
1348 define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1349 ; CHECK-LABEL: zext8_0ext_0246810121413579111315:
1350 ; CHECK: @ %bb.0: @ %entry
1351 ; CHECK-NEXT: vrev16.8 q1, q0
1352 ; CHECK-NEXT: vdup.8 q2, r0
1353 ; CHECK-NEXT: vmullb.u8 q1, q2, q1
1354 ; CHECK-NEXT: vmullb.u8 q0, q2, q0
1357 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1358 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1359 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
1360 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
1361 %out2 = zext <16 x i8> %shuf2 to <16 x i16>
1362 %out = mul <16 x i16> %out2, %out1
1366 define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
1367 ; CHECK-LABEL: zext8_0246810121413579111315_ext0:
1368 ; CHECK: @ %bb.0: @ %entry
1369 ; CHECK-NEXT: vmovlb.u8 q1, q0
1370 ; CHECK-NEXT: uxtb r0, r0
1371 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1372 ; CHECK-NEXT: vmovlt.u8 q0, q0
1373 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1374 ; CHECK-NEXT: vmov q0, q2
1377 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1378 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1379 %ext = zext i8 %src2 to i16
1380 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1381 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1382 %out = mul <16 x i16> %out1, %shuf2
1386 define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
1387 ; CHECK-LABEL: zext8_ext0_0246810121413579111315:
1388 ; CHECK: @ %bb.0: @ %entry
1389 ; CHECK-NEXT: vmovlb.u8 q1, q0
1390 ; CHECK-NEXT: uxtb r0, r0
1391 ; CHECK-NEXT: vmul.i16 q2, q1, r0
1392 ; CHECK-NEXT: vmovlt.u8 q0, q0
1393 ; CHECK-NEXT: vmul.i16 q1, q0, r0
1394 ; CHECK-NEXT: vmov q0, q2
1397 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1398 %out1 = zext <16 x i8> %shuf1 to <16 x i16>
1399 %ext = zext i8 %src2 to i16
1400 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
1401 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
1402 %out = mul <16 x i16> %shuf2, %out1