1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc < %s -mtriple aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
7 define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
8 ; CHECK-SD-LABEL: dupsext_v8i8_v8i16:
9 ; CHECK-SD: // %bb.0: // %entry
10 ; CHECK-SD-NEXT: dup v1.8b, w0
11 ; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
14 ; CHECK-GI-LABEL: dupsext_v8i8_v8i16:
15 ; CHECK-GI: // %bb.0: // %entry
16 ; CHECK-GI-NEXT: lsl w8, w0, #8
17 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
18 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
19 ; CHECK-GI-NEXT: dup v1.8h, w8
20 ; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
23 %in = sext i8 %src to i16
24 %ext.b = sext <8 x i8> %b to <8 x i16>
25 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
26 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
27 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
31 define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
32 ; CHECK-SD-LABEL: dupzext_v8i8_v8i16:
33 ; CHECK-SD: // %bb.0: // %entry
34 ; CHECK-SD-NEXT: dup v1.8b, w0
35 ; CHECK-SD-NEXT: umull v0.8h, v1.8b, v0.8b
38 ; CHECK-GI-LABEL: dupzext_v8i8_v8i16:
39 ; CHECK-GI: // %bb.0: // %entry
40 ; CHECK-GI-NEXT: and w8, w0, #0xff
41 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
42 ; CHECK-GI-NEXT: dup v1.8h, w8
43 ; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
46 %in = zext i8 %src to i16
47 %ext.b = zext <8 x i8> %b to <8 x i16>
48 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
49 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
50 %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
54 define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
55 ; CHECK-SD-LABEL: dupsext_v4i16_v4i32:
56 ; CHECK-SD: // %bb.0: // %entry
57 ; CHECK-SD-NEXT: dup v1.4h, w0
58 ; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h
61 ; CHECK-GI-LABEL: dupsext_v4i16_v4i32:
62 ; CHECK-GI: // %bb.0: // %entry
63 ; CHECK-GI-NEXT: sxth w8, w0
64 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
65 ; CHECK-GI-NEXT: dup v1.4s, w8
66 ; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
69 %in = sext i16 %src to i32
70 %ext.b = sext <4 x i16> %b to <4 x i32>
71 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
72 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
73 %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
77 define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
78 ; CHECK-SD-LABEL: dupzext_v4i16_v4i32:
79 ; CHECK-SD: // %bb.0: // %entry
80 ; CHECK-SD-NEXT: dup v1.4h, w0
81 ; CHECK-SD-NEXT: umull v0.4s, v1.4h, v0.4h
84 ; CHECK-GI-LABEL: dupzext_v4i16_v4i32:
85 ; CHECK-GI: // %bb.0: // %entry
86 ; CHECK-GI-NEXT: and w8, w0, #0xffff
87 ; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
88 ; CHECK-GI-NEXT: dup v1.4s, w8
89 ; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
92 %in = zext i16 %src to i32
93 %ext.b = zext <4 x i16> %b to <4 x i32>
94 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
95 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
96 %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
100 define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
101 ; CHECK-SD-LABEL: dupsext_v2i32_v2i64:
102 ; CHECK-SD: // %bb.0: // %entry
103 ; CHECK-SD-NEXT: dup v1.2s, w0
104 ; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s
107 ; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
108 ; CHECK-GI: // %bb.0: // %entry
109 ; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
110 ; CHECK-GI-NEXT: sxtw x8, w0
111 ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
112 ; CHECK-GI-NEXT: dup v1.2d, x8
113 ; CHECK-GI-NEXT: fmov x9, d0
114 ; CHECK-GI-NEXT: mov x11, v0.d[1]
115 ; CHECK-GI-NEXT: fmov x8, d1
116 ; CHECK-GI-NEXT: mov x10, v1.d[1]
117 ; CHECK-GI-NEXT: mul x8, x8, x9
118 ; CHECK-GI-NEXT: mul x9, x10, x11
119 ; CHECK-GI-NEXT: mov v0.d[0], x8
120 ; CHECK-GI-NEXT: mov v0.d[1], x9
123 %in = sext i32 %src to i64
124 %ext.b = sext <2 x i32> %b to <2 x i64>
125 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
126 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
127 %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
131 define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
132 ; CHECK-SD-LABEL: dupzext_v2i32_v2i64:
133 ; CHECK-SD: // %bb.0: // %entry
134 ; CHECK-SD-NEXT: dup v1.2s, w0
135 ; CHECK-SD-NEXT: umull v0.2d, v1.2s, v0.2s
138 ; CHECK-GI-LABEL: dupzext_v2i32_v2i64:
139 ; CHECK-GI: // %bb.0: // %entry
140 ; CHECK-GI-NEXT: mov w8, w0
141 ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
142 ; CHECK-GI-NEXT: dup v1.2d, x8
143 ; CHECK-GI-NEXT: fmov x9, d0
144 ; CHECK-GI-NEXT: mov x11, v0.d[1]
145 ; CHECK-GI-NEXT: fmov x8, d1
146 ; CHECK-GI-NEXT: mov x10, v1.d[1]
147 ; CHECK-GI-NEXT: mul x8, x8, x9
148 ; CHECK-GI-NEXT: mul x9, x10, x11
149 ; CHECK-GI-NEXT: mov v0.d[0], x8
150 ; CHECK-GI-NEXT: mov v0.d[1], x9
153 %in = zext i32 %src to i64
154 %ext.b = zext <2 x i32> %b to <2 x i64>
155 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
156 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
157 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
161 ; Unsupported combines
163 define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
164 ; CHECK-SD-LABEL: dupsext_v2i8_v2i16:
165 ; CHECK-SD: // %bb.0: // %entry
166 ; CHECK-SD-NEXT: sxtb w8, w0
167 ; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
168 ; CHECK-SD-NEXT: dup v1.2s, w8
169 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
170 ; CHECK-SD-NEXT: mul v0.2s, v1.2s, v0.2s
173 ; CHECK-GI-LABEL: dupsext_v2i8_v2i16:
174 ; CHECK-GI: // %bb.0: // %entry
175 ; CHECK-GI-NEXT: lsl w8, w0, #8
176 ; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24
177 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
178 ; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24
179 ; CHECK-GI-NEXT: dup v1.4h, w8
180 ; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
181 ; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s
184 %in = sext i8 %src to i16
185 %ext.b = sext <2 x i8> %b to <2 x i16>
186 %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
187 %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
188 %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
192 define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
193 ; CHECK-SD-LABEL: dupzext_v2i16_v2i64:
194 ; CHECK-SD: // %bb.0: // %entry
195 ; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff
196 ; CHECK-SD-NEXT: and w8, w0, #0xffff
197 ; CHECK-SD-NEXT: dup v2.2s, w8
198 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
199 ; CHECK-SD-NEXT: umull v0.2d, v2.2s, v0.2s
202 ; CHECK-GI-LABEL: dupzext_v2i16_v2i64:
203 ; CHECK-GI: // %bb.0: // %entry
204 ; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff
205 ; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
206 ; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
207 ; CHECK-GI-NEXT: and x8, x0, #0xffff
208 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
209 ; CHECK-GI-NEXT: dup v1.2d, x8
210 ; CHECK-GI-NEXT: fmov x8, d1
211 ; CHECK-GI-NEXT: fmov x9, d0
212 ; CHECK-GI-NEXT: mov x10, v1.d[1]
213 ; CHECK-GI-NEXT: mov x11, v0.d[1]
214 ; CHECK-GI-NEXT: mul x8, x8, x9
215 ; CHECK-GI-NEXT: mul x9, x10, x11
216 ; CHECK-GI-NEXT: mov v0.d[0], x8
217 ; CHECK-GI-NEXT: mov v0.d[1], x9
220 %in = zext i16 %src to i64
221 %ext.b = zext <2 x i16> %b to <2 x i64>
222 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
223 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
224 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
232 ; dupsext_v2i16_v2i32
233 ; dupsext_v2i16_v2i64
239 ; dupzext_v2i16_v2i32
240 ; dupzext_v2i16_v2i64
244 define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
245 ; CHECK-SD-LABEL: nonsplat_shuffleinsert:
246 ; CHECK-SD: // %bb.0: // %entry
247 ; CHECK-SD-NEXT: dup v1.8b, w0
248 ; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
251 ; CHECK-GI-LABEL: nonsplat_shuffleinsert:
252 ; CHECK-GI: // %bb.0: // %entry
253 ; CHECK-GI-NEXT: lsl w8, w0, #8
254 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
255 ; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
256 ; CHECK-GI-NEXT: mov v1.h[1], w8
257 ; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4
258 ; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
261 %in = sext i8 %src to i16
262 %ext.b = sext <8 x i8> %b to <8 x i16>
263 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
264 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
265 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
269 define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
270 ; CHECK-SD-LABEL: nonsplat_shuffleinsert2:
271 ; CHECK-SD: // %bb.0: // %entry
272 ; CHECK-SD-NEXT: fmov s1, w0
273 ; CHECK-SD-NEXT: mov v1.h[1], w1
274 ; CHECK-SD-NEXT: mov v1.h[2], w2
275 ; CHECK-SD-NEXT: mov v1.h[3], w3
276 ; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h
279 ; CHECK-GI-LABEL: nonsplat_shuffleinsert2:
280 ; CHECK-GI: // %bb.0: // %entry
281 ; CHECK-GI-NEXT: sxth w8, w0
282 ; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
283 ; CHECK-GI-NEXT: mov v1.s[0], w8
284 ; CHECK-GI-NEXT: sxth w8, w1
285 ; CHECK-GI-NEXT: mov v1.s[1], w8
286 ; CHECK-GI-NEXT: sxth w8, w2
287 ; CHECK-GI-NEXT: mov v1.s[2], w8
288 ; CHECK-GI-NEXT: sxth w8, w3
289 ; CHECK-GI-NEXT: mov v1.s[3], w8
290 ; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s
293 %s0 = sext i16 %b0 to i32
294 %s1 = sext i16 %b1 to i32
295 %s2 = sext i16 %b2 to i32
296 %s3 = sext i16 %b3 to i32
297 %ext.b = sext <4 x i16> %b to <4 x i32>
298 %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
299 %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
300 %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
301 %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
302 %out = mul nsw <4 x i32> %v3, %ext.b
306 define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
307 ; CHECK-SD-LABEL: typei1_orig:
308 ; CHECK-SD: // %bb.0:
309 ; CHECK-SD-NEXT: cmp x0, #0
310 ; CHECK-SD-NEXT: ldr q0, [x2]
311 ; CHECK-SD-NEXT: cset w8, gt
312 ; CHECK-SD-NEXT: dup v1.8h, w8
313 ; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v0.8h
314 ; CHECK-SD-NEXT: cmeq v1.8h, v1.8h, #0
315 ; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b
316 ; CHECK-SD-NEXT: xtn v0.8b, v0.8h
317 ; CHECK-SD-NEXT: str q0, [x1]
320 ; CHECK-GI-LABEL: typei1_orig:
321 ; CHECK-GI: // %bb.0:
322 ; CHECK-GI-NEXT: ldr q1, [x2]
323 ; CHECK-GI-NEXT: cmp x0, #0
324 ; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff
325 ; CHECK-GI-NEXT: cset w8, gt
326 ; CHECK-GI-NEXT: neg v1.8h, v1.8h
327 ; CHECK-GI-NEXT: dup v2.8h, w8
328 ; CHECK-GI-NEXT: mvn v0.16b, v0.16b
329 ; CHECK-GI-NEXT: mul v1.8h, v1.8h, v2.8h
330 ; CHECK-GI-NEXT: cmeq v1.8h, v1.8h, #0
331 ; CHECK-GI-NEXT: mvn v1.16b, v1.16b
332 ; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b
333 ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7
334 ; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7
335 ; CHECK-GI-NEXT: str q0, [x1]
337 %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
338 %tmp6 = load <8 x i16>, ptr %q, align 2
339 %tmp7 = sub <8 x i16> zeroinitializer, %tmp6
340 %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
341 %tmp9 = icmp slt i64 0, %a
342 %tmp10 = zext i1 %tmp9 to i16
343 %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
344 %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
345 %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
346 %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
347 %tmp15 = and <16 x i1> %tmp14, %tmp
348 %tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
349 store <16 x i8> %tmp16, ptr %p, align 1
353 define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
354 ; CHECK-SD-LABEL: typei1_v8i1_v8i16:
355 ; CHECK-SD: // %bb.0: // %entry
356 ; CHECK-SD-NEXT: movi v1.8b, #1
357 ; CHECK-SD-NEXT: and w8, w0, #0x1
358 ; CHECK-SD-NEXT: dup v2.8b, w8
359 ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
360 ; CHECK-SD-NEXT: umull v0.8h, v2.8b, v0.8b
363 ; CHECK-GI-LABEL: typei1_v8i1_v8i16:
364 ; CHECK-GI: // %bb.0: // %entry
365 ; CHECK-GI-NEXT: movi v1.8h, #1
366 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
367 ; CHECK-GI-NEXT: and w8, w0, #0x1
368 ; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
369 ; CHECK-GI-NEXT: dup v1.8h, w8
370 ; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
373 %in = zext i1 %src to i16
374 %ext.b = zext <8 x i1> %b to <8 x i16>
375 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
376 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
377 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
381 define <8 x i16> @missing_insert(<8 x i8> %b) {
382 ; CHECK-SD-LABEL: missing_insert:
383 ; CHECK-SD: // %bb.0: // %entry
384 ; CHECK-SD-NEXT: ext v1.8b, v0.8b, v0.8b, #2
385 ; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b
388 ; CHECK-GI-LABEL: missing_insert:
389 ; CHECK-GI: // %bb.0: // %entry
390 ; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
391 ; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4
392 ; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
395 %ext.b = sext <8 x i8> %b to <8 x i16>
396 %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
397 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
401 define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
402 ; CHECK-SD-LABEL: shufsext_v8i8_v8i16:
403 ; CHECK-SD: // %bb.0: // %entry
404 ; CHECK-SD-NEXT: rev64 v0.8b, v0.8b
405 ; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
408 ; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
409 ; CHECK-GI: // %bb.0: // %entry
410 ; CHECK-GI-NEXT: adrp x8, .LCPI13_0
411 ; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
412 ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
413 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
414 ; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
415 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
418 %in = sext <8 x i8> %src to <8 x i16>
419 %ext.b = sext <8 x i8> %b to <8 x i16>
420 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
421 %out = mul nsw <8 x i16> %shuf, %ext.b
425 define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
426 ; CHECK-SD-LABEL: shufsext_v2i32_v2i64:
427 ; CHECK-SD: // %bb.0: // %entry
428 ; CHECK-SD-NEXT: rev64 v0.2s, v0.2s
429 ; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
432 ; CHECK-GI-LABEL: shufsext_v2i32_v2i64:
433 ; CHECK-GI: // %bb.0: // %entry
434 ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
435 ; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
436 ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
437 ; CHECK-GI-NEXT: fmov x9, d1
438 ; CHECK-GI-NEXT: mov x11, v1.d[1]
439 ; CHECK-GI-NEXT: fmov x8, d0
440 ; CHECK-GI-NEXT: mov x10, v0.d[1]
441 ; CHECK-GI-NEXT: mul x8, x8, x9
442 ; CHECK-GI-NEXT: mul x9, x10, x11
443 ; CHECK-GI-NEXT: mov v0.d[0], x8
444 ; CHECK-GI-NEXT: mov v0.d[1], x9
447 %in = sext <2 x i32> %src to <2 x i64>
448 %ext.b = sext <2 x i32> %b to <2 x i64>
449 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
450 %out = mul nsw <2 x i64> %shuf, %ext.b
454 define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
455 ; CHECK-SD-LABEL: shufzext_v8i8_v8i16:
456 ; CHECK-SD: // %bb.0: // %entry
457 ; CHECK-SD-NEXT: rev64 v0.8b, v0.8b
458 ; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b
461 ; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
462 ; CHECK-GI: // %bb.0: // %entry
463 ; CHECK-GI-NEXT: adrp x8, .LCPI15_0
464 ; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
465 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
466 ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
467 ; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
468 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
471 %in = zext <8 x i8> %src to <8 x i16>
472 %ext.b = zext <8 x i8> %b to <8 x i16>
473 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
474 %out = mul nsw <8 x i16> %shuf, %ext.b
478 define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
479 ; CHECK-SD-LABEL: shufzext_v2i32_v2i64:
480 ; CHECK-SD: // %bb.0: // %entry
481 ; CHECK-SD-NEXT: rev64 v0.2s, v0.2s
482 ; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
485 ; CHECK-GI-LABEL: shufzext_v2i32_v2i64:
486 ; CHECK-GI: // %bb.0: // %entry
487 ; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
488 ; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
489 ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
490 ; CHECK-GI-NEXT: fmov x9, d1
491 ; CHECK-GI-NEXT: mov x11, v1.d[1]
492 ; CHECK-GI-NEXT: fmov x8, d0
493 ; CHECK-GI-NEXT: mov x10, v0.d[1]
494 ; CHECK-GI-NEXT: mul x8, x8, x9
495 ; CHECK-GI-NEXT: mul x9, x10, x11
496 ; CHECK-GI-NEXT: mov v0.d[0], x8
497 ; CHECK-GI-NEXT: mov v0.d[1], x9
500 %in = sext <2 x i32> %src to <2 x i64>
501 %ext.b = sext <2 x i32> %b to <2 x i64>
502 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
503 %out = mul nsw <2 x i64> %shuf, %ext.b
507 define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
508 ; CHECK-SD-LABEL: shufzext_v8i8_v8i16_twoin:
509 ; CHECK-SD: // %bb.0: // %entry
510 ; CHECK-SD-NEXT: trn1 v0.8b, v0.8b, v1.8b
511 ; CHECK-SD-NEXT: umull v0.8h, v0.8b, v2.8b
514 ; CHECK-GI-LABEL: shufzext_v8i8_v8i16_twoin:
515 ; CHECK-GI: // %bb.0: // %entry
516 ; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
517 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
518 ; CHECK-GI-NEXT: trn1 v0.8h, v0.8h, v1.8h
519 ; CHECK-GI-NEXT: ushll v1.8h, v2.8b, #0
520 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
523 %in1 = zext <8 x i8> %src1 to <8 x i16>
524 %in2 = zext <8 x i8> %src2 to <8 x i16>
525 %ext.b = zext <8 x i8> %b to <8 x i16>
526 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
527 %out = mul nsw <8 x i16> %shuf, %ext.b
531 define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
532 ; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
533 ; CHECK: // %bb.0: // %entry
534 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
535 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
536 ; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h
537 ; CHECK-NEXT: ushll v1.8h, v2.8b, #0
538 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
541 %in1 = zext <8 x i8> %src1 to <8 x i16>
542 %in2 = sext <8 x i8> %src2 to <8 x i16>
543 %ext.b = zext <8 x i8> %b to <8 x i16>
544 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
545 %out = mul nsw <8 x i16> %shuf, %ext.b