1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s
6 define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
7 ; CHECK-LABEL: dupsext_v8i8_v8i16:
8 ; CHECK: // %bb.0: // %entry
9 ; CHECK-NEXT: dup v1.8b, w0
10 ; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
13 %in = sext i8 %src to i16
14 %ext.b = sext <8 x i8> %b to <8 x i16>
15 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
16 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
17 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
21 define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
22 ; CHECK-LABEL: dupzext_v8i8_v8i16:
23 ; CHECK: // %bb.0: // %entry
24 ; CHECK-NEXT: dup v1.8b, w0
25 ; CHECK-NEXT: umull v0.8h, v1.8b, v0.8b
28 %in = zext i8 %src to i16
29 %ext.b = zext <8 x i8> %b to <8 x i16>
30 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
31 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
32 %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
36 define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
37 ; CHECK-LABEL: dupsext_v4i16_v4i32:
38 ; CHECK: // %bb.0: // %entry
39 ; CHECK-NEXT: dup v1.4h, w0
40 ; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
43 %in = sext i16 %src to i32
44 %ext.b = sext <4 x i16> %b to <4 x i32>
45 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
46 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
47 %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
51 define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
52 ; CHECK-LABEL: dupzext_v4i16_v4i32:
53 ; CHECK: // %bb.0: // %entry
54 ; CHECK-NEXT: dup v1.4h, w0
55 ; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h
58 %in = zext i16 %src to i32
59 %ext.b = zext <4 x i16> %b to <4 x i32>
60 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
61 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
62 %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
66 define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
67 ; CHECK-LABEL: dupsext_v2i32_v2i64:
68 ; CHECK: // %bb.0: // %entry
69 ; CHECK-NEXT: dup v1.2s, w0
70 ; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s
73 %in = sext i32 %src to i64
74 %ext.b = sext <2 x i32> %b to <2 x i64>
75 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
76 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
77 %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
81 define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
82 ; CHECK-LABEL: dupzext_v2i32_v2i64:
83 ; CHECK: // %bb.0: // %entry
84 ; CHECK-NEXT: dup v1.2s, w0
85 ; CHECK-NEXT: umull v0.2d, v1.2s, v0.2s
88 %in = zext i32 %src to i64
89 %ext.b = zext <2 x i32> %b to <2 x i64>
90 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
91 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
92 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
96 ; Unsupported combines
98 define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
99 ; CHECK-LABEL: dupsext_v2i8_v2i16:
100 ; CHECK: // %bb.0: // %entry
101 ; CHECK-NEXT: sxtb w8, w0
102 ; CHECK-NEXT: shl v0.2s, v0.2s, #24
103 ; CHECK-NEXT: dup v1.2s, w8
104 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24
105 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s
108 %in = sext i8 %src to i16
109 %ext.b = sext <2 x i8> %b to <2 x i16>
110 %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
111 %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
112 %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
116 define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
117 ; CHECK-LABEL: dupzext_v2i16_v2i64:
118 ; CHECK: // %bb.0: // %entry
119 ; CHECK-NEXT: movi d1, #0x00ffff0000ffff
120 ; CHECK-NEXT: and w8, w0, #0xffff
121 ; CHECK-NEXT: dup v2.2s, w8
122 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
123 ; CHECK-NEXT: umull v0.2d, v2.2s, v0.2s
126 %in = zext i16 %src to i64
127 %ext.b = zext <2 x i16> %b to <2 x i64>
128 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
129 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
130 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
138 ; dupsext_v2i16_v2i32
139 ; dupsext_v2i16_v2i64
145 ; dupzext_v2i16_v2i32
146 ; dupzext_v2i16_v2i64
150 define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
151 ; CHECK-LABEL: nonsplat_shuffleinsert:
152 ; CHECK: // %bb.0: // %entry
153 ; CHECK-NEXT: dup v1.8b, w0
154 ; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
157 %in = sext i8 %src to i16
158 %ext.b = sext <8 x i8> %b to <8 x i16>
159 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
160 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
161 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
165 define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
166 ; CHECK-LABEL: nonsplat_shuffleinsert2:
167 ; CHECK: // %bb.0: // %entry
168 ; CHECK-NEXT: fmov s1, w0
169 ; CHECK-NEXT: mov v1.h[1], w1
170 ; CHECK-NEXT: mov v1.h[2], w2
171 ; CHECK-NEXT: mov v1.h[3], w3
172 ; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h
175 %s0 = sext i16 %b0 to i32
176 %s1 = sext i16 %b1 to i32
177 %s2 = sext i16 %b2 to i32
178 %s3 = sext i16 %b3 to i32
179 %ext.b = sext <4 x i16> %b to <4 x i32>
180 %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
181 %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
182 %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
183 %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
184 %out = mul nsw <4 x i32> %v3, %ext.b
188 define void @typei1_orig(i64 %a, ptr %p, ptr %q) {
189 ; CHECK-LABEL: typei1_orig:
191 ; CHECK-NEXT: cmp x0, #0
192 ; CHECK-NEXT: ldr q0, [x2]
193 ; CHECK-NEXT: cset w8, gt
194 ; CHECK-NEXT: dup v1.8h, w8
195 ; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h
196 ; CHECK-NEXT: cmeq v1.8h, v1.8h, #0
197 ; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
198 ; CHECK-NEXT: xtn v0.8b, v0.8h
199 ; CHECK-NEXT: str q0, [x1]
201 %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
202 %tmp6 = load <8 x i16>, ptr %q, align 2
203 %tmp7 = sub <8 x i16> zeroinitializer, %tmp6
204 %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
205 %tmp9 = icmp slt i64 0, %a
206 %tmp10 = zext i1 %tmp9 to i16
207 %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
208 %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
209 %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
210 %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
211 %tmp15 = and <16 x i1> %tmp14, %tmp
212 %tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
213 store <16 x i8> %tmp16, ptr %p, align 1
217 define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
218 ; CHECK-LABEL: typei1_v8i1_v8i16:
219 ; CHECK: // %bb.0: // %entry
220 ; CHECK-NEXT: movi v1.8b, #1
221 ; CHECK-NEXT: and w8, w0, #0x1
222 ; CHECK-NEXT: dup v2.8b, w8
223 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
224 ; CHECK-NEXT: umull v0.8h, v2.8b, v0.8b
227 %in = zext i1 %src to i16
228 %ext.b = zext <8 x i1> %b to <8 x i16>
229 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
230 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
231 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
235 define <8 x i16> @missing_insert(<8 x i8> %b) {
236 ; CHECK-LABEL: missing_insert:
237 ; CHECK: // %bb.0: // %entry
238 ; CHECK-NEXT: ext v1.8b, v0.8b, v0.8b, #2
239 ; CHECK-NEXT: smull v0.8h, v1.8b, v0.8b
242 %ext.b = sext <8 x i8> %b to <8 x i16>
243 %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
244 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
248 define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
249 ; CHECK-LABEL: shufsext_v8i8_v8i16:
250 ; CHECK: // %bb.0: // %entry
251 ; CHECK-NEXT: rev64 v0.8b, v0.8b
252 ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
255 %in = sext <8 x i8> %src to <8 x i16>
256 %ext.b = sext <8 x i8> %b to <8 x i16>
257 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
258 %out = mul nsw <8 x i16> %shuf, %ext.b
262 define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
263 ; CHECK-LABEL: shufsext_v2i32_v2i64:
264 ; CHECK: // %bb.0: // %entry
265 ; CHECK-NEXT: rev64 v0.2s, v0.2s
266 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
269 %in = sext <2 x i32> %src to <2 x i64>
270 %ext.b = sext <2 x i32> %b to <2 x i64>
271 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
272 %out = mul nsw <2 x i64> %shuf, %ext.b
276 define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
277 ; CHECK-LABEL: shufzext_v8i8_v8i16:
278 ; CHECK: // %bb.0: // %entry
279 ; CHECK-NEXT: rev64 v0.8b, v0.8b
280 ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b
283 %in = zext <8 x i8> %src to <8 x i16>
284 %ext.b = zext <8 x i8> %b to <8 x i16>
285 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
286 %out = mul nsw <8 x i16> %shuf, %ext.b
290 define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
291 ; CHECK-LABEL: shufzext_v2i32_v2i64:
292 ; CHECK: // %bb.0: // %entry
293 ; CHECK-NEXT: rev64 v0.2s, v0.2s
294 ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
297 %in = sext <2 x i32> %src to <2 x i64>
298 %ext.b = sext <2 x i32> %b to <2 x i64>
299 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
300 %out = mul nsw <2 x i64> %shuf, %ext.b
304 define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
305 ; CHECK-LABEL: shufzext_v8i8_v8i16_twoin:
306 ; CHECK: // %bb.0: // %entry
307 ; CHECK-NEXT: trn1 v0.8b, v0.8b, v1.8b
308 ; CHECK-NEXT: umull v0.8h, v0.8b, v2.8b
311 %in1 = zext <8 x i8> %src1 to <8 x i16>
312 %in2 = zext <8 x i8> %src2 to <8 x i16>
313 %ext.b = zext <8 x i8> %b to <8 x i16>
314 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
315 %out = mul nsw <8 x i16> %shuf, %ext.b
319 define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
320 ; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
321 ; CHECK: // %bb.0: // %entry
322 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0
323 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0
324 ; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h
325 ; CHECK-NEXT: ushll v1.8h, v2.8b, #0
326 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
329 %in1 = zext <8 x i8> %src1 to <8 x i16>
330 %in2 = sext <8 x i8> %src2 to <8 x i16>
331 %ext.b = zext <8 x i8> %b to <8 x i16>
332 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
333 %out = mul nsw <8 x i16> %shuf, %ext.b