1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
4 define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) {
5 ; CHECK-LABEL: mul_splat_sext_v8i16:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: ldr d1, [x0]
8 ; CHECK-NEXT: movi v0.2d, #0000000000000000
9 ; CHECK-NEXT: mov x8, xzr
10 ; CHECK-NEXT: dup v1.8b, v1.b[3]
11 ; CHECK-NEXT: .LBB0_1: // %l1
12 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
13 ; CHECK-NEXT: ldr d2, [x1, x8]
14 ; CHECK-NEXT: add x8, x8, #4
15 ; CHECK-NEXT: cmp w8, #4
16 ; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
17 ; CHECK-NEXT: b.eq .LBB0_1
18 ; CHECK-NEXT: // %bb.2: // %l2
21 %x.val = load <8 x i8>, ptr %x
22 %x.ext = sext <8 x i8> %x.val to <8 x i16>
23 %a = shufflevector <8 x i16> %x.ext, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
27 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
28 %q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
29 %y.idx = mul nuw nsw i32 %p, 4
30 %y.ptr = getelementptr i8, ptr %y, i32 %y.idx
31 %y.val = load <8 x i8>, ptr %y.ptr
32 %y.ext = sext <8 x i8> %y.val to <8 x i16>
33 %b = mul <8 x i16> %y.ext, %a
34 %c = add <8 x i16> %q, %b
36 %c1 = icmp eq i32 %p, 0
37 br i1 %c1, label %l1, label %l2
43 define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) {
44 ; CHECK-LABEL: mul_splat_sext_v4i32:
45 ; CHECK: // %bb.0: // %entry
46 ; CHECK-NEXT: movi v0.2d, #0000000000000000
47 ; CHECK-NEXT: ldr d1, [x0]
48 ; CHECK-NEXT: mov x8, xzr
49 ; CHECK-NEXT: .LBB1_1: // %l1
50 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
51 ; CHECK-NEXT: ldr d2, [x1, x8]
52 ; CHECK-NEXT: add x8, x8, #8
53 ; CHECK-NEXT: cmp w8, #8
54 ; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
55 ; CHECK-NEXT: b.eq .LBB1_1
56 ; CHECK-NEXT: // %bb.2: // %l2
59 %x.val = load <4 x i16>, ptr %x
60 %x.ext = sext <4 x i16> %x.val to <4 x i32>
61 %a = shufflevector <4 x i32> %x.ext, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
65 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
66 %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
67 %y.idx = mul nuw nsw i32 %p, 4
68 %y.ptr = getelementptr i16, ptr %y, i32 %y.idx
69 %y.val = load <4 x i16>, ptr %y.ptr
70 %y.ext = sext <4 x i16> %y.val to <4 x i32>
71 %b = mul <4 x i32> %y.ext, %a
72 %c = add <4 x i32> %q, %b
74 %c1 = icmp eq i32 %p, 0
75 br i1 %c1, label %l1, label %l2
81 define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) {
82 ; CHECK-LABEL: mul_splat_sext_v2i64:
83 ; CHECK: // %bb.0: // %entry
84 ; CHECK-NEXT: movi v0.2d, #0000000000000000
85 ; CHECK-NEXT: ldr d1, [x0]
86 ; CHECK-NEXT: mov x8, xzr
87 ; CHECK-NEXT: .LBB2_1: // %l1
88 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
89 ; CHECK-NEXT: ldr d2, [x1, x8]
90 ; CHECK-NEXT: add x8, x8, #16
91 ; CHECK-NEXT: cmp w8, #16
92 ; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
93 ; CHECK-NEXT: b.eq .LBB2_1
94 ; CHECK-NEXT: // %bb.2: // %l2
97 %x.val = load <2 x i32>, ptr %x
98 %x.ext = sext <2 x i32> %x.val to <2 x i64>
99 %a = shufflevector <2 x i64> %x.ext, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
103 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
104 %q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
105 %y.idx = mul nuw nsw i32 %p, 4
106 %y.ptr = getelementptr i32, ptr %y, i32 %y.idx
107 %y.val = load <2 x i32>, ptr %y.ptr
108 %y.ext = sext <2 x i32> %y.val to <2 x i64>
109 %b = mul <2 x i64> %y.ext, %a
110 %c = add <2 x i64> %q, %b
112 %c1 = icmp eq i32 %p, 0
113 br i1 %c1, label %l1, label %l2
119 define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) {
120 ; CHECK-LABEL: mul_sext_splat_v8i16:
121 ; CHECK: // %bb.0: // %entry
122 ; CHECK-NEXT: ldr d1, [x0]
123 ; CHECK-NEXT: movi v0.2d, #0000000000000000
124 ; CHECK-NEXT: mov x8, xzr
125 ; CHECK-NEXT: dup v1.8b, v1.b[3]
126 ; CHECK-NEXT: .LBB3_1: // %l1
127 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
128 ; CHECK-NEXT: ldr d2, [x1, x8]
129 ; CHECK-NEXT: add x8, x8, #4
130 ; CHECK-NEXT: cmp w8, #4
131 ; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
132 ; CHECK-NEXT: b.eq .LBB3_1
133 ; CHECK-NEXT: // %bb.2: // %l2
136 %x.val = load <8 x i8>, ptr %x
137 %x.spt = shufflevector <8 x i8> %x.val, <8 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
138 %a = sext <8 x i8> %x.spt to <8 x i16>
142 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
143 %q = phi <8 x i16> [ zeroinitializer, %entry ], [ %c, %l1 ]
144 %y.idx = mul nuw nsw i32 %p, 4
145 %y.ptr = getelementptr i8, ptr %y, i32 %y.idx
146 %y.val = load <8 x i8>, ptr %y.ptr
147 %y.ext = sext <8 x i8> %y.val to <8 x i16>
148 %b = mul <8 x i16> %y.ext, %a
149 %c = add <8 x i16> %q, %b
151 %c1 = icmp eq i32 %p, 0
152 br i1 %c1, label %l1, label %l2
158 define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) {
159 ; CHECK-LABEL: mul_sext_splat_v4i32:
160 ; CHECK: // %bb.0: // %entry
161 ; CHECK-NEXT: movi v0.2d, #0000000000000000
162 ; CHECK-NEXT: ldr d1, [x0]
163 ; CHECK-NEXT: mov x8, xzr
164 ; CHECK-NEXT: .LBB4_1: // %l1
165 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
166 ; CHECK-NEXT: ldr d2, [x1, x8]
167 ; CHECK-NEXT: add x8, x8, #8
168 ; CHECK-NEXT: cmp w8, #8
169 ; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
170 ; CHECK-NEXT: b.eq .LBB4_1
171 ; CHECK-NEXT: // %bb.2: // %l2
174 %x.val = load <4 x i16>, ptr %x
175 %x.spt = shufflevector <4 x i16> %x.val, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
176 %a = sext <4 x i16> %x.spt to <4 x i32>
180 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
181 %q = phi <4 x i32> [ zeroinitializer, %entry ], [ %c, %l1 ]
182 %y.idx = mul nuw nsw i32 %p, 4
183 %y.ptr = getelementptr i16, ptr %y, i32 %y.idx
184 %y.val = load <4 x i16>, ptr %y.ptr
185 %y.ext = sext <4 x i16> %y.val to <4 x i32>
186 %b = mul <4 x i32> %y.ext, %a
187 %c = add <4 x i32> %q, %b
189 %c1 = icmp eq i32 %p, 0
190 br i1 %c1, label %l1, label %l2
196 define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) {
197 ; CHECK-LABEL: mul_sext_splat_v2i64:
198 ; CHECK: // %bb.0: // %entry
199 ; CHECK-NEXT: movi v0.2d, #0000000000000000
200 ; CHECK-NEXT: ldr d1, [x0]
201 ; CHECK-NEXT: mov x8, xzr
202 ; CHECK-NEXT: .LBB5_1: // %l1
203 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
204 ; CHECK-NEXT: ldr d2, [x1, x8]
205 ; CHECK-NEXT: add x8, x8, #16
206 ; CHECK-NEXT: cmp w8, #16
207 ; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
208 ; CHECK-NEXT: b.eq .LBB5_1
209 ; CHECK-NEXT: // %bb.2: // %l2
212 %x.val = load <2 x i32>, ptr %x
213 %x.spt = shufflevector <2 x i32> %x.val, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
214 %a = sext <2 x i32> %x.spt to <2 x i64>
218 %p = phi i32 [ 0, %entry ], [ %pa, %l1 ]
219 %q = phi <2 x i64> [ zeroinitializer, %entry ], [ %c, %l1 ]
220 %y.idx = mul nuw nsw i32 %p, 4
221 %y.ptr = getelementptr i32, ptr %y, i32 %y.idx
222 %y.val = load <2 x i32>, ptr %y.ptr
223 %y.ext = sext <2 x i32> %y.val to <2 x i64>
224 %b = mul <2 x i64> %y.ext, %a
225 %c = add <2 x i64> %q, %b
227 %c1 = icmp eq i32 %p, 0
228 br i1 %c1, label %l1, label %l2