1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
6 define <vscale x 8 x i16> @mul_i16_imm(<vscale x 8 x i16> %a) {
7 ; CHECK-LABEL: mul_i16_imm
8 ; CHECK: mov w[[W:[0-9]+]], #255
9 ; CHECK-NEXT: mov z1.h, w[[W]]
10 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
11 %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
12 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
13 %res = mul <vscale x 8 x i16> %a, %splat
14 ret <vscale x 8 x i16> %res
17 define <vscale x 8 x i16> @mul_i16_imm_neg(<vscale x 8 x i16> %a) {
18 ; CHECK-LABEL: mul_i16_imm_neg
19 ; CHECK: mov w[[W:[0-9]+]], #-200
20 ; CHECK-NEXT: mov z1.h, w[[W]]
21 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
22 %elt = insertelement <vscale x 8 x i16> undef, i16 -200, i32 0
23 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
24 %res = mul <vscale x 8 x i16> %a, %splat
25 ret <vscale x 8 x i16> %res
28 define <vscale x 4 x i32> @mul_i32_imm(<vscale x 4 x i32> %a) {
29 ; CHECK-LABEL: mul_i32_imm
30 ; CHECK: mov w[[W:[0-9]+]], #255
31 ; CHECK-NEXT: mov z1.s, w[[W]]
32 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
33 %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
34 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
35 %res = mul <vscale x 4 x i32> %a, %splat
36 ret <vscale x 4 x i32> %res
39 define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
40 ; CHECK-LABEL: mul_i32_imm_neg
41 ; CHECK: mov w[[W:[0-9]+]], #-200
42 ; CHECK-NEXT: mov z1.s, w[[W]]
43 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
44 %elt = insertelement <vscale x 4 x i32> undef, i32 -200, i32 0
45 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
46 %res = mul <vscale x 4 x i32> %a, %splat
47 ret <vscale x 4 x i32> %res
50 define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
51 ; CHECK-LABEL: mul_i64_imm
52 ; CHECK: mov w[[X:[0-9]+]], #255
53 ; CHECK-NEXT: z1.d, x[[X]]
54 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
55 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
56 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
57 %res = mul <vscale x 2 x i64> %a, %splat
58 ret <vscale x 2 x i64> %res
61 define <vscale x 2 x i64> @mul_i64_imm_neg(<vscale x 2 x i64> %a) {
62 ; CHECK-LABEL: mul_i64_imm_neg
63 ; CHECK: mov x[[X:[0-9]+]], #-200
64 ; CHECK-NEXT: z1.d, x[[X]]
65 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
66 %elt = insertelement <vscale x 2 x i64> undef, i64 -200, i32 0
67 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
68 %res = mul <vscale x 2 x i64> %a, %splat
69 ret <vscale x 2 x i64> %res
73 ; MUL (vector, unpredicated)
75 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a,
76 <vscale x 16 x i8> %b) {
78 ; CHECK: mul z0.b, z0.b, z1.b
80 %res = mul <vscale x 16 x i8> %a, %b
81 ret <vscale x 16 x i8> %res
84 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a,
85 <vscale x 8 x i16> %b) {
86 ; CHECK-LABEL: mul_i16
87 ; CHECK: mul z0.h, z0.h, z1.h
89 %res = mul <vscale x 8 x i16> %a, %b
90 ret <vscale x 8 x i16> %res
93 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a,
94 <vscale x 4 x i32> %b) {
95 ; CHECK-LABEL: mul_i32
96 ; CHECK: mul z0.s, z0.s, z1.s
98 %res = mul <vscale x 4 x i32> %a, %b
99 ret <vscale x 4 x i32> %res
102 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a,
103 <vscale x 2 x i64> %b) {
104 ; CHECK-LABEL: mul_i64
105 ; CHECK: mul z0.d, z0.d, z1.d
107 %res = mul <vscale x 2 x i64> %a, %b
108 ret <vscale x 2 x i64> %res
112 ; SMULH (vector, unpredicated)
114 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
115 <vscale x 16 x i8> %b) {
116 ; CHECK-LABEL: smulh_i8
117 ; CHECK: smulh z0.b, z0.b, z1.b
119 %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
120 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
121 <vscale x 16 x i8> %b)
122 ret <vscale x 16 x i8> %res
125 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
126 <vscale x 8 x i16> %b) {
127 ; CHECK-LABEL: smulh_i16
128 ; CHECK: smulh z0.h, z0.h, z1.h
130 %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
131 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
132 <vscale x 8 x i16> %b)
133 ret <vscale x 8 x i16> %res
136 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
137 <vscale x 4 x i32> %b) {
138 ; CHECK-LABEL: smulh_i32
139 ; CHECK: smulh z0.s, z0.s, z1.s
141 %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
142 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
143 <vscale x 4 x i32> %b)
144 ret <vscale x 4 x i32> %res
147 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
148 <vscale x 2 x i64> %b) {
149 ; CHECK-LABEL: smulh_i64
150 ; CHECK: smulh z0.d, z0.d, z1.d
152 %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
153 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
154 <vscale x 2 x i64> %b)
155 ret <vscale x 2 x i64> %res
159 ; UMULH (vector, unpredicated)
161 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
162 <vscale x 16 x i8> %b) {
163 ; CHECK-LABEL: umulh_i8
164 ; CHECK: umulh z0.b, z0.b, z1.b
166 %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
167 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
168 <vscale x 16 x i8> %b)
169 ret <vscale x 16 x i8> %res
172 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
173 <vscale x 8 x i16> %b) {
174 ; CHECK-LABEL: umulh_i16
175 ; CHECK: umulh z0.h, z0.h, z1.h
177 %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
178 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
179 <vscale x 8 x i16> %b)
180 ret <vscale x 8 x i16> %res
183 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
184 <vscale x 4 x i32> %b) {
185 ; CHECK-LABEL: umulh_i32
186 ; CHECK: umulh z0.s, z0.s, z1.s
188 %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
189 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
190 <vscale x 4 x i32> %b)
191 ret <vscale x 4 x i32> %res
194 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
195 <vscale x 2 x i64> %b) {
196 ; CHECK-LABEL: umulh_i64
197 ; CHECK: umulh z0.d, z0.d, z1.d
199 %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
200 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
201 <vscale x 2 x i64> %b)
202 ret <vscale x 2 x i64> %res
206 ; PMUL (vector, unpredicated)
208 define <vscale x 16 x i8> @pmul_i8(<vscale x 16 x i8> %a,
209 <vscale x 16 x i8> %b) {
210 ; CHECK-LABEL: pmul_i8
211 ; CHECK: pmul z0.b, z0.b, z1.b
213 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %a,
214 <vscale x 16 x i8> %b)
215 ret <vscale x 16 x i8> %res
219 ; SQDMULH (vector, unpredicated)
221 define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a,
222 <vscale x 16 x i8> %b) {
223 ; CHECK-LABEL: sqdmulh_i8
224 ; CHECK: sqdmulh z0.b, z0.b, z1.b
226 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
227 <vscale x 16 x i8> %b)
228 ret <vscale x 16 x i8> %res
231 define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a,
232 <vscale x 8 x i16> %b) {
233 ; CHECK-LABEL: sqdmulh_i16
234 ; CHECK: sqdmulh z0.h, z0.h, z1.h
236 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
237 <vscale x 8 x i16> %b)
238 ret <vscale x 8 x i16> %res
241 define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a,
242 <vscale x 4 x i32> %b) {
243 ; CHECK-LABEL: sqdmulh_i32
244 ; CHECK: sqdmulh z0.s, z0.s, z1.s
246 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
247 <vscale x 4 x i32> %b)
248 ret <vscale x 4 x i32> %res
251 define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a,
252 <vscale x 2 x i64> %b) {
253 ; CHECK-LABEL: sqdmulh_i64
254 ; CHECK: sqdmulh z0.d, z0.d, z1.d
256 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
257 <vscale x 2 x i64> %b)
258 ret <vscale x 2 x i64> %res
262 ; SQRDMULH (vector, unpredicated)
264 define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a,
265 <vscale x 16 x i8> %b) {
266 ; CHECK-LABEL: sqrdmulh_i8
267 ; CHECK: sqrdmulh z0.b, z0.b, z1.b
269 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
270 <vscale x 16 x i8> %b)
271 ret <vscale x 16 x i8> %res
274 define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a,
275 <vscale x 8 x i16> %b) {
276 ; CHECK-LABEL: sqrdmulh_i16
277 ; CHECK: sqrdmulh z0.h, z0.h, z1.h
279 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
280 <vscale x 8 x i16> %b)
281 ret <vscale x 8 x i16> %res
284 define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a,
285 <vscale x 4 x i32> %b) {
286 ; CHECK-LABEL: sqrdmulh_i32
287 ; CHECK: sqrdmulh z0.s, z0.s, z1.s
289 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
290 <vscale x 4 x i32> %b)
291 ret <vscale x 4 x i32> %res
294 define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a,
295 <vscale x 2 x i64> %b) {
296 ; CHECK-LABEL: sqrdmulh_i64
297 ; CHECK: sqrdmulh z0.d, z0.d, z1.d
299 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
300 <vscale x 2 x i64> %b)
301 ret <vscale x 2 x i64> %res
304 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
305 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
306 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
307 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
308 declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
309 declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
310 declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
311 declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
312 declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
313 declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
314 declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
315 declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
316 declare <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
317 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
318 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
319 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
320 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
321 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
322 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
323 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
324 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)