1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
7 define <vscale x 8 x i16> @mul_i16_imm(<vscale x 8 x i16> %a) {
8 ; CHECK-LABEL: mul_i16_imm:
10 ; CHECK-NEXT: mov z1.h, #255 // =0xff
11 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
13 %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
14 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
15 %res = mul <vscale x 8 x i16> %a, %splat
16 ret <vscale x 8 x i16> %res
19 define <vscale x 8 x i16> @mul_i16_imm_neg(<vscale x 8 x i16> %a) {
20 ; CHECK-LABEL: mul_i16_imm_neg:
22 ; CHECK-NEXT: mov w8, #-200
23 ; CHECK-NEXT: mov z1.h, w8
24 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
26 %elt = insertelement <vscale x 8 x i16> undef, i16 -200, i32 0
27 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %res = mul <vscale x 8 x i16> %a, %splat
29 ret <vscale x 8 x i16> %res
32 define <vscale x 4 x i32> @mul_i32_imm(<vscale x 4 x i32> %a) {
33 ; CHECK-LABEL: mul_i32_imm:
35 ; CHECK-NEXT: mov z1.s, #255 // =0xff
36 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
38 %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
39 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
40 %res = mul <vscale x 4 x i32> %a, %splat
41 ret <vscale x 4 x i32> %res
44 define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
45 ; CHECK-LABEL: mul_i32_imm_neg:
47 ; CHECK-NEXT: mov w8, #-200
48 ; CHECK-NEXT: mov z1.s, w8
49 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
51 %elt = insertelement <vscale x 4 x i32> undef, i32 -200, i32 0
52 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
53 %res = mul <vscale x 4 x i32> %a, %splat
54 ret <vscale x 4 x i32> %res
57 define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
58 ; CHECK-LABEL: mul_i64_imm:
60 ; CHECK-NEXT: mov z1.d, #255 // =0xff
61 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
63 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
64 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
65 %res = mul <vscale x 2 x i64> %a, %splat
66 ret <vscale x 2 x i64> %res
69 define <vscale x 2 x i64> @mul_i64_imm_neg(<vscale x 2 x i64> %a) {
70 ; CHECK-LABEL: mul_i64_imm_neg:
72 ; CHECK-NEXT: mov x8, #-200
73 ; CHECK-NEXT: mov z1.d, x8
74 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
76 %elt = insertelement <vscale x 2 x i64> undef, i64 -200, i32 0
77 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
78 %res = mul <vscale x 2 x i64> %a, %splat
79 ret <vscale x 2 x i64> %res
83 ; MUL (vector, unpredicated)
85 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a,
86 ; CHECK-LABEL: mul_i8:
88 ; CHECK-NEXT: mul z0.b, z0.b, z1.b
90 <vscale x 16 x i8> %b) {
91 %res = mul <vscale x 16 x i8> %a, %b
92 ret <vscale x 16 x i8> %res
95 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a,
96 ; CHECK-LABEL: mul_i16:
98 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
100 <vscale x 8 x i16> %b) {
101 %res = mul <vscale x 8 x i16> %a, %b
102 ret <vscale x 8 x i16> %res
105 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a,
106 ; CHECK-LABEL: mul_i32:
108 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
110 <vscale x 4 x i32> %b) {
111 %res = mul <vscale x 4 x i32> %a, %b
112 ret <vscale x 4 x i32> %res
115 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a,
116 ; CHECK-LABEL: mul_i64:
118 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
120 <vscale x 2 x i64> %b) {
121 %res = mul <vscale x 2 x i64> %a, %b
122 ret <vscale x 2 x i64> %res
126 ; SMULH (vector, unpredicated)
128 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
129 ; CHECK-LABEL: smulh_i8:
131 ; CHECK-NEXT: smulh z0.b, z0.b, z1.b
133 <vscale x 16 x i8> %b) {
134 %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
135 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
136 <vscale x 16 x i8> %b)
137 ret <vscale x 16 x i8> %res
140 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
141 ; CHECK-LABEL: smulh_i16:
143 ; CHECK-NEXT: smulh z0.h, z0.h, z1.h
145 <vscale x 8 x i16> %b) {
146 %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
147 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
148 <vscale x 8 x i16> %b)
149 ret <vscale x 8 x i16> %res
152 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
153 ; CHECK-LABEL: smulh_i32:
155 ; CHECK-NEXT: smulh z0.s, z0.s, z1.s
157 <vscale x 4 x i32> %b) {
158 %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
159 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
160 <vscale x 4 x i32> %b)
161 ret <vscale x 4 x i32> %res
164 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
165 ; CHECK-LABEL: smulh_i64:
167 ; CHECK-NEXT: smulh z0.d, z0.d, z1.d
169 <vscale x 2 x i64> %b) {
170 %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
171 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
172 <vscale x 2 x i64> %b)
173 ret <vscale x 2 x i64> %res
177 ; UMULH (vector, unpredicated)
179 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
180 ; CHECK-LABEL: umulh_i8:
182 ; CHECK-NEXT: umulh z0.b, z0.b, z1.b
184 <vscale x 16 x i8> %b) {
185 %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
186 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
187 <vscale x 16 x i8> %b)
188 ret <vscale x 16 x i8> %res
191 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
192 ; CHECK-LABEL: umulh_i16:
194 ; CHECK-NEXT: umulh z0.h, z0.h, z1.h
196 <vscale x 8 x i16> %b) {
197 %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
198 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
199 <vscale x 8 x i16> %b)
200 ret <vscale x 8 x i16> %res
203 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
204 ; CHECK-LABEL: umulh_i32:
206 ; CHECK-NEXT: umulh z0.s, z0.s, z1.s
208 <vscale x 4 x i32> %b) {
209 %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
210 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
211 <vscale x 4 x i32> %b)
212 ret <vscale x 4 x i32> %res
215 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
216 ; CHECK-LABEL: umulh_i64:
218 ; CHECK-NEXT: umulh z0.d, z0.d, z1.d
220 <vscale x 2 x i64> %b) {
221 %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
222 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
223 <vscale x 2 x i64> %b)
224 ret <vscale x 2 x i64> %res
228 ; PMUL (vector, unpredicated)
230 define <vscale x 16 x i8> @pmul_i8(<vscale x 16 x i8> %a,
231 ; CHECK-LABEL: pmul_i8:
233 ; CHECK-NEXT: pmul z0.b, z0.b, z1.b
235 <vscale x 16 x i8> %b) {
236 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %a,
237 <vscale x 16 x i8> %b)
238 ret <vscale x 16 x i8> %res
242 ; SQDMULH (vector, unpredicated)
244 define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a,
245 ; CHECK-LABEL: sqdmulh_i8:
247 ; CHECK-NEXT: sqdmulh z0.b, z0.b, z1.b
249 <vscale x 16 x i8> %b) {
250 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
251 <vscale x 16 x i8> %b)
252 ret <vscale x 16 x i8> %res
255 define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a,
256 ; CHECK-LABEL: sqdmulh_i16:
258 ; CHECK-NEXT: sqdmulh z0.h, z0.h, z1.h
260 <vscale x 8 x i16> %b) {
261 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
262 <vscale x 8 x i16> %b)
263 ret <vscale x 8 x i16> %res
266 define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a,
267 ; CHECK-LABEL: sqdmulh_i32:
269 ; CHECK-NEXT: sqdmulh z0.s, z0.s, z1.s
271 <vscale x 4 x i32> %b) {
272 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
273 <vscale x 4 x i32> %b)
274 ret <vscale x 4 x i32> %res
277 define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a,
278 ; CHECK-LABEL: sqdmulh_i64:
280 ; CHECK-NEXT: sqdmulh z0.d, z0.d, z1.d
282 <vscale x 2 x i64> %b) {
283 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
284 <vscale x 2 x i64> %b)
285 ret <vscale x 2 x i64> %res
289 ; SQRDMULH (vector, unpredicated)
291 define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a,
292 ; CHECK-LABEL: sqrdmulh_i8:
294 ; CHECK-NEXT: sqrdmulh z0.b, z0.b, z1.b
296 <vscale x 16 x i8> %b) {
297 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
298 <vscale x 16 x i8> %b)
299 ret <vscale x 16 x i8> %res
302 define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a,
303 ; CHECK-LABEL: sqrdmulh_i16:
305 ; CHECK-NEXT: sqrdmulh z0.h, z0.h, z1.h
307 <vscale x 8 x i16> %b) {
308 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
309 <vscale x 8 x i16> %b)
310 ret <vscale x 8 x i16> %res
313 define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a,
314 ; CHECK-LABEL: sqrdmulh_i32:
316 ; CHECK-NEXT: sqrdmulh z0.s, z0.s, z1.s
318 <vscale x 4 x i32> %b) {
319 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
320 <vscale x 4 x i32> %b)
321 ret <vscale x 4 x i32> %res
324 define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a,
325 ; CHECK-LABEL: sqrdmulh_i64:
327 ; CHECK-NEXT: sqrdmulh z0.d, z0.d, z1.d
329 <vscale x 2 x i64> %b) {
330 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
331 <vscale x 2 x i64> %b)
332 ret <vscale x 2 x i64> %res
335 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
336 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
337 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
338 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
339 declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
340 declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
341 declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
342 declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
343 declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
344 declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
345 declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
346 declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
347 declare <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
348 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
349 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
350 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
351 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
352 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
353 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
354 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
355 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)