1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3 ; RUN: llc -mattr=+sve2 -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
5 ; This test only tests the legal types for a given vector width, as mulh nodes
6 ; do not get generated for non-legal types.
8 target triple = "aarch64-unknown-linux-gnu"
14 define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
15 ; SVE-LABEL: smulh_v4i8:
17 ; SVE-NEXT: ptrue p0.h, vl4
18 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
19 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
20 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
21 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
22 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
23 ; SVE-NEXT: lsr z0.h, z0.h, #4
24 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
27 ; SVE2-LABEL: smulh_v4i8:
29 ; SVE2-NEXT: ptrue p0.h, vl4
30 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
31 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
32 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
33 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
34 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
35 ; SVE2-NEXT: lsr z0.h, z0.h, #4
36 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
38 %insert = insertelement <4 x i16> undef, i16 4, i64 0
39 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
40 %1 = sext <4 x i8> %op1 to <4 x i16>
41 %2 = sext <4 x i8> %op2 to <4 x i16>
42 %mul = mul <4 x i16> %1, %2
43 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
44 %res = trunc <4 x i16> %shr to <4 x i8>
48 define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
49 ; SVE-LABEL: smulh_v8i8:
51 ; SVE-NEXT: ptrue p0.b, vl8
52 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
53 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
54 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
55 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
58 ; SVE2-LABEL: smulh_v8i8:
60 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
61 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
62 ; SVE2-NEXT: smulh z0.b, z0.b, z1.b
63 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
65 %insert = insertelement <8 x i16> undef, i16 8, i64 0
66 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
67 %1 = sext <8 x i8> %op1 to <8 x i16>
68 %2 = sext <8 x i8> %op2 to <8 x i16>
69 %mul = mul <8 x i16> %1, %2
70 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
71 %res = trunc <8 x i16> %shr to <8 x i8>
75 define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
76 ; SVE-LABEL: smulh_v16i8:
78 ; SVE-NEXT: ptrue p0.b, vl16
79 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
80 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
81 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
82 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
85 ; SVE2-LABEL: smulh_v16i8:
87 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
88 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
89 ; SVE2-NEXT: smulh z0.b, z0.b, z1.b
90 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
92 %1 = sext <16 x i8> %op1 to <16 x i16>
93 %2 = sext <16 x i8> %op2 to <16 x i16>
94 %mul = mul <16 x i16> %1, %2
95 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
96 %res = trunc <16 x i16> %shr to <16 x i8>
100 define void @smulh_v32i8(ptr %a, ptr %b) {
101 ; SVE-LABEL: smulh_v32i8:
103 ; SVE-NEXT: ptrue p0.b, vl16
104 ; SVE-NEXT: ldp q0, q3, [x1]
105 ; SVE-NEXT: ldp q1, q2, [x0]
106 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
107 ; SVE-NEXT: movprfx z1, z2
108 ; SVE-NEXT: smulh z1.b, p0/m, z1.b, z3.b
109 ; SVE-NEXT: stp q0, q1, [x0]
112 ; SVE2-LABEL: smulh_v32i8:
114 ; SVE2-NEXT: ldp q0, q3, [x1]
115 ; SVE2-NEXT: ldp q1, q2, [x0]
116 ; SVE2-NEXT: smulh z0.b, z1.b, z0.b
117 ; SVE2-NEXT: smulh z1.b, z2.b, z3.b
118 ; SVE2-NEXT: stp q0, q1, [x0]
120 %op1 = load <32 x i8>, ptr %a
121 %op2 = load <32 x i8>, ptr %b
122 %1 = sext <32 x i8> %op1 to <32 x i16>
123 %2 = sext <32 x i8> %op2 to <32 x i16>
124 %mul = mul <32 x i16> %1, %2
125 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
126 %res = trunc <32 x i16> %shr to <32 x i8>
127 store <32 x i8> %res, ptr %a
131 define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
132 ; SVE-LABEL: smulh_v2i16:
134 ; SVE-NEXT: ptrue p0.s, vl2
135 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
136 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
137 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
138 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
139 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
140 ; SVE-NEXT: lsr z0.s, z0.s, #16
141 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
144 ; SVE2-LABEL: smulh_v2i16:
146 ; SVE2-NEXT: ptrue p0.s, vl2
147 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
148 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
149 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
150 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
151 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
152 ; SVE2-NEXT: lsr z0.s, z0.s, #16
153 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
155 %1 = sext <2 x i16> %op1 to <2 x i32>
156 %2 = sext <2 x i16> %op2 to <2 x i32>
157 %mul = mul <2 x i32> %1, %2
158 %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
159 %res = trunc <2 x i32> %shr to <2 x i16>
163 define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
164 ; SVE-LABEL: smulh_v4i16:
166 ; SVE-NEXT: ptrue p0.h, vl4
167 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
168 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
169 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
170 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
173 ; SVE2-LABEL: smulh_v4i16:
175 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
176 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
177 ; SVE2-NEXT: smulh z0.h, z0.h, z1.h
178 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
180 %1 = sext <4 x i16> %op1 to <4 x i32>
181 %2 = sext <4 x i16> %op2 to <4 x i32>
182 %mul = mul <4 x i32> %1, %2
183 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
184 %res = trunc <4 x i32> %shr to <4 x i16>
188 define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
189 ; SVE-LABEL: smulh_v8i16:
191 ; SVE-NEXT: ptrue p0.h, vl8
192 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
193 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
194 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
195 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
198 ; SVE2-LABEL: smulh_v8i16:
200 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
201 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
202 ; SVE2-NEXT: smulh z0.h, z0.h, z1.h
203 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
205 %1 = sext <8 x i16> %op1 to <8 x i32>
206 %2 = sext <8 x i16> %op2 to <8 x i32>
207 %mul = mul <8 x i32> %1, %2
208 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
209 %res = trunc <8 x i32> %shr to <8 x i16>
213 define void @smulh_v16i16(ptr %a, ptr %b) {
214 ; SVE-LABEL: smulh_v16i16:
216 ; SVE-NEXT: ptrue p0.h, vl8
217 ; SVE-NEXT: ldp q0, q3, [x1]
218 ; SVE-NEXT: ldp q1, q2, [x0]
219 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
220 ; SVE-NEXT: movprfx z1, z2
221 ; SVE-NEXT: smulh z1.h, p0/m, z1.h, z3.h
222 ; SVE-NEXT: stp q0, q1, [x0]
225 ; SVE2-LABEL: smulh_v16i16:
227 ; SVE2-NEXT: ldp q0, q3, [x1]
228 ; SVE2-NEXT: ldp q1, q2, [x0]
229 ; SVE2-NEXT: smulh z0.h, z1.h, z0.h
230 ; SVE2-NEXT: smulh z1.h, z2.h, z3.h
231 ; SVE2-NEXT: stp q0, q1, [x0]
233 %op1 = load <16 x i16>, ptr %a
234 %op2 = load <16 x i16>, ptr %b
235 %1 = sext <16 x i16> %op1 to <16 x i32>
236 %2 = sext <16 x i16> %op2 to <16 x i32>
237 %mul = mul <16 x i32> %1, %2
238 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
239 %res = trunc <16 x i32> %shr to <16 x i16>
240 store <16 x i16> %res, ptr %a
244 define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
245 ; SVE-LABEL: smulh_v2i32:
247 ; SVE-NEXT: ptrue p0.s, vl2
248 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
249 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
250 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
251 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
254 ; SVE2-LABEL: smulh_v2i32:
256 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
257 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
258 ; SVE2-NEXT: smulh z0.s, z0.s, z1.s
259 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
261 %1 = sext <2 x i32> %op1 to <2 x i64>
262 %2 = sext <2 x i32> %op2 to <2 x i64>
263 %mul = mul <2 x i64> %1, %2
264 %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
265 %res = trunc <2 x i64> %shr to <2 x i32>
269 define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
270 ; SVE-LABEL: smulh_v4i32:
272 ; SVE-NEXT: ptrue p0.s, vl4
273 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
274 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
275 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
276 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
279 ; SVE2-LABEL: smulh_v4i32:
281 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
282 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
283 ; SVE2-NEXT: smulh z0.s, z0.s, z1.s
284 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
286 %1 = sext <4 x i32> %op1 to <4 x i64>
287 %2 = sext <4 x i32> %op2 to <4 x i64>
288 %mul = mul <4 x i64> %1, %2
289 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
290 %res = trunc <4 x i64> %shr to <4 x i32>
294 define void @smulh_v8i32(ptr %a, ptr %b) {
295 ; SVE-LABEL: smulh_v8i32:
297 ; SVE-NEXT: ptrue p0.s, vl4
298 ; SVE-NEXT: ldp q0, q3, [x1]
299 ; SVE-NEXT: ldp q1, q2, [x0]
300 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
301 ; SVE-NEXT: movprfx z1, z2
302 ; SVE-NEXT: smulh z1.s, p0/m, z1.s, z3.s
303 ; SVE-NEXT: stp q0, q1, [x0]
306 ; SVE2-LABEL: smulh_v8i32:
308 ; SVE2-NEXT: ldp q0, q3, [x1]
309 ; SVE2-NEXT: ldp q1, q2, [x0]
310 ; SVE2-NEXT: smulh z0.s, z1.s, z0.s
311 ; SVE2-NEXT: smulh z1.s, z2.s, z3.s
312 ; SVE2-NEXT: stp q0, q1, [x0]
314 %op1 = load <8 x i32>, ptr %a
315 %op2 = load <8 x i32>, ptr %b
316 %1 = sext <8 x i32> %op1 to <8 x i64>
317 %2 = sext <8 x i32> %op2 to <8 x i64>
318 %mul = mul <8 x i64> %1, %2
319 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
320 %res = trunc <8 x i64> %shr to <8 x i32>
321 store <8 x i32> %res, ptr %a
325 define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
326 ; SVE-LABEL: smulh_v1i64:
328 ; SVE-NEXT: ptrue p0.d, vl1
329 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
330 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
331 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
332 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
335 ; SVE2-LABEL: smulh_v1i64:
337 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
338 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
339 ; SVE2-NEXT: smulh z0.d, z0.d, z1.d
340 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
342 %insert = insertelement <1 x i128> undef, i128 64, i128 0
343 %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
344 %1 = sext <1 x i64> %op1 to <1 x i128>
345 %2 = sext <1 x i64> %op2 to <1 x i128>
346 %mul = mul <1 x i128> %1, %2
347 %shr = lshr <1 x i128> %mul, %splat
348 %res = trunc <1 x i128> %shr to <1 x i64>
352 define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
353 ; SVE-LABEL: smulh_v2i64:
355 ; SVE-NEXT: ptrue p0.d, vl2
356 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
357 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
358 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
359 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
362 ; SVE2-LABEL: smulh_v2i64:
364 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
365 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
366 ; SVE2-NEXT: smulh z0.d, z0.d, z1.d
367 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
369 %1 = sext <2 x i64> %op1 to <2 x i128>
370 %2 = sext <2 x i64> %op2 to <2 x i128>
371 %mul = mul <2 x i128> %1, %2
372 %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
373 %res = trunc <2 x i128> %shr to <2 x i64>
377 define void @smulh_v4i64(ptr %a, ptr %b) {
378 ; SVE-LABEL: smulh_v4i64:
380 ; SVE-NEXT: ptrue p0.d, vl2
381 ; SVE-NEXT: ldp q0, q3, [x1]
382 ; SVE-NEXT: ldp q1, q2, [x0]
383 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
384 ; SVE-NEXT: movprfx z1, z2
385 ; SVE-NEXT: smulh z1.d, p0/m, z1.d, z3.d
386 ; SVE-NEXT: stp q0, q1, [x0]
389 ; SVE2-LABEL: smulh_v4i64:
391 ; SVE2-NEXT: ldp q0, q3, [x1]
392 ; SVE2-NEXT: ldp q1, q2, [x0]
393 ; SVE2-NEXT: smulh z0.d, z1.d, z0.d
394 ; SVE2-NEXT: smulh z1.d, z2.d, z3.d
395 ; SVE2-NEXT: stp q0, q1, [x0]
397 %op1 = load <4 x i64>, ptr %a
398 %op2 = load <4 x i64>, ptr %b
399 %1 = sext <4 x i64> %op1 to <4 x i128>
400 %2 = sext <4 x i64> %op2 to <4 x i128>
401 %mul = mul <4 x i128> %1, %2
402 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
403 %res = trunc <4 x i128> %shr to <4 x i64>
404 store <4 x i64> %res, ptr %a
412 define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
413 ; SVE-LABEL: umulh_v4i8:
415 ; SVE-NEXT: ptrue p0.h, vl4
416 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
417 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
418 ; SVE-NEXT: and z0.h, z0.h, #0xff
419 ; SVE-NEXT: and z1.h, z1.h, #0xff
420 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
421 ; SVE-NEXT: lsr z0.h, z0.h, #4
422 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
425 ; SVE2-LABEL: umulh_v4i8:
427 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
428 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
429 ; SVE2-NEXT: and z0.h, z0.h, #0xff
430 ; SVE2-NEXT: and z1.h, z1.h, #0xff
431 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
432 ; SVE2-NEXT: lsr z0.h, z0.h, #4
433 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
435 %1 = zext <4 x i8> %op1 to <4 x i16>
436 %2 = zext <4 x i8> %op2 to <4 x i16>
437 %mul = mul <4 x i16> %1, %2
438 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
439 %res = trunc <4 x i16> %shr to <4 x i8>
443 define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
444 ; SVE-LABEL: umulh_v8i8:
446 ; SVE-NEXT: ptrue p0.b, vl8
447 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
448 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
449 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
450 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
453 ; SVE2-LABEL: umulh_v8i8:
455 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
456 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
457 ; SVE2-NEXT: umulh z0.b, z0.b, z1.b
458 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
460 %1 = zext <8 x i8> %op1 to <8 x i16>
461 %2 = zext <8 x i8> %op2 to <8 x i16>
462 %mul = mul <8 x i16> %1, %2
463 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
464 %res = trunc <8 x i16> %shr to <8 x i8>
468 define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
469 ; SVE-LABEL: umulh_v16i8:
471 ; SVE-NEXT: ptrue p0.b, vl16
472 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
473 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
474 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
475 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
478 ; SVE2-LABEL: umulh_v16i8:
480 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
481 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
482 ; SVE2-NEXT: umulh z0.b, z0.b, z1.b
483 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
485 %1 = zext <16 x i8> %op1 to <16 x i16>
486 %2 = zext <16 x i8> %op2 to <16 x i16>
487 %mul = mul <16 x i16> %1, %2
488 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
489 %res = trunc <16 x i16> %shr to <16 x i8>
493 define void @umulh_v32i8(ptr %a, ptr %b) {
494 ; SVE-LABEL: umulh_v32i8:
496 ; SVE-NEXT: ptrue p0.b, vl16
497 ; SVE-NEXT: ldp q0, q3, [x1]
498 ; SVE-NEXT: ldp q1, q2, [x0]
499 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
500 ; SVE-NEXT: movprfx z1, z2
501 ; SVE-NEXT: umulh z1.b, p0/m, z1.b, z3.b
502 ; SVE-NEXT: stp q0, q1, [x0]
505 ; SVE2-LABEL: umulh_v32i8:
507 ; SVE2-NEXT: ldp q0, q3, [x1]
508 ; SVE2-NEXT: ldp q1, q2, [x0]
509 ; SVE2-NEXT: umulh z0.b, z1.b, z0.b
510 ; SVE2-NEXT: umulh z1.b, z2.b, z3.b
511 ; SVE2-NEXT: stp q0, q1, [x0]
513 %op1 = load <32 x i8>, ptr %a
514 %op2 = load <32 x i8>, ptr %b
515 %1 = zext <32 x i8> %op1 to <32 x i16>
516 %2 = zext <32 x i8> %op2 to <32 x i16>
517 %mul = mul <32 x i16> %1, %2
518 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
519 %res = trunc <32 x i16> %shr to <32 x i8>
520 store <32 x i8> %res, ptr %a
524 define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
525 ; SVE-LABEL: umulh_v2i16:
527 ; SVE-NEXT: ptrue p0.s, vl2
528 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
529 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
530 ; SVE-NEXT: and z0.s, z0.s, #0xffff
531 ; SVE-NEXT: and z1.s, z1.s, #0xffff
532 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
533 ; SVE-NEXT: lsr z0.s, z0.s, #16
534 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
537 ; SVE2-LABEL: umulh_v2i16:
539 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
540 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
541 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
542 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
543 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
544 ; SVE2-NEXT: lsr z0.s, z0.s, #16
545 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
547 %1 = zext <2 x i16> %op1 to <2 x i32>
548 %2 = zext <2 x i16> %op2 to <2 x i32>
549 %mul = mul <2 x i32> %1, %2
550 %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
551 %res = trunc <2 x i32> %shr to <2 x i16>
555 define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
556 ; SVE-LABEL: umulh_v4i16:
558 ; SVE-NEXT: ptrue p0.h, vl4
559 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
560 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
561 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
562 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
565 ; SVE2-LABEL: umulh_v4i16:
567 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
568 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
569 ; SVE2-NEXT: umulh z0.h, z0.h, z1.h
570 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
572 %1 = zext <4 x i16> %op1 to <4 x i32>
573 %2 = zext <4 x i16> %op2 to <4 x i32>
574 %mul = mul <4 x i32> %1, %2
575 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
576 %res = trunc <4 x i32> %shr to <4 x i16>
580 define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
581 ; SVE-LABEL: umulh_v8i16:
583 ; SVE-NEXT: ptrue p0.h, vl8
584 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
585 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
586 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
587 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
590 ; SVE2-LABEL: umulh_v8i16:
592 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
593 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
594 ; SVE2-NEXT: umulh z0.h, z0.h, z1.h
595 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
597 %1 = zext <8 x i16> %op1 to <8 x i32>
598 %2 = zext <8 x i16> %op2 to <8 x i32>
599 %mul = mul <8 x i32> %1, %2
600 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
601 %res = trunc <8 x i32> %shr to <8 x i16>
605 define void @umulh_v16i16(ptr %a, ptr %b) {
606 ; SVE-LABEL: umulh_v16i16:
608 ; SVE-NEXT: ptrue p0.h, vl8
609 ; SVE-NEXT: ldp q0, q3, [x1]
610 ; SVE-NEXT: ldp q1, q2, [x0]
611 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
612 ; SVE-NEXT: movprfx z1, z2
613 ; SVE-NEXT: umulh z1.h, p0/m, z1.h, z3.h
614 ; SVE-NEXT: stp q0, q1, [x0]
617 ; SVE2-LABEL: umulh_v16i16:
619 ; SVE2-NEXT: ldp q0, q3, [x1]
620 ; SVE2-NEXT: ldp q1, q2, [x0]
621 ; SVE2-NEXT: umulh z0.h, z1.h, z0.h
622 ; SVE2-NEXT: umulh z1.h, z2.h, z3.h
623 ; SVE2-NEXT: stp q0, q1, [x0]
625 %op1 = load <16 x i16>, ptr %a
626 %op2 = load <16 x i16>, ptr %b
627 %1 = zext <16 x i16> %op1 to <16 x i32>
628 %2 = zext <16 x i16> %op2 to <16 x i32>
629 %mul = mul <16 x i32> %1, %2
630 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
631 %res = trunc <16 x i32> %shr to <16 x i16>
632 store <16 x i16> %res, ptr %a
636 define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
637 ; SVE-LABEL: umulh_v2i32:
639 ; SVE-NEXT: ptrue p0.s, vl2
640 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
641 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
642 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
643 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
646 ; SVE2-LABEL: umulh_v2i32:
648 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
649 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
650 ; SVE2-NEXT: umulh z0.s, z0.s, z1.s
651 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
653 %1 = zext <2 x i32> %op1 to <2 x i64>
654 %2 = zext <2 x i32> %op2 to <2 x i64>
655 %mul = mul <2 x i64> %1, %2
656 %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
657 %res = trunc <2 x i64> %shr to <2 x i32>
661 define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
662 ; SVE-LABEL: umulh_v4i32:
664 ; SVE-NEXT: ptrue p0.s, vl4
665 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
666 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
667 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
668 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
671 ; SVE2-LABEL: umulh_v4i32:
673 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
674 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
675 ; SVE2-NEXT: umulh z0.s, z0.s, z1.s
676 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
678 %1 = zext <4 x i32> %op1 to <4 x i64>
679 %2 = zext <4 x i32> %op2 to <4 x i64>
680 %mul = mul <4 x i64> %1, %2
681 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
682 %res = trunc <4 x i64> %shr to <4 x i32>
686 define void @umulh_v8i32(ptr %a, ptr %b) {
687 ; SVE-LABEL: umulh_v8i32:
689 ; SVE-NEXT: ptrue p0.s, vl4
690 ; SVE-NEXT: ldp q0, q3, [x1]
691 ; SVE-NEXT: ldp q1, q2, [x0]
692 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
693 ; SVE-NEXT: movprfx z1, z2
694 ; SVE-NEXT: umulh z1.s, p0/m, z1.s, z3.s
695 ; SVE-NEXT: stp q0, q1, [x0]
698 ; SVE2-LABEL: umulh_v8i32:
700 ; SVE2-NEXT: ldp q0, q3, [x1]
701 ; SVE2-NEXT: ldp q1, q2, [x0]
702 ; SVE2-NEXT: umulh z0.s, z1.s, z0.s
703 ; SVE2-NEXT: umulh z1.s, z2.s, z3.s
704 ; SVE2-NEXT: stp q0, q1, [x0]
706 %op1 = load <8 x i32>, ptr %a
707 %op2 = load <8 x i32>, ptr %b
708 %insert = insertelement <8 x i64> undef, i64 32, i64 0
709 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
710 %1 = zext <8 x i32> %op1 to <8 x i64>
711 %2 = zext <8 x i32> %op2 to <8 x i64>
712 %mul = mul <8 x i64> %1, %2
713 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
714 %res = trunc <8 x i64> %shr to <8 x i32>
715 store <8 x i32> %res, ptr %a
719 define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
720 ; SVE-LABEL: umulh_v1i64:
722 ; SVE-NEXT: ptrue p0.d, vl1
723 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
724 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
725 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
726 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
729 ; SVE2-LABEL: umulh_v1i64:
731 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
732 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
733 ; SVE2-NEXT: umulh z0.d, z0.d, z1.d
734 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
736 %1 = zext <1 x i64> %op1 to <1 x i128>
737 %2 = zext <1 x i64> %op2 to <1 x i128>
738 %mul = mul <1 x i128> %1, %2
739 %shr = lshr <1 x i128> %mul, <i128 64>
740 %res = trunc <1 x i128> %shr to <1 x i64>
744 define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
745 ; SVE-LABEL: umulh_v2i64:
747 ; SVE-NEXT: ptrue p0.d, vl2
748 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
749 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
750 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
751 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
754 ; SVE2-LABEL: umulh_v2i64:
756 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
757 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
758 ; SVE2-NEXT: umulh z0.d, z0.d, z1.d
759 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
761 %1 = zext <2 x i64> %op1 to <2 x i128>
762 %2 = zext <2 x i64> %op2 to <2 x i128>
763 %mul = mul <2 x i128> %1, %2
764 %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
765 %res = trunc <2 x i128> %shr to <2 x i64>
769 define void @umulh_v4i64(ptr %a, ptr %b) {
770 ; SVE-LABEL: umulh_v4i64:
772 ; SVE-NEXT: ptrue p0.d, vl2
773 ; SVE-NEXT: ldp q0, q3, [x1]
774 ; SVE-NEXT: ldp q1, q2, [x0]
775 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
776 ; SVE-NEXT: movprfx z1, z2
777 ; SVE-NEXT: umulh z1.d, p0/m, z1.d, z3.d
778 ; SVE-NEXT: stp q0, q1, [x0]
781 ; SVE2-LABEL: umulh_v4i64:
783 ; SVE2-NEXT: ldp q0, q3, [x1]
784 ; SVE2-NEXT: ldp q1, q2, [x0]
785 ; SVE2-NEXT: umulh z0.d, z1.d, z0.d
786 ; SVE2-NEXT: umulh z1.d, z2.d, z3.d
787 ; SVE2-NEXT: stp q0, q1, [x0]
789 %op1 = load <4 x i64>, ptr %a
790 %op2 = load <4 x i64>, ptr %b
791 %1 = zext <4 x i64> %op1 to <4 x i128>
792 %2 = zext <4 x i64> %op2 to <4 x i128>
793 %mul = mul <4 x i128> %1, %2
794 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
795 %res = trunc <4 x i128> %shr to <4 x i64>
796 store <4 x i64> %res, ptr %a
799 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: