1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3 ; RUN: llc -mattr=+sve2 -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
4 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
6 ; This test only tests the legal types for a given vector width, as mulh nodes
7 ; do not get generated for non-legal types.
9 target triple = "aarch64-unknown-linux-gnu"
15 define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
16 ; SVE-LABEL: smulh_v4i8:
18 ; SVE-NEXT: ptrue p0.h, vl4
19 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
20 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
21 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h
22 ; SVE-NEXT: sxtb z1.h, p0/m, z1.h
23 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
24 ; SVE-NEXT: lsr z0.h, z0.h, #4
25 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
28 ; SVE2-LABEL: smulh_v4i8:
30 ; SVE2-NEXT: ptrue p0.h, vl4
31 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
32 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
33 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h
34 ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h
35 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
36 ; SVE2-NEXT: lsr z0.h, z0.h, #4
37 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
39 %insert = insertelement <4 x i16> undef, i16 4, i64 0
40 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
41 %1 = sext <4 x i8> %op1 to <4 x i16>
42 %2 = sext <4 x i8> %op2 to <4 x i16>
43 %mul = mul <4 x i16> %1, %2
44 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
45 %res = trunc <4 x i16> %shr to <4 x i8>
49 define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
50 ; SVE-LABEL: smulh_v8i8:
52 ; SVE-NEXT: ptrue p0.b, vl8
53 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
54 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
55 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
56 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
59 ; SVE2-LABEL: smulh_v8i8:
61 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
62 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
63 ; SVE2-NEXT: smulh z0.b, z0.b, z1.b
64 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
66 %insert = insertelement <8 x i16> undef, i16 8, i64 0
67 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
68 %1 = sext <8 x i8> %op1 to <8 x i16>
69 %2 = sext <8 x i8> %op2 to <8 x i16>
70 %mul = mul <8 x i16> %1, %2
71 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
72 %res = trunc <8 x i16> %shr to <8 x i8>
76 define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
77 ; SVE-LABEL: smulh_v16i8:
79 ; SVE-NEXT: ptrue p0.b, vl16
80 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
81 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
82 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
83 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
86 ; SVE2-LABEL: smulh_v16i8:
88 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
89 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
90 ; SVE2-NEXT: smulh z0.b, z0.b, z1.b
91 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
93 %1 = sext <16 x i8> %op1 to <16 x i16>
94 %2 = sext <16 x i8> %op2 to <16 x i16>
95 %mul = mul <16 x i16> %1, %2
96 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
97 %res = trunc <16 x i16> %shr to <16 x i8>
101 define void @smulh_v32i8(ptr %a, ptr %b) {
102 ; SVE-LABEL: smulh_v32i8:
104 ; SVE-NEXT: ptrue p0.b, vl16
105 ; SVE-NEXT: ldp q0, q3, [x1]
106 ; SVE-NEXT: ldp q1, q2, [x0]
107 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b
108 ; SVE-NEXT: movprfx z1, z2
109 ; SVE-NEXT: smulh z1.b, p0/m, z1.b, z3.b
110 ; SVE-NEXT: stp q0, q1, [x0]
113 ; SVE2-LABEL: smulh_v32i8:
115 ; SVE2-NEXT: ldp q0, q3, [x1]
116 ; SVE2-NEXT: ldp q1, q2, [x0]
117 ; SVE2-NEXT: smulh z0.b, z1.b, z0.b
118 ; SVE2-NEXT: smulh z1.b, z2.b, z3.b
119 ; SVE2-NEXT: stp q0, q1, [x0]
121 %op1 = load <32 x i8>, ptr %a
122 %op2 = load <32 x i8>, ptr %b
123 %1 = sext <32 x i8> %op1 to <32 x i16>
124 %2 = sext <32 x i8> %op2 to <32 x i16>
125 %mul = mul <32 x i16> %1, %2
126 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
127 %res = trunc <32 x i16> %shr to <32 x i8>
128 store <32 x i8> %res, ptr %a
132 define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
133 ; SVE-LABEL: smulh_v2i16:
135 ; SVE-NEXT: ptrue p0.s, vl2
136 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
137 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
138 ; SVE-NEXT: sxth z0.s, p0/m, z0.s
139 ; SVE-NEXT: sxth z1.s, p0/m, z1.s
140 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
141 ; SVE-NEXT: lsr z0.s, z0.s, #16
142 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
145 ; SVE2-LABEL: smulh_v2i16:
147 ; SVE2-NEXT: ptrue p0.s, vl2
148 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
149 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
150 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s
151 ; SVE2-NEXT: sxth z1.s, p0/m, z1.s
152 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
153 ; SVE2-NEXT: lsr z0.s, z0.s, #16
154 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
156 %1 = sext <2 x i16> %op1 to <2 x i32>
157 %2 = sext <2 x i16> %op2 to <2 x i32>
158 %mul = mul <2 x i32> %1, %2
159 %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
160 %res = trunc <2 x i32> %shr to <2 x i16>
164 define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
165 ; SVE-LABEL: smulh_v4i16:
167 ; SVE-NEXT: ptrue p0.h, vl4
168 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
169 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
170 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
171 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
174 ; SVE2-LABEL: smulh_v4i16:
176 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
177 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
178 ; SVE2-NEXT: smulh z0.h, z0.h, z1.h
179 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
181 %1 = sext <4 x i16> %op1 to <4 x i32>
182 %2 = sext <4 x i16> %op2 to <4 x i32>
183 %mul = mul <4 x i32> %1, %2
184 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
185 %res = trunc <4 x i32> %shr to <4 x i16>
189 define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
190 ; SVE-LABEL: smulh_v8i16:
192 ; SVE-NEXT: ptrue p0.h, vl8
193 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
194 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
195 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
196 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
199 ; SVE2-LABEL: smulh_v8i16:
201 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
202 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
203 ; SVE2-NEXT: smulh z0.h, z0.h, z1.h
204 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
206 %1 = sext <8 x i16> %op1 to <8 x i32>
207 %2 = sext <8 x i16> %op2 to <8 x i32>
208 %mul = mul <8 x i32> %1, %2
209 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
210 %res = trunc <8 x i32> %shr to <8 x i16>
214 define void @smulh_v16i16(ptr %a, ptr %b) {
215 ; SVE-LABEL: smulh_v16i16:
217 ; SVE-NEXT: ptrue p0.h, vl8
218 ; SVE-NEXT: ldp q0, q3, [x1]
219 ; SVE-NEXT: ldp q1, q2, [x0]
220 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h
221 ; SVE-NEXT: movprfx z1, z2
222 ; SVE-NEXT: smulh z1.h, p0/m, z1.h, z3.h
223 ; SVE-NEXT: stp q0, q1, [x0]
226 ; SVE2-LABEL: smulh_v16i16:
228 ; SVE2-NEXT: ldp q0, q3, [x1]
229 ; SVE2-NEXT: ldp q1, q2, [x0]
230 ; SVE2-NEXT: smulh z0.h, z1.h, z0.h
231 ; SVE2-NEXT: smulh z1.h, z2.h, z3.h
232 ; SVE2-NEXT: stp q0, q1, [x0]
234 %op1 = load <16 x i16>, ptr %a
235 %op2 = load <16 x i16>, ptr %b
236 %1 = sext <16 x i16> %op1 to <16 x i32>
237 %2 = sext <16 x i16> %op2 to <16 x i32>
238 %mul = mul <16 x i32> %1, %2
239 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
240 %res = trunc <16 x i32> %shr to <16 x i16>
241 store <16 x i16> %res, ptr %a
245 define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
246 ; SVE-LABEL: smulh_v2i32:
248 ; SVE-NEXT: ptrue p0.s, vl2
249 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
250 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
251 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
252 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
255 ; SVE2-LABEL: smulh_v2i32:
257 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
258 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
259 ; SVE2-NEXT: smulh z0.s, z0.s, z1.s
260 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
262 %1 = sext <2 x i32> %op1 to <2 x i64>
263 %2 = sext <2 x i32> %op2 to <2 x i64>
264 %mul = mul <2 x i64> %1, %2
265 %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
266 %res = trunc <2 x i64> %shr to <2 x i32>
270 define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
271 ; SVE-LABEL: smulh_v4i32:
273 ; SVE-NEXT: ptrue p0.s, vl4
274 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
275 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
276 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
277 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
280 ; SVE2-LABEL: smulh_v4i32:
282 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
283 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
284 ; SVE2-NEXT: smulh z0.s, z0.s, z1.s
285 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
287 %1 = sext <4 x i32> %op1 to <4 x i64>
288 %2 = sext <4 x i32> %op2 to <4 x i64>
289 %mul = mul <4 x i64> %1, %2
290 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
291 %res = trunc <4 x i64> %shr to <4 x i32>
295 define void @smulh_v8i32(ptr %a, ptr %b) {
296 ; SVE-LABEL: smulh_v8i32:
298 ; SVE-NEXT: ptrue p0.s, vl4
299 ; SVE-NEXT: ldp q0, q3, [x1]
300 ; SVE-NEXT: ldp q1, q2, [x0]
301 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s
302 ; SVE-NEXT: movprfx z1, z2
303 ; SVE-NEXT: smulh z1.s, p0/m, z1.s, z3.s
304 ; SVE-NEXT: stp q0, q1, [x0]
307 ; SVE2-LABEL: smulh_v8i32:
309 ; SVE2-NEXT: ldp q0, q3, [x1]
310 ; SVE2-NEXT: ldp q1, q2, [x0]
311 ; SVE2-NEXT: smulh z0.s, z1.s, z0.s
312 ; SVE2-NEXT: smulh z1.s, z2.s, z3.s
313 ; SVE2-NEXT: stp q0, q1, [x0]
315 %op1 = load <8 x i32>, ptr %a
316 %op2 = load <8 x i32>, ptr %b
317 %1 = sext <8 x i32> %op1 to <8 x i64>
318 %2 = sext <8 x i32> %op2 to <8 x i64>
319 %mul = mul <8 x i64> %1, %2
320 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
321 %res = trunc <8 x i64> %shr to <8 x i32>
322 store <8 x i32> %res, ptr %a
326 define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
327 ; SVE-LABEL: smulh_v1i64:
329 ; SVE-NEXT: ptrue p0.d, vl1
330 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
331 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
332 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
333 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
336 ; SVE2-LABEL: smulh_v1i64:
338 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
339 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
340 ; SVE2-NEXT: smulh z0.d, z0.d, z1.d
341 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
343 %insert = insertelement <1 x i128> undef, i128 64, i128 0
344 %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
345 %1 = sext <1 x i64> %op1 to <1 x i128>
346 %2 = sext <1 x i64> %op2 to <1 x i128>
347 %mul = mul <1 x i128> %1, %2
348 %shr = lshr <1 x i128> %mul, %splat
349 %res = trunc <1 x i128> %shr to <1 x i64>
353 define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
354 ; SVE-LABEL: smulh_v2i64:
356 ; SVE-NEXT: ptrue p0.d, vl2
357 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
358 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
359 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
360 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
363 ; SVE2-LABEL: smulh_v2i64:
365 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
366 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
367 ; SVE2-NEXT: smulh z0.d, z0.d, z1.d
368 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
370 %1 = sext <2 x i64> %op1 to <2 x i128>
371 %2 = sext <2 x i64> %op2 to <2 x i128>
372 %mul = mul <2 x i128> %1, %2
373 %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
374 %res = trunc <2 x i128> %shr to <2 x i64>
378 define void @smulh_v4i64(ptr %a, ptr %b) {
379 ; SVE-LABEL: smulh_v4i64:
381 ; SVE-NEXT: ptrue p0.d, vl2
382 ; SVE-NEXT: ldp q0, q3, [x1]
383 ; SVE-NEXT: ldp q1, q2, [x0]
384 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d
385 ; SVE-NEXT: movprfx z1, z2
386 ; SVE-NEXT: smulh z1.d, p0/m, z1.d, z3.d
387 ; SVE-NEXT: stp q0, q1, [x0]
390 ; SVE2-LABEL: smulh_v4i64:
392 ; SVE2-NEXT: ldp q0, q3, [x1]
393 ; SVE2-NEXT: ldp q1, q2, [x0]
394 ; SVE2-NEXT: smulh z0.d, z1.d, z0.d
395 ; SVE2-NEXT: smulh z1.d, z2.d, z3.d
396 ; SVE2-NEXT: stp q0, q1, [x0]
398 %op1 = load <4 x i64>, ptr %a
399 %op2 = load <4 x i64>, ptr %b
400 %1 = sext <4 x i64> %op1 to <4 x i128>
401 %2 = sext <4 x i64> %op2 to <4 x i128>
402 %mul = mul <4 x i128> %1, %2
403 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
404 %res = trunc <4 x i128> %shr to <4 x i64>
405 store <4 x i64> %res, ptr %a
413 define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
414 ; SVE-LABEL: umulh_v4i8:
416 ; SVE-NEXT: ptrue p0.h, vl4
417 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
418 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
419 ; SVE-NEXT: and z0.h, z0.h, #0xff
420 ; SVE-NEXT: and z1.h, z1.h, #0xff
421 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h
422 ; SVE-NEXT: lsr z0.h, z0.h, #4
423 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
426 ; SVE2-LABEL: umulh_v4i8:
428 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
429 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
430 ; SVE2-NEXT: and z0.h, z0.h, #0xff
431 ; SVE2-NEXT: and z1.h, z1.h, #0xff
432 ; SVE2-NEXT: mul z0.h, z0.h, z1.h
433 ; SVE2-NEXT: lsr z0.h, z0.h, #4
434 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
436 %1 = zext <4 x i8> %op1 to <4 x i16>
437 %2 = zext <4 x i8> %op2 to <4 x i16>
438 %mul = mul <4 x i16> %1, %2
439 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4>
440 %res = trunc <4 x i16> %shr to <4 x i8>
444 define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
445 ; SVE-LABEL: umulh_v8i8:
447 ; SVE-NEXT: ptrue p0.b, vl8
448 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
449 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
450 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
451 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
454 ; SVE2-LABEL: umulh_v8i8:
456 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
457 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
458 ; SVE2-NEXT: umulh z0.b, z0.b, z1.b
459 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
461 %1 = zext <8 x i8> %op1 to <8 x i16>
462 %2 = zext <8 x i8> %op2 to <8 x i16>
463 %mul = mul <8 x i16> %1, %2
464 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
465 %res = trunc <8 x i16> %shr to <8 x i8>
469 define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
470 ; SVE-LABEL: umulh_v16i8:
472 ; SVE-NEXT: ptrue p0.b, vl16
473 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
474 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
475 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
476 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
479 ; SVE2-LABEL: umulh_v16i8:
481 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
482 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
483 ; SVE2-NEXT: umulh z0.b, z0.b, z1.b
484 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
486 %1 = zext <16 x i8> %op1 to <16 x i16>
487 %2 = zext <16 x i8> %op2 to <16 x i16>
488 %mul = mul <16 x i16> %1, %2
489 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
490 %res = trunc <16 x i16> %shr to <16 x i8>
494 define void @umulh_v32i8(ptr %a, ptr %b) {
495 ; SVE-LABEL: umulh_v32i8:
497 ; SVE-NEXT: ptrue p0.b, vl16
498 ; SVE-NEXT: ldp q0, q3, [x1]
499 ; SVE-NEXT: ldp q1, q2, [x0]
500 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b
501 ; SVE-NEXT: movprfx z1, z2
502 ; SVE-NEXT: umulh z1.b, p0/m, z1.b, z3.b
503 ; SVE-NEXT: stp q0, q1, [x0]
506 ; SVE2-LABEL: umulh_v32i8:
508 ; SVE2-NEXT: ldp q0, q3, [x1]
509 ; SVE2-NEXT: ldp q1, q2, [x0]
510 ; SVE2-NEXT: umulh z0.b, z1.b, z0.b
511 ; SVE2-NEXT: umulh z1.b, z2.b, z3.b
512 ; SVE2-NEXT: stp q0, q1, [x0]
514 %op1 = load <32 x i8>, ptr %a
515 %op2 = load <32 x i8>, ptr %b
516 %1 = zext <32 x i8> %op1 to <32 x i16>
517 %2 = zext <32 x i8> %op2 to <32 x i16>
518 %mul = mul <32 x i16> %1, %2
519 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
520 %res = trunc <32 x i16> %shr to <32 x i8>
521 store <32 x i8> %res, ptr %a
525 define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
526 ; SVE-LABEL: umulh_v2i16:
528 ; SVE-NEXT: ptrue p0.s, vl2
529 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
530 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
531 ; SVE-NEXT: and z0.s, z0.s, #0xffff
532 ; SVE-NEXT: and z1.s, z1.s, #0xffff
533 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s
534 ; SVE-NEXT: lsr z0.s, z0.s, #16
535 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
538 ; SVE2-LABEL: umulh_v2i16:
540 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
541 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
542 ; SVE2-NEXT: and z0.s, z0.s, #0xffff
543 ; SVE2-NEXT: and z1.s, z1.s, #0xffff
544 ; SVE2-NEXT: mul z0.s, z0.s, z1.s
545 ; SVE2-NEXT: lsr z0.s, z0.s, #16
546 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
548 %1 = zext <2 x i16> %op1 to <2 x i32>
549 %2 = zext <2 x i16> %op2 to <2 x i32>
550 %mul = mul <2 x i32> %1, %2
551 %shr = lshr <2 x i32> %mul, <i32 16, i32 16>
552 %res = trunc <2 x i32> %shr to <2 x i16>
556 define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
557 ; SVE-LABEL: umulh_v4i16:
559 ; SVE-NEXT: ptrue p0.h, vl4
560 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
561 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
562 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
563 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
566 ; SVE2-LABEL: umulh_v4i16:
568 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
569 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
570 ; SVE2-NEXT: umulh z0.h, z0.h, z1.h
571 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
573 %1 = zext <4 x i16> %op1 to <4 x i32>
574 %2 = zext <4 x i16> %op2 to <4 x i32>
575 %mul = mul <4 x i32> %1, %2
576 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16>
577 %res = trunc <4 x i32> %shr to <4 x i16>
581 define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
582 ; SVE-LABEL: umulh_v8i16:
584 ; SVE-NEXT: ptrue p0.h, vl8
585 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
586 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
587 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
588 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
591 ; SVE2-LABEL: umulh_v8i16:
593 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
594 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
595 ; SVE2-NEXT: umulh z0.h, z0.h, z1.h
596 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
598 %1 = zext <8 x i16> %op1 to <8 x i32>
599 %2 = zext <8 x i16> %op2 to <8 x i32>
600 %mul = mul <8 x i32> %1, %2
601 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
602 %res = trunc <8 x i32> %shr to <8 x i16>
606 define void @umulh_v16i16(ptr %a, ptr %b) {
607 ; SVE-LABEL: umulh_v16i16:
609 ; SVE-NEXT: ptrue p0.h, vl8
610 ; SVE-NEXT: ldp q0, q3, [x1]
611 ; SVE-NEXT: ldp q1, q2, [x0]
612 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h
613 ; SVE-NEXT: movprfx z1, z2
614 ; SVE-NEXT: umulh z1.h, p0/m, z1.h, z3.h
615 ; SVE-NEXT: stp q0, q1, [x0]
618 ; SVE2-LABEL: umulh_v16i16:
620 ; SVE2-NEXT: ldp q0, q3, [x1]
621 ; SVE2-NEXT: ldp q1, q2, [x0]
622 ; SVE2-NEXT: umulh z0.h, z1.h, z0.h
623 ; SVE2-NEXT: umulh z1.h, z2.h, z3.h
624 ; SVE2-NEXT: stp q0, q1, [x0]
626 %op1 = load <16 x i16>, ptr %a
627 %op2 = load <16 x i16>, ptr %b
628 %1 = zext <16 x i16> %op1 to <16 x i32>
629 %2 = zext <16 x i16> %op2 to <16 x i32>
630 %mul = mul <16 x i32> %1, %2
631 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
632 %res = trunc <16 x i32> %shr to <16 x i16>
633 store <16 x i16> %res, ptr %a
637 define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
638 ; SVE-LABEL: umulh_v2i32:
640 ; SVE-NEXT: ptrue p0.s, vl2
641 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
642 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
643 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
644 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
647 ; SVE2-LABEL: umulh_v2i32:
649 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
650 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
651 ; SVE2-NEXT: umulh z0.s, z0.s, z1.s
652 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
654 %1 = zext <2 x i32> %op1 to <2 x i64>
655 %2 = zext <2 x i32> %op2 to <2 x i64>
656 %mul = mul <2 x i64> %1, %2
657 %shr = lshr <2 x i64> %mul, <i64 32, i64 32>
658 %res = trunc <2 x i64> %shr to <2 x i32>
662 define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
663 ; SVE-LABEL: umulh_v4i32:
665 ; SVE-NEXT: ptrue p0.s, vl4
666 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
667 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
668 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
669 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
672 ; SVE2-LABEL: umulh_v4i32:
674 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
675 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
676 ; SVE2-NEXT: umulh z0.s, z0.s, z1.s
677 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
679 %1 = zext <4 x i32> %op1 to <4 x i64>
680 %2 = zext <4 x i32> %op2 to <4 x i64>
681 %mul = mul <4 x i64> %1, %2
682 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
683 %res = trunc <4 x i64> %shr to <4 x i32>
687 define void @umulh_v8i32(ptr %a, ptr %b) {
688 ; SVE-LABEL: umulh_v8i32:
690 ; SVE-NEXT: ptrue p0.s, vl4
691 ; SVE-NEXT: ldp q0, q3, [x1]
692 ; SVE-NEXT: ldp q1, q2, [x0]
693 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s
694 ; SVE-NEXT: movprfx z1, z2
695 ; SVE-NEXT: umulh z1.s, p0/m, z1.s, z3.s
696 ; SVE-NEXT: stp q0, q1, [x0]
699 ; SVE2-LABEL: umulh_v8i32:
701 ; SVE2-NEXT: ldp q0, q3, [x1]
702 ; SVE2-NEXT: ldp q1, q2, [x0]
703 ; SVE2-NEXT: umulh z0.s, z1.s, z0.s
704 ; SVE2-NEXT: umulh z1.s, z2.s, z3.s
705 ; SVE2-NEXT: stp q0, q1, [x0]
707 %op1 = load <8 x i32>, ptr %a
708 %op2 = load <8 x i32>, ptr %b
709 %insert = insertelement <8 x i64> undef, i64 32, i64 0
710 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
711 %1 = zext <8 x i32> %op1 to <8 x i64>
712 %2 = zext <8 x i32> %op2 to <8 x i64>
713 %mul = mul <8 x i64> %1, %2
714 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
715 %res = trunc <8 x i64> %shr to <8 x i32>
716 store <8 x i32> %res, ptr %a
720 define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
721 ; SVE-LABEL: umulh_v1i64:
723 ; SVE-NEXT: ptrue p0.d, vl1
724 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
725 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1
726 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
727 ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
730 ; SVE2-LABEL: umulh_v1i64:
732 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
733 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
734 ; SVE2-NEXT: umulh z0.d, z0.d, z1.d
735 ; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
737 %1 = zext <1 x i64> %op1 to <1 x i128>
738 %2 = zext <1 x i64> %op2 to <1 x i128>
739 %mul = mul <1 x i128> %1, %2
740 %shr = lshr <1 x i128> %mul, <i128 64>
741 %res = trunc <1 x i128> %shr to <1 x i64>
745 define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
746 ; SVE-LABEL: umulh_v2i64:
748 ; SVE-NEXT: ptrue p0.d, vl2
749 ; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
750 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
751 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
752 ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
755 ; SVE2-LABEL: umulh_v2i64:
757 ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
758 ; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
759 ; SVE2-NEXT: umulh z0.d, z0.d, z1.d
760 ; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
762 %1 = zext <2 x i64> %op1 to <2 x i128>
763 %2 = zext <2 x i64> %op2 to <2 x i128>
764 %mul = mul <2 x i128> %1, %2
765 %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
766 %res = trunc <2 x i128> %shr to <2 x i64>
770 define void @umulh_v4i64(ptr %a, ptr %b) {
771 ; SVE-LABEL: umulh_v4i64:
773 ; SVE-NEXT: ptrue p0.d, vl2
774 ; SVE-NEXT: ldp q0, q3, [x1]
775 ; SVE-NEXT: ldp q1, q2, [x0]
776 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d
777 ; SVE-NEXT: movprfx z1, z2
778 ; SVE-NEXT: umulh z1.d, p0/m, z1.d, z3.d
779 ; SVE-NEXT: stp q0, q1, [x0]
782 ; SVE2-LABEL: umulh_v4i64:
784 ; SVE2-NEXT: ldp q0, q3, [x1]
785 ; SVE2-NEXT: ldp q1, q2, [x0]
786 ; SVE2-NEXT: umulh z0.d, z1.d, z0.d
787 ; SVE2-NEXT: umulh z1.d, z2.d, z3.d
788 ; SVE2-NEXT: stp q0, q1, [x0]
790 %op1 = load <4 x i64>, ptr %a
791 %op2 = load <4 x i64>, ptr %b
792 %1 = zext <4 x i64> %op1 to <4 x i128>
793 %2 = zext <4 x i64> %op2 to <4 x i128>
794 %mul = mul <4 x i128> %1, %2
795 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64>
796 %res = trunc <4 x i128> %shr to <4 x i64>
797 store <4 x i64> %res, ptr %a
800 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: