1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 define i8 @i8(i8 %a, i8 %b) {
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: mul w0, w0, w1
15 define i16 @i16(i16 %a, i16 %b) {
17 ; CHECK: // %bb.0: // %entry
18 ; CHECK-NEXT: mul w0, w0, w1
25 define i32 @i32(i32 %a, i32 %b) {
27 ; CHECK: // %bb.0: // %entry
28 ; CHECK-NEXT: mul w0, w0, w1
35 define i64 @i64(i64 %a, i64 %b) {
37 ; CHECK: // %bb.0: // %entry
38 ; CHECK-NEXT: mul x0, x0, x1
45 define i128 @i128(i128 %a, i128 %b) {
46 ; CHECK-SD-LABEL: i128:
47 ; CHECK-SD: // %bb.0: // %entry
48 ; CHECK-SD-NEXT: umulh x8, x0, x2
49 ; CHECK-SD-NEXT: madd x8, x0, x3, x8
50 ; CHECK-SD-NEXT: mul x0, x0, x2
51 ; CHECK-SD-NEXT: madd x1, x1, x2, x8
54 ; CHECK-GI-LABEL: i128:
55 ; CHECK-GI: // %bb.0: // %entry
56 ; CHECK-GI-NEXT: mul x9, x0, x3
57 ; CHECK-GI-NEXT: mul x8, x0, x2
58 ; CHECK-GI-NEXT: umulh x10, x0, x2
59 ; CHECK-GI-NEXT: madd x9, x1, x2, x9
60 ; CHECK-GI-NEXT: mov x0, x8
61 ; CHECK-GI-NEXT: add x1, x9, x10
68 define void @v2i8(ptr %p1, ptr %p2) {
69 ; CHECK-SD-LABEL: v2i8:
70 ; CHECK-SD: // %bb.0: // %entry
71 ; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0]
72 ; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1]
73 ; CHECK-SD-NEXT: add x8, x0, #1
74 ; CHECK-SD-NEXT: add x9, x1, #1
75 ; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8]
76 ; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9]
77 ; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
78 ; CHECK-SD-NEXT: mov w8, v0.s[1]
79 ; CHECK-SD-NEXT: fmov w9, s0
80 ; CHECK-SD-NEXT: strb w9, [x0]
81 ; CHECK-SD-NEXT: strb w8, [x0, #1]
84 ; CHECK-GI-LABEL: v2i8:
85 ; CHECK-GI: // %bb.0: // %entry
86 ; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0]
87 ; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1]
88 ; CHECK-GI-NEXT: ldr b2, [x0, #1]
89 ; CHECK-GI-NEXT: ldr b3, [x1, #1]
90 ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
91 ; CHECK-GI-NEXT: mov v1.s[1], v3.s[0]
92 ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s
93 ; CHECK-GI-NEXT: mov s1, v0.s[1]
94 ; CHECK-GI-NEXT: str b0, [x0]
95 ; CHECK-GI-NEXT: str b1, [x0, #1]
98 %d = load <2 x i8>, ptr %p1
99 %e = load <2 x i8>, ptr %p2
100 %s = mul <2 x i8> %d, %e
101 store <2 x i8> %s, ptr %p1
105 define void @v3i8(ptr %p1, ptr %p2) {
106 ; CHECK-SD-LABEL: v3i8:
107 ; CHECK-SD: // %bb.0: // %entry
108 ; CHECK-SD-NEXT: sub sp, sp, #16
109 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
110 ; CHECK-SD-NEXT: ldr s0, [x0]
111 ; CHECK-SD-NEXT: ldr s1, [x1]
112 ; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
113 ; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b
114 ; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h
115 ; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b
116 ; CHECK-SD-NEXT: umov w8, v0.h[2]
117 ; CHECK-SD-NEXT: str s1, [sp, #12]
118 ; CHECK-SD-NEXT: ldrh w9, [sp, #12]
119 ; CHECK-SD-NEXT: strb w8, [x0, #2]
120 ; CHECK-SD-NEXT: strh w9, [x0]
121 ; CHECK-SD-NEXT: add sp, sp, #16
124 ; CHECK-GI-LABEL: v3i8:
125 ; CHECK-GI: // %bb.0: // %entry
126 ; CHECK-GI-NEXT: ldrb w8, [x0]
127 ; CHECK-GI-NEXT: ldrb w9, [x1]
128 ; CHECK-GI-NEXT: ldrb w10, [x0, #1]
129 ; CHECK-GI-NEXT: ldrb w11, [x1, #1]
130 ; CHECK-GI-NEXT: fmov s0, w8
131 ; CHECK-GI-NEXT: fmov s1, w9
132 ; CHECK-GI-NEXT: ldrb w8, [x0, #2]
133 ; CHECK-GI-NEXT: ldrb w9, [x1, #2]
134 ; CHECK-GI-NEXT: mov v0.h[1], w10
135 ; CHECK-GI-NEXT: mov v1.h[1], w11
136 ; CHECK-GI-NEXT: mov v0.h[2], w8
137 ; CHECK-GI-NEXT: mov v1.h[2], w9
138 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
139 ; CHECK-GI-NEXT: mov h1, v0.h[1]
140 ; CHECK-GI-NEXT: mov h2, v0.h[2]
141 ; CHECK-GI-NEXT: str b0, [x0]
142 ; CHECK-GI-NEXT: str b1, [x0, #1]
143 ; CHECK-GI-NEXT: str b2, [x0, #2]
146 %d = load <3 x i8>, ptr %p1
147 %e = load <3 x i8>, ptr %p2
148 %s = mul <3 x i8> %d, %e
149 store <3 x i8> %s, ptr %p1
153 define void @v4i8(ptr %p1, ptr %p2) {
154 ; CHECK-SD-LABEL: v4i8:
155 ; CHECK-SD: // %bb.0: // %entry
156 ; CHECK-SD-NEXT: ldr s0, [x0]
157 ; CHECK-SD-NEXT: ldr s1, [x1]
158 ; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b
159 ; CHECK-SD-NEXT: xtn v0.8b, v0.8h
160 ; CHECK-SD-NEXT: str s0, [x0]
163 ; CHECK-GI-LABEL: v4i8:
164 ; CHECK-GI: // %bb.0: // %entry
165 ; CHECK-GI-NEXT: ldr w8, [x0]
166 ; CHECK-GI-NEXT: ldr w9, [x1]
167 ; CHECK-GI-NEXT: fmov s0, w8
168 ; CHECK-GI-NEXT: fmov s1, w9
169 ; CHECK-GI-NEXT: mov b2, v0.b[1]
170 ; CHECK-GI-NEXT: mov b3, v1.b[1]
171 ; CHECK-GI-NEXT: mov b4, v0.b[2]
172 ; CHECK-GI-NEXT: mov b5, v0.b[3]
173 ; CHECK-GI-NEXT: fmov w8, s2
174 ; CHECK-GI-NEXT: mov b2, v1.b[2]
175 ; CHECK-GI-NEXT: fmov w9, s3
176 ; CHECK-GI-NEXT: mov b3, v1.b[3]
177 ; CHECK-GI-NEXT: mov v0.h[1], w8
178 ; CHECK-GI-NEXT: mov v1.h[1], w9
179 ; CHECK-GI-NEXT: fmov w8, s4
180 ; CHECK-GI-NEXT: fmov w9, s2
181 ; CHECK-GI-NEXT: mov v0.h[2], w8
182 ; CHECK-GI-NEXT: mov v1.h[2], w9
183 ; CHECK-GI-NEXT: fmov w8, s5
184 ; CHECK-GI-NEXT: fmov w9, s3
185 ; CHECK-GI-NEXT: mov v0.h[3], w8
186 ; CHECK-GI-NEXT: mov v1.h[3], w9
187 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
188 ; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
189 ; CHECK-GI-NEXT: fmov w8, s0
190 ; CHECK-GI-NEXT: str w8, [x0]
193 %d = load <4 x i8>, ptr %p1
194 %e = load <4 x i8>, ptr %p2
195 %s = mul <4 x i8> %d, %e
196 store <4 x i8> %s, ptr %p1
200 define <8 x i8> @v8i8(<8 x i8> %d, <8 x i8> %e) {
202 ; CHECK: // %bb.0: // %entry
203 ; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b
206 %s = mul <8 x i8> %d, %e
210 define <16 x i8> @v16i8(<16 x i8> %d, <16 x i8> %e) {
211 ; CHECK-LABEL: v16i8:
212 ; CHECK: // %bb.0: // %entry
213 ; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b
216 %s = mul <16 x i8> %d, %e
220 define <32 x i8> @v32i8(<32 x i8> %d, <32 x i8> %e) {
221 ; CHECK-SD-LABEL: v32i8:
222 ; CHECK-SD: // %bb.0: // %entry
223 ; CHECK-SD-NEXT: mul v1.16b, v1.16b, v3.16b
224 ; CHECK-SD-NEXT: mul v0.16b, v0.16b, v2.16b
227 ; CHECK-GI-LABEL: v32i8:
228 ; CHECK-GI: // %bb.0: // %entry
229 ; CHECK-GI-NEXT: mul v0.16b, v0.16b, v2.16b
230 ; CHECK-GI-NEXT: mul v1.16b, v1.16b, v3.16b
233 %s = mul <32 x i8> %d, %e
237 define void @v2i16(ptr %p1, ptr %p2) {
238 ; CHECK-SD-LABEL: v2i16:
239 ; CHECK-SD: // %bb.0: // %entry
240 ; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0]
241 ; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1]
242 ; CHECK-SD-NEXT: add x8, x0, #2
243 ; CHECK-SD-NEXT: add x9, x1, #2
244 ; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
245 ; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9]
246 ; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
247 ; CHECK-SD-NEXT: mov w8, v0.s[1]
248 ; CHECK-SD-NEXT: fmov w9, s0
249 ; CHECK-SD-NEXT: strh w9, [x0]
250 ; CHECK-SD-NEXT: strh w8, [x0, #2]
253 ; CHECK-GI-LABEL: v2i16:
254 ; CHECK-GI: // %bb.0: // %entry
255 ; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0]
256 ; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1]
257 ; CHECK-GI-NEXT: ldr h2, [x0, #2]
258 ; CHECK-GI-NEXT: ldr h3, [x1, #2]
259 ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
260 ; CHECK-GI-NEXT: mov v1.s[1], v3.s[0]
261 ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s
262 ; CHECK-GI-NEXT: mov s1, v0.s[1]
263 ; CHECK-GI-NEXT: str h0, [x0]
264 ; CHECK-GI-NEXT: str h1, [x0, #2]
267 %d = load <2 x i16>, ptr %p1
268 %e = load <2 x i16>, ptr %p2
269 %s = mul <2 x i16> %d, %e
270 store <2 x i16> %s, ptr %p1
274 define void @v3i16(ptr %p1, ptr %p2) {
275 ; CHECK-SD-LABEL: v3i16:
276 ; CHECK-SD: // %bb.0: // %entry
277 ; CHECK-SD-NEXT: ldr d0, [x0]
278 ; CHECK-SD-NEXT: ldr d1, [x1]
279 ; CHECK-SD-NEXT: add x8, x0, #4
280 ; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h
281 ; CHECK-SD-NEXT: st1 { v0.h }[2], [x8]
282 ; CHECK-SD-NEXT: str s0, [x0]
285 ; CHECK-GI-LABEL: v3i16:
286 ; CHECK-GI: // %bb.0: // %entry
287 ; CHECK-GI-NEXT: ldr h0, [x0]
288 ; CHECK-GI-NEXT: ldr h1, [x1]
289 ; CHECK-GI-NEXT: add x8, x0, #2
290 ; CHECK-GI-NEXT: add x9, x1, #2
291 ; CHECK-GI-NEXT: add x10, x1, #4
292 ; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8]
293 ; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9]
294 ; CHECK-GI-NEXT: add x9, x0, #4
295 ; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9]
296 ; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10]
297 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
298 ; CHECK-GI-NEXT: str h0, [x0]
299 ; CHECK-GI-NEXT: st1 { v0.h }[1], [x8]
300 ; CHECK-GI-NEXT: st1 { v0.h }[2], [x9]
303 %d = load <3 x i16>, ptr %p1
304 %e = load <3 x i16>, ptr %p2
305 %s = mul <3 x i16> %d, %e
306 store <3 x i16> %s, ptr %p1
310 define <4 x i16> @v4i16(<4 x i16> %d, <4 x i16> %e) {
311 ; CHECK-LABEL: v4i16:
312 ; CHECK: // %bb.0: // %entry
313 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
316 %s = mul <4 x i16> %d, %e
320 define <8 x i16> @v8i16(<8 x i16> %d, <8 x i16> %e) {
321 ; CHECK-LABEL: v8i16:
322 ; CHECK: // %bb.0: // %entry
323 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
326 %s = mul <8 x i16> %d, %e
330 define <16 x i16> @v16i16(<16 x i16> %d, <16 x i16> %e) {
331 ; CHECK-SD-LABEL: v16i16:
332 ; CHECK-SD: // %bb.0: // %entry
333 ; CHECK-SD-NEXT: mul v1.8h, v1.8h, v3.8h
334 ; CHECK-SD-NEXT: mul v0.8h, v0.8h, v2.8h
337 ; CHECK-GI-LABEL: v16i16:
338 ; CHECK-GI: // %bb.0: // %entry
339 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v2.8h
340 ; CHECK-GI-NEXT: mul v1.8h, v1.8h, v3.8h
343 %s = mul <16 x i16> %d, %e
347 define <2 x i32> @v2i32(<2 x i32> %d, <2 x i32> %e) {
348 ; CHECK-LABEL: v2i32:
349 ; CHECK: // %bb.0: // %entry
350 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s
353 %s = mul <2 x i32> %d, %e
357 define <3 x i32> @v3i32(<3 x i32> %d, <3 x i32> %e) {
358 ; CHECK-LABEL: v3i32:
359 ; CHECK: // %bb.0: // %entry
360 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
363 %s = mul <3 x i32> %d, %e
367 define <4 x i32> @v4i32(<4 x i32> %d, <4 x i32> %e) {
368 ; CHECK-LABEL: v4i32:
369 ; CHECK: // %bb.0: // %entry
370 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
373 %s = mul <4 x i32> %d, %e
377 define <8 x i32> @v8i32(<8 x i32> %d, <8 x i32> %e) {
378 ; CHECK-SD-LABEL: v8i32:
379 ; CHECK-SD: // %bb.0: // %entry
380 ; CHECK-SD-NEXT: mul v1.4s, v1.4s, v3.4s
381 ; CHECK-SD-NEXT: mul v0.4s, v0.4s, v2.4s
384 ; CHECK-GI-LABEL: v8i32:
385 ; CHECK-GI: // %bb.0: // %entry
386 ; CHECK-GI-NEXT: mul v0.4s, v0.4s, v2.4s
387 ; CHECK-GI-NEXT: mul v1.4s, v1.4s, v3.4s
390 %s = mul <8 x i32> %d, %e
394 define <2 x i64> @v2i64(<2 x i64> %d, <2 x i64> %e) {
395 ; CHECK-SD-LABEL: v2i64:
396 ; CHECK-SD: // %bb.0: // %entry
397 ; CHECK-SD-NEXT: fmov x10, d1
398 ; CHECK-SD-NEXT: fmov x11, d0
399 ; CHECK-SD-NEXT: mov x8, v1.d[1]
400 ; CHECK-SD-NEXT: mov x9, v0.d[1]
401 ; CHECK-SD-NEXT: mul x10, x11, x10
402 ; CHECK-SD-NEXT: mul x8, x9, x8
403 ; CHECK-SD-NEXT: fmov d0, x10
404 ; CHECK-SD-NEXT: mov v0.d[1], x8
407 ; CHECK-GI-LABEL: v2i64:
408 ; CHECK-GI: // %bb.0: // %entry
409 ; CHECK-GI-NEXT: fmov x8, d0
410 ; CHECK-GI-NEXT: fmov x9, d1
411 ; CHECK-GI-NEXT: mov x10, v0.d[1]
412 ; CHECK-GI-NEXT: mov x11, v1.d[1]
413 ; CHECK-GI-NEXT: mul x8, x8, x9
414 ; CHECK-GI-NEXT: mul x9, x10, x11
415 ; CHECK-GI-NEXT: mov v0.d[0], x8
416 ; CHECK-GI-NEXT: mov v0.d[1], x9
419 %s = mul <2 x i64> %d, %e
423 define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
424 ; CHECK-SD-LABEL: v3i64:
425 ; CHECK-SD: // %bb.0: // %entry
426 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
427 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
428 ; CHECK-SD-NEXT: fmov x8, d3
429 ; CHECK-SD-NEXT: fmov x9, d0
430 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
431 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
432 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
433 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
434 ; CHECK-SD-NEXT: fmov x10, d1
435 ; CHECK-SD-NEXT: fmov x11, d2
436 ; CHECK-SD-NEXT: mul x8, x9, x8
437 ; CHECK-SD-NEXT: fmov x9, d4
438 ; CHECK-SD-NEXT: mul x9, x10, x9
439 ; CHECK-SD-NEXT: fmov x10, d5
440 ; CHECK-SD-NEXT: fmov d0, x8
441 ; CHECK-SD-NEXT: mul x10, x11, x10
442 ; CHECK-SD-NEXT: fmov d1, x9
443 ; CHECK-SD-NEXT: fmov d2, x10
446 ; CHECK-GI-LABEL: v3i64:
447 ; CHECK-GI: // %bb.0: // %entry
448 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
449 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
450 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
451 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
452 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
453 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
454 ; CHECK-GI-NEXT: fmov x8, d0
455 ; CHECK-GI-NEXT: fmov x9, d3
456 ; CHECK-GI-NEXT: mov x10, v0.d[1]
457 ; CHECK-GI-NEXT: mov x11, v3.d[1]
458 ; CHECK-GI-NEXT: mul x8, x8, x9
459 ; CHECK-GI-NEXT: mul x9, x10, x11
460 ; CHECK-GI-NEXT: mov v0.d[0], x8
461 ; CHECK-GI-NEXT: fmov x8, d2
462 ; CHECK-GI-NEXT: mov v0.d[1], x9
463 ; CHECK-GI-NEXT: fmov x9, d5
464 ; CHECK-GI-NEXT: mul x8, x8, x9
465 ; CHECK-GI-NEXT: mov d1, v0.d[1]
466 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
467 ; CHECK-GI-NEXT: fmov d2, x8
470 %s = mul <3 x i64> %d, %e
474 define <4 x i64> @v4i64(<4 x i64> %d, <4 x i64> %e) {
475 ; CHECK-SD-LABEL: v4i64:
476 ; CHECK-SD: // %bb.0: // %entry
477 ; CHECK-SD-NEXT: fmov x8, d2
478 ; CHECK-SD-NEXT: fmov x9, d0
479 ; CHECK-SD-NEXT: fmov x12, d1
480 ; CHECK-SD-NEXT: mov x10, v2.d[1]
481 ; CHECK-SD-NEXT: mov x11, v0.d[1]
482 ; CHECK-SD-NEXT: mov x13, v3.d[1]
483 ; CHECK-SD-NEXT: mov x14, v1.d[1]
484 ; CHECK-SD-NEXT: mul x8, x9, x8
485 ; CHECK-SD-NEXT: fmov x9, d3
486 ; CHECK-SD-NEXT: mul x10, x11, x10
487 ; CHECK-SD-NEXT: mul x9, x12, x9
488 ; CHECK-SD-NEXT: fmov d0, x8
489 ; CHECK-SD-NEXT: mul x11, x14, x13
490 ; CHECK-SD-NEXT: mov v0.d[1], x10
491 ; CHECK-SD-NEXT: fmov d1, x9
492 ; CHECK-SD-NEXT: mov v1.d[1], x11
495 ; CHECK-GI-LABEL: v4i64:
496 ; CHECK-GI: // %bb.0: // %entry
497 ; CHECK-GI-NEXT: fmov x8, d0
498 ; CHECK-GI-NEXT: fmov x9, d2
499 ; CHECK-GI-NEXT: fmov x12, d3
500 ; CHECK-GI-NEXT: mov x10, v0.d[1]
501 ; CHECK-GI-NEXT: mov x11, v2.d[1]
502 ; CHECK-GI-NEXT: mov x13, v1.d[1]
503 ; CHECK-GI-NEXT: mov x14, v3.d[1]
504 ; CHECK-GI-NEXT: mul x8, x8, x9
505 ; CHECK-GI-NEXT: fmov x9, d1
506 ; CHECK-GI-NEXT: mul x10, x10, x11
507 ; CHECK-GI-NEXT: mul x9, x9, x12
508 ; CHECK-GI-NEXT: mov v0.d[0], x8
509 ; CHECK-GI-NEXT: mul x11, x13, x14
510 ; CHECK-GI-NEXT: mov v1.d[0], x9
511 ; CHECK-GI-NEXT: mov v0.d[1], x10
512 ; CHECK-GI-NEXT: mov v1.d[1], x11
515 %s = mul <4 x i64> %d, %e
519 define <2 x i128> @v2i128(<2 x i128> %d, <2 x i128> %e) {
520 ; CHECK-SD-LABEL: v2i128:
521 ; CHECK-SD: // %bb.0: // %entry
522 ; CHECK-SD-NEXT: umulh x8, x2, x6
523 ; CHECK-SD-NEXT: umulh x9, x0, x4
524 ; CHECK-SD-NEXT: madd x8, x2, x7, x8
525 ; CHECK-SD-NEXT: madd x9, x0, x5, x9
526 ; CHECK-SD-NEXT: madd x3, x3, x6, x8
527 ; CHECK-SD-NEXT: madd x1, x1, x4, x9
528 ; CHECK-SD-NEXT: mul x0, x0, x4
529 ; CHECK-SD-NEXT: mul x2, x2, x6
532 ; CHECK-GI-LABEL: v2i128:
533 ; CHECK-GI: // %bb.0: // %entry
534 ; CHECK-GI-NEXT: mul x9, x0, x5
535 ; CHECK-GI-NEXT: mul x12, x2, x7
536 ; CHECK-GI-NEXT: mul x8, x0, x4
537 ; CHECK-GI-NEXT: umulh x10, x0, x4
538 ; CHECK-GI-NEXT: madd x11, x1, x4, x9
539 ; CHECK-GI-NEXT: mov x0, x8
540 ; CHECK-GI-NEXT: mul x9, x2, x6
541 ; CHECK-GI-NEXT: umulh x13, x2, x6
542 ; CHECK-GI-NEXT: add x1, x11, x10
543 ; CHECK-GI-NEXT: madd x12, x3, x6, x12
544 ; CHECK-GI-NEXT: mov x2, x9
545 ; CHECK-GI-NEXT: add x3, x12, x13
548 %s = mul <2 x i128> %d, %e
552 define <3 x i128> @v3i128(<3 x i128> %d, <3 x i128> %e) {
553 ; CHECK-SD-LABEL: v3i128:
554 ; CHECK-SD: // %bb.0: // %entry
555 ; CHECK-SD-NEXT: umulh x9, x0, x6
556 ; CHECK-SD-NEXT: ldp x8, x10, [sp]
557 ; CHECK-SD-NEXT: madd x9, x0, x7, x9
558 ; CHECK-SD-NEXT: umulh x11, x2, x8
559 ; CHECK-SD-NEXT: madd x1, x1, x6, x9
560 ; CHECK-SD-NEXT: ldp x9, x12, [sp, #16]
561 ; CHECK-SD-NEXT: madd x10, x2, x10, x11
562 ; CHECK-SD-NEXT: umulh x13, x4, x9
563 ; CHECK-SD-NEXT: madd x3, x3, x8, x10
564 ; CHECK-SD-NEXT: madd x11, x4, x12, x13
565 ; CHECK-SD-NEXT: mul x0, x0, x6
566 ; CHECK-SD-NEXT: madd x5, x5, x9, x11
567 ; CHECK-SD-NEXT: mul x2, x2, x8
568 ; CHECK-SD-NEXT: mul x4, x4, x9
571 ; CHECK-GI-LABEL: v3i128:
572 ; CHECK-GI: // %bb.0: // %entry
573 ; CHECK-GI-NEXT: ldp x10, x13, [sp]
574 ; CHECK-GI-NEXT: mul x9, x0, x7
575 ; CHECK-GI-NEXT: mul x8, x0, x6
576 ; CHECK-GI-NEXT: mul x13, x2, x13
577 ; CHECK-GI-NEXT: madd x12, x1, x6, x9
578 ; CHECK-GI-NEXT: mul x9, x2, x10
579 ; CHECK-GI-NEXT: umulh x14, x2, x10
580 ; CHECK-GI-NEXT: madd x10, x3, x10, x13
581 ; CHECK-GI-NEXT: ldp x13, x15, [sp, #16]
582 ; CHECK-GI-NEXT: mov x2, x9
583 ; CHECK-GI-NEXT: umulh x11, x0, x6
584 ; CHECK-GI-NEXT: mov x0, x8
585 ; CHECK-GI-NEXT: mul x15, x4, x15
586 ; CHECK-GI-NEXT: add x3, x10, x14
587 ; CHECK-GI-NEXT: umulh x16, x4, x13
588 ; CHECK-GI-NEXT: add x1, x12, x11
589 ; CHECK-GI-NEXT: madd x15, x5, x13, x15
590 ; CHECK-GI-NEXT: mul x4, x4, x13
591 ; CHECK-GI-NEXT: add x5, x15, x16
594 %s = mul <3 x i128> %d, %e
598 define <4 x i128> @v4i128(<4 x i128> %d, <4 x i128> %e) {
599 ; CHECK-SD-LABEL: v4i128:
600 ; CHECK-SD: // %bb.0: // %entry
601 ; CHECK-SD-NEXT: ldp x8, x9, [sp]
602 ; CHECK-SD-NEXT: ldp x11, x12, [sp, #16]
603 ; CHECK-SD-NEXT: umulh x10, x0, x8
604 ; CHECK-SD-NEXT: umulh x13, x2, x11
605 ; CHECK-SD-NEXT: madd x9, x0, x9, x10
606 ; CHECK-SD-NEXT: madd x10, x2, x12, x13
607 ; CHECK-SD-NEXT: ldp x13, x14, [sp, #48]
608 ; CHECK-SD-NEXT: madd x1, x1, x8, x9
609 ; CHECK-SD-NEXT: madd x3, x3, x11, x10
610 ; CHECK-SD-NEXT: ldp x9, x10, [sp, #32]
611 ; CHECK-SD-NEXT: umulh x15, x6, x13
612 ; CHECK-SD-NEXT: umulh x12, x4, x9
613 ; CHECK-SD-NEXT: mul x0, x0, x8
614 ; CHECK-SD-NEXT: madd x10, x4, x10, x12
615 ; CHECK-SD-NEXT: madd x12, x6, x14, x15
616 ; CHECK-SD-NEXT: madd x5, x5, x9, x10
617 ; CHECK-SD-NEXT: madd x7, x7, x13, x12
618 ; CHECK-SD-NEXT: mul x2, x2, x11
619 ; CHECK-SD-NEXT: mul x4, x4, x9
620 ; CHECK-SD-NEXT: mul x6, x6, x13
623 ; CHECK-GI-LABEL: v4i128:
624 ; CHECK-GI: // %bb.0: // %entry
625 ; CHECK-GI-NEXT: ldp x9, x10, [sp]
626 ; CHECK-GI-NEXT: ldp x15, x16, [sp, #32]
627 ; CHECK-GI-NEXT: mul x10, x0, x10
628 ; CHECK-GI-NEXT: mul x16, x4, x16
629 ; CHECK-GI-NEXT: madd x12, x1, x9, x10
630 ; CHECK-GI-NEXT: ldp x10, x13, [sp, #16]
631 ; CHECK-GI-NEXT: mul x8, x0, x9
632 ; CHECK-GI-NEXT: mul x13, x2, x13
633 ; CHECK-GI-NEXT: umulh x11, x0, x9
634 ; CHECK-GI-NEXT: mul x9, x2, x10
635 ; CHECK-GI-NEXT: umulh x14, x2, x10
636 ; CHECK-GI-NEXT: add x1, x12, x11
637 ; CHECK-GI-NEXT: madd x13, x3, x10, x13
638 ; CHECK-GI-NEXT: mov x2, x9
639 ; CHECK-GI-NEXT: mul x10, x4, x15
640 ; CHECK-GI-NEXT: umulh x17, x4, x15
641 ; CHECK-GI-NEXT: add x3, x13, x14
642 ; CHECK-GI-NEXT: madd x15, x5, x15, x16
643 ; CHECK-GI-NEXT: ldp x16, x18, [sp, #48]
644 ; CHECK-GI-NEXT: mov x4, x10
645 ; CHECK-GI-NEXT: mul x18, x6, x18
646 ; CHECK-GI-NEXT: umulh x0, x6, x16
647 ; CHECK-GI-NEXT: add x5, x15, x17
648 ; CHECK-GI-NEXT: madd x18, x7, x16, x18
649 ; CHECK-GI-NEXT: mul x6, x6, x16
650 ; CHECK-GI-NEXT: add x7, x18, x0
651 ; CHECK-GI-NEXT: mov x0, x8
654 %s = mul <4 x i128> %d, %e