1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2 ; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3 ; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
5 ; CHECK_GI: warning: Instruction selection used fallback path for mulv_v3i64
7 declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
8 declare i8 @llvm.vector.reduce.mul.v3i8(<3 x i8>)
9 declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
10 declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
11 declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
12 declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
13 declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
14 declare i16 @llvm.vector.reduce.mul.v3i16(<3 x i16>)
15 declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
16 declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
17 declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
18 declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
19 declare i32 @llvm.vector.reduce.mul.v3i32(<3 x i32>)
20 declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
21 declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
22 declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
23 declare i64 @llvm.vector.reduce.mul.v3i64(<3 x i64>)
24 declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
25 declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>)
27 define i8 @mulv_v2i8(<2 x i8> %a) {
28 ; CHECK-SD-LABEL: mulv_v2i8:
29 ; CHECK-SD: // %bb.0: // %entry
30 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
31 ; CHECK-SD-NEXT: mov w8, v0.s[1]
32 ; CHECK-SD-NEXT: fmov w9, s0
33 ; CHECK-SD-NEXT: mul w0, w9, w8
36 ; CHECK-GI-LABEL: mulv_v2i8:
37 ; CHECK-GI: // %bb.0: // %entry
38 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
39 ; CHECK-GI-NEXT: mov s1, v0.s[1]
40 ; CHECK-GI-NEXT: fmov w8, s0
41 ; CHECK-GI-NEXT: fmov w9, s1
42 ; CHECK-GI-NEXT: mul w0, w8, w9
45 %arg1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a)
49 define i8 @mulv_v3i8(<3 x i8> %a) {
50 ; CHECK-LABEL: mulv_v3i8:
51 ; CHECK: // %bb.0: // %entry
52 ; CHECK-NEXT: mul w8, w0, w1
53 ; CHECK-NEXT: mul w0, w8, w2
56 %arg1 = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %a)
60 define i8 @mulv_v4i8(<4 x i8> %a) {
61 ; CHECK-SD-LABEL: mulv_v4i8:
62 ; CHECK-SD: // %bb.0: // %entry
63 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
64 ; CHECK-SD-NEXT: umov w8, v0.h[1]
65 ; CHECK-SD-NEXT: umov w9, v0.h[0]
66 ; CHECK-SD-NEXT: umov w10, v0.h[2]
67 ; CHECK-SD-NEXT: mul w8, w9, w8
68 ; CHECK-SD-NEXT: umov w9, v0.h[3]
69 ; CHECK-SD-NEXT: mul w8, w8, w10
70 ; CHECK-SD-NEXT: mul w0, w8, w9
73 ; CHECK-GI-LABEL: mulv_v4i8:
74 ; CHECK-GI: // %bb.0: // %entry
75 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
76 ; CHECK-GI-NEXT: mov h1, v0.h[1]
77 ; CHECK-GI-NEXT: mov h2, v0.h[2]
78 ; CHECK-GI-NEXT: mov h3, v0.h[3]
79 ; CHECK-GI-NEXT: fmov w8, s0
80 ; CHECK-GI-NEXT: fmov w9, s1
81 ; CHECK-GI-NEXT: fmov w10, s2
82 ; CHECK-GI-NEXT: fmov w11, s3
83 ; CHECK-GI-NEXT: mul w8, w8, w9
84 ; CHECK-GI-NEXT: mul w9, w10, w11
85 ; CHECK-GI-NEXT: mul w0, w8, w9
88 %arg1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a)
92 define i8 @mulv_v8i8(<8 x i8> %a) {
93 ; CHECK-SD-LABEL: mulv_v8i8:
94 ; CHECK-SD: // %bb.0: // %entry
95 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
96 ; CHECK-SD-NEXT: umov w8, v0.b[1]
97 ; CHECK-SD-NEXT: umov w9, v0.b[0]
98 ; CHECK-SD-NEXT: umov w10, v0.b[2]
99 ; CHECK-SD-NEXT: mul w8, w9, w8
100 ; CHECK-SD-NEXT: umov w9, v0.b[3]
101 ; CHECK-SD-NEXT: mul w8, w8, w10
102 ; CHECK-SD-NEXT: umov w10, v0.b[4]
103 ; CHECK-SD-NEXT: mul w8, w8, w9
104 ; CHECK-SD-NEXT: umov w9, v0.b[5]
105 ; CHECK-SD-NEXT: mul w8, w8, w10
106 ; CHECK-SD-NEXT: umov w10, v0.b[6]
107 ; CHECK-SD-NEXT: mul w8, w8, w9
108 ; CHECK-SD-NEXT: umov w9, v0.b[7]
109 ; CHECK-SD-NEXT: mul w8, w8, w10
110 ; CHECK-SD-NEXT: mul w0, w8, w9
113 ; CHECK-GI-LABEL: mulv_v8i8:
114 ; CHECK-GI: // %bb.0: // %entry
115 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
116 ; CHECK-GI-NEXT: mov b1, v0.b[1]
117 ; CHECK-GI-NEXT: mov b2, v0.b[2]
118 ; CHECK-GI-NEXT: mov b3, v0.b[3]
119 ; CHECK-GI-NEXT: mov b4, v0.b[4]
120 ; CHECK-GI-NEXT: mov b5, v0.b[5]
121 ; CHECK-GI-NEXT: fmov w8, s0
122 ; CHECK-GI-NEXT: mov b6, v0.b[6]
123 ; CHECK-GI-NEXT: mov b7, v0.b[7]
124 ; CHECK-GI-NEXT: fmov w9, s1
125 ; CHECK-GI-NEXT: fmov w10, s2
126 ; CHECK-GI-NEXT: fmov w11, s3
127 ; CHECK-GI-NEXT: fmov w12, s5
128 ; CHECK-GI-NEXT: mul w8, w8, w9
129 ; CHECK-GI-NEXT: fmov w9, s4
130 ; CHECK-GI-NEXT: mul w10, w10, w11
131 ; CHECK-GI-NEXT: fmov w11, s6
132 ; CHECK-GI-NEXT: mul w9, w9, w12
133 ; CHECK-GI-NEXT: fmov w12, s7
134 ; CHECK-GI-NEXT: mul w8, w8, w10
135 ; CHECK-GI-NEXT: mul w11, w11, w12
136 ; CHECK-GI-NEXT: mul w9, w9, w11
137 ; CHECK-GI-NEXT: mul w0, w8, w9
140 %arg1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a)
144 define i8 @mulv_v16i8(<16 x i8> %a) {
145 ; CHECK-SD-LABEL: mulv_v16i8:
146 ; CHECK-SD: // %bb.0: // %entry
147 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
148 ; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b
149 ; CHECK-SD-NEXT: umov w8, v0.b[1]
150 ; CHECK-SD-NEXT: umov w9, v0.b[0]
151 ; CHECK-SD-NEXT: umov w10, v0.b[2]
152 ; CHECK-SD-NEXT: mul w8, w9, w8
153 ; CHECK-SD-NEXT: umov w9, v0.b[3]
154 ; CHECK-SD-NEXT: mul w8, w8, w10
155 ; CHECK-SD-NEXT: umov w10, v0.b[4]
156 ; CHECK-SD-NEXT: mul w8, w8, w9
157 ; CHECK-SD-NEXT: umov w9, v0.b[5]
158 ; CHECK-SD-NEXT: mul w8, w8, w10
159 ; CHECK-SD-NEXT: umov w10, v0.b[6]
160 ; CHECK-SD-NEXT: mul w8, w8, w9
161 ; CHECK-SD-NEXT: umov w9, v0.b[7]
162 ; CHECK-SD-NEXT: mul w8, w8, w10
163 ; CHECK-SD-NEXT: mul w0, w8, w9
166 ; CHECK-GI-LABEL: mulv_v16i8:
167 ; CHECK-GI: // %bb.0: // %entry
168 ; CHECK-GI-NEXT: mov d1, v0.d[1]
169 ; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
170 ; CHECK-GI-NEXT: mov b1, v0.b[1]
171 ; CHECK-GI-NEXT: mov b2, v0.b[2]
172 ; CHECK-GI-NEXT: mov b3, v0.b[3]
173 ; CHECK-GI-NEXT: mov b4, v0.b[4]
174 ; CHECK-GI-NEXT: mov b5, v0.b[5]
175 ; CHECK-GI-NEXT: fmov w8, s0
176 ; CHECK-GI-NEXT: mov b6, v0.b[6]
177 ; CHECK-GI-NEXT: mov b7, v0.b[7]
178 ; CHECK-GI-NEXT: fmov w9, s1
179 ; CHECK-GI-NEXT: fmov w10, s2
180 ; CHECK-GI-NEXT: fmov w11, s3
181 ; CHECK-GI-NEXT: fmov w12, s5
182 ; CHECK-GI-NEXT: mul w8, w8, w9
183 ; CHECK-GI-NEXT: fmov w9, s4
184 ; CHECK-GI-NEXT: mul w10, w10, w11
185 ; CHECK-GI-NEXT: fmov w11, s6
186 ; CHECK-GI-NEXT: mul w9, w9, w12
187 ; CHECK-GI-NEXT: fmov w12, s7
188 ; CHECK-GI-NEXT: mul w8, w8, w10
189 ; CHECK-GI-NEXT: mul w11, w11, w12
190 ; CHECK-GI-NEXT: mul w9, w9, w11
191 ; CHECK-GI-NEXT: mul w0, w8, w9
194 %arg1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a)
198 define i8 @mulv_v32i8(<32 x i8> %a) {
199 ; CHECK-SD-LABEL: mulv_v32i8:
200 ; CHECK-SD: // %bb.0: // %entry
201 ; CHECK-SD-NEXT: mul v0.16b, v0.16b, v1.16b
202 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
203 ; CHECK-SD-NEXT: mul v0.8b, v0.8b, v1.8b
204 ; CHECK-SD-NEXT: umov w8, v0.b[1]
205 ; CHECK-SD-NEXT: umov w9, v0.b[0]
206 ; CHECK-SD-NEXT: umov w10, v0.b[2]
207 ; CHECK-SD-NEXT: mul w8, w9, w8
208 ; CHECK-SD-NEXT: umov w9, v0.b[3]
209 ; CHECK-SD-NEXT: mul w8, w8, w10
210 ; CHECK-SD-NEXT: umov w10, v0.b[4]
211 ; CHECK-SD-NEXT: mul w8, w8, w9
212 ; CHECK-SD-NEXT: umov w9, v0.b[5]
213 ; CHECK-SD-NEXT: mul w8, w8, w10
214 ; CHECK-SD-NEXT: umov w10, v0.b[6]
215 ; CHECK-SD-NEXT: mul w8, w8, w9
216 ; CHECK-SD-NEXT: umov w9, v0.b[7]
217 ; CHECK-SD-NEXT: mul w8, w8, w10
218 ; CHECK-SD-NEXT: mul w0, w8, w9
221 ; CHECK-GI-LABEL: mulv_v32i8:
222 ; CHECK-GI: // %bb.0: // %entry
223 ; CHECK-GI-NEXT: mov d2, v0.d[1]
224 ; CHECK-GI-NEXT: mov d3, v1.d[1]
225 ; CHECK-GI-NEXT: mul v0.8b, v0.8b, v2.8b
226 ; CHECK-GI-NEXT: mul v1.8b, v1.8b, v3.8b
227 ; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
228 ; CHECK-GI-NEXT: mov b1, v0.b[1]
229 ; CHECK-GI-NEXT: mov b2, v0.b[2]
230 ; CHECK-GI-NEXT: mov b3, v0.b[3]
231 ; CHECK-GI-NEXT: mov b4, v0.b[4]
232 ; CHECK-GI-NEXT: mov b5, v0.b[5]
233 ; CHECK-GI-NEXT: fmov w8, s0
234 ; CHECK-GI-NEXT: mov b6, v0.b[6]
235 ; CHECK-GI-NEXT: mov b7, v0.b[7]
236 ; CHECK-GI-NEXT: fmov w9, s1
237 ; CHECK-GI-NEXT: fmov w10, s2
238 ; CHECK-GI-NEXT: fmov w11, s3
239 ; CHECK-GI-NEXT: fmov w12, s5
240 ; CHECK-GI-NEXT: mul w8, w8, w9
241 ; CHECK-GI-NEXT: fmov w9, s4
242 ; CHECK-GI-NEXT: mul w10, w10, w11
243 ; CHECK-GI-NEXT: fmov w11, s6
244 ; CHECK-GI-NEXT: mul w9, w9, w12
245 ; CHECK-GI-NEXT: fmov w12, s7
246 ; CHECK-GI-NEXT: mul w8, w8, w10
247 ; CHECK-GI-NEXT: mul w11, w11, w12
248 ; CHECK-GI-NEXT: mul w9, w9, w11
249 ; CHECK-GI-NEXT: mul w0, w8, w9
252 %arg1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a)
256 define i16 @mulv_v2i16(<2 x i16> %a) {
257 ; CHECK-SD-LABEL: mulv_v2i16:
258 ; CHECK-SD: // %bb.0: // %entry
259 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
260 ; CHECK-SD-NEXT: mov w8, v0.s[1]
261 ; CHECK-SD-NEXT: fmov w9, s0
262 ; CHECK-SD-NEXT: mul w0, w9, w8
265 ; CHECK-GI-LABEL: mulv_v2i16:
266 ; CHECK-GI: // %bb.0: // %entry
267 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
268 ; CHECK-GI-NEXT: mov s1, v0.s[1]
269 ; CHECK-GI-NEXT: fmov w8, s0
270 ; CHECK-GI-NEXT: fmov w9, s1
271 ; CHECK-GI-NEXT: mul w0, w8, w9
274 %arg1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a)
278 define i16 @mulv_v3i16(<3 x i16> %a) {
279 ; CHECK-SD-LABEL: mulv_v3i16:
280 ; CHECK-SD: // %bb.0: // %entry
281 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
282 ; CHECK-SD-NEXT: umov w8, v0.h[1]
283 ; CHECK-SD-NEXT: umov w9, v0.h[0]
284 ; CHECK-SD-NEXT: umov w10, v0.h[2]
285 ; CHECK-SD-NEXT: mul w8, w9, w8
286 ; CHECK-SD-NEXT: mul w0, w8, w10
289 ; CHECK-GI-LABEL: mulv_v3i16:
290 ; CHECK-GI: // %bb.0: // %entry
291 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
292 ; CHECK-GI-NEXT: mov h1, v0.h[1]
293 ; CHECK-GI-NEXT: mov h2, v0.h[2]
294 ; CHECK-GI-NEXT: fmov w8, s0
295 ; CHECK-GI-NEXT: fmov w9, s1
296 ; CHECK-GI-NEXT: mul w8, w8, w9
297 ; CHECK-GI-NEXT: fmov w9, s2
298 ; CHECK-GI-NEXT: mul w0, w8, w9
301 %arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a)
305 define i16 @mulv_v4i16(<4 x i16> %a) {
306 ; CHECK-SD-LABEL: mulv_v4i16:
307 ; CHECK-SD: // %bb.0: // %entry
308 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
309 ; CHECK-SD-NEXT: umov w8, v0.h[1]
310 ; CHECK-SD-NEXT: umov w9, v0.h[0]
311 ; CHECK-SD-NEXT: umov w10, v0.h[2]
312 ; CHECK-SD-NEXT: mul w8, w9, w8
313 ; CHECK-SD-NEXT: umov w9, v0.h[3]
314 ; CHECK-SD-NEXT: mul w8, w8, w10
315 ; CHECK-SD-NEXT: mul w0, w8, w9
318 ; CHECK-GI-LABEL: mulv_v4i16:
319 ; CHECK-GI: // %bb.0: // %entry
320 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
321 ; CHECK-GI-NEXT: mov h1, v0.h[1]
322 ; CHECK-GI-NEXT: mov h2, v0.h[2]
323 ; CHECK-GI-NEXT: mov h3, v0.h[3]
324 ; CHECK-GI-NEXT: fmov w8, s0
325 ; CHECK-GI-NEXT: fmov w9, s1
326 ; CHECK-GI-NEXT: fmov w10, s2
327 ; CHECK-GI-NEXT: fmov w11, s3
328 ; CHECK-GI-NEXT: mul w8, w8, w9
329 ; CHECK-GI-NEXT: mul w9, w10, w11
330 ; CHECK-GI-NEXT: mul w0, w8, w9
333 %arg1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a)
337 define i16 @mulv_v8i16(<8 x i16> %a) {
338 ; CHECK-SD-LABEL: mulv_v8i16:
339 ; CHECK-SD: // %bb.0: // %entry
340 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
341 ; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h
342 ; CHECK-SD-NEXT: umov w8, v0.h[1]
343 ; CHECK-SD-NEXT: umov w9, v0.h[0]
344 ; CHECK-SD-NEXT: umov w10, v0.h[2]
345 ; CHECK-SD-NEXT: mul w8, w9, w8
346 ; CHECK-SD-NEXT: umov w9, v0.h[3]
347 ; CHECK-SD-NEXT: mul w8, w8, w10
348 ; CHECK-SD-NEXT: mul w0, w8, w9
351 ; CHECK-GI-LABEL: mulv_v8i16:
352 ; CHECK-GI: // %bb.0: // %entry
353 ; CHECK-GI-NEXT: mov d1, v0.d[1]
354 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
355 ; CHECK-GI-NEXT: mov h1, v0.h[1]
356 ; CHECK-GI-NEXT: mov h2, v0.h[2]
357 ; CHECK-GI-NEXT: mov h3, v0.h[3]
358 ; CHECK-GI-NEXT: fmov w8, s0
359 ; CHECK-GI-NEXT: fmov w9, s1
360 ; CHECK-GI-NEXT: fmov w10, s2
361 ; CHECK-GI-NEXT: fmov w11, s3
362 ; CHECK-GI-NEXT: mul w8, w8, w9
363 ; CHECK-GI-NEXT: mul w9, w10, w11
364 ; CHECK-GI-NEXT: mul w0, w8, w9
367 %arg1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a)
371 define i16 @mulv_v16i16(<16 x i16> %a) {
372 ; CHECK-SD-LABEL: mulv_v16i16:
373 ; CHECK-SD: // %bb.0: // %entry
374 ; CHECK-SD-NEXT: mul v0.8h, v0.8h, v1.8h
375 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
376 ; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h
377 ; CHECK-SD-NEXT: umov w8, v0.h[1]
378 ; CHECK-SD-NEXT: umov w9, v0.h[0]
379 ; CHECK-SD-NEXT: umov w10, v0.h[2]
380 ; CHECK-SD-NEXT: mul w8, w9, w8
381 ; CHECK-SD-NEXT: umov w9, v0.h[3]
382 ; CHECK-SD-NEXT: mul w8, w8, w10
383 ; CHECK-SD-NEXT: mul w0, w8, w9
386 ; CHECK-GI-LABEL: mulv_v16i16:
387 ; CHECK-GI: // %bb.0: // %entry
388 ; CHECK-GI-NEXT: mov d2, v0.d[1]
389 ; CHECK-GI-NEXT: mov d3, v1.d[1]
390 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h
391 ; CHECK-GI-NEXT: mul v1.4h, v1.4h, v3.4h
392 ; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
393 ; CHECK-GI-NEXT: mov h1, v0.h[1]
394 ; CHECK-GI-NEXT: mov h2, v0.h[2]
395 ; CHECK-GI-NEXT: mov h3, v0.h[3]
396 ; CHECK-GI-NEXT: fmov w8, s0
397 ; CHECK-GI-NEXT: fmov w9, s1
398 ; CHECK-GI-NEXT: fmov w10, s2
399 ; CHECK-GI-NEXT: mul w8, w8, w9
400 ; CHECK-GI-NEXT: fmov w9, s3
401 ; CHECK-GI-NEXT: mul w9, w10, w9
402 ; CHECK-GI-NEXT: mul w0, w8, w9
405 %arg1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a)
409 define i32 @mulv_v2i32(<2 x i32> %a) {
410 ; CHECK-SD-LABEL: mulv_v2i32:
411 ; CHECK-SD: // %bb.0: // %entry
412 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
413 ; CHECK-SD-NEXT: mov w8, v0.s[1]
414 ; CHECK-SD-NEXT: fmov w9, s0
415 ; CHECK-SD-NEXT: mul w0, w9, w8
418 ; CHECK-GI-LABEL: mulv_v2i32:
419 ; CHECK-GI: // %bb.0: // %entry
420 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
421 ; CHECK-GI-NEXT: mov s1, v0.s[1]
422 ; CHECK-GI-NEXT: fmov w8, s0
423 ; CHECK-GI-NEXT: fmov w9, s1
424 ; CHECK-GI-NEXT: mul w0, w8, w9
427 %arg1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a)
431 define i32 @mulv_v3i32(<3 x i32> %a) {
432 ; CHECK-LABEL: mulv_v3i32:
433 ; CHECK: // %bb.0: // %entry
434 ; CHECK-NEXT: mov v1.16b, v0.16b
435 ; CHECK-NEXT: mov w8, #1 // =0x1
436 ; CHECK-NEXT: mov v1.s[3], w8
437 ; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
438 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s
439 ; CHECK-NEXT: mov w8, v0.s[1]
440 ; CHECK-NEXT: fmov w9, s0
441 ; CHECK-NEXT: mul w0, w9, w8
444 %arg1 = call i32 @llvm.vector.reduce.mul.v3i32(<3 x i32> %a)
448 define i32 @mulv_v4i32(<4 x i32> %a) {
449 ; CHECK-SD-LABEL: mulv_v4i32:
450 ; CHECK-SD: // %bb.0: // %entry
451 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
452 ; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
453 ; CHECK-SD-NEXT: mov w8, v0.s[1]
454 ; CHECK-SD-NEXT: fmov w9, s0
455 ; CHECK-SD-NEXT: mul w0, w9, w8
458 ; CHECK-GI-LABEL: mulv_v4i32:
459 ; CHECK-GI: // %bb.0: // %entry
460 ; CHECK-GI-NEXT: mov d1, v0.d[1]
461 ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s
462 ; CHECK-GI-NEXT: mov s1, v0.s[1]
463 ; CHECK-GI-NEXT: fmov w8, s0
464 ; CHECK-GI-NEXT: fmov w9, s1
465 ; CHECK-GI-NEXT: mul w0, w8, w9
468 %arg1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a)
472 define i32 @mulv_v8i32(<8 x i32> %a) {
473 ; CHECK-SD-LABEL: mulv_v8i32:
474 ; CHECK-SD: // %bb.0: // %entry
475 ; CHECK-SD-NEXT: mul v0.4s, v0.4s, v1.4s
476 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
477 ; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
478 ; CHECK-SD-NEXT: mov w8, v0.s[1]
479 ; CHECK-SD-NEXT: fmov w9, s0
480 ; CHECK-SD-NEXT: mul w0, w9, w8
483 ; CHECK-GI-LABEL: mulv_v8i32:
484 ; CHECK-GI: // %bb.0: // %entry
485 ; CHECK-GI-NEXT: mov d2, v0.d[1]
486 ; CHECK-GI-NEXT: mov d3, v1.d[1]
487 ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v2.2s
488 ; CHECK-GI-NEXT: mul v1.2s, v1.2s, v3.2s
489 ; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s
490 ; CHECK-GI-NEXT: mov s1, v0.s[1]
491 ; CHECK-GI-NEXT: fmov w8, s0
492 ; CHECK-GI-NEXT: fmov w9, s1
493 ; CHECK-GI-NEXT: mul w0, w8, w9
496 %arg1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a)
500 define i64 @mulv_v2i64(<2 x i64> %a) {
501 ; CHECK-SD-LABEL: mulv_v2i64:
502 ; CHECK-SD: // %bb.0: // %entry
503 ; CHECK-SD-NEXT: mov x8, v0.d[1]
504 ; CHECK-SD-NEXT: fmov x9, d0
505 ; CHECK-SD-NEXT: mul x0, x9, x8
508 ; CHECK-GI-LABEL: mulv_v2i64:
509 ; CHECK-GI: // %bb.0: // %entry
510 ; CHECK-GI-NEXT: mov d1, v0.d[1]
511 ; CHECK-GI-NEXT: fmov x8, d0
512 ; CHECK-GI-NEXT: fmov x9, d1
513 ; CHECK-GI-NEXT: mul x0, x8, x9
516 %arg1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a)
520 define i64 @mulv_v3i64(<3 x i64> %a) {
521 ; CHECK-SD-LABEL: mulv_v3i64:
522 ; CHECK-SD: // %bb.0: // %entry
523 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
524 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
525 ; CHECK-SD-NEXT: fmov x8, d2
526 ; CHECK-SD-NEXT: fmov x9, d0
527 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
528 ; CHECK-SD-NEXT: mul x8, x9, x8
529 ; CHECK-SD-NEXT: fmov x9, d1
530 ; CHECK-SD-NEXT: mul x0, x9, x8
533 ; CHECK-GI-LABEL: mulv_v3i64:
534 ; CHECK-GI: // %bb.0: // %entry
535 ; CHECK-GI-NEXT: fmov x8, d0
536 ; CHECK-GI-NEXT: fmov x9, d1
537 ; CHECK-GI-NEXT: mul x8, x8, x9
538 ; CHECK-GI-NEXT: fmov x9, d2
539 ; CHECK-GI-NEXT: mul x0, x8, x9
542 %arg1 = call i64 @llvm.vector.reduce.mul.v3i64(<3 x i64> %a)
546 define i64 @mulv_v4i64(<4 x i64> %a) {
547 ; CHECK-SD-LABEL: mulv_v4i64:
548 ; CHECK-SD: // %bb.0: // %entry
549 ; CHECK-SD-NEXT: mov x8, v1.d[1]
550 ; CHECK-SD-NEXT: mov x9, v0.d[1]
551 ; CHECK-SD-NEXT: fmov x10, d0
552 ; CHECK-SD-NEXT: mul x8, x9, x8
553 ; CHECK-SD-NEXT: fmov x9, d1
554 ; CHECK-SD-NEXT: mul x9, x10, x9
555 ; CHECK-SD-NEXT: mul x0, x9, x8
558 ; CHECK-GI-LABEL: mulv_v4i64:
559 ; CHECK-GI: // %bb.0: // %entry
560 ; CHECK-GI-NEXT: mov d2, v0.d[1]
561 ; CHECK-GI-NEXT: mov d3, v1.d[1]
562 ; CHECK-GI-NEXT: fmov x8, d0
563 ; CHECK-GI-NEXT: fmov x9, d2
564 ; CHECK-GI-NEXT: fmov x10, d3
565 ; CHECK-GI-NEXT: mul x8, x8, x9
566 ; CHECK-GI-NEXT: fmov x9, d1
567 ; CHECK-GI-NEXT: mul x9, x9, x10
568 ; CHECK-GI-NEXT: mul x0, x8, x9
571 %arg1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a)
575 define i128 @mulv_v2i128(<2 x i128> %a) {
576 ; CHECK-SD-LABEL: mulv_v2i128:
577 ; CHECK-SD: // %bb.0: // %entry
578 ; CHECK-SD-NEXT: umulh x8, x0, x2
579 ; CHECK-SD-NEXT: madd x8, x0, x3, x8
580 ; CHECK-SD-NEXT: mul x0, x0, x2
581 ; CHECK-SD-NEXT: madd x1, x1, x2, x8
584 ; CHECK-GI-LABEL: mulv_v2i128:
585 ; CHECK-GI: // %bb.0: // %entry
586 ; CHECK-GI-NEXT: mul x9, x0, x3
587 ; CHECK-GI-NEXT: mul x8, x0, x2
588 ; CHECK-GI-NEXT: umulh x10, x0, x2
589 ; CHECK-GI-NEXT: madd x9, x1, x2, x9
590 ; CHECK-GI-NEXT: mov x0, x8
591 ; CHECK-GI-NEXT: add x1, x9, x10
594 %arg1 = call i128 @llvm.vector.reduce.mul.v2i128(<2 x i128> %a)