1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3 ; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7 ; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9 ; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
11 define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
12 ; PWR9LE-LABEL: v2i32:
13 ; PWR9LE: # %bb.0: # %entry
14 ; PWR9LE-NEXT: xxspltw v3, v2, 2
15 ; PWR9LE-NEXT: li r3, 0
16 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
17 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
20 ; PWR9BE-LABEL: v2i32:
21 ; PWR9BE: # %bb.0: # %entry
22 ; PWR9BE-NEXT: xxspltw v3, v2, 1
23 ; PWR9BE-NEXT: li r3, 0
24 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
25 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
28 ; PWR10LE-LABEL: v2i32:
29 ; PWR10LE: # %bb.0: # %entry
30 ; PWR10LE-NEXT: xxspltw v3, v2, 2
31 ; PWR10LE-NEXT: li r3, 0
32 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
33 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
36 ; PWR10BE-LABEL: v2i32:
37 ; PWR10BE: # %bb.0: # %entry
38 ; PWR10BE-NEXT: xxspltw v3, v2, 1
39 ; PWR10BE-NEXT: li r3, 0
40 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
41 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
44 %0 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a)
48 define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
49 ; PWR9LE-LABEL: v4i32:
50 ; PWR9LE: # %bb.0: # %entry
51 ; PWR9LE-NEXT: xxswapd v3, v2
52 ; PWR9LE-NEXT: li r3, 0
53 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
54 ; PWR9LE-NEXT: xxspltw v3, v2, 2
55 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
56 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
59 ; PWR9BE-LABEL: v4i32:
60 ; PWR9BE: # %bb.0: # %entry
61 ; PWR9BE-NEXT: xxswapd v3, v2
62 ; PWR9BE-NEXT: li r3, 0
63 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
64 ; PWR9BE-NEXT: xxspltw v3, v2, 1
65 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
66 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
69 ; PWR10LE-LABEL: v4i32:
70 ; PWR10LE: # %bb.0: # %entry
71 ; PWR10LE-NEXT: xxswapd v3, v2
72 ; PWR10LE-NEXT: li r3, 0
73 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
74 ; PWR10LE-NEXT: xxspltw v3, v2, 2
75 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
76 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
79 ; PWR10BE-LABEL: v4i32:
80 ; PWR10BE: # %bb.0: # %entry
81 ; PWR10BE-NEXT: xxswapd v3, v2
82 ; PWR10BE-NEXT: li r3, 0
83 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
84 ; PWR10BE-NEXT: xxspltw v3, v2, 1
85 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
86 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
89 %0 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a)
93 define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
94 ; PWR9LE-LABEL: v8i32:
95 ; PWR9LE: # %bb.0: # %entry
96 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
97 ; PWR9LE-NEXT: li r3, 0
98 ; PWR9LE-NEXT: xxswapd v3, v2
99 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
100 ; PWR9LE-NEXT: xxspltw v3, v2, 2
101 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
102 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
105 ; PWR9BE-LABEL: v8i32:
106 ; PWR9BE: # %bb.0: # %entry
107 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
108 ; PWR9BE-NEXT: li r3, 0
109 ; PWR9BE-NEXT: xxswapd v3, v2
110 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
111 ; PWR9BE-NEXT: xxspltw v3, v2, 1
112 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
113 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
116 ; PWR10LE-LABEL: v8i32:
117 ; PWR10LE: # %bb.0: # %entry
118 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
119 ; PWR10LE-NEXT: li r3, 0
120 ; PWR10LE-NEXT: xxswapd v3, v2
121 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
122 ; PWR10LE-NEXT: xxspltw v3, v2, 2
123 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
124 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
127 ; PWR10BE-LABEL: v8i32:
128 ; PWR10BE: # %bb.0: # %entry
129 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
130 ; PWR10BE-NEXT: li r3, 0
131 ; PWR10BE-NEXT: xxswapd v3, v2
132 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
133 ; PWR10BE-NEXT: xxspltw v3, v2, 1
134 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
135 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
138 %0 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a)
142 define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
143 ; PWR9LE-LABEL: v16i32:
144 ; PWR9LE: # %bb.0: # %entry
145 ; PWR9LE-NEXT: vmuluwm v3, v3, v5
146 ; PWR9LE-NEXT: vmuluwm v2, v2, v4
147 ; PWR9LE-NEXT: li r3, 0
148 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
149 ; PWR9LE-NEXT: xxswapd v3, v2
150 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
151 ; PWR9LE-NEXT: xxspltw v3, v2, 2
152 ; PWR9LE-NEXT: vmuluwm v2, v2, v3
153 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
156 ; PWR9BE-LABEL: v16i32:
157 ; PWR9BE: # %bb.0: # %entry
158 ; PWR9BE-NEXT: vmuluwm v3, v3, v5
159 ; PWR9BE-NEXT: vmuluwm v2, v2, v4
160 ; PWR9BE-NEXT: li r3, 0
161 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
162 ; PWR9BE-NEXT: xxswapd v3, v2
163 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
164 ; PWR9BE-NEXT: xxspltw v3, v2, 1
165 ; PWR9BE-NEXT: vmuluwm v2, v2, v3
166 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
169 ; PWR10LE-LABEL: v16i32:
170 ; PWR10LE: # %bb.0: # %entry
171 ; PWR10LE-NEXT: vmuluwm v3, v3, v5
172 ; PWR10LE-NEXT: vmuluwm v2, v2, v4
173 ; PWR10LE-NEXT: li r3, 0
174 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
175 ; PWR10LE-NEXT: xxswapd v3, v2
176 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
177 ; PWR10LE-NEXT: xxspltw v3, v2, 2
178 ; PWR10LE-NEXT: vmuluwm v2, v2, v3
179 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
182 ; PWR10BE-LABEL: v16i32:
183 ; PWR10BE: # %bb.0: # %entry
184 ; PWR10BE-NEXT: vmuluwm v3, v3, v5
185 ; PWR10BE-NEXT: vmuluwm v2, v2, v4
186 ; PWR10BE-NEXT: li r3, 0
187 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
188 ; PWR10BE-NEXT: xxswapd v3, v2
189 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
190 ; PWR10BE-NEXT: xxspltw v3, v2, 1
191 ; PWR10BE-NEXT: vmuluwm v2, v2, v3
192 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
195 %0 = call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %a)
199 declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>) #0
200 declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) #0
201 declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>) #0
202 declare i32 @llvm.vector.reduce.mul.v16i32(<16 x i32>) #0
204 attributes #0 = { nounwind }