1 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
2 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
3 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not vabsdu
5 ; Function Attrs: nounwind readnone
6 define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
8 %sub.i = sub <4 x i32> zeroinitializer, %a
9 %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
11 ; CHECK-LABEL: simple_absv_32
12 ; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
13 ; CHECK-NEXT: vabsduw 2, 2, [[ZERO]]
15 ; CHECK-PWR8-LABEL: simple_absv_32
22 ; Function Attrs: nounwind readnone
23 define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
25 %sub.i = sub <4 x i32> zeroinitializer, %a
26 %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
28 ; CHECK-LABEL: simple_absv_32_swap
29 ; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
30 ; CHECK-NEXT: vabsduw 2, 2, [[ZERO]]
32 ; CHECK-PWR8-LABEL: simple_absv_32_swap
39 define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {
41 %sub.i = sub <8 x i16> zeroinitializer, %a
42 %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
44 ; CHECK-LABEL: simple_absv_16
45 ; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
46 ; CHECK-NEXT: vabsduh 2, 2, [[ZERO]]
48 ; CHECK-PWR8-LABEL: simple_absv_16
55 ; Function Attrs: nounwind readnone
56 define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
58 %sub.i = sub <16 x i8> zeroinitializer, %a
59 %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
61 ; CHECK-LABEL: simple_absv_8
62 ; CHECK: vxor [[ZERO:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
63 ; CHECK-NEXT: vabsdub 2, 2, [[ZERO]]
65 ; CHECK-PWR8-LABEL: simple_absv_8
72 ; The select pattern can only be detected for v4i32.
73 ; Function Attrs: norecurse nounwind readnone
74 define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
76 %0 = sub nsw <4 x i32> %a, %b
77 %1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
78 %2 = sub <4 x i32> zeroinitializer, %0
79 %3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
81 ; CHECK-LABEL: sub_absv_32
82 ; CHECK: vabsduw 2, 2, 3
84 ; CHECK-PWR8-LABEL: sub_absv_32
90 ; FIXME: This does not produce the ISD::ABS that we are looking for.
91 ; We should fix the missing canonicalization.
92 ; We do manage to find the word version of ABS but not the halfword.
93 ; Threfore, we end up doing more work than is required with a pair of abs for word
94 ; instead of just one for the halfword.
95 ; Function Attrs: norecurse nounwind readnone
96 define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
98 %0 = sext <8 x i16> %a to <8 x i32>
99 %1 = sext <8 x i16> %b to <8 x i32>
100 %2 = sub nsw <8 x i32> %0, %1
101 %3 = icmp sgt <8 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
102 %4 = sub nsw <8 x i32> zeroinitializer, %2
103 %5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4
104 %6 = trunc <8 x i32> %5 to <8 x i16>
106 ; CHECK-LABEL: sub_absv_16
113 ; CHECK-PWR8-LABEL: sub_absv_16
114 ; CHECK-PWR8: vsubuwm
119 ; FIXME: This does not produce ISD::ABS. This does not even vectorize correctly!
120 ; This function should look like sub_absv_32 and sub_absv_16 except that the type is v16i8.
121 ; Function Attrs: norecurse nounwind readnone
122 define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
124 %vecext = extractelement <16 x i8> %a, i32 0
125 %conv = zext i8 %vecext to i32
126 %vecext1 = extractelement <16 x i8> %b, i32 0
127 %conv2 = zext i8 %vecext1 to i32
128 %sub = sub nsw i32 %conv, %conv2
129 %ispos = icmp sgt i32 %sub, -1
130 %neg = sub nsw i32 0, %sub
131 %0 = select i1 %ispos, i32 %sub, i32 %neg
132 %conv3 = trunc i32 %0 to i8
133 %vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv3, i32 0
134 %vecext4 = extractelement <16 x i8> %a, i32 1
135 %conv5 = zext i8 %vecext4 to i32
136 %vecext6 = extractelement <16 x i8> %b, i32 1
137 %conv7 = zext i8 %vecext6 to i32
138 %sub8 = sub nsw i32 %conv5, %conv7
139 %ispos171 = icmp sgt i32 %sub8, -1
140 %neg172 = sub nsw i32 0, %sub8
141 %1 = select i1 %ispos171, i32 %sub8, i32 %neg172
142 %conv10 = trunc i32 %1 to i8
143 %vecins11 = insertelement <16 x i8> %vecins, i8 %conv10, i32 1
144 %vecext12 = extractelement <16 x i8> %a, i32 2
145 %conv13 = zext i8 %vecext12 to i32
146 %vecext14 = extractelement <16 x i8> %b, i32 2
147 %conv15 = zext i8 %vecext14 to i32
148 %sub16 = sub nsw i32 %conv13, %conv15
149 %ispos173 = icmp sgt i32 %sub16, -1
150 %neg174 = sub nsw i32 0, %sub16
151 %2 = select i1 %ispos173, i32 %sub16, i32 %neg174
152 %conv18 = trunc i32 %2 to i8
153 %vecins19 = insertelement <16 x i8> %vecins11, i8 %conv18, i32 2
154 %vecext20 = extractelement <16 x i8> %a, i32 3
155 %conv21 = zext i8 %vecext20 to i32
156 %vecext22 = extractelement <16 x i8> %b, i32 3
157 %conv23 = zext i8 %vecext22 to i32
158 %sub24 = sub nsw i32 %conv21, %conv23
159 %ispos175 = icmp sgt i32 %sub24, -1
160 %neg176 = sub nsw i32 0, %sub24
161 %3 = select i1 %ispos175, i32 %sub24, i32 %neg176
162 %conv26 = trunc i32 %3 to i8
163 %vecins27 = insertelement <16 x i8> %vecins19, i8 %conv26, i32 3
164 %vecext28 = extractelement <16 x i8> %a, i32 4
165 %conv29 = zext i8 %vecext28 to i32
166 %vecext30 = extractelement <16 x i8> %b, i32 4
167 %conv31 = zext i8 %vecext30 to i32
168 %sub32 = sub nsw i32 %conv29, %conv31
169 %ispos177 = icmp sgt i32 %sub32, -1
170 %neg178 = sub nsw i32 0, %sub32
171 %4 = select i1 %ispos177, i32 %sub32, i32 %neg178
172 %conv34 = trunc i32 %4 to i8
173 %vecins35 = insertelement <16 x i8> %vecins27, i8 %conv34, i32 4
174 %vecext36 = extractelement <16 x i8> %a, i32 5
175 %conv37 = zext i8 %vecext36 to i32
176 %vecext38 = extractelement <16 x i8> %b, i32 5
177 %conv39 = zext i8 %vecext38 to i32
178 %sub40 = sub nsw i32 %conv37, %conv39
179 %ispos179 = icmp sgt i32 %sub40, -1
180 %neg180 = sub nsw i32 0, %sub40
181 %5 = select i1 %ispos179, i32 %sub40, i32 %neg180
182 %conv42 = trunc i32 %5 to i8
183 %vecins43 = insertelement <16 x i8> %vecins35, i8 %conv42, i32 5
184 %vecext44 = extractelement <16 x i8> %a, i32 6
185 %conv45 = zext i8 %vecext44 to i32
186 %vecext46 = extractelement <16 x i8> %b, i32 6
187 %conv47 = zext i8 %vecext46 to i32
188 %sub48 = sub nsw i32 %conv45, %conv47
189 %ispos181 = icmp sgt i32 %sub48, -1
190 %neg182 = sub nsw i32 0, %sub48
191 %6 = select i1 %ispos181, i32 %sub48, i32 %neg182
192 %conv50 = trunc i32 %6 to i8
193 %vecins51 = insertelement <16 x i8> %vecins43, i8 %conv50, i32 6
194 %vecext52 = extractelement <16 x i8> %a, i32 7
195 %conv53 = zext i8 %vecext52 to i32
196 %vecext54 = extractelement <16 x i8> %b, i32 7
197 %conv55 = zext i8 %vecext54 to i32
198 %sub56 = sub nsw i32 %conv53, %conv55
199 %ispos183 = icmp sgt i32 %sub56, -1
200 %neg184 = sub nsw i32 0, %sub56
201 %7 = select i1 %ispos183, i32 %sub56, i32 %neg184
202 %conv58 = trunc i32 %7 to i8
203 %vecins59 = insertelement <16 x i8> %vecins51, i8 %conv58, i32 7
204 %vecext60 = extractelement <16 x i8> %a, i32 8
205 %conv61 = zext i8 %vecext60 to i32
206 %vecext62 = extractelement <16 x i8> %b, i32 8
207 %conv63 = zext i8 %vecext62 to i32
208 %sub64 = sub nsw i32 %conv61, %conv63
209 %ispos185 = icmp sgt i32 %sub64, -1
210 %neg186 = sub nsw i32 0, %sub64
211 %8 = select i1 %ispos185, i32 %sub64, i32 %neg186
212 %conv66 = trunc i32 %8 to i8
213 %vecins67 = insertelement <16 x i8> %vecins59, i8 %conv66, i32 8
214 %vecext68 = extractelement <16 x i8> %a, i32 9
215 %conv69 = zext i8 %vecext68 to i32
216 %vecext70 = extractelement <16 x i8> %b, i32 9
217 %conv71 = zext i8 %vecext70 to i32
218 %sub72 = sub nsw i32 %conv69, %conv71
219 %ispos187 = icmp sgt i32 %sub72, -1
220 %neg188 = sub nsw i32 0, %sub72
221 %9 = select i1 %ispos187, i32 %sub72, i32 %neg188
222 %conv74 = trunc i32 %9 to i8
223 %vecins75 = insertelement <16 x i8> %vecins67, i8 %conv74, i32 9
224 %vecext76 = extractelement <16 x i8> %a, i32 10
225 %conv77 = zext i8 %vecext76 to i32
226 %vecext78 = extractelement <16 x i8> %b, i32 10
227 %conv79 = zext i8 %vecext78 to i32
228 %sub80 = sub nsw i32 %conv77, %conv79
229 %ispos189 = icmp sgt i32 %sub80, -1
230 %neg190 = sub nsw i32 0, %sub80
231 %10 = select i1 %ispos189, i32 %sub80, i32 %neg190
232 %conv82 = trunc i32 %10 to i8
233 %vecins83 = insertelement <16 x i8> %vecins75, i8 %conv82, i32 10
234 %vecext84 = extractelement <16 x i8> %a, i32 11
235 %conv85 = zext i8 %vecext84 to i32
236 %vecext86 = extractelement <16 x i8> %b, i32 11
237 %conv87 = zext i8 %vecext86 to i32
238 %sub88 = sub nsw i32 %conv85, %conv87
239 %ispos191 = icmp sgt i32 %sub88, -1
240 %neg192 = sub nsw i32 0, %sub88
241 %11 = select i1 %ispos191, i32 %sub88, i32 %neg192
242 %conv90 = trunc i32 %11 to i8
243 %vecins91 = insertelement <16 x i8> %vecins83, i8 %conv90, i32 11
244 %vecext92 = extractelement <16 x i8> %a, i32 12
245 %conv93 = zext i8 %vecext92 to i32
246 %vecext94 = extractelement <16 x i8> %b, i32 12
247 %conv95 = zext i8 %vecext94 to i32
248 %sub96 = sub nsw i32 %conv93, %conv95
249 %ispos193 = icmp sgt i32 %sub96, -1
250 %neg194 = sub nsw i32 0, %sub96
251 %12 = select i1 %ispos193, i32 %sub96, i32 %neg194
252 %conv98 = trunc i32 %12 to i8
253 %vecins99 = insertelement <16 x i8> %vecins91, i8 %conv98, i32 12
254 %vecext100 = extractelement <16 x i8> %a, i32 13
255 %conv101 = zext i8 %vecext100 to i32
256 %vecext102 = extractelement <16 x i8> %b, i32 13
257 %conv103 = zext i8 %vecext102 to i32
258 %sub104 = sub nsw i32 %conv101, %conv103
259 %ispos195 = icmp sgt i32 %sub104, -1
260 %neg196 = sub nsw i32 0, %sub104
261 %13 = select i1 %ispos195, i32 %sub104, i32 %neg196
262 %conv106 = trunc i32 %13 to i8
263 %vecins107 = insertelement <16 x i8> %vecins99, i8 %conv106, i32 13
264 %vecext108 = extractelement <16 x i8> %a, i32 14
265 %conv109 = zext i8 %vecext108 to i32
266 %vecext110 = extractelement <16 x i8> %b, i32 14
267 %conv111 = zext i8 %vecext110 to i32
268 %sub112 = sub nsw i32 %conv109, %conv111
269 %ispos197 = icmp sgt i32 %sub112, -1
270 %neg198 = sub nsw i32 0, %sub112
271 %14 = select i1 %ispos197, i32 %sub112, i32 %neg198
272 %conv114 = trunc i32 %14 to i8
273 %vecins115 = insertelement <16 x i8> %vecins107, i8 %conv114, i32 14
274 %vecext116 = extractelement <16 x i8> %a, i32 15
275 %conv117 = zext i8 %vecext116 to i32
276 %vecext118 = extractelement <16 x i8> %b, i32 15
277 %conv119 = zext i8 %vecext118 to i32
278 %sub120 = sub nsw i32 %conv117, %conv119
279 %ispos199 = icmp sgt i32 %sub120, -1
280 %neg200 = sub nsw i32 0, %sub120
281 %15 = select i1 %ispos199, i32 %sub120, i32 %neg200
282 %conv122 = trunc i32 %15 to i8
283 %vecins123 = insertelement <16 x i8> %vecins115, i8 %conv122, i32 15
284 ret <16 x i8> %vecins123
285 ; CHECK-LABEL: sub_absv_8
292 ; CHECK-PWR8-LABEL: sub_absv_8
298 ; Function Attrs: nounwind readnone
299 define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
301 %sub = sub <4 x i32> %a, %b
302 %sub.i = sub <4 x i32> zeroinitializer, %sub
303 %0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i)
305 ; CHECK-LABEL: sub_absv_vec_32
306 ; CHECK: vabsduw 2, 2, 3
308 ; CHECK-PWR8-LABEL: sub_absv_vec_32
310 ; CHECK-PWR8: vsubuwm
315 ; Function Attrs: nounwind readnone
316 define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
318 %sub = sub <8 x i16> %a, %b
319 %sub.i = sub <8 x i16> zeroinitializer, %sub
320 %0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i)
322 ; CHECK-LABEL: sub_absv_vec_16
323 ; CHECK: vabsduh 2, 2, 3
325 ; CHECK-PWR8-LABEL: sub_absv_vec_16
327 ; CHECK-PWR8: vsubuhm
332 ; Function Attrs: nounwind readnone
333 define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
335 %sub = sub <16 x i8> %a, %b
336 %sub.i = sub <16 x i8> zeroinitializer, %sub
337 %0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i)
339 ; CHECK-LABEL: sub_absv_vec_8
340 ; CHECK: vabsdub 2, 2, 3
342 ; CHECK-PWR8-LABEL: sub_absv_vec_8
344 ; CHECK-PWR8: vsububm
350 ; Function Attrs: nounwind readnone
351 declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
353 ; Function Attrs: nounwind readnone
354 declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)
356 ; Function Attrs: nounwind readnone
357 declare <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8>, <16 x i8>)