1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
4 ; Test simplifications of vector compares that should simplify to true, false or equality.
6 define <4 x i32> @slt_min(<4 x i32> %x) {
7 ; CHECK-LABEL: slt_min:
9 ; CHECK-NEXT: xorps %xmm0, %xmm0
11 %cmp = icmp slt <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
12 %r = sext <4 x i1> %cmp to <4 x i32>
16 define <4 x i32> @sge_min(<4 x i32> %x) {
17 ; CHECK-LABEL: sge_min:
19 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
21 %cmp = icmp sge <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
22 %r = sext <4 x i1> %cmp to <4 x i32>
26 define <4 x i32> @sgt_min(<4 x i32> %x) {
27 ; CHECK-LABEL: sgt_min:
29 ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
31 %cmp = icmp sgt <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
32 %r = sext <4 x i1> %cmp to <4 x i32>
36 define <4 x i32> @sle_min(<4 x i32> %x) {
37 ; CHECK-LABEL: sle_min:
39 ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
40 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
41 ; CHECK-NEXT: pxor %xmm1, %xmm0
43 %cmp = icmp sle <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
44 %r = sext <4 x i1> %cmp to <4 x i32>
48 define <4 x i32> @sgt_max(<4 x i32> %x) {
49 ; CHECK-LABEL: sgt_max:
51 ; CHECK-NEXT: xorps %xmm0, %xmm0
53 %cmp = icmp sgt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
54 %r = sext <4 x i1> %cmp to <4 x i32>
58 define <4 x i32> @sle_max(<4 x i32> %x) {
59 ; CHECK-LABEL: sle_max:
61 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
63 %cmp = icmp sle <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
64 %r = sext <4 x i1> %cmp to <4 x i32>
68 define <4 x i32> @slt_max(<4 x i32> %x) {
69 ; CHECK-LABEL: slt_max:
71 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
72 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
73 ; CHECK-NEXT: movdqa %xmm1, %xmm0
75 %cmp = icmp slt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
76 %r = sext <4 x i1> %cmp to <4 x i32>
80 define <4 x i32> @sge_max(<4 x i32> %x) {
81 ; CHECK-LABEL: sge_max:
83 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
84 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
85 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
86 ; CHECK-NEXT: pxor %xmm1, %xmm0
88 %cmp = icmp sge <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
89 %r = sext <4 x i1> %cmp to <4 x i32>
93 define <4 x i32> @ult_min(<4 x i32> %x) {
94 ; CHECK-LABEL: ult_min:
96 ; CHECK-NEXT: xorps %xmm0, %xmm0
98 %cmp = icmp ult <4 x i32> %x, zeroinitializer
99 %r = sext <4 x i1> %cmp to <4 x i32>
103 define <4 x i32> @uge_min(<4 x i32> %x) {
104 ; CHECK-LABEL: uge_min:
106 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
108 %cmp = icmp uge <4 x i32> %x, zeroinitializer
109 %r = sext <4 x i1> %cmp to <4 x i32>
113 define <4 x i32> @ugt_min(<4 x i32> %x) {
114 ; CHECK-LABEL: ugt_min:
116 ; CHECK-NEXT: pxor %xmm1, %xmm1
117 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
118 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
119 ; CHECK-NEXT: pxor %xmm1, %xmm0
121 %cmp = icmp ugt <4 x i32> %x, zeroinitializer
122 %r = sext <4 x i1> %cmp to <4 x i32>
126 define <4 x i32> @ule_min(<4 x i32> %x) {
127 ; CHECK-LABEL: ule_min:
129 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
130 ; CHECK-NEXT: pxor %xmm1, %xmm0
131 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
132 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
133 ; CHECK-NEXT: pxor %xmm1, %xmm0
135 %cmp = icmp ule <4 x i32> %x, zeroinitializer
136 %r = sext <4 x i1> %cmp to <4 x i32>
140 define <4 x i32> @ugt_max(<4 x i32> %x) {
141 ; CHECK-LABEL: ugt_max:
143 ; CHECK-NEXT: xorps %xmm0, %xmm0
145 %cmp = icmp ugt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
146 %r = sext <4 x i1> %cmp to <4 x i32>
150 define <4 x i32> @ule_max(<4 x i32> %x) {
151 ; CHECK-LABEL: ule_max:
153 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
155 %cmp = icmp ule <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
156 %r = sext <4 x i1> %cmp to <4 x i32>
160 define <4 x i32> @ult_max(<4 x i32> %x) {
161 ; CHECK-LABEL: ult_max:
163 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
164 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
165 ; CHECK-NEXT: pxor %xmm1, %xmm0
167 %cmp = icmp ult <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
168 %r = sext <4 x i1> %cmp to <4 x i32>
172 define <4 x i32> @uge_max(<4 x i32> %x) {
173 ; CHECK-LABEL: uge_max:
175 ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
176 ; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
177 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
178 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
179 ; CHECK-NEXT: pxor %xmm2, %xmm1
180 ; CHECK-NEXT: movdqa %xmm1, %xmm0
182 %cmp = icmp uge <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
183 %r = sext <4 x i1> %cmp to <4 x i32>
187 define <4 x i32> @slt_min_plus1(<4 x i32> %x) {
188 ; CHECK-LABEL: slt_min_plus1:
190 ; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
192 %cmp = icmp slt <4 x i32> %x, <i32 -2147483647, i32 -2147483647, i32 -2147483647, i32 -2147483647>
193 %r = sext <4 x i1> %cmp to <4 x i32>
197 define <4 x i32> @sge_min_plus1(<4 x i32> %x) {
198 ; CHECK-LABEL: sge_min_plus1:
200 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649]
201 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
202 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
203 ; CHECK-NEXT: pxor %xmm1, %xmm0
205 %cmp = icmp sge <4 x i32> %x, <i32 -2147483647, i32 -2147483647, i32 -2147483647, i32 -2147483647>
206 %r = sext <4 x i1> %cmp to <4 x i32>
210 define <4 x i32> @sgt_max_minus1(<4 x i32> %x) {
211 ; CHECK-LABEL: sgt_max_minus1:
213 ; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
215 %cmp = icmp sgt <4 x i32> %x, <i32 2147483646, i32 2147483646, i32 2147483646, i32 2147483646>
216 %r = sext <4 x i1> %cmp to <4 x i32>
220 define <4 x i32> @sle_max_minus1(<4 x i32> %x) {
221 ; CHECK-LABEL: sle_max_minus1:
223 ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
225 ; CHECK-NEXT: pxor %xmm1, %xmm0
227 %cmp = icmp sle <4 x i32> %x, <i32 2147483646, i32 2147483646, i32 2147483646, i32 2147483646>
228 %r = sext <4 x i1> %cmp to <4 x i32>
232 define <4 x i32> @ult_one(<4 x i32> %x) {
233 ; CHECK-LABEL: ult_one:
235 ; CHECK-NEXT: pxor %xmm1, %xmm1
236 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
238 %cmp = icmp ult <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
239 %r = sext <4 x i1> %cmp to <4 x i32>
243 define <4 x i32> @uge_one(<4 x i32> %x) {
244 ; CHECK-LABEL: uge_one:
246 ; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
247 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483649,2147483649,2147483649,2147483649]
248 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
249 ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
250 ; CHECK-NEXT: pxor %xmm1, %xmm0
252 %cmp = icmp uge <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
253 %r = sext <4 x i1> %cmp to <4 x i32>
257 define <4 x i32> @ugt_max_minus1(<4 x i32> %x) {
258 ; CHECK-LABEL: ugt_max_minus1:
260 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
261 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
263 %cmp = icmp ugt <4 x i32> %x, <i32 -2, i32 -2, i32 -2, i32 -2>
264 %r = sext <4 x i1> %cmp to <4 x i32>
268 define <4 x i32> @ule_max_minus1(<4 x i32> %x) {
269 ; CHECK-LABEL: ule_max_minus1:
271 ; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
272 ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
273 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
274 ; CHECK-NEXT: pxor %xmm1, %xmm0
276 %cmp = icmp ule <4 x i32> %x, <i32 -2, i32 -2, i32 -2, i32 -2>
277 %r = sext <4 x i1> %cmp to <4 x i32>
281 define <4 x i32> @ugt_smax(<4 x i32> %x) {
282 ; CHECK-LABEL: ugt_smax:
284 ; CHECK-NEXT: pxor %xmm1, %xmm1
285 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
286 ; CHECK-NEXT: movdqa %xmm1, %xmm0
288 %cmp = icmp ugt <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
289 %r = sext <4 x i1> %cmp to <4 x i32>
293 define <4 x i32> @ule_smax(<4 x i32> %x) {
294 ; CHECK-LABEL: ule_smax:
296 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
297 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
299 %cmp = icmp ule <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
300 %r = sext <4 x i1> %cmp to <4 x i32>
304 define <4 x i32> @ult_smin(<4 x i32> %x) {
305 ; CHECK-LABEL: ult_smin:
307 ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
308 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
310 %cmp = icmp ult <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
311 %r = sext <4 x i1> %cmp to <4 x i32>
315 define <4 x i32> @uge_smin(<4 x i32> %x) {
316 ; CHECK-LABEL: uge_smin:
318 ; CHECK-NEXT: pxor %xmm1, %xmm1
319 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
320 ; CHECK-NEXT: movdqa %xmm1, %xmm0
322 %cmp = icmp uge <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
323 %r = sext <4 x i1> %cmp to <4 x i32>
327 ; Make sure we can efficiently handle ne smin by turning into sgt.
328 define <4 x i32> @ne_smin(<4 x i32> %x) {
329 ; CHECK-LABEL: ne_smin:
331 ; CHECK-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
333 %cmp = icmp ne <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
334 %r = sext <4 x i1> %cmp to <4 x i32>
338 ; Make sure we can efficiently handle ne smax by turning into sgt. We can't fold
339 ; the constant pool load, but the alternative is a cmpeq+invert which is 3 instructions.
340 ; The PCMPGT version is two instructions given sufficient register allocation freedom
341 ; to avoid the last mov to %xmm0 seen here.
342 define <4 x i32> @ne_smax(<4 x i32> %x) {
343 ; CHECK-LABEL: ne_smax:
345 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
346 ; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
347 ; CHECK-NEXT: movdqa %xmm1, %xmm0
349 %cmp = icmp ne <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
350 %r = sext <4 x i1> %cmp to <4 x i32>