1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
6 ; testz(~X,Y) -> testc(X,Y)
9 define i32 @ptestz_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
10 ; CHECK-LABEL: ptestz_256_invert0:
12 ; CHECK-NEXT: movl %edi, %eax
13 ; CHECK-NEXT: vptest %ymm1, %ymm0
14 ; CHECK-NEXT: cmovael %esi, %eax
15 ; CHECK-NEXT: vzeroupper
17 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
18 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %d)
19 %t3 = icmp ne i32 %t2, 0
20 %t4 = select i1 %t3, i32 %a, i32 %b
25 ; testz(X,~Y) -> testc(Y,X)
28 define i32 @ptestz_256_invert1(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
29 ; CHECK-LABEL: ptestz_256_invert1:
31 ; CHECK-NEXT: movl %edi, %eax
32 ; CHECK-NEXT: vptest %ymm0, %ymm1
33 ; CHECK-NEXT: cmovael %esi, %eax
34 ; CHECK-NEXT: vzeroupper
36 %t1 = xor <4 x i64> %d, <i64 -1, i64 -1, i64 -1, i64 -1>
37 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> %t1)
38 %t3 = icmp ne i32 %t2, 0
39 %t4 = select i1 %t3, i32 %a, i32 %b
44 ; testc(~X,Y) -> testz(X,Y)
47 define i32 @ptestc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
48 ; CHECK-LABEL: ptestc_256_invert0:
50 ; CHECK-NEXT: movl %edi, %eax
51 ; CHECK-NEXT: vptest %ymm1, %ymm0
52 ; CHECK-NEXT: cmovnel %esi, %eax
53 ; CHECK-NEXT: vzeroupper
55 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
56 %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %t1, <4 x i64> %d)
57 %t3 = icmp ne i32 %t2, 0
58 %t4 = select i1 %t3, i32 %a, i32 %b
63 ; testnzc(~X,Y) -> testnzc(X,Y)
66 define i32 @ptestnzc_256_invert0(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
67 ; CHECK-LABEL: ptestnzc_256_invert0:
69 ; CHECK-NEXT: movl %edi, %eax
70 ; CHECK-NEXT: vptest %ymm1, %ymm0
71 ; CHECK-NEXT: cmovbel %esi, %eax
72 ; CHECK-NEXT: vzeroupper
74 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
75 %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
76 %t3 = icmp ne i32 %t2, 0
77 %t4 = select i1 %t3, i32 %a, i32 %b
81 define i32 @ptestnzc_256_invert0_commute(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
82 ; CHECK-LABEL: ptestnzc_256_invert0_commute:
84 ; CHECK-NEXT: movl %edi, %eax
85 ; CHECK-NEXT: vptest %ymm1, %ymm0
86 ; CHECK-NEXT: cmoval %esi, %eax
87 ; CHECK-NEXT: vzeroupper
89 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
90 %t2 = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %t1, <4 x i64> %d)
91 %t3 = icmp eq i32 %t2, 0
92 %t4 = select i1 %t3, i32 %a, i32 %b
97 ; testc(X,~X) -> testc(X,-1)
100 define i32 @ptestc_256_not(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
101 ; AVX1-LABEL: ptestc_256_not:
103 ; AVX1-NEXT: movl %edi, %eax
104 ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
105 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
106 ; AVX1-NEXT: vptest %ymm1, %ymm0
107 ; AVX1-NEXT: cmovael %esi, %eax
108 ; AVX1-NEXT: vzeroupper
111 ; AVX2-LABEL: ptestc_256_not:
113 ; AVX2-NEXT: movl %edi, %eax
114 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
115 ; AVX2-NEXT: vptest %ymm1, %ymm0
116 ; AVX2-NEXT: cmovael %esi, %eax
117 ; AVX2-NEXT: vzeroupper
119 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
120 %t2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %c, <4 x i64> %t1)
121 %t3 = icmp ne i32 %t2, 0
122 %t4 = select i1 %t3, i32 %a, i32 %b
127 ; testz(AND(X,Y),AND(X,Y)) -> testz(X,Y)
130 define i32 @ptestz_256_and(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
131 ; CHECK-LABEL: ptestz_256_and:
133 ; CHECK-NEXT: movl %edi, %eax
134 ; CHECK-NEXT: vptest %ymm1, %ymm0
135 ; CHECK-NEXT: cmovel %esi, %eax
136 ; CHECK-NEXT: vzeroupper
138 %t1 = and <4 x i64> %c, %d
139 %t2 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t1, <4 x i64> %t1)
140 %t3 = icmp eq i32 %t2, 0
141 %t4 = select i1 %t3, i32 %a, i32 %b
146 ; testz(AND(~X,Y),AND(~X,Y)) -> testc(X,Y)
149 define i32 @ptestz_256_andc(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
150 ; CHECK-LABEL: ptestz_256_andc:
152 ; CHECK-NEXT: movl %edi, %eax
153 ; CHECK-NEXT: vptest %ymm1, %ymm0
154 ; CHECK-NEXT: cmovbl %esi, %eax
155 ; CHECK-NEXT: vzeroupper
157 %t1 = xor <4 x i64> %c, <i64 -1, i64 -1, i64 -1, i64 -1>
158 %t2 = and <4 x i64> %t1, %d
159 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> %t2)
160 %t4 = icmp eq i32 %t3, 0
161 %t5 = select i1 %t4, i32 %a, i32 %b
166 ; testz(-1,X) -> testz(X,X)
169 define i32 @ptestz_256_allones0(<4 x i64> %c, i32 %a, i32 %b) {
170 ; CHECK-LABEL: ptestz_256_allones0:
172 ; CHECK-NEXT: movl %edi, %eax
173 ; CHECK-NEXT: vptest %ymm0, %ymm0
174 ; CHECK-NEXT: cmovnel %esi, %eax
175 ; CHECK-NEXT: vzeroupper
177 %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> %c)
178 %t2 = icmp ne i32 %t1, 0
179 %t3 = select i1 %t2, i32 %a, i32 %b
184 ; testz(X,-1) -> testz(X,X)
187 define i32 @ptestz_256_allones1(<4 x i64> %c, i32 %a, i32 %b) {
188 ; CHECK-LABEL: ptestz_256_allones1:
190 ; CHECK-NEXT: movl %edi, %eax
191 ; CHECK-NEXT: vptest %ymm0, %ymm0
192 ; CHECK-NEXT: cmovnel %esi, %eax
193 ; CHECK-NEXT: vzeroupper
195 %t1 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %c, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
196 %t2 = icmp ne i32 %t1, 0
197 %t3 = select i1 %t2, i32 %a, i32 %b
202 ; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
205 define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) {
206 ; AVX1-LABEL: ptestz_v8i32_signbits:
208 ; AVX1-NEXT: movl %edi, %eax
209 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
210 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
211 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
212 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
213 ; AVX1-NEXT: vptest %ymm0, %ymm0
214 ; AVX1-NEXT: cmovnel %esi, %eax
215 ; AVX1-NEXT: vzeroupper
218 ; AVX2-LABEL: ptestz_v8i32_signbits:
220 ; AVX2-NEXT: movl %edi, %eax
221 ; AVX2-NEXT: vtestps %ymm0, %ymm0
222 ; AVX2-NEXT: cmovnel %esi, %eax
223 ; AVX2-NEXT: vzeroupper
225 %t1 = ashr <8 x i32> %c, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
226 %t2 = bitcast <8 x i32> %t1 to <4 x i64>
227 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
228 %t4 = icmp ne i32 %t3, 0
229 %t5 = select i1 %t4, i32 %a, i32 %b
233 define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) {
234 ; AVX1-LABEL: ptestz_v32i8_signbits:
236 ; AVX1-NEXT: movl %edi, %eax
237 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
238 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
239 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
240 ; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
241 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
242 ; AVX1-NEXT: vptest %ymm0, %ymm0
243 ; AVX1-NEXT: cmovnel %esi, %eax
244 ; AVX1-NEXT: vzeroupper
247 ; AVX2-LABEL: ptestz_v32i8_signbits:
249 ; AVX2-NEXT: movl %edi, %eax
250 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx
251 ; AVX2-NEXT: testl %ecx, %ecx
252 ; AVX2-NEXT: cmovnel %esi, %eax
253 ; AVX2-NEXT: vzeroupper
255 %t1 = ashr <32 x i8> %c, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
256 %t2 = bitcast <32 x i8> %t1 to <4 x i64>
257 %t3 = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %t2, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
258 %t4 = icmp ne i32 %t3, 0
259 %t5 = select i1 %t4, i32 %a, i32 %b
264 ; testz(or(extract_lo(X),extract_hi(X),or(extract_lo(Y),extract_hi(Y)) -> testz(X,Y)
267 ; FIXME: Foldable to ptest(xor(%0,%1),xor(%0,%1))
268 define i1 @PR38788(<16 x i16> %0, <16 x i16> %1) {
269 ; AVX1-LABEL: PR38788:
271 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
272 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
273 ; AVX1-NEXT: vpcmpeqw %xmm2, %xmm3, %xmm2
274 ; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
275 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
276 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
277 ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
278 ; AVX1-NEXT: vptest %ymm1, %ymm0
279 ; AVX1-NEXT: setae %al
280 ; AVX1-NEXT: vzeroupper
283 ; AVX2-LABEL: PR38788:
285 ; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
286 ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
287 ; AVX2-NEXT: vptest %ymm1, %ymm0
288 ; AVX2-NEXT: setae %al
289 ; AVX2-NEXT: vzeroupper
291 %3 = icmp eq <16 x i16> %0, %1
292 %4 = sext <16 x i1> %3 to <16 x i16>
293 %5 = bitcast <16 x i16> %4 to <4 x i64>
294 %6 = tail call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %5, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>)
295 %7 = icmp eq i32 %6, 0
299 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>)
300 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>)
301 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>)