1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512vpopcntdq,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX512
6 declare i32 @llvm.ctpop.i32(i32)
7 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
8 define i1 @is_pow2_non_zero(i32 %xin) {
9 ; CHECK-NOBMI-LABEL: is_pow2_non_zero:
10 ; CHECK-NOBMI: # %bb.0:
11 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
12 ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100
13 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
14 ; CHECK-NOBMI-NEXT: testl %eax, %edi
15 ; CHECK-NOBMI-NEXT: sete %al
16 ; CHECK-NOBMI-NEXT: retq
18 ; CHECK-BMI2-LABEL: is_pow2_non_zero:
19 ; CHECK-BMI2: # %bb.0:
20 ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100
21 ; CHECK-BMI2-NEXT: blsrl %edi, %eax
22 ; CHECK-BMI2-NEXT: sete %al
23 ; CHECK-BMI2-NEXT: retq
25 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
26 %r = icmp eq i32 %cnt, 1
30 define i1 @is_pow2_non_zero_x_maybe_z(i32 %x) {
31 ; CHECK-NOBMI-LABEL: is_pow2_non_zero_x_maybe_z:
32 ; CHECK-NOBMI: # %bb.0:
33 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
34 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
35 ; CHECK-NOBMI-NEXT: testl %eax, %edi
36 ; CHECK-NOBMI-NEXT: sete %cl
37 ; CHECK-NOBMI-NEXT: testl %edi, %edi
38 ; CHECK-NOBMI-NEXT: setne %al
39 ; CHECK-NOBMI-NEXT: andb %cl, %al
40 ; CHECK-NOBMI-NEXT: retq
42 ; CHECK-BMI2-LABEL: is_pow2_non_zero_x_maybe_z:
43 ; CHECK-BMI2: # %bb.0:
44 ; CHECK-BMI2-NEXT: testl %edi, %edi
45 ; CHECK-BMI2-NEXT: setne %cl
46 ; CHECK-BMI2-NEXT: blsrl %edi, %eax
47 ; CHECK-BMI2-NEXT: sete %al
48 ; CHECK-BMI2-NEXT: andb %cl, %al
49 ; CHECK-BMI2-NEXT: retq
50 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
51 %r = icmp eq i32 %cnt, 1
55 define i1 @neither_pow2_non_zero(i32 %xin) {
56 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero:
57 ; CHECK-NOBMI: # %bb.0:
58 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
59 ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100
60 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
61 ; CHECK-NOBMI-NEXT: testl %eax, %edi
62 ; CHECK-NOBMI-NEXT: setne %al
63 ; CHECK-NOBMI-NEXT: retq
65 ; CHECK-BMI2-LABEL: neither_pow2_non_zero:
66 ; CHECK-BMI2: # %bb.0:
67 ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100
68 ; CHECK-BMI2-NEXT: blsrl %edi, %eax
69 ; CHECK-BMI2-NEXT: setne %al
70 ; CHECK-BMI2-NEXT: retq
72 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
73 %r = icmp ne i32 %cnt, 1
77 define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
78 ; CHECK-NOBMI-LABEL: is_pow2_non_zero_4xv64:
79 ; CHECK-NOBMI: # %bb.0:
80 ; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm2 = [256,256]
81 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm0
82 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm1
83 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2
84 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
85 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
86 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3
87 ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
88 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3
89 ; CHECK-NOBMI-NEXT: paddq %xmm0, %xmm2
90 ; CHECK-NOBMI-NEXT: pand %xmm2, %xmm0
91 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
92 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
93 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
94 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
95 ; CHECK-NOBMI-NEXT: andps %xmm1, %xmm0
96 ; CHECK-NOBMI-NEXT: retq
98 ; CHECK-AVX2-LABEL: is_pow2_non_zero_4xv64:
99 ; CHECK-AVX2: # %bb.0:
100 ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
101 ; CHECK-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
102 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
103 ; CHECK-AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1
104 ; CHECK-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
105 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
106 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
107 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
108 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
109 ; CHECK-AVX2-NEXT: vzeroupper
110 ; CHECK-AVX2-NEXT: retq
112 ; CHECK-AVX512-LABEL: is_pow2_non_zero_4xv64:
113 ; CHECK-AVX512: # %bb.0:
114 ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
115 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
116 ; CHECK-AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
117 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
118 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
119 ; CHECK-AVX512-NEXT: vzeroupper
120 ; CHECK-AVX512-NEXT: retq
121 %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
122 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
123 %r = icmp eq <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
127 define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
128 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64:
129 ; CHECK-NOBMI: # %bb.0:
130 ; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm2 = [256,256]
131 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm0
132 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm1
133 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2
134 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
135 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
136 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3
137 ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
138 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3
139 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
140 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm4
141 ; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
142 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
143 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
144 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
145 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
146 ; CHECK-NOBMI-NEXT: andps %xmm1, %xmm0
147 ; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0
148 ; CHECK-NOBMI-NEXT: retq
150 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64:
151 ; CHECK-AVX2: # %bb.0:
152 ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
153 ; CHECK-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
154 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
155 ; CHECK-AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm2
156 ; CHECK-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
157 ; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
158 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
159 ; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
160 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
161 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
162 ; CHECK-AVX2-NEXT: vzeroupper
163 ; CHECK-AVX2-NEXT: retq
165 ; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64:
166 ; CHECK-AVX512: # %bb.0:
167 ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
168 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
169 ; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
170 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
171 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
172 ; CHECK-AVX512-NEXT: vzeroupper
173 ; CHECK-AVX512-NEXT: retq
174 %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
175 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
176 %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
180 define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
181 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
182 ; CHECK-NOBMI: # %bb.0:
183 ; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm2
184 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm3
185 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm4
186 ; CHECK-NOBMI-NEXT: paddq %xmm3, %xmm4
187 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm4
188 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm1
189 ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,0,3,2]
190 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm5
191 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm4
192 ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,0,3,2]
193 ; CHECK-NOBMI-NEXT: pand %xmm4, %xmm1
194 ; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm1
195 ; CHECK-NOBMI-NEXT: por %xmm5, %xmm1
196 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
197 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm4
198 ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,0,3,2]
199 ; CHECK-NOBMI-NEXT: pand %xmm4, %xmm5
200 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
201 ; CHECK-NOBMI-NEXT: paddq %xmm3, %xmm4
202 ; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
203 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm0
204 ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
205 ; CHECK-NOBMI-NEXT: pand %xmm2, %xmm0
206 ; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm0
207 ; CHECK-NOBMI-NEXT: por %xmm5, %xmm0
208 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
209 ; CHECK-NOBMI-NEXT: retq
211 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
212 ; CHECK-AVX2: # %bb.0:
213 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
214 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm2
215 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
216 ; CHECK-AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm4
217 ; CHECK-AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0
218 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
219 ; CHECK-AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
220 ; CHECK-AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0
221 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
222 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
223 ; CHECK-AVX2-NEXT: vzeroupper
224 ; CHECK-AVX2-NEXT: retq
226 ; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
227 ; CHECK-AVX512: # %bb.0:
228 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
229 ; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
230 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
231 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
232 ; CHECK-AVX512-NEXT: vzeroupper
233 ; CHECK-AVX512-NEXT: retq
234 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
235 %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
238 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: