1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512vpopcntdq,+bmi2,+bmi | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX512
6 declare i32 @llvm.ctpop.i32(i32)
7 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
8 define i1 @is_pow2_non_zero(i32 %xin) {
9 ; CHECK-NOBMI-LABEL: is_pow2_non_zero:
10 ; CHECK-NOBMI: # %bb.0:
11 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
12 ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100
13 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
14 ; CHECK-NOBMI-NEXT: testl %eax, %edi
15 ; CHECK-NOBMI-NEXT: sete %al
16 ; CHECK-NOBMI-NEXT: retq
18 ; CHECK-BMI2-LABEL: is_pow2_non_zero:
19 ; CHECK-BMI2: # %bb.0:
20 ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100
21 ; CHECK-BMI2-NEXT: blsrl %edi, %eax
22 ; CHECK-BMI2-NEXT: sete %al
23 ; CHECK-BMI2-NEXT: retq
25 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
26 %r = icmp eq i32 %cnt, 1
30 define i1 @is_pow2_non_zero_x_maybe_z(i32 %x) {
31 ; CHECK-LABEL: is_pow2_non_zero_x_maybe_z:
33 ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
34 ; CHECK-NEXT: leal -1(%rdi), %eax
35 ; CHECK-NEXT: xorl %eax, %edi
36 ; CHECK-NEXT: cmpl %eax, %edi
37 ; CHECK-NEXT: seta %al
39 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
40 %r = icmp eq i32 %cnt, 1
44 define i1 @neither_pow2_non_zero(i32 %xin) {
45 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero:
46 ; CHECK-NOBMI: # %bb.0:
47 ; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
48 ; CHECK-NOBMI-NEXT: orl $256, %edi # imm = 0x100
49 ; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax
50 ; CHECK-NOBMI-NEXT: testl %eax, %edi
51 ; CHECK-NOBMI-NEXT: setne %al
52 ; CHECK-NOBMI-NEXT: retq
54 ; CHECK-BMI2-LABEL: neither_pow2_non_zero:
55 ; CHECK-BMI2: # %bb.0:
56 ; CHECK-BMI2-NEXT: orl $256, %edi # imm = 0x100
57 ; CHECK-BMI2-NEXT: blsrl %edi, %eax
58 ; CHECK-BMI2-NEXT: setne %al
59 ; CHECK-BMI2-NEXT: retq
61 %cnt = call i32 @llvm.ctpop.i32(i32 %x)
62 %r = icmp ne i32 %cnt, 1
66 define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) {
67 ; CHECK-NOBMI-LABEL: is_pow2_non_zero_4xv64:
68 ; CHECK-NOBMI: # %bb.0:
69 ; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm2 = [256,256]
70 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm0
71 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm1
72 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2
73 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
74 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
75 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3
76 ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
77 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3
78 ; CHECK-NOBMI-NEXT: paddq %xmm0, %xmm2
79 ; CHECK-NOBMI-NEXT: pand %xmm2, %xmm0
80 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
81 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
82 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
83 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
84 ; CHECK-NOBMI-NEXT: andps %xmm1, %xmm0
85 ; CHECK-NOBMI-NEXT: retq
87 ; CHECK-AVX2-LABEL: is_pow2_non_zero_4xv64:
88 ; CHECK-AVX2: # %bb.0:
89 ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
90 ; CHECK-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
91 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
92 ; CHECK-AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm1
93 ; CHECK-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
94 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
95 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
96 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
97 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
98 ; CHECK-AVX2-NEXT: vzeroupper
99 ; CHECK-AVX2-NEXT: retq
101 ; CHECK-AVX512-LABEL: is_pow2_non_zero_4xv64:
102 ; CHECK-AVX512: # %bb.0:
103 ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
104 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
105 ; CHECK-AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
106 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
107 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
108 ; CHECK-AVX512-NEXT: vzeroupper
109 ; CHECK-AVX512-NEXT: retq
110 %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
111 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
112 %r = icmp eq <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
116 define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) {
117 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64:
118 ; CHECK-NOBMI: # %bb.0:
119 ; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm2 = [256,256]
120 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm0
121 ; CHECK-NOBMI-NEXT: por %xmm2, %xmm1
122 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2
123 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
124 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
125 ; CHECK-NOBMI-NEXT: pand %xmm1, %xmm3
126 ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
127 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm3
128 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
129 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm4
130 ; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
131 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
132 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
133 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
134 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
135 ; CHECK-NOBMI-NEXT: andps %xmm1, %xmm0
136 ; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0
137 ; CHECK-NOBMI-NEXT: retq
139 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64:
140 ; CHECK-AVX2: # %bb.0:
141 ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [256,256,256,256]
142 ; CHECK-AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
143 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
144 ; CHECK-AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm2
145 ; CHECK-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
146 ; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
147 ; CHECK-AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
148 ; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
149 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
150 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
151 ; CHECK-AVX2-NEXT: vzeroupper
152 ; CHECK-AVX2-NEXT: retq
154 ; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64:
155 ; CHECK-AVX512: # %bb.0:
156 ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
157 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
158 ; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
159 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
160 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
161 ; CHECK-AVX512-NEXT: vzeroupper
162 ; CHECK-AVX512-NEXT: retq
163 %x = or <4 x i64> %xin, <i64 256, i64 256, i64 256, i64 256>
164 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
165 %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>
169 define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) {
170 ; CHECK-NOBMI-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
171 ; CHECK-NOBMI: # %bb.0:
172 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm2, %xmm2
173 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
174 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm3
175 ; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
176 ; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm3
177 ; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm1
178 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm5
179 ; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm5
180 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm6
181 ; CHECK-NOBMI-NEXT: paddq %xmm2, %xmm6
182 ; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm6
183 ; CHECK-NOBMI-NEXT: pxor %xmm6, %xmm0
184 ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4
185 ; CHECK-NOBMI-NEXT: pcmpgtd %xmm6, %xmm4
186 ; CHECK-NOBMI-NEXT: movdqa %xmm4, %xmm7
187 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm5[0,2]
188 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm1
189 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm6, %xmm0
190 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
191 ; CHECK-NOBMI-NEXT: andps %xmm7, %xmm0
192 ; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3]
193 ; CHECK-NOBMI-NEXT: orps %xmm4, %xmm0
194 ; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0
195 ; CHECK-NOBMI-NEXT: retq
197 ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
198 ; CHECK-AVX2: # %bb.0:
199 ; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
200 ; CHECK-AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm2
201 ; CHECK-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
202 ; CHECK-AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
203 ; CHECK-AVX2-NEXT: vpxor %ymm0, %ymm2, %ymm0
204 ; CHECK-AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0
205 ; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
206 ; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
207 ; CHECK-AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
208 ; CHECK-AVX2-NEXT: vzeroupper
209 ; CHECK-AVX2-NEXT: retq
211 ; CHECK-AVX512-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z:
212 ; CHECK-AVX512: # %bb.0:
213 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0
214 ; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1
215 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
216 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
217 ; CHECK-AVX512-NEXT: vzeroupper
218 ; CHECK-AVX512-NEXT: retq
219 %cnt = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
220 %r = icmp ne <4 x i64> %cnt, <i64 1, i64 1, i64 1, i64 1>