1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse,+sse2,+avx,+avx2 | FileCheck %s
5 ; icmp eq/ne (urem %x, C), 0
6 ; Iff C is not a power of two (those should not get to here though),
7 ; and %x may have at most one bit set, omit the 'urem':
10 ;------------------------------------------------------------------------------;
12 ;------------------------------------------------------------------------------;
14 define i1 @p0_scalar_urem_by_const(i32 %x, i32 %y) {
15 ; CHECK-LABEL: p0_scalar_urem_by_const:
17 ; CHECK-NEXT: testb $-128, %dil
18 ; CHECK-NEXT: sete %al
20 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
21 %t1 = urem i32 %t0, 6 ; '6' is clearly not a power of two
22 %t2 = icmp eq i32 %t1, 0
26 define i1 @p1_scalar_urem_by_nonconst(i32 %x, i32 %y) {
27 ; CHECK-LABEL: p1_scalar_urem_by_nonconst:
29 ; CHECK-NEXT: testb $-128, %dil
30 ; CHECK-NEXT: sete %al
32 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
33 %t1 = or i32 %y, 6 ; two bits set, clearly not a power of two
34 %t2 = urem i32 %t0, %t1
35 %t3 = icmp eq i32 %t2, 0
39 define i1 @p2_scalar_shifted_urem_by_const(i32 %x, i32 %y) {
40 ; CHECK-LABEL: p2_scalar_shifted_urem_by_const:
42 ; CHECK-NEXT: movl %esi, %ecx
43 ; CHECK-NEXT: andl $1, %edi
44 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
45 ; CHECK-NEXT: shll %cl, %edi
46 ; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
47 ; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556
48 ; CHECK-NEXT: setb %al
50 %t0 = and i32 %x, 1 ; clearly a power-of-two or zero
51 %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y
52 %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two
53 %t3 = icmp eq i32 %t2, 0
57 define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) {
58 ; CHECK-LABEL: p3_scalar_shifted2_urem_by_const:
60 ; CHECK-NEXT: movl %esi, %ecx
61 ; CHECK-NEXT: andl $2, %edi
62 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
63 ; CHECK-NEXT: shll %cl, %edi
64 ; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
65 ; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556
66 ; CHECK-NEXT: setb %al
68 %t0 = and i32 %x, 2 ; clearly a power-of-two or zero
69 %t1 = shl i32 %t0, %y ; will still be a power-of-two or zero with any %y
70 %t2 = urem i32 %t1, 3 ; '3' is clearly not a power of two
71 %t3 = icmp eq i32 %t2, 0
75 ;------------------------------------------------------------------------------;
77 ;------------------------------------------------------------------------------;
79 define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
80 ; CHECK-LABEL: p4_vector_urem_by_const__splat:
82 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
83 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
84 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
85 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
86 ; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
87 ; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
88 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
89 ; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
90 ; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1
91 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
92 ; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1
93 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
94 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
95 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
97 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
98 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
99 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
103 define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) {
104 ; CHECK-LABEL: p5_vector_urem_by_const__nonsplat:
106 ; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
107 ; CHECK-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,954437177]
108 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
109 ; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
110 ; CHECK-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
111 ; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
112 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
113 ; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
114 ; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
115 ; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
116 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
117 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
118 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
120 %t0 = and <4 x i32> %x, <i32 128, i32 2, i32 4, i32 8>
121 %t1 = urem <4 x i32> %t0, <i32 3, i32 5, i32 6, i32 9>
122 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
126 define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32> %y) {
127 ; CHECK-LABEL: p6_vector_urem_by_const__nonsplat_undef0:
129 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
130 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
131 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
132 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
133 ; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
134 ; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
135 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
136 ; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
137 ; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1
138 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
139 ; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1
140 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
141 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
142 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
144 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128>
145 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
146 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0>
150 define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) {
151 ; CHECK-LABEL: p7_vector_urem_by_const__nonsplat_undef2:
153 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
154 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
155 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
156 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
157 ; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
158 ; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
159 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
160 ; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
161 ; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1
162 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
163 ; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1
164 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
165 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
166 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
168 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero
169 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
170 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
174 define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32> %y) {
175 ; CHECK-LABEL: p8_vector_urem_by_const__nonsplat_undef3:
177 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [128,128,128,128]
178 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
179 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
180 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
181 ; CHECK-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
182 ; CHECK-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
183 ; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
184 ; CHECK-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
185 ; CHECK-NEXT: vpsrld $2, %xmm1, %xmm1
186 ; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
187 ; CHECK-NEXT: vpmulld %xmm2, %xmm1, %xmm1
188 ; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
189 ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
190 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
192 %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128>
193 %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two
194 %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0>
198 ;------------------------------------------------------------------------------;
199 ; Basic negative tests
200 ;------------------------------------------------------------------------------;
202 define i1 @n0_urem_of_maybe_not_power_of_two(i32 %x, i32 %y) {
203 ; CHECK-LABEL: n0_urem_of_maybe_not_power_of_two:
205 ; CHECK-NEXT: andl $3, %edi
206 ; CHECK-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
207 ; CHECK-NEXT: cmpl $1431655766, %eax # imm = 0x55555556
208 ; CHECK-NEXT: setb %al
210 %t0 = and i32 %x, 3 ; up to two bits set, not power-of-two
211 %t1 = urem i32 %t0, 3
212 %t2 = icmp eq i32 %t1, 0
216 define i1 @n1_urem_by_maybe_power_of_two(i32 %x, i32 %y) {
217 ; CHECK-LABEL: n1_urem_by_maybe_power_of_two:
219 ; CHECK-NEXT: movl %edi, %eax
220 ; CHECK-NEXT: andl $128, %eax
221 ; CHECK-NEXT: orl $1, %esi
222 ; CHECK-NEXT: xorl %edx, %edx
223 ; CHECK-NEXT: divl %esi
224 ; CHECK-NEXT: testl %edx, %edx
225 ; CHECK-NEXT: sete %al
227 %t0 = and i32 %x, 128 ; clearly a power-of-two or zero
228 %t1 = or i32 %y, 1 ; one low bit set, may be a power of two
229 %t2 = urem i32 %t0, %t1
230 %t3 = icmp eq i32 %t2, 0