1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1
7 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=X64,AVX2
8 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=X64,AVX512VL
10 define i1 @test_urem_odd(i13 %X) nounwind {
11 ; X86-LABEL: test_urem_odd:
13 ; X86-NEXT: imull $3277, {{[0-9]+}}(%esp), %eax # imm = 0xCCD
14 ; X86-NEXT: andl $8191, %eax # imm = 0x1FFF
15 ; X86-NEXT: cmpl $1639, %eax # imm = 0x667
19 ; X64-LABEL: test_urem_odd:
21 ; X64-NEXT: imull $3277, %edi, %eax # imm = 0xCCD
22 ; X64-NEXT: andl $8191, %eax # imm = 0x1FFF
23 ; X64-NEXT: cmpl $1639, %eax # imm = 0x667
26 %urem = urem i13 %X, 5
27 %cmp = icmp eq i13 %urem, 0
31 define i1 @test_urem_even(i27 %X) nounwind {
32 ; X86-LABEL: test_urem_even:
34 ; X86-NEXT: imull $115043767, {{[0-9]+}}(%esp), %eax # imm = 0x6DB6DB7
35 ; X86-NEXT: movl %eax, %ecx
36 ; X86-NEXT: shll $26, %ecx
37 ; X86-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE
39 ; X86-NEXT: orl %ecx, %eax
40 ; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
41 ; X86-NEXT: cmpl $9586981, %eax # imm = 0x924925
45 ; X64-LABEL: test_urem_even:
47 ; X64-NEXT: imull $115043767, %edi, %eax # imm = 0x6DB6DB7
48 ; X64-NEXT: movl %eax, %ecx
49 ; X64-NEXT: shll $26, %ecx
50 ; X64-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE
52 ; X64-NEXT: orl %ecx, %eax
53 ; X64-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF
54 ; X64-NEXT: cmpl $9586981, %eax # imm = 0x924925
57 %urem = urem i27 %X, 14
58 %cmp = icmp eq i27 %urem, 0
62 define i1 @test_urem_odd_setne(i4 %X) nounwind {
63 ; X86-LABEL: test_urem_odd_setne:
65 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; X86-NEXT: leal (%eax,%eax,2), %ecx
67 ; X86-NEXT: leal (%eax,%ecx,4), %eax
68 ; X86-NEXT: andb $15, %al
69 ; X86-NEXT: cmpb $4, %al
73 ; X64-LABEL: test_urem_odd_setne:
75 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
76 ; X64-NEXT: leal (%rdi,%rdi,2), %eax
77 ; X64-NEXT: leal (%rdi,%rax,4), %eax
78 ; X64-NEXT: andb $15, %al
79 ; X64-NEXT: cmpb $4, %al
83 %cmp = icmp ne i4 %urem, 0
87 define i1 @test_urem_negative_odd(i9 %X) nounwind {
88 ; X86-LABEL: test_urem_negative_odd:
90 ; X86-NEXT: imull $307, {{[0-9]+}}(%esp), %eax # imm = 0x133
91 ; X86-NEXT: andl $511, %eax # imm = 0x1FF
92 ; X86-NEXT: cmpw $2, %ax
96 ; X64-LABEL: test_urem_negative_odd:
98 ; X64-NEXT: imull $307, %edi, %eax # imm = 0x133
99 ; X64-NEXT: andl $511, %eax # imm = 0x1FF
100 ; X64-NEXT: cmpw $2, %ax
101 ; X64-NEXT: setae %al
103 %urem = urem i9 %X, -5
104 %cmp = icmp ne i9 %urem, 0
108 define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
109 ; X86-LABEL: test_urem_vec:
111 ; X86-NEXT: imull $683, {{[0-9]+}}(%esp), %eax # imm = 0x2AB
112 ; X86-NEXT: movl %eax, %ecx
113 ; X86-NEXT: shll $10, %ecx
114 ; X86-NEXT: andl $2046, %eax # imm = 0x7FE
115 ; X86-NEXT: shrl %eax
116 ; X86-NEXT: orl %ecx, %eax
117 ; X86-NEXT: andl $2047, %eax # imm = 0x7FF
118 ; X86-NEXT: cmpl $342, %eax # imm = 0x156
119 ; X86-NEXT: setae %al
120 ; X86-NEXT: imull $1463, {{[0-9]+}}(%esp), %ecx # imm = 0x5B7
121 ; X86-NEXT: addl $-1463, %ecx # imm = 0xFA49
122 ; X86-NEXT: andl $2047, %ecx # imm = 0x7FF
123 ; X86-NEXT: cmpl $293, %ecx # imm = 0x125
124 ; X86-NEXT: setae %dl
125 ; X86-NEXT: imull $819, {{[0-9]+}}(%esp), %ecx # imm = 0x333
126 ; X86-NEXT: addl $-1638, %ecx # imm = 0xF99A
127 ; X86-NEXT: andl $2047, %ecx # imm = 0x7FF
128 ; X86-NEXT: cmpw $2, %cx
129 ; X86-NEXT: setae %cl
132 ; SSE2-LABEL: test_urem_vec:
134 ; SSE2-NEXT: movd %esi, %xmm0
135 ; SSE2-NEXT: movd %edi, %xmm1
136 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
137 ; SSE2-NEXT: movd %edx, %xmm0
138 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
139 ; SSE2-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
140 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = <683,1463,819,u>
141 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
142 ; SSE2-NEXT: pmuludq %xmm0, %xmm1
143 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
144 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
145 ; SSE2-NEXT: pmuludq %xmm2, %xmm0
146 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
147 ; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
148 ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2047,2047,2047,2047]
149 ; SSE2-NEXT: movdqa %xmm1, %xmm2
150 ; SSE2-NEXT: pand %xmm0, %xmm2
151 ; SSE2-NEXT: psrld $1, %xmm2
152 ; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3]
153 ; SSE2-NEXT: pslld $10, %xmm1
154 ; SSE2-NEXT: xorps %xmm2, %xmm2
155 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
156 ; SSE2-NEXT: orps %xmm3, %xmm2
157 ; SSE2-NEXT: andps %xmm0, %xmm2
158 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
159 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
160 ; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
161 ; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl
162 ; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl
165 ; SSE41-LABEL: test_urem_vec:
167 ; SSE41-NEXT: movd %edi, %xmm0
168 ; SSE41-NEXT: pinsrd $1, %esi, %xmm0
169 ; SSE41-NEXT: pinsrd $2, %edx, %xmm0
170 ; SSE41-NEXT: psubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
171 ; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
173 ; SSE41-NEXT: movdqa %xmm0, %xmm2
174 ; SSE41-NEXT: pand %xmm1, %xmm2
175 ; SSE41-NEXT: psrld $1, %xmm2
176 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3,4,5,6,7]
177 ; SSE41-NEXT: pslld $10, %xmm0
178 ; SSE41-NEXT: pxor %xmm3, %xmm3
179 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7]
180 ; SSE41-NEXT: por %xmm2, %xmm3
181 ; SSE41-NEXT: pand %xmm1, %xmm3
182 ; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
183 ; SSE41-NEXT: movd %xmm3, %eax
184 ; SSE41-NEXT: pextrb $4, %xmm3, %edx
185 ; SSE41-NEXT: pextrb $8, %xmm3, %ecx
186 ; SSE41-NEXT: # kill: def $al killed $al killed $eax
187 ; SSE41-NEXT: # kill: def $dl killed $dl killed $edx
188 ; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx
191 ; AVX1-LABEL: test_urem_vec:
193 ; AVX1-NEXT: vmovd %edi, %xmm0
194 ; AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
195 ; AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
196 ; AVX1-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
197 ; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
198 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
199 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm2
200 ; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
201 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3,4,5,6,7]
202 ; AVX1-NEXT: vpslld $10, %xmm0, %xmm0
203 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
204 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3,4,5,6,7]
205 ; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
206 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
207 ; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
208 ; AVX1-NEXT: vmovd %xmm0, %eax
209 ; AVX1-NEXT: vpextrb $4, %xmm0, %edx
210 ; AVX1-NEXT: vpextrb $8, %xmm0, %ecx
211 ; AVX1-NEXT: # kill: def $al killed $al killed $eax
212 ; AVX1-NEXT: # kill: def $dl killed $dl killed $edx
213 ; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx
216 ; AVX2-LABEL: test_urem_vec:
218 ; AVX2-NEXT: vmovd %edi, %xmm0
219 ; AVX2-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
220 ; AVX2-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
221 ; AVX2-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
222 ; AVX2-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
223 ; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
224 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047]
225 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
226 ; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
227 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
228 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
229 ; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
230 ; AVX2-NEXT: vmovd %xmm0, %eax
231 ; AVX2-NEXT: vpextrb $4, %xmm0, %edx
232 ; AVX2-NEXT: vpextrb $8, %xmm0, %ecx
233 ; AVX2-NEXT: # kill: def $al killed $al killed $eax
234 ; AVX2-NEXT: # kill: def $dl killed $dl killed $edx
235 ; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
238 ; AVX512VL-LABEL: test_urem_vec:
240 ; AVX512VL-NEXT: vmovd %edi, %xmm0
241 ; AVX512VL-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
242 ; AVX512VL-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0
243 ; AVX512VL-NEXT: vpsubd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
244 ; AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
245 ; AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
246 ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2047,2047,2047,2047]
247 ; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0
248 ; AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
249 ; AVX512VL-NEXT: vpternlogd $200, %xmm1, %xmm2, %xmm0
250 ; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k0
251 ; AVX512VL-NEXT: kshiftrw $1, %k0, %k1
252 ; AVX512VL-NEXT: kmovw %k1, %edx
253 ; AVX512VL-NEXT: kshiftrw $2, %k0, %k1
254 ; AVX512VL-NEXT: kmovw %k1, %ecx
255 ; AVX512VL-NEXT: kmovw %k0, %eax
256 ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax
257 ; AVX512VL-NEXT: # kill: def $dl killed $dl killed $edx
258 ; AVX512VL-NEXT: # kill: def $cl killed $cl killed $ecx
259 ; AVX512VL-NEXT: retq
260 %urem = urem <3 x i11> %X, <i11 6, i11 7, i11 -5>
261 %cmp = icmp ne <3 x i11> %urem, <i11 0, i11 1, i11 2>