1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
5 define void @test_udiv7_v2i32(ptr %x, ptr %y) nounwind {
6 ; X64-LABEL: test_udiv7_v2i32:
8 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
9 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
10 ; X64-NEXT: movdqa %xmm0, %xmm2
11 ; X64-NEXT: pmuludq %xmm1, %xmm2
12 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
13 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
14 ; X64-NEXT: pmuludq %xmm1, %xmm3
15 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
16 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
17 ; X64-NEXT: psubd %xmm2, %xmm0
18 ; X64-NEXT: psrld $1, %xmm0
19 ; X64-NEXT: paddd %xmm2, %xmm0
20 ; X64-NEXT: psrld $2, %xmm0
21 ; X64-NEXT: movq %xmm0, (%rsi)
24 ; X86-LABEL: test_udiv7_v2i32:
26 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
28 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
29 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
30 ; X86-NEXT: movdqa %xmm0, %xmm2
31 ; X86-NEXT: pmuludq %xmm1, %xmm2
32 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
33 ; X86-NEXT: movdqa %xmm0, %xmm3
34 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
35 ; X86-NEXT: pmuludq %xmm1, %xmm3
36 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
37 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
38 ; X86-NEXT: psubd %xmm2, %xmm0
39 ; X86-NEXT: psrld $1, %xmm0
40 ; X86-NEXT: paddd %xmm2, %xmm0
41 ; X86-NEXT: psrld $2, %xmm0
42 ; X86-NEXT: movq %xmm0, (%eax)
44 %a = load <2 x i32>, ptr %x
45 %b = udiv <2 x i32> %a, <i32 7, i32 7>
46 store <2 x i32> %b, ptr %y
50 define void @test_urem7_v2i32(ptr %x, ptr %y) nounwind {
51 ; X64-LABEL: test_urem7_v2i32:
53 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
54 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
55 ; X64-NEXT: movdqa %xmm0, %xmm2
56 ; X64-NEXT: pmuludq %xmm1, %xmm2
57 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
58 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
59 ; X64-NEXT: pmuludq %xmm1, %xmm3
60 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
61 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
62 ; X64-NEXT: movdqa %xmm0, %xmm1
63 ; X64-NEXT: psubd %xmm2, %xmm1
64 ; X64-NEXT: psrld $1, %xmm1
65 ; X64-NEXT: paddd %xmm2, %xmm1
66 ; X64-NEXT: psrld $2, %xmm1
67 ; X64-NEXT: movdqa %xmm1, %xmm2
68 ; X64-NEXT: pslld $3, %xmm2
69 ; X64-NEXT: psubd %xmm2, %xmm1
70 ; X64-NEXT: paddd %xmm0, %xmm1
71 ; X64-NEXT: movq %xmm1, (%rsi)
74 ; X86-LABEL: test_urem7_v2i32:
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
78 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
79 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
80 ; X86-NEXT: movdqa %xmm0, %xmm2
81 ; X86-NEXT: pmuludq %xmm1, %xmm2
82 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
83 ; X86-NEXT: movdqa %xmm0, %xmm3
84 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
85 ; X86-NEXT: pmuludq %xmm1, %xmm3
86 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
87 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
88 ; X86-NEXT: movdqa %xmm0, %xmm1
89 ; X86-NEXT: psubd %xmm2, %xmm1
90 ; X86-NEXT: psrld $1, %xmm1
91 ; X86-NEXT: paddd %xmm2, %xmm1
92 ; X86-NEXT: psrld $2, %xmm1
93 ; X86-NEXT: movdqa %xmm1, %xmm2
94 ; X86-NEXT: pslld $3, %xmm2
95 ; X86-NEXT: psubd %xmm2, %xmm1
96 ; X86-NEXT: paddd %xmm0, %xmm1
97 ; X86-NEXT: movq %xmm1, (%eax)
99 %a = load <2 x i32>, ptr %x
100 %b = urem <2 x i32> %a, <i32 7, i32 7>
101 store <2 x i32> %b, ptr %y
105 define void @test_sdiv7_v2i32(ptr %x, ptr %y) nounwind {
106 ; X64-LABEL: test_sdiv7_v2i32:
108 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
109 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
110 ; X64-NEXT: movdqa %xmm0, %xmm2
111 ; X64-NEXT: pmuludq %xmm1, %xmm2
112 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
113 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
114 ; X64-NEXT: pmuludq %xmm1, %xmm3
115 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
116 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
117 ; X64-NEXT: pxor %xmm3, %xmm3
118 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
119 ; X64-NEXT: pand %xmm1, %xmm3
120 ; X64-NEXT: paddd %xmm0, %xmm3
121 ; X64-NEXT: psubd %xmm3, %xmm2
122 ; X64-NEXT: paddd %xmm0, %xmm2
123 ; X64-NEXT: movdqa %xmm2, %xmm0
124 ; X64-NEXT: psrld $31, %xmm0
125 ; X64-NEXT: psrad $2, %xmm2
126 ; X64-NEXT: paddd %xmm0, %xmm2
127 ; X64-NEXT: movq %xmm2, (%rsi)
130 ; X86-LABEL: test_sdiv7_v2i32:
132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
133 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
134 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
135 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
136 ; X86-NEXT: movdqa %xmm1, %xmm0
137 ; X86-NEXT: pmuludq %xmm2, %xmm0
138 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
139 ; X86-NEXT: movdqa %xmm1, %xmm3
140 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
141 ; X86-NEXT: pmuludq %xmm2, %xmm3
142 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
143 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
144 ; X86-NEXT: pxor %xmm3, %xmm3
145 ; X86-NEXT: pcmpgtd %xmm1, %xmm3
146 ; X86-NEXT: pand %xmm2, %xmm3
147 ; X86-NEXT: paddd %xmm1, %xmm3
148 ; X86-NEXT: psubd %xmm3, %xmm0
149 ; X86-NEXT: paddd %xmm1, %xmm0
150 ; X86-NEXT: movdqa %xmm0, %xmm1
151 ; X86-NEXT: psrld $31, %xmm1
152 ; X86-NEXT: psrad $2, %xmm0
153 ; X86-NEXT: paddd %xmm1, %xmm0
154 ; X86-NEXT: movq %xmm0, (%eax)
156 %a = load <2 x i32>, ptr %x
157 %b = sdiv <2 x i32> %a, <i32 7, i32 7>
158 store <2 x i32> %b, ptr %y
162 define void @test_srem7_v2i32(ptr %x, ptr %y) nounwind {
163 ; X64-LABEL: test_srem7_v2i32:
165 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
166 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
167 ; X64-NEXT: movdqa %xmm0, %xmm2
168 ; X64-NEXT: pmuludq %xmm1, %xmm2
169 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
170 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
171 ; X64-NEXT: pmuludq %xmm1, %xmm3
172 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
173 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
174 ; X64-NEXT: pxor %xmm3, %xmm3
175 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
176 ; X64-NEXT: pand %xmm1, %xmm3
177 ; X64-NEXT: paddd %xmm0, %xmm3
178 ; X64-NEXT: psubd %xmm3, %xmm2
179 ; X64-NEXT: paddd %xmm0, %xmm2
180 ; X64-NEXT: movdqa %xmm2, %xmm1
181 ; X64-NEXT: psrld $31, %xmm1
182 ; X64-NEXT: psrad $2, %xmm2
183 ; X64-NEXT: paddd %xmm1, %xmm2
184 ; X64-NEXT: movdqa %xmm2, %xmm1
185 ; X64-NEXT: pslld $3, %xmm1
186 ; X64-NEXT: psubd %xmm1, %xmm2
187 ; X64-NEXT: paddd %xmm0, %xmm2
188 ; X64-NEXT: movq %xmm2, (%rsi)
191 ; X86-LABEL: test_srem7_v2i32:
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
194 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
195 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
196 ; X86-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
197 ; X86-NEXT: movdqa %xmm0, %xmm1
198 ; X86-NEXT: pmuludq %xmm2, %xmm1
199 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
200 ; X86-NEXT: movdqa %xmm0, %xmm3
201 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1]
202 ; X86-NEXT: pmuludq %xmm2, %xmm3
203 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
204 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
205 ; X86-NEXT: pxor %xmm3, %xmm3
206 ; X86-NEXT: pcmpgtd %xmm0, %xmm3
207 ; X86-NEXT: pand %xmm2, %xmm3
208 ; X86-NEXT: paddd %xmm0, %xmm3
209 ; X86-NEXT: psubd %xmm3, %xmm1
210 ; X86-NEXT: paddd %xmm0, %xmm1
211 ; X86-NEXT: movdqa %xmm1, %xmm2
212 ; X86-NEXT: psrld $31, %xmm2
213 ; X86-NEXT: psrad $2, %xmm1
214 ; X86-NEXT: paddd %xmm2, %xmm1
215 ; X86-NEXT: movdqa %xmm1, %xmm2
216 ; X86-NEXT: pslld $3, %xmm2
217 ; X86-NEXT: psubd %xmm2, %xmm1
218 ; X86-NEXT: paddd %xmm0, %xmm1
219 ; X86-NEXT: movq %xmm1, (%eax)
221 %a = load <2 x i32>, ptr %x
222 %b = srem <2 x i32> %a, <i32 7, i32 7>
223 store <2 x i32> %b, ptr %y
227 define void @test_udiv_pow2_v2i32(ptr %x, ptr %y) nounwind {
228 ; X64-LABEL: test_udiv_pow2_v2i32:
230 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
231 ; X64-NEXT: psrld $3, %xmm0
232 ; X64-NEXT: movq %xmm0, (%rsi)
235 ; X86-LABEL: test_udiv_pow2_v2i32:
237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
239 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
240 ; X86-NEXT: psrld $3, %xmm0
241 ; X86-NEXT: movq %xmm0, (%eax)
243 %a = load <2 x i32>, ptr %x
244 %b = udiv <2 x i32> %a, <i32 8, i32 8>
245 store <2 x i32> %b, ptr %y
249 define void @test_urem_pow2_v2i32(ptr %x, ptr %y) nounwind {
250 ; X64-LABEL: test_urem_pow2_v2i32:
252 ; X64-NEXT: movabsq $30064771079, %rax # imm = 0x700000007
253 ; X64-NEXT: andq (%rdi), %rax
254 ; X64-NEXT: movq %rax, (%rsi)
257 ; X86-LABEL: test_urem_pow2_v2i32:
259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
261 ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
262 ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
263 ; X86-NEXT: movlps %xmm0, (%eax)
265 %a = load <2 x i32>, ptr %x
266 %b = urem <2 x i32> %a, <i32 8, i32 8>
267 store <2 x i32> %b, ptr %y
271 define void @test_sdiv_pow2_v2i32(ptr %x, ptr %y) nounwind {
272 ; X64-LABEL: test_sdiv_pow2_v2i32:
274 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
275 ; X64-NEXT: movdqa %xmm0, %xmm1
276 ; X64-NEXT: psrad $31, %xmm1
277 ; X64-NEXT: psrld $29, %xmm1
278 ; X64-NEXT: paddd %xmm0, %xmm1
279 ; X64-NEXT: psrad $3, %xmm1
280 ; X64-NEXT: movq %xmm1, (%rsi)
283 ; X86-LABEL: test_sdiv_pow2_v2i32:
285 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
287 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
288 ; X86-NEXT: movdqa %xmm0, %xmm1
289 ; X86-NEXT: psrad $31, %xmm1
290 ; X86-NEXT: psrld $29, %xmm1
291 ; X86-NEXT: paddd %xmm0, %xmm1
292 ; X86-NEXT: psrad $3, %xmm1
293 ; X86-NEXT: movq %xmm1, (%eax)
295 %a = load <2 x i32>, ptr %x
296 %b = sdiv <2 x i32> %a, <i32 8, i32 8>
297 store <2 x i32> %b, ptr %y
301 define void @test_srem_pow2_v2i32(ptr %x, ptr %y) nounwind {
302 ; X64-LABEL: test_srem_pow2_v2i32:
304 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
305 ; X64-NEXT: psrld $3, %xmm0
306 ; X64-NEXT: movq %xmm0, (%rsi)
309 ; X86-LABEL: test_srem_pow2_v2i32:
311 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
312 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
313 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
314 ; X86-NEXT: psrld $3, %xmm0
315 ; X86-NEXT: movq %xmm0, (%eax)
317 %a = load <2 x i32>, ptr %x
318 %b = udiv <2 x i32> %a, <i32 8, i32 8>
319 store <2 x i32> %b, ptr %y
323 define void @test_udiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
324 ; X64-LABEL: test_udiv_v2i32:
326 ; X64-NEXT: movq %rdx, %rcx
327 ; X64-NEXT: movq (%rdi), %rax
328 ; X64-NEXT: movq %rax, %xmm0
329 ; X64-NEXT: movq (%rsi), %rsi
330 ; X64-NEXT: movq %rsi, %xmm1
331 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
332 ; X64-NEXT: xorl %edx, %edx
333 ; X64-NEXT: divl %esi
334 ; X64-NEXT: movd %eax, %xmm2
335 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
336 ; X64-NEXT: movd %xmm0, %eax
337 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
338 ; X64-NEXT: movd %xmm0, %esi
339 ; X64-NEXT: xorl %edx, %edx
340 ; X64-NEXT: divl %esi
341 ; X64-NEXT: movd %eax, %xmm0
342 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
343 ; X64-NEXT: movq %xmm2, (%rcx)
346 ; X86-LABEL: test_udiv_v2i32:
348 ; X86-NEXT: pushl %esi
349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
351 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
352 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
353 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
354 ; X86-NEXT: movd %xmm0, %eax
355 ; X86-NEXT: movd %xmm1, %esi
356 ; X86-NEXT: xorl %edx, %edx
357 ; X86-NEXT: divl %esi
358 ; X86-NEXT: movd %eax, %xmm2
359 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
360 ; X86-NEXT: movd %xmm0, %eax
361 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
362 ; X86-NEXT: movd %xmm1, %esi
363 ; X86-NEXT: xorl %edx, %edx
364 ; X86-NEXT: divl %esi
365 ; X86-NEXT: movd %eax, %xmm0
366 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
367 ; X86-NEXT: movq %xmm2, (%ecx)
368 ; X86-NEXT: popl %esi
370 %a = load <2 x i32>, ptr %x
371 %b = load <2 x i32>, ptr %y
372 %c = udiv <2 x i32> %a, %b
373 store <2 x i32> %c, ptr %z
377 define void @test_urem_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
378 ; X64-LABEL: test_urem_v2i32:
380 ; X64-NEXT: movq %rdx, %rcx
381 ; X64-NEXT: movq (%rdi), %rax
382 ; X64-NEXT: movq %rax, %xmm0
383 ; X64-NEXT: movq (%rsi), %rsi
384 ; X64-NEXT: movq %rsi, %xmm1
385 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
386 ; X64-NEXT: xorl %edx, %edx
387 ; X64-NEXT: divl %esi
388 ; X64-NEXT: movd %edx, %xmm2
389 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
390 ; X64-NEXT: movd %xmm0, %eax
391 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
392 ; X64-NEXT: movd %xmm0, %esi
393 ; X64-NEXT: xorl %edx, %edx
394 ; X64-NEXT: divl %esi
395 ; X64-NEXT: movd %edx, %xmm0
396 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
397 ; X64-NEXT: movq %xmm2, (%rcx)
400 ; X86-LABEL: test_urem_v2i32:
402 ; X86-NEXT: pushl %esi
403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
406 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
407 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
408 ; X86-NEXT: movd %xmm0, %eax
409 ; X86-NEXT: movd %xmm1, %esi
410 ; X86-NEXT: xorl %edx, %edx
411 ; X86-NEXT: divl %esi
412 ; X86-NEXT: movd %edx, %xmm2
413 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
414 ; X86-NEXT: movd %xmm0, %eax
415 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
416 ; X86-NEXT: movd %xmm1, %esi
417 ; X86-NEXT: xorl %edx, %edx
418 ; X86-NEXT: divl %esi
419 ; X86-NEXT: movd %edx, %xmm0
420 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
421 ; X86-NEXT: movq %xmm2, (%ecx)
422 ; X86-NEXT: popl %esi
424 %a = load <2 x i32>, ptr %x
425 %b = load <2 x i32>, ptr %y
426 %c = urem <2 x i32> %a, %b
427 store <2 x i32> %c, ptr %z
431 define void @test_sdiv_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
432 ; X64-LABEL: test_sdiv_v2i32:
434 ; X64-NEXT: movq %rdx, %rcx
435 ; X64-NEXT: movq (%rdi), %rax
436 ; X64-NEXT: movq %rax, %xmm0
437 ; X64-NEXT: movq (%rsi), %rsi
438 ; X64-NEXT: movq %rsi, %xmm1
439 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
441 ; X64-NEXT: idivl %esi
442 ; X64-NEXT: movd %eax, %xmm2
443 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
444 ; X64-NEXT: movd %xmm0, %eax
445 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
446 ; X64-NEXT: movd %xmm0, %esi
448 ; X64-NEXT: idivl %esi
449 ; X64-NEXT: movd %eax, %xmm0
450 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
451 ; X64-NEXT: movq %xmm2, (%rcx)
454 ; X86-LABEL: test_sdiv_v2i32:
456 ; X86-NEXT: pushl %esi
457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
459 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
460 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
461 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
462 ; X86-NEXT: movd %xmm0, %eax
463 ; X86-NEXT: movd %xmm1, %esi
465 ; X86-NEXT: idivl %esi
466 ; X86-NEXT: movd %eax, %xmm2
467 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
468 ; X86-NEXT: movd %xmm0, %eax
469 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
470 ; X86-NEXT: movd %xmm1, %esi
472 ; X86-NEXT: idivl %esi
473 ; X86-NEXT: movd %eax, %xmm0
474 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
475 ; X86-NEXT: movq %xmm2, (%ecx)
476 ; X86-NEXT: popl %esi
478 %a = load <2 x i32>, ptr %x
479 %b = load <2 x i32>, ptr %y
480 %c = sdiv <2 x i32> %a, %b
481 store <2 x i32> %c, ptr %z
485 define void @test_srem_v2i32(ptr %x, ptr %y, ptr %z) nounwind {
486 ; X64-LABEL: test_srem_v2i32:
488 ; X64-NEXT: movq %rdx, %rcx
489 ; X64-NEXT: movq (%rdi), %rax
490 ; X64-NEXT: movq %rax, %xmm0
491 ; X64-NEXT: movq (%rsi), %rsi
492 ; X64-NEXT: movq %rsi, %xmm1
493 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
495 ; X64-NEXT: idivl %esi
496 ; X64-NEXT: movd %eax, %xmm2
497 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
498 ; X64-NEXT: movd %xmm0, %eax
499 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
500 ; X64-NEXT: movd %xmm0, %esi
502 ; X64-NEXT: idivl %esi
503 ; X64-NEXT: movd %eax, %xmm0
504 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
505 ; X64-NEXT: movq %xmm2, (%rcx)
508 ; X86-LABEL: test_srem_v2i32:
510 ; X86-NEXT: pushl %esi
511 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
512 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
513 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
514 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
515 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
516 ; X86-NEXT: movd %xmm0, %eax
517 ; X86-NEXT: movd %xmm1, %esi
519 ; X86-NEXT: idivl %esi
520 ; X86-NEXT: movd %eax, %xmm2
521 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
522 ; X86-NEXT: movd %xmm0, %eax
523 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
524 ; X86-NEXT: movd %xmm1, %esi
526 ; X86-NEXT: idivl %esi
527 ; X86-NEXT: movd %eax, %xmm0
528 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
529 ; X86-NEXT: movq %xmm2, (%ecx)
530 ; X86-NEXT: popl %esi
532 %a = load <2 x i32>, ptr %x
533 %b = load <2 x i32>, ptr %y
534 %c = sdiv <2 x i32> %a, %b
535 store <2 x i32> %c, ptr %z