1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
5 define void @test_udiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
6 ; X64-LABEL: test_udiv7_v2i32:
8 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
9 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
10 ; X64-NEXT: movdqa %xmm0, %xmm2
11 ; X64-NEXT: pmuludq %xmm1, %xmm2
12 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
13 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
14 ; X64-NEXT: pmuludq %xmm1, %xmm3
15 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
16 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
17 ; X64-NEXT: psubd %xmm2, %xmm0
18 ; X64-NEXT: psrld $1, %xmm0
19 ; X64-NEXT: paddd %xmm2, %xmm0
20 ; X64-NEXT: psrld $2, %xmm0
21 ; X64-NEXT: movq %xmm0, (%rsi)
24 ; X86-LABEL: test_udiv7_v2i32:
26 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
28 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
29 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
30 ; X86-NEXT: movdqa %xmm0, %xmm2
31 ; X86-NEXT: pmuludq %xmm1, %xmm2
32 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
33 ; X86-NEXT: movdqa %xmm0, %xmm3
34 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
35 ; X86-NEXT: pmuludq %xmm1, %xmm3
36 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
37 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
38 ; X86-NEXT: psubd %xmm2, %xmm0
39 ; X86-NEXT: psrld $1, %xmm0
40 ; X86-NEXT: paddd %xmm2, %xmm0
41 ; X86-NEXT: psrld $2, %xmm0
42 ; X86-NEXT: movq %xmm0, (%eax)
44 %a = load <2 x i32>, <2 x i32>* %x
45 %b = udiv <2 x i32> %a, <i32 7, i32 7>
46 store <2 x i32> %b, <2 x i32>* %y
50 define void @test_urem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
51 ; X64-LABEL: test_urem7_v2i32:
53 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
54 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
55 ; X64-NEXT: movdqa %xmm0, %xmm2
56 ; X64-NEXT: pmuludq %xmm1, %xmm2
57 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
58 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
59 ; X64-NEXT: pmuludq %xmm1, %xmm3
60 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
61 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
62 ; X64-NEXT: movdqa %xmm0, %xmm1
63 ; X64-NEXT: psubd %xmm2, %xmm1
64 ; X64-NEXT: psrld $1, %xmm1
65 ; X64-NEXT: paddd %xmm2, %xmm1
66 ; X64-NEXT: psrld $2, %xmm1
67 ; X64-NEXT: movdqa %xmm1, %xmm2
68 ; X64-NEXT: pslld $3, %xmm2
69 ; X64-NEXT: psubd %xmm2, %xmm1
70 ; X64-NEXT: paddd %xmm0, %xmm1
71 ; X64-NEXT: movq %xmm1, (%rsi)
74 ; X86-LABEL: test_urem7_v2i32:
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
78 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
79 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
80 ; X86-NEXT: movdqa %xmm0, %xmm2
81 ; X86-NEXT: pmuludq %xmm1, %xmm2
82 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
83 ; X86-NEXT: movdqa %xmm0, %xmm3
84 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
85 ; X86-NEXT: pmuludq %xmm1, %xmm3
86 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
87 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
88 ; X86-NEXT: movdqa %xmm0, %xmm1
89 ; X86-NEXT: psubd %xmm2, %xmm1
90 ; X86-NEXT: psrld $1, %xmm1
91 ; X86-NEXT: paddd %xmm2, %xmm1
92 ; X86-NEXT: psrld $2, %xmm1
93 ; X86-NEXT: movdqa %xmm1, %xmm2
94 ; X86-NEXT: pslld $3, %xmm2
95 ; X86-NEXT: psubd %xmm2, %xmm1
96 ; X86-NEXT: paddd %xmm0, %xmm1
97 ; X86-NEXT: movq %xmm1, (%eax)
99 %a = load <2 x i32>, <2 x i32>* %x
100 %b = urem <2 x i32> %a, <i32 7, i32 7>
101 store <2 x i32> %b, <2 x i32>* %y
105 define void @test_sdiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
106 ; X64-LABEL: test_sdiv7_v2i32:
108 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
109 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
110 ; X64-NEXT: movdqa %xmm0, %xmm2
111 ; X64-NEXT: pmuludq %xmm1, %xmm2
112 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
113 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
114 ; X64-NEXT: pmuludq %xmm1, %xmm3
115 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
116 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
117 ; X64-NEXT: pxor %xmm3, %xmm3
118 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
119 ; X64-NEXT: pand %xmm1, %xmm3
120 ; X64-NEXT: paddd %xmm0, %xmm3
121 ; X64-NEXT: psubd %xmm3, %xmm2
122 ; X64-NEXT: paddd %xmm0, %xmm2
123 ; X64-NEXT: movdqa %xmm2, %xmm0
124 ; X64-NEXT: psrld $31, %xmm0
125 ; X64-NEXT: psrad $2, %xmm2
126 ; X64-NEXT: paddd %xmm0, %xmm2
127 ; X64-NEXT: movq %xmm2, (%rsi)
130 ; X86-LABEL: test_sdiv7_v2i32:
132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
133 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
134 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
135 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
136 ; X86-NEXT: movdqa %xmm0, %xmm2
137 ; X86-NEXT: pmuludq %xmm1, %xmm2
138 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
139 ; X86-NEXT: movdqa %xmm0, %xmm3
140 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
141 ; X86-NEXT: pmuludq %xmm1, %xmm3
142 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
143 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
144 ; X86-NEXT: pxor %xmm3, %xmm3
145 ; X86-NEXT: pcmpgtd %xmm0, %xmm3
146 ; X86-NEXT: pand %xmm1, %xmm3
147 ; X86-NEXT: paddd %xmm0, %xmm3
148 ; X86-NEXT: psubd %xmm3, %xmm2
149 ; X86-NEXT: paddd %xmm0, %xmm2
150 ; X86-NEXT: movdqa %xmm2, %xmm0
151 ; X86-NEXT: psrld $31, %xmm0
152 ; X86-NEXT: psrad $2, %xmm2
153 ; X86-NEXT: paddd %xmm0, %xmm2
154 ; X86-NEXT: movq %xmm2, (%eax)
156 %a = load <2 x i32>, <2 x i32>* %x
157 %b = sdiv <2 x i32> %a, <i32 7, i32 7>
158 store <2 x i32> %b, <2 x i32>* %y
162 define void @test_srem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
163 ; X64-LABEL: test_srem7_v2i32:
165 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
166 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
167 ; X64-NEXT: movdqa %xmm0, %xmm2
168 ; X64-NEXT: pmuludq %xmm1, %xmm2
169 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
170 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
171 ; X64-NEXT: pmuludq %xmm1, %xmm3
172 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
173 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
174 ; X64-NEXT: pxor %xmm3, %xmm3
175 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
176 ; X64-NEXT: pand %xmm1, %xmm3
177 ; X64-NEXT: paddd %xmm0, %xmm3
178 ; X64-NEXT: psubd %xmm3, %xmm2
179 ; X64-NEXT: paddd %xmm0, %xmm2
180 ; X64-NEXT: movdqa %xmm2, %xmm1
181 ; X64-NEXT: psrld $31, %xmm1
182 ; X64-NEXT: psrad $2, %xmm2
183 ; X64-NEXT: paddd %xmm1, %xmm2
184 ; X64-NEXT: movdqa %xmm2, %xmm1
185 ; X64-NEXT: pslld $3, %xmm1
186 ; X64-NEXT: psubd %xmm1, %xmm2
187 ; X64-NEXT: paddd %xmm0, %xmm2
188 ; X64-NEXT: movq %xmm2, (%rsi)
191 ; X86-LABEL: test_srem7_v2i32:
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
194 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
195 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
196 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
197 ; X86-NEXT: movdqa %xmm0, %xmm2
198 ; X86-NEXT: pmuludq %xmm1, %xmm2
199 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
200 ; X86-NEXT: movdqa %xmm0, %xmm3
201 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
202 ; X86-NEXT: pmuludq %xmm1, %xmm3
203 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
204 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
205 ; X86-NEXT: pxor %xmm3, %xmm3
206 ; X86-NEXT: pcmpgtd %xmm0, %xmm3
207 ; X86-NEXT: pand %xmm1, %xmm3
208 ; X86-NEXT: paddd %xmm0, %xmm3
209 ; X86-NEXT: psubd %xmm3, %xmm2
210 ; X86-NEXT: paddd %xmm0, %xmm2
211 ; X86-NEXT: movdqa %xmm2, %xmm1
212 ; X86-NEXT: psrld $31, %xmm1
213 ; X86-NEXT: psrad $2, %xmm2
214 ; X86-NEXT: paddd %xmm1, %xmm2
215 ; X86-NEXT: movdqa %xmm2, %xmm1
216 ; X86-NEXT: pslld $3, %xmm1
217 ; X86-NEXT: psubd %xmm1, %xmm2
218 ; X86-NEXT: paddd %xmm0, %xmm2
219 ; X86-NEXT: movq %xmm2, (%eax)
221 %a = load <2 x i32>, <2 x i32>* %x
222 %b = srem <2 x i32> %a, <i32 7, i32 7>
223 store <2 x i32> %b, <2 x i32>* %y
227 define void @test_udiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
228 ; X64-LABEL: test_udiv_pow2_v2i32:
230 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
231 ; X64-NEXT: psrld $3, %xmm0
232 ; X64-NEXT: movq %xmm0, (%rsi)
235 ; X86-LABEL: test_udiv_pow2_v2i32:
237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
239 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
240 ; X86-NEXT: psrld $3, %xmm0
241 ; X86-NEXT: movq %xmm0, (%eax)
243 %a = load <2 x i32>, <2 x i32>* %x
244 %b = udiv <2 x i32> %a, <i32 8, i32 8>
245 store <2 x i32> %b, <2 x i32>* %y
249 define void @test_urem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
250 ; X64-LABEL: test_urem_pow2_v2i32:
252 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
253 ; X64-NEXT: andps {{.*}}(%rip), %xmm0
254 ; X64-NEXT: movlps %xmm0, (%rsi)
257 ; X86-LABEL: test_urem_pow2_v2i32:
259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
261 ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
262 ; X86-NEXT: andps {{\.LCPI.*}}, %xmm0
263 ; X86-NEXT: movlps %xmm0, (%eax)
265 %a = load <2 x i32>, <2 x i32>* %x
266 %b = urem <2 x i32> %a, <i32 8, i32 8>
267 store <2 x i32> %b, <2 x i32>* %y
271 define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
272 ; X64-LABEL: test_sdiv_pow2_v2i32:
274 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
275 ; X64-NEXT: movdqa %xmm0, %xmm1
276 ; X64-NEXT: psrad $31, %xmm1
277 ; X64-NEXT: psrld $29, %xmm1
278 ; X64-NEXT: paddd %xmm0, %xmm1
279 ; X64-NEXT: psrad $3, %xmm1
280 ; X64-NEXT: movq %xmm1, (%rsi)
283 ; X86-LABEL: test_sdiv_pow2_v2i32:
285 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
286 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
287 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
288 ; X86-NEXT: movdqa %xmm0, %xmm1
289 ; X86-NEXT: psrad $31, %xmm1
290 ; X86-NEXT: psrld $29, %xmm1
291 ; X86-NEXT: paddd %xmm0, %xmm1
292 ; X86-NEXT: psrad $3, %xmm1
293 ; X86-NEXT: movq %xmm1, (%eax)
295 %a = load <2 x i32>, <2 x i32>* %x
296 %b = sdiv <2 x i32> %a, <i32 8, i32 8>
297 store <2 x i32> %b, <2 x i32>* %y
301 define void @test_srem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
302 ; X64-LABEL: test_srem_pow2_v2i32:
304 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
305 ; X64-NEXT: psrld $3, %xmm0
306 ; X64-NEXT: movq %xmm0, (%rsi)
309 ; X86-LABEL: test_srem_pow2_v2i32:
311 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
312 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
313 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
314 ; X86-NEXT: psrld $3, %xmm0
315 ; X86-NEXT: movq %xmm0, (%eax)
317 %a = load <2 x i32>, <2 x i32>* %x
318 %b = udiv <2 x i32> %a, <i32 8, i32 8>
319 store <2 x i32> %b, <2 x i32>* %y
323 define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
324 ; X64-LABEL: test_udiv_v2i32:
326 ; X64-NEXT: movq %rdx, %rcx
327 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
328 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
329 ; X64-NEXT: movd %xmm0, %eax
330 ; X64-NEXT: movd %xmm1, %esi
331 ; X64-NEXT: xorl %edx, %edx
332 ; X64-NEXT: divl %esi
333 ; X64-NEXT: movd %eax, %xmm2
334 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
335 ; X64-NEXT: movd %xmm0, %eax
336 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
337 ; X64-NEXT: movd %xmm0, %esi
338 ; X64-NEXT: xorl %edx, %edx
339 ; X64-NEXT: divl %esi
340 ; X64-NEXT: movd %eax, %xmm0
341 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
342 ; X64-NEXT: movq %xmm2, (%rcx)
345 ; X86-LABEL: test_udiv_v2i32:
347 ; X86-NEXT: pushl %esi
348 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
351 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
352 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
353 ; X86-NEXT: movd %xmm0, %eax
354 ; X86-NEXT: movd %xmm1, %esi
355 ; X86-NEXT: xorl %edx, %edx
356 ; X86-NEXT: divl %esi
357 ; X86-NEXT: movd %eax, %xmm2
358 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
359 ; X86-NEXT: movd %xmm0, %eax
360 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
361 ; X86-NEXT: movd %xmm1, %esi
362 ; X86-NEXT: xorl %edx, %edx
363 ; X86-NEXT: divl %esi
364 ; X86-NEXT: movd %eax, %xmm0
365 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
366 ; X86-NEXT: movq %xmm2, (%ecx)
367 ; X86-NEXT: popl %esi
369 %a = load <2 x i32>, <2 x i32>* %x
370 %b = load <2 x i32>, <2 x i32>* %y
371 %c = udiv <2 x i32> %a, %b
372 store <2 x i32> %c, <2 x i32>* %z
376 define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
377 ; X64-LABEL: test_urem_v2i32:
379 ; X64-NEXT: movq %rdx, %rcx
380 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
381 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
382 ; X64-NEXT: movd %xmm0, %eax
383 ; X64-NEXT: movd %xmm1, %esi
384 ; X64-NEXT: xorl %edx, %edx
385 ; X64-NEXT: divl %esi
386 ; X64-NEXT: movd %edx, %xmm2
387 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
388 ; X64-NEXT: movd %xmm0, %eax
389 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
390 ; X64-NEXT: movd %xmm0, %esi
391 ; X64-NEXT: xorl %edx, %edx
392 ; X64-NEXT: divl %esi
393 ; X64-NEXT: movd %edx, %xmm0
394 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
395 ; X64-NEXT: movq %xmm2, (%rcx)
398 ; X86-LABEL: test_urem_v2i32:
400 ; X86-NEXT: pushl %esi
401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
402 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
404 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
405 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
406 ; X86-NEXT: movd %xmm0, %eax
407 ; X86-NEXT: movd %xmm1, %esi
408 ; X86-NEXT: xorl %edx, %edx
409 ; X86-NEXT: divl %esi
410 ; X86-NEXT: movd %edx, %xmm2
411 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
412 ; X86-NEXT: movd %xmm0, %eax
413 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
414 ; X86-NEXT: movd %xmm1, %esi
415 ; X86-NEXT: xorl %edx, %edx
416 ; X86-NEXT: divl %esi
417 ; X86-NEXT: movd %edx, %xmm0
418 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
419 ; X86-NEXT: movq %xmm2, (%ecx)
420 ; X86-NEXT: popl %esi
422 %a = load <2 x i32>, <2 x i32>* %x
423 %b = load <2 x i32>, <2 x i32>* %y
424 %c = urem <2 x i32> %a, %b
425 store <2 x i32> %c, <2 x i32>* %z
429 define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
430 ; X64-LABEL: test_sdiv_v2i32:
432 ; X64-NEXT: movq %rdx, %rcx
433 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
434 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
435 ; X64-NEXT: movd %xmm0, %eax
436 ; X64-NEXT: movd %xmm1, %esi
438 ; X64-NEXT: idivl %esi
439 ; X64-NEXT: movd %eax, %xmm2
440 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
441 ; X64-NEXT: movd %xmm0, %eax
442 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
443 ; X64-NEXT: movd %xmm0, %esi
445 ; X64-NEXT: idivl %esi
446 ; X64-NEXT: movd %eax, %xmm0
447 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
448 ; X64-NEXT: movq %xmm2, (%rcx)
451 ; X86-LABEL: test_sdiv_v2i32:
453 ; X86-NEXT: pushl %ebx
454 ; X86-NEXT: pushl %edi
455 ; X86-NEXT: pushl %esi
456 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
459 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
460 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
461 ; X86-NEXT: movd %xmm0, %ecx
462 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
463 ; X86-NEXT: movd %xmm0, %eax
464 ; X86-NEXT: movd %xmm1, %edi
465 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
466 ; X86-NEXT: movd %xmm1, %ebx
468 ; X86-NEXT: idivl %ebx
469 ; X86-NEXT: movd %eax, %xmm0
470 ; X86-NEXT: movl %ecx, %eax
472 ; X86-NEXT: idivl %edi
473 ; X86-NEXT: movd %eax, %xmm1
474 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
475 ; X86-NEXT: movq %xmm1, (%esi)
476 ; X86-NEXT: popl %esi
477 ; X86-NEXT: popl %edi
478 ; X86-NEXT: popl %ebx
480 %a = load <2 x i32>, <2 x i32>* %x
481 %b = load <2 x i32>, <2 x i32>* %y
482 %c = sdiv <2 x i32> %a, %b
483 store <2 x i32> %c, <2 x i32>* %z
487 define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
488 ; X64-LABEL: test_srem_v2i32:
490 ; X64-NEXT: movq %rdx, %rcx
491 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
492 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
493 ; X64-NEXT: movd %xmm0, %eax
494 ; X64-NEXT: movd %xmm1, %esi
496 ; X64-NEXT: idivl %esi
497 ; X64-NEXT: movd %eax, %xmm2
498 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
499 ; X64-NEXT: movd %xmm0, %eax
500 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
501 ; X64-NEXT: movd %xmm0, %esi
503 ; X64-NEXT: idivl %esi
504 ; X64-NEXT: movd %eax, %xmm0
505 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
506 ; X64-NEXT: movq %xmm2, (%rcx)
509 ; X86-LABEL: test_srem_v2i32:
511 ; X86-NEXT: pushl %ebx
512 ; X86-NEXT: pushl %edi
513 ; X86-NEXT: pushl %esi
514 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
515 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
517 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
518 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
519 ; X86-NEXT: movd %xmm0, %ecx
520 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
521 ; X86-NEXT: movd %xmm0, %eax
522 ; X86-NEXT: movd %xmm1, %edi
523 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
524 ; X86-NEXT: movd %xmm1, %ebx
526 ; X86-NEXT: idivl %ebx
527 ; X86-NEXT: movd %eax, %xmm0
528 ; X86-NEXT: movl %ecx, %eax
530 ; X86-NEXT: idivl %edi
531 ; X86-NEXT: movd %eax, %xmm1
532 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
533 ; X86-NEXT: movq %xmm1, (%esi)
534 ; X86-NEXT: popl %esi
535 ; X86-NEXT: popl %edi
536 ; X86-NEXT: popl %ebx
538 %a = load <2 x i32>, <2 x i32>* %x
539 %b = load <2 x i32>, <2 x i32>* %y
540 %c = sdiv <2 x i32> %a, %b
541 store <2 x i32> %c, <2 x i32>* %z