1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
5 define void @test_udiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
6 ; X64-LABEL: test_udiv7_v2i32:
8 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
9 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
10 ; X64-NEXT: movdqa %xmm0, %xmm2
11 ; X64-NEXT: pmuludq %xmm1, %xmm2
12 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
13 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
14 ; X64-NEXT: pmuludq %xmm1, %xmm3
15 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
16 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
17 ; X64-NEXT: psubd %xmm2, %xmm0
18 ; X64-NEXT: psrld $1, %xmm0
19 ; X64-NEXT: paddd %xmm2, %xmm0
20 ; X64-NEXT: psrld $2, %xmm0
21 ; X64-NEXT: movq %xmm0, (%rsi)
24 ; X86-LABEL: test_udiv7_v2i32:
26 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
28 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
29 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
30 ; X86-NEXT: movdqa %xmm0, %xmm2
31 ; X86-NEXT: pmuludq %xmm1, %xmm2
32 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
33 ; X86-NEXT: movdqa %xmm0, %xmm3
34 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
35 ; X86-NEXT: pmuludq %xmm1, %xmm3
36 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
37 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
38 ; X86-NEXT: psubd %xmm2, %xmm0
39 ; X86-NEXT: psrld $1, %xmm0
40 ; X86-NEXT: paddd %xmm2, %xmm0
41 ; X86-NEXT: psrld $2, %xmm0
42 ; X86-NEXT: movq %xmm0, (%eax)
44 %a = load <2 x i32>, <2 x i32>* %x
45 %b = udiv <2 x i32> %a, <i32 7, i32 7>
46 store <2 x i32> %b, <2 x i32>* %y
50 define void @test_urem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
51 ; X64-LABEL: test_urem7_v2i32:
53 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
54 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
55 ; X64-NEXT: movdqa %xmm0, %xmm2
56 ; X64-NEXT: pmuludq %xmm1, %xmm2
57 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
58 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
59 ; X64-NEXT: pmuludq %xmm1, %xmm3
60 ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
61 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
62 ; X64-NEXT: movdqa %xmm0, %xmm1
63 ; X64-NEXT: psubd %xmm2, %xmm1
64 ; X64-NEXT: psrld $1, %xmm1
65 ; X64-NEXT: paddd %xmm2, %xmm1
66 ; X64-NEXT: psrld $2, %xmm1
67 ; X64-NEXT: movdqa %xmm1, %xmm2
68 ; X64-NEXT: pslld $3, %xmm2
69 ; X64-NEXT: psubd %xmm2, %xmm1
70 ; X64-NEXT: paddd %xmm0, %xmm1
71 ; X64-NEXT: movq %xmm1, (%rsi)
74 ; X86-LABEL: test_urem7_v2i32:
76 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
77 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
78 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
79 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
80 ; X86-NEXT: movdqa %xmm0, %xmm2
81 ; X86-NEXT: pmuludq %xmm1, %xmm2
82 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
83 ; X86-NEXT: movdqa %xmm0, %xmm3
84 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
85 ; X86-NEXT: pmuludq %xmm1, %xmm3
86 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
87 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
88 ; X86-NEXT: movdqa %xmm0, %xmm1
89 ; X86-NEXT: psubd %xmm2, %xmm1
90 ; X86-NEXT: psrld $1, %xmm1
91 ; X86-NEXT: paddd %xmm2, %xmm1
92 ; X86-NEXT: psrld $2, %xmm1
93 ; X86-NEXT: movdqa %xmm1, %xmm2
94 ; X86-NEXT: pslld $3, %xmm2
95 ; X86-NEXT: psubd %xmm2, %xmm1
96 ; X86-NEXT: paddd %xmm0, %xmm1
97 ; X86-NEXT: movq %xmm1, (%eax)
99 %a = load <2 x i32>, <2 x i32>* %x
100 %b = urem <2 x i32> %a, <i32 7, i32 7>
101 store <2 x i32> %b, <2 x i32>* %y
105 define void @test_sdiv7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
106 ; X64-LABEL: test_sdiv7_v2i32:
108 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
109 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
110 ; X64-NEXT: movdqa %xmm0, %xmm2
111 ; X64-NEXT: pmuludq %xmm1, %xmm2
112 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
113 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
114 ; X64-NEXT: pmuludq %xmm1, %xmm3
115 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
116 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
117 ; X64-NEXT: pxor %xmm3, %xmm3
118 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
119 ; X64-NEXT: pand %xmm1, %xmm3
120 ; X64-NEXT: paddd %xmm0, %xmm3
121 ; X64-NEXT: psubd %xmm3, %xmm2
122 ; X64-NEXT: paddd %xmm0, %xmm2
123 ; X64-NEXT: movdqa %xmm2, %xmm0
124 ; X64-NEXT: psrld $31, %xmm0
125 ; X64-NEXT: psrad $2, %xmm2
126 ; X64-NEXT: paddd %xmm0, %xmm2
127 ; X64-NEXT: movq %xmm2, (%rsi)
130 ; X86-LABEL: test_sdiv7_v2i32:
132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
133 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
134 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
135 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
136 ; X86-NEXT: movdqa %xmm0, %xmm2
137 ; X86-NEXT: pmuludq %xmm1, %xmm2
138 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
139 ; X86-NEXT: movdqa %xmm0, %xmm3
140 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
141 ; X86-NEXT: pmuludq %xmm1, %xmm3
142 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
143 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
144 ; X86-NEXT: pxor %xmm3, %xmm3
145 ; X86-NEXT: pcmpgtd %xmm0, %xmm3
146 ; X86-NEXT: pand %xmm1, %xmm3
147 ; X86-NEXT: paddd %xmm0, %xmm3
148 ; X86-NEXT: psubd %xmm3, %xmm2
149 ; X86-NEXT: paddd %xmm0, %xmm2
150 ; X86-NEXT: movdqa %xmm2, %xmm0
151 ; X86-NEXT: psrld $31, %xmm0
152 ; X86-NEXT: psrad $2, %xmm2
153 ; X86-NEXT: paddd %xmm0, %xmm2
154 ; X86-NEXT: movq %xmm2, (%eax)
156 %a = load <2 x i32>, <2 x i32>* %x
157 %b = sdiv <2 x i32> %a, <i32 7, i32 7>
158 store <2 x i32> %b, <2 x i32>* %y
162 define void @test_srem7_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
163 ; X64-LABEL: test_srem7_v2i32:
165 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
166 ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
167 ; X64-NEXT: movdqa %xmm0, %xmm2
168 ; X64-NEXT: pmuludq %xmm1, %xmm2
169 ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
170 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
171 ; X64-NEXT: pmuludq %xmm1, %xmm3
172 ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
173 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
174 ; X64-NEXT: pxor %xmm3, %xmm3
175 ; X64-NEXT: pcmpgtd %xmm0, %xmm3
176 ; X64-NEXT: pand %xmm1, %xmm3
177 ; X64-NEXT: paddd %xmm0, %xmm3
178 ; X64-NEXT: psubd %xmm3, %xmm2
179 ; X64-NEXT: paddd %xmm0, %xmm2
180 ; X64-NEXT: movdqa %xmm2, %xmm1
181 ; X64-NEXT: psrld $31, %xmm1
182 ; X64-NEXT: psrad $2, %xmm2
183 ; X64-NEXT: paddd %xmm1, %xmm2
184 ; X64-NEXT: movdqa %xmm2, %xmm1
185 ; X64-NEXT: pslld $3, %xmm1
186 ; X64-NEXT: psubd %xmm1, %xmm2
187 ; X64-NEXT: paddd %xmm0, %xmm2
188 ; X64-NEXT: movq %xmm2, (%rsi)
191 ; X86-LABEL: test_srem7_v2i32:
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
194 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
195 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
196 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
197 ; X86-NEXT: movdqa %xmm0, %xmm2
198 ; X86-NEXT: pmuludq %xmm1, %xmm2
199 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
200 ; X86-NEXT: movdqa %xmm0, %xmm3
201 ; X86-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
202 ; X86-NEXT: pmuludq %xmm1, %xmm3
203 ; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
204 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
205 ; X86-NEXT: pxor %xmm3, %xmm3
206 ; X86-NEXT: pcmpgtd %xmm0, %xmm3
207 ; X86-NEXT: pand %xmm1, %xmm3
208 ; X86-NEXT: paddd %xmm0, %xmm3
209 ; X86-NEXT: psubd %xmm3, %xmm2
210 ; X86-NEXT: paddd %xmm0, %xmm2
211 ; X86-NEXT: movdqa %xmm2, %xmm1
212 ; X86-NEXT: psrld $31, %xmm1
213 ; X86-NEXT: psrad $2, %xmm2
214 ; X86-NEXT: paddd %xmm1, %xmm2
215 ; X86-NEXT: movdqa %xmm2, %xmm1
216 ; X86-NEXT: pslld $3, %xmm1
217 ; X86-NEXT: psubd %xmm1, %xmm2
218 ; X86-NEXT: paddd %xmm0, %xmm2
219 ; X86-NEXT: movq %xmm2, (%eax)
221 %a = load <2 x i32>, <2 x i32>* %x
222 %b = srem <2 x i32> %a, <i32 7, i32 7>
223 store <2 x i32> %b, <2 x i32>* %y
227 define void @test_udiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
228 ; X64-LABEL: test_udiv_pow2_v2i32:
230 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
231 ; X64-NEXT: psrld $3, %xmm0
232 ; X64-NEXT: movq %xmm0, (%rsi)
235 ; X86-LABEL: test_udiv_pow2_v2i32:
237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
238 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
239 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
240 ; X86-NEXT: psrld $3, %xmm0
241 ; X86-NEXT: movq %xmm0, (%eax)
243 %a = load <2 x i32>, <2 x i32>* %x
244 %b = udiv <2 x i32> %a, <i32 8, i32 8>
245 store <2 x i32> %b, <2 x i32>* %y
249 define void @test_urem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
250 ; X64-LABEL: test_urem_pow2_v2i32:
252 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
253 ; X64-NEXT: andps {{.*}}(%rip), %xmm0
254 ; X64-NEXT: movlps %xmm0, (%rsi)
257 ; X86-LABEL: test_urem_pow2_v2i32:
259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
261 ; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
262 ; X86-NEXT: andps {{\.LCPI.*}}, %xmm0
263 ; X86-NEXT: movlps %xmm0, (%eax)
266 ; X64_WIDEN-LABEL: test_urem_pow2_v2i32:
267 ; X64_WIDEN: # %bb.0:
268 ; X64_WIDEN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
269 ; X64_WIDEN-NEXT: andps {{.*}}(%rip), %xmm0
270 ; X64_WIDEN-NEXT: movlps %xmm0, (%rsi)
271 ; X64_WIDEN-NEXT: retq
273 ; X86_WIDEN-LABEL: test_urem_pow2_v2i32:
274 ; X86_WIDEN: # %bb.0:
275 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
276 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
277 ; X86_WIDEN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
278 ; X86_WIDEN-NEXT: andps {{\.LCPI.*}}, %xmm0
279 ; X86_WIDEN-NEXT: movlps %xmm0, (%eax)
280 ; X86_WIDEN-NEXT: retl
281 %a = load <2 x i32>, <2 x i32>* %x
282 %b = urem <2 x i32> %a, <i32 8, i32 8>
283 store <2 x i32> %b, <2 x i32>* %y
287 define void @test_sdiv_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
288 ; X64-LABEL: test_sdiv_pow2_v2i32:
290 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
291 ; X64-NEXT: movdqa %xmm0, %xmm1
292 ; X64-NEXT: psrad $31, %xmm1
293 ; X64-NEXT: psrld $29, %xmm1
294 ; X64-NEXT: paddd %xmm0, %xmm1
295 ; X64-NEXT: psrad $3, %xmm1
296 ; X64-NEXT: movq %xmm1, (%rsi)
299 ; X86-LABEL: test_sdiv_pow2_v2i32:
301 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
302 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
303 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
304 ; X86-NEXT: movdqa %xmm0, %xmm1
305 ; X86-NEXT: psrad $31, %xmm1
306 ; X86-NEXT: psrld $29, %xmm1
307 ; X86-NEXT: paddd %xmm0, %xmm1
308 ; X86-NEXT: psrad $3, %xmm1
309 ; X86-NEXT: movq %xmm1, (%eax)
312 ; X64_WIDEN-LABEL: test_sdiv_pow2_v2i32:
313 ; X64_WIDEN: # %bb.0:
314 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
315 ; X64_WIDEN-NEXT: movdqa %xmm0, %xmm1
316 ; X64_WIDEN-NEXT: psrad $31, %xmm1
317 ; X64_WIDEN-NEXT: psrld $29, %xmm1
318 ; X64_WIDEN-NEXT: paddd %xmm0, %xmm1
319 ; X64_WIDEN-NEXT: psrad $3, %xmm1
320 ; X64_WIDEN-NEXT: movq %xmm1, (%rsi)
321 ; X64_WIDEN-NEXT: retq
323 ; X86_WIDEN-LABEL: test_sdiv_pow2_v2i32:
324 ; X86_WIDEN: # %bb.0:
325 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
326 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
327 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
328 ; X86_WIDEN-NEXT: movdqa %xmm0, %xmm1
329 ; X86_WIDEN-NEXT: psrad $31, %xmm1
330 ; X86_WIDEN-NEXT: psrld $29, %xmm1
331 ; X86_WIDEN-NEXT: paddd %xmm0, %xmm1
332 ; X86_WIDEN-NEXT: psrad $3, %xmm1
333 ; X86_WIDEN-NEXT: movq %xmm1, (%eax)
334 ; X86_WIDEN-NEXT: retl
335 %a = load <2 x i32>, <2 x i32>* %x
336 %b = sdiv <2 x i32> %a, <i32 8, i32 8>
337 store <2 x i32> %b, <2 x i32>* %y
341 define void @test_srem_pow2_v2i32(<2 x i32>* %x, <2 x i32>* %y) nounwind {
342 ; X64-LABEL: test_srem_pow2_v2i32:
344 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
345 ; X64-NEXT: psrld $3, %xmm0
346 ; X64-NEXT: movq %xmm0, (%rsi)
349 ; X86-LABEL: test_srem_pow2_v2i32:
351 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
352 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
353 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
354 ; X86-NEXT: psrld $3, %xmm0
355 ; X86-NEXT: movq %xmm0, (%eax)
358 ; X64_WIDEN-LABEL: test_srem_pow2_v2i32:
359 ; X64_WIDEN: # %bb.0:
360 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
361 ; X64_WIDEN-NEXT: psrld $3, %xmm0
362 ; X64_WIDEN-NEXT: movq %xmm0, (%rsi)
363 ; X64_WIDEN-NEXT: retq
365 ; X86_WIDEN-LABEL: test_srem_pow2_v2i32:
366 ; X86_WIDEN: # %bb.0:
367 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
368 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
369 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
370 ; X86_WIDEN-NEXT: psrld $3, %xmm0
371 ; X86_WIDEN-NEXT: movq %xmm0, (%eax)
372 ; X86_WIDEN-NEXT: retl
373 %a = load <2 x i32>, <2 x i32>* %x
374 %b = udiv <2 x i32> %a, <i32 8, i32 8>
375 store <2 x i32> %b, <2 x i32>* %y
379 define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
380 ; X64-LABEL: test_udiv_v2i32:
382 ; X64-NEXT: movq %rdx, %rcx
383 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
384 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
385 ; X64-NEXT: movd %xmm0, %eax
386 ; X64-NEXT: movd %xmm1, %esi
387 ; X64-NEXT: xorl %edx, %edx
388 ; X64-NEXT: divl %esi
389 ; X64-NEXT: movd %eax, %xmm2
390 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
391 ; X64-NEXT: movd %xmm0, %eax
392 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
393 ; X64-NEXT: movd %xmm0, %esi
394 ; X64-NEXT: xorl %edx, %edx
395 ; X64-NEXT: divl %esi
396 ; X64-NEXT: movd %eax, %xmm0
397 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
398 ; X64-NEXT: movq %xmm2, (%rcx)
401 ; X86-LABEL: test_udiv_v2i32:
403 ; X86-NEXT: pushl %esi
404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
407 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
408 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
409 ; X86-NEXT: movd %xmm0, %eax
410 ; X86-NEXT: movd %xmm1, %esi
411 ; X86-NEXT: xorl %edx, %edx
412 ; X86-NEXT: divl %esi
413 ; X86-NEXT: movd %eax, %xmm2
414 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
415 ; X86-NEXT: movd %xmm0, %eax
416 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
417 ; X86-NEXT: movd %xmm1, %esi
418 ; X86-NEXT: xorl %edx, %edx
419 ; X86-NEXT: divl %esi
420 ; X86-NEXT: movd %eax, %xmm0
421 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
422 ; X86-NEXT: movq %xmm2, (%ecx)
423 ; X86-NEXT: popl %esi
426 ; X64_WIDEN-LABEL: test_udiv_v2i32:
427 ; X64_WIDEN: # %bb.0:
428 ; X64_WIDEN-NEXT: movq %rdx, %rcx
429 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
430 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
431 ; X64_WIDEN-NEXT: movd %xmm0, %eax
432 ; X64_WIDEN-NEXT: movd %xmm1, %esi
433 ; X64_WIDEN-NEXT: xorl %edx, %edx
434 ; X64_WIDEN-NEXT: divl %esi
435 ; X64_WIDEN-NEXT: movd %eax, %xmm2
436 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
437 ; X64_WIDEN-NEXT: movd %xmm0, %eax
438 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
439 ; X64_WIDEN-NEXT: movd %xmm0, %esi
440 ; X64_WIDEN-NEXT: xorl %edx, %edx
441 ; X64_WIDEN-NEXT: divl %esi
442 ; X64_WIDEN-NEXT: movd %eax, %xmm0
443 ; X64_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
444 ; X64_WIDEN-NEXT: movq %xmm2, (%rcx)
445 ; X64_WIDEN-NEXT: retq
447 ; X86_WIDEN-LABEL: test_udiv_v2i32:
448 ; X86_WIDEN: # %bb.0:
449 ; X86_WIDEN-NEXT: pushl %esi
450 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
451 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
452 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edx
453 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
454 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
455 ; X86_WIDEN-NEXT: movd %xmm0, %eax
456 ; X86_WIDEN-NEXT: movd %xmm1, %esi
457 ; X86_WIDEN-NEXT: xorl %edx, %edx
458 ; X86_WIDEN-NEXT: divl %esi
459 ; X86_WIDEN-NEXT: movd %eax, %xmm2
460 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
461 ; X86_WIDEN-NEXT: movd %xmm0, %eax
462 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
463 ; X86_WIDEN-NEXT: movd %xmm1, %esi
464 ; X86_WIDEN-NEXT: xorl %edx, %edx
465 ; X86_WIDEN-NEXT: divl %esi
466 ; X86_WIDEN-NEXT: movd %eax, %xmm0
467 ; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
468 ; X86_WIDEN-NEXT: movq %xmm2, (%ecx)
469 ; X86_WIDEN-NEXT: popl %esi
470 ; X86_WIDEN-NEXT: retl
471 %a = load <2 x i32>, <2 x i32>* %x
472 %b = load <2 x i32>, <2 x i32>* %y
473 %c = udiv <2 x i32> %a, %b
474 store <2 x i32> %c, <2 x i32>* %z
478 define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
479 ; X64-LABEL: test_urem_v2i32:
481 ; X64-NEXT: movq %rdx, %rcx
482 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
483 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
484 ; X64-NEXT: movd %xmm0, %eax
485 ; X64-NEXT: movd %xmm1, %esi
486 ; X64-NEXT: xorl %edx, %edx
487 ; X64-NEXT: divl %esi
488 ; X64-NEXT: movd %edx, %xmm2
489 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
490 ; X64-NEXT: movd %xmm0, %eax
491 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
492 ; X64-NEXT: movd %xmm0, %esi
493 ; X64-NEXT: xorl %edx, %edx
494 ; X64-NEXT: divl %esi
495 ; X64-NEXT: movd %edx, %xmm0
496 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
497 ; X64-NEXT: movq %xmm2, (%rcx)
500 ; X86-LABEL: test_urem_v2i32:
502 ; X86-NEXT: pushl %esi
503 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
504 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
505 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
506 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
507 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
508 ; X86-NEXT: movd %xmm0, %eax
509 ; X86-NEXT: movd %xmm1, %esi
510 ; X86-NEXT: xorl %edx, %edx
511 ; X86-NEXT: divl %esi
512 ; X86-NEXT: movd %edx, %xmm2
513 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
514 ; X86-NEXT: movd %xmm0, %eax
515 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
516 ; X86-NEXT: movd %xmm1, %esi
517 ; X86-NEXT: xorl %edx, %edx
518 ; X86-NEXT: divl %esi
519 ; X86-NEXT: movd %edx, %xmm0
520 ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
521 ; X86-NEXT: movq %xmm2, (%ecx)
522 ; X86-NEXT: popl %esi
525 ; X64_WIDEN-LABEL: test_urem_v2i32:
526 ; X64_WIDEN: # %bb.0:
527 ; X64_WIDEN-NEXT: movq %rdx, %rcx
528 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
529 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
530 ; X64_WIDEN-NEXT: movd %xmm0, %eax
531 ; X64_WIDEN-NEXT: movd %xmm1, %esi
532 ; X64_WIDEN-NEXT: xorl %edx, %edx
533 ; X64_WIDEN-NEXT: divl %esi
534 ; X64_WIDEN-NEXT: movd %edx, %xmm2
535 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
536 ; X64_WIDEN-NEXT: movd %xmm0, %eax
537 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
538 ; X64_WIDEN-NEXT: movd %xmm0, %esi
539 ; X64_WIDEN-NEXT: xorl %edx, %edx
540 ; X64_WIDEN-NEXT: divl %esi
541 ; X64_WIDEN-NEXT: movd %edx, %xmm0
542 ; X64_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
543 ; X64_WIDEN-NEXT: movq %xmm2, (%rcx)
544 ; X64_WIDEN-NEXT: retq
546 ; X86_WIDEN-LABEL: test_urem_v2i32:
547 ; X86_WIDEN: # %bb.0:
548 ; X86_WIDEN-NEXT: pushl %esi
549 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
550 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
551 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edx
552 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
553 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
554 ; X86_WIDEN-NEXT: movd %xmm0, %eax
555 ; X86_WIDEN-NEXT: movd %xmm1, %esi
556 ; X86_WIDEN-NEXT: xorl %edx, %edx
557 ; X86_WIDEN-NEXT: divl %esi
558 ; X86_WIDEN-NEXT: movd %edx, %xmm2
559 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
560 ; X86_WIDEN-NEXT: movd %xmm0, %eax
561 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
562 ; X86_WIDEN-NEXT: movd %xmm1, %esi
563 ; X86_WIDEN-NEXT: xorl %edx, %edx
564 ; X86_WIDEN-NEXT: divl %esi
565 ; X86_WIDEN-NEXT: movd %edx, %xmm0
566 ; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
567 ; X86_WIDEN-NEXT: movq %xmm2, (%ecx)
568 ; X86_WIDEN-NEXT: popl %esi
569 ; X86_WIDEN-NEXT: retl
570 %a = load <2 x i32>, <2 x i32>* %x
571 %b = load <2 x i32>, <2 x i32>* %y
572 %c = urem <2 x i32> %a, %b
573 store <2 x i32> %c, <2 x i32>* %z
577 define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
578 ; X64-LABEL: test_sdiv_v2i32:
580 ; X64-NEXT: movq %rdx, %rcx
581 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
582 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
583 ; X64-NEXT: movd %xmm0, %eax
584 ; X64-NEXT: movd %xmm1, %esi
586 ; X64-NEXT: idivl %esi
587 ; X64-NEXT: movd %eax, %xmm2
588 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
589 ; X64-NEXT: movd %xmm0, %eax
590 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
591 ; X64-NEXT: movd %xmm0, %esi
593 ; X64-NEXT: idivl %esi
594 ; X64-NEXT: movd %eax, %xmm0
595 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
596 ; X64-NEXT: movq %xmm2, (%rcx)
599 ; X86-LABEL: test_sdiv_v2i32:
601 ; X86-NEXT: pushl %ebx
602 ; X86-NEXT: pushl %edi
603 ; X86-NEXT: pushl %esi
604 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
605 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
606 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
607 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
608 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
609 ; X86-NEXT: movd %xmm0, %ecx
610 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
611 ; X86-NEXT: movd %xmm0, %eax
612 ; X86-NEXT: movd %xmm1, %edi
613 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
614 ; X86-NEXT: movd %xmm1, %ebx
616 ; X86-NEXT: idivl %ebx
617 ; X86-NEXT: movd %eax, %xmm0
618 ; X86-NEXT: movl %ecx, %eax
620 ; X86-NEXT: idivl %edi
621 ; X86-NEXT: movd %eax, %xmm1
622 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
623 ; X86-NEXT: movq %xmm1, (%esi)
624 ; X86-NEXT: popl %esi
625 ; X86-NEXT: popl %edi
626 ; X86-NEXT: popl %ebx
629 ; X64_WIDEN-LABEL: test_sdiv_v2i32:
630 ; X64_WIDEN: # %bb.0:
631 ; X64_WIDEN-NEXT: movq %rdx, %rcx
632 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
633 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
634 ; X64_WIDEN-NEXT: movd %xmm0, %eax
635 ; X64_WIDEN-NEXT: movd %xmm1, %esi
636 ; X64_WIDEN-NEXT: cltd
637 ; X64_WIDEN-NEXT: idivl %esi
638 ; X64_WIDEN-NEXT: movd %eax, %xmm2
639 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
640 ; X64_WIDEN-NEXT: movd %xmm0, %eax
641 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
642 ; X64_WIDEN-NEXT: movd %xmm0, %esi
643 ; X64_WIDEN-NEXT: cltd
644 ; X64_WIDEN-NEXT: idivl %esi
645 ; X64_WIDEN-NEXT: movd %eax, %xmm0
646 ; X64_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
647 ; X64_WIDEN-NEXT: movq %xmm2, (%rcx)
648 ; X64_WIDEN-NEXT: retq
650 ; X86_WIDEN-LABEL: test_sdiv_v2i32:
651 ; X86_WIDEN: # %bb.0:
652 ; X86_WIDEN-NEXT: pushl %ebx
653 ; X86_WIDEN-NEXT: pushl %edi
654 ; X86_WIDEN-NEXT: pushl %esi
655 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
656 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
657 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
658 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
659 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
660 ; X86_WIDEN-NEXT: movd %xmm0, %ecx
661 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
662 ; X86_WIDEN-NEXT: movd %xmm0, %eax
663 ; X86_WIDEN-NEXT: movd %xmm1, %edi
664 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
665 ; X86_WIDEN-NEXT: movd %xmm1, %ebx
666 ; X86_WIDEN-NEXT: cltd
667 ; X86_WIDEN-NEXT: idivl %ebx
668 ; X86_WIDEN-NEXT: movd %eax, %xmm0
669 ; X86_WIDEN-NEXT: movl %ecx, %eax
670 ; X86_WIDEN-NEXT: cltd
671 ; X86_WIDEN-NEXT: idivl %edi
672 ; X86_WIDEN-NEXT: movd %eax, %xmm1
673 ; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
674 ; X86_WIDEN-NEXT: movq %xmm1, (%esi)
675 ; X86_WIDEN-NEXT: popl %esi
676 ; X86_WIDEN-NEXT: popl %edi
677 ; X86_WIDEN-NEXT: popl %ebx
678 ; X86_WIDEN-NEXT: retl
679 %a = load <2 x i32>, <2 x i32>* %x
680 %b = load <2 x i32>, <2 x i32>* %y
681 %c = sdiv <2 x i32> %a, %b
682 store <2 x i32> %c, <2 x i32>* %z
686 define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwind {
687 ; X64-LABEL: test_srem_v2i32:
689 ; X64-NEXT: movq %rdx, %rcx
690 ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
691 ; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
692 ; X64-NEXT: movd %xmm0, %eax
693 ; X64-NEXT: movd %xmm1, %esi
695 ; X64-NEXT: idivl %esi
696 ; X64-NEXT: movd %eax, %xmm2
697 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
698 ; X64-NEXT: movd %xmm0, %eax
699 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
700 ; X64-NEXT: movd %xmm0, %esi
702 ; X64-NEXT: idivl %esi
703 ; X64-NEXT: movd %eax, %xmm0
704 ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
705 ; X64-NEXT: movq %xmm2, (%rcx)
708 ; X86-LABEL: test_srem_v2i32:
710 ; X86-NEXT: pushl %ebx
711 ; X86-NEXT: pushl %edi
712 ; X86-NEXT: pushl %esi
713 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
714 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
715 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
716 ; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
717 ; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
718 ; X86-NEXT: movd %xmm0, %ecx
719 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
720 ; X86-NEXT: movd %xmm0, %eax
721 ; X86-NEXT: movd %xmm1, %edi
722 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
723 ; X86-NEXT: movd %xmm1, %ebx
725 ; X86-NEXT: idivl %ebx
726 ; X86-NEXT: movd %eax, %xmm0
727 ; X86-NEXT: movl %ecx, %eax
729 ; X86-NEXT: idivl %edi
730 ; X86-NEXT: movd %eax, %xmm1
731 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
732 ; X86-NEXT: movq %xmm1, (%esi)
733 ; X86-NEXT: popl %esi
734 ; X86-NEXT: popl %edi
735 ; X86-NEXT: popl %ebx
738 ; X64_WIDEN-LABEL: test_srem_v2i32:
739 ; X64_WIDEN: # %bb.0:
740 ; X64_WIDEN-NEXT: movq %rdx, %rcx
741 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
742 ; X64_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
743 ; X64_WIDEN-NEXT: movd %xmm0, %eax
744 ; X64_WIDEN-NEXT: movd %xmm1, %esi
745 ; X64_WIDEN-NEXT: cltd
746 ; X64_WIDEN-NEXT: idivl %esi
747 ; X64_WIDEN-NEXT: movd %eax, %xmm2
748 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
749 ; X64_WIDEN-NEXT: movd %xmm0, %eax
750 ; X64_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
751 ; X64_WIDEN-NEXT: movd %xmm0, %esi
752 ; X64_WIDEN-NEXT: cltd
753 ; X64_WIDEN-NEXT: idivl %esi
754 ; X64_WIDEN-NEXT: movd %eax, %xmm0
755 ; X64_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
756 ; X64_WIDEN-NEXT: movq %xmm2, (%rcx)
757 ; X64_WIDEN-NEXT: retq
759 ; X86_WIDEN-LABEL: test_srem_v2i32:
760 ; X86_WIDEN: # %bb.0:
761 ; X86_WIDEN-NEXT: pushl %ebx
762 ; X86_WIDEN-NEXT: pushl %edi
763 ; X86_WIDEN-NEXT: pushl %esi
764 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
765 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
766 ; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
767 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
768 ; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
769 ; X86_WIDEN-NEXT: movd %xmm0, %ecx
770 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
771 ; X86_WIDEN-NEXT: movd %xmm0, %eax
772 ; X86_WIDEN-NEXT: movd %xmm1, %edi
773 ; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
774 ; X86_WIDEN-NEXT: movd %xmm1, %ebx
775 ; X86_WIDEN-NEXT: cltd
776 ; X86_WIDEN-NEXT: idivl %ebx
777 ; X86_WIDEN-NEXT: movd %eax, %xmm0
778 ; X86_WIDEN-NEXT: movl %ecx, %eax
779 ; X86_WIDEN-NEXT: cltd
780 ; X86_WIDEN-NEXT: idivl %edi
781 ; X86_WIDEN-NEXT: movd %eax, %xmm1
782 ; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
783 ; X86_WIDEN-NEXT: movq %xmm1, (%esi)
784 ; X86_WIDEN-NEXT: popl %esi
785 ; X86_WIDEN-NEXT: popl %edi
786 ; X86_WIDEN-NEXT: popl %ebx
787 ; X86_WIDEN-NEXT: retl
788 %a = load <2 x i32>, <2 x i32>* %x
789 %b = load <2 x i32>, <2 x i32>* %y
790 %c = sdiv <2 x i32> %a, %b
791 store <2 x i32> %c, <2 x i32>* %z