1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s
4 ; Verify when widening a divide/remainder operation, we only generate a
5 ; divide/rem per element since divide/remainder can trap.
8 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
9 ; CHECK-LABEL: vectorDiv:
10 ; CHECK: # %bb.0: # %entry
11 ; CHECK-NEXT: movq %rdx, %r8
12 ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
13 ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
14 ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
15 ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
16 ; CHECK-NEXT: movq (%rdi,%rcx,8), %rdi
17 ; CHECK-NEXT: movq (%rsi,%rcx,8), %r10
18 ; CHECK-NEXT: movq %rdi, %rax
19 ; CHECK-NEXT: shrq $32, %rax
20 ; CHECK-NEXT: movq %r10, %rsi
21 ; CHECK-NEXT: shrq $32, %rsi
22 ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
24 ; CHECK-NEXT: idivl %esi
25 ; CHECK-NEXT: movl %eax, %r9d
26 ; CHECK-NEXT: movl %edi, %eax
28 ; CHECK-NEXT: idivl %r10d
29 ; CHECK-NEXT: movd %eax, %xmm0
30 ; CHECK-NEXT: pinsrd $1, %r9d, %xmm0
31 ; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8)
34 %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
35 %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
36 %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
37 %index = alloca i32, align 4
38 store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
39 store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
40 store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
41 %tmp = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %qdest.addr
42 %tmp1 = load i32, i32* %index
43 %arrayidx = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp, i32 %tmp1
44 %tmp2 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %nsource.addr
45 %tmp3 = load i32, i32* %index
46 %arrayidx4 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
47 %tmp5 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx4
48 %tmp6 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %dsource.addr
49 %tmp7 = load i32, i32* %index
50 %arrayidx8 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
51 %tmp9 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx8
52 %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
53 store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
57 ; CHECK: test_char_div
58 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
59 ; CHECK-LABEL: test_char_div:
61 ; CHECK-NEXT: movsbl %dil, %eax
62 ; CHECK-NEXT: idivb %cl
63 ; CHECK-NEXT: movl %eax, %edi
64 ; CHECK-NEXT: movsbl %sil, %eax
65 ; CHECK-NEXT: idivb %r8b
66 ; CHECK-NEXT: movl %eax, %esi
67 ; CHECK-NEXT: movsbl %dl, %eax
68 ; CHECK-NEXT: idivb %r9b
69 ; CHECK-NEXT: movl %eax, %ecx
70 ; CHECK-NEXT: movl %edi, %eax
71 ; CHECK-NEXT: movl %esi, %edx
73 %div.r = sdiv <3 x i8> %num, %div
77 ; CHECK: test_uchar_div
78 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
79 ; CHECK-LABEL: test_uchar_div:
81 ; CHECK-NEXT: movzbl %dil, %eax
82 ; CHECK-NEXT: divb %cl
83 ; CHECK-NEXT: movl %eax, %edi
84 ; CHECK-NEXT: movzbl %sil, %eax
85 ; CHECK-NEXT: divb %r8b
86 ; CHECK-NEXT: movl %eax, %esi
87 ; CHECK-NEXT: movzbl %dl, %eax
88 ; CHECK-NEXT: divb %r9b
89 ; CHECK-NEXT: movl %eax, %ecx
90 ; CHECK-NEXT: movl %edi, %eax
91 ; CHECK-NEXT: movl %esi, %edx
93 %div.r = udiv <3 x i8> %num, %div
97 ; CHECK: test_short_div
98 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
99 ; CHECK-LABEL: test_short_div:
101 ; CHECK-NEXT: pextrw $4, %xmm0, %eax
102 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx
103 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
105 ; CHECK-NEXT: idivw %cx
106 ; CHECK-NEXT: movl %eax, %r8d
107 ; CHECK-NEXT: pextrw $3, %xmm0, %eax
108 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx
109 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
111 ; CHECK-NEXT: idivw %cx
112 ; CHECK-NEXT: movl %eax, %r9d
113 ; CHECK-NEXT: pextrw $2, %xmm0, %eax
114 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx
115 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
117 ; CHECK-NEXT: idivw %cx
118 ; CHECK-NEXT: movl %eax, %edi
119 ; CHECK-NEXT: movd %xmm0, %eax
120 ; CHECK-NEXT: movd %xmm1, %ecx
121 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
123 ; CHECK-NEXT: idivw %cx
124 ; CHECK-NEXT: movl %eax, %ecx
125 ; CHECK-NEXT: pextrw $1, %xmm0, %eax
126 ; CHECK-NEXT: pextrw $1, %xmm1, %esi
127 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
129 ; CHECK-NEXT: idivw %si
130 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax
131 ; CHECK-NEXT: movd %ecx, %xmm0
132 ; CHECK-NEXT: pinsrw $1, %eax, %xmm0
133 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0
134 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0
135 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0
137 %div.r = sdiv <5 x i16> %num, %div
141 ; CHECK: test_ushort_div
142 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
143 ; CHECK-LABEL: test_ushort_div:
145 ; CHECK-NEXT: pextrw $1, %xmm0, %eax
146 ; CHECK-NEXT: pextrw $1, %xmm1, %ecx
147 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
148 ; CHECK-NEXT: xorl %edx, %edx
149 ; CHECK-NEXT: divw %cx
150 ; CHECK-NEXT: movl %eax, %ecx
151 ; CHECK-NEXT: movd %xmm0, %eax
152 ; CHECK-NEXT: movd %xmm1, %esi
153 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
154 ; CHECK-NEXT: xorl %edx, %edx
155 ; CHECK-NEXT: divw %si
156 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax
157 ; CHECK-NEXT: movd %eax, %xmm2
158 ; CHECK-NEXT: pinsrw $1, %ecx, %xmm2
159 ; CHECK-NEXT: pextrw $2, %xmm0, %eax
160 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx
161 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
162 ; CHECK-NEXT: xorl %edx, %edx
163 ; CHECK-NEXT: divw %cx
164 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax
165 ; CHECK-NEXT: pinsrw $2, %eax, %xmm2
166 ; CHECK-NEXT: pextrw $3, %xmm0, %eax
167 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx
168 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
169 ; CHECK-NEXT: xorl %edx, %edx
170 ; CHECK-NEXT: divw %cx
171 ; CHECK-NEXT: # kill: def $ax killed $ax def $eax
172 ; CHECK-NEXT: pinsrw $3, %eax, %xmm2
173 ; CHECK-NEXT: movdqa %xmm2, %xmm0
175 %div.r = udiv <4 x i16> %num, %div
179 ; CHECK: test_uint_div
180 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
181 ; CHECK-LABEL: test_uint_div:
183 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
184 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
185 ; CHECK-NEXT: xorl %edx, %edx
186 ; CHECK-NEXT: divl %ecx
187 ; CHECK-NEXT: movl %eax, %ecx
188 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
189 ; CHECK-NEXT: pextrd $1, %xmm1, %esi
190 ; CHECK-NEXT: xorl %edx, %edx
191 ; CHECK-NEXT: divl %esi
192 ; CHECK-NEXT: movl %eax, %esi
193 ; CHECK-NEXT: movd %xmm0, %eax
194 ; CHECK-NEXT: movd %xmm1, %edi
195 ; CHECK-NEXT: xorl %edx, %edx
196 ; CHECK-NEXT: divl %edi
197 ; CHECK-NEXT: movd %eax, %xmm0
198 ; CHECK-NEXT: pinsrd $1, %esi, %xmm0
199 ; CHECK-NEXT: pinsrd $2, %ecx, %xmm0
201 %div.r = udiv <3 x i32> %num, %div
205 ; CHECK: test_long_div
206 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
207 ; CHECK-LABEL: test_long_div:
209 ; CHECK-NEXT: movq %rdx, %r10
210 ; CHECK-NEXT: movq %rdi, %rax
212 ; CHECK-NEXT: idivq %rcx
213 ; CHECK-NEXT: movq %rax, %rcx
214 ; CHECK-NEXT: movq %rsi, %rax
216 ; CHECK-NEXT: idivq %r8
217 ; CHECK-NEXT: movq %rax, %rsi
218 ; CHECK-NEXT: movq %r10, %rax
220 ; CHECK-NEXT: idivq %r9
221 ; CHECK-NEXT: movq %rax, %rdi
222 ; CHECK-NEXT: movq %rcx, %rax
223 ; CHECK-NEXT: movq %rsi, %rdx
224 ; CHECK-NEXT: movq %rdi, %rcx
226 %div.r = sdiv <3 x i64> %num, %div
230 ; CHECK: test_ulong_div
231 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
232 ; CHECK-LABEL: test_ulong_div:
234 ; CHECK-NEXT: movq %rdx, %r10
235 ; CHECK-NEXT: movq %rdi, %rax
236 ; CHECK-NEXT: xorl %edx, %edx
237 ; CHECK-NEXT: divq %rcx
238 ; CHECK-NEXT: movq %rax, %rcx
239 ; CHECK-NEXT: movq %rsi, %rax
240 ; CHECK-NEXT: xorl %edx, %edx
241 ; CHECK-NEXT: divq %r8
242 ; CHECK-NEXT: movq %rax, %rsi
243 ; CHECK-NEXT: movq %r10, %rax
244 ; CHECK-NEXT: xorl %edx, %edx
245 ; CHECK-NEXT: divq %r9
246 ; CHECK-NEXT: movq %rax, %rdi
247 ; CHECK-NEXT: movq %rcx, %rax
248 ; CHECK-NEXT: movq %rsi, %rdx
249 ; CHECK-NEXT: movq %rdi, %rcx
251 %div.r = udiv <3 x i64> %num, %div
255 ; CHECK: test_char_rem
256 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
257 ; CHECK-LABEL: test_char_rem:
259 ; CHECK-NEXT: pextrb $1, %xmm1, %ecx
260 ; CHECK-NEXT: pextrb $1, %xmm0, %eax
262 ; CHECK-NEXT: idivb %cl
263 ; CHECK-NEXT: movsbl %ah, %ecx
264 ; CHECK-NEXT: movd %xmm1, %edx
265 ; CHECK-NEXT: movd %xmm0, %eax
267 ; CHECK-NEXT: idivb %dl
268 ; CHECK-NEXT: movsbl %ah, %eax
269 ; CHECK-NEXT: movd %eax, %xmm2
270 ; CHECK-NEXT: pinsrb $1, %ecx, %xmm2
271 ; CHECK-NEXT: pextrb $2, %xmm1, %ecx
272 ; CHECK-NEXT: pextrb $2, %xmm0, %eax
274 ; CHECK-NEXT: idivb %cl
275 ; CHECK-NEXT: movsbl %ah, %eax
276 ; CHECK-NEXT: pinsrb $2, %eax, %xmm2
277 ; CHECK-NEXT: pextrb $3, %xmm1, %ecx
278 ; CHECK-NEXT: pextrb $3, %xmm0, %eax
280 ; CHECK-NEXT: idivb %cl
281 ; CHECK-NEXT: movsbl %ah, %eax
282 ; CHECK-NEXT: pinsrb $3, %eax, %xmm2
283 ; CHECK-NEXT: movdqa %xmm2, %xmm0
285 %rem.r = srem <4 x i8> %num, %rem
289 ; CHECK: test_short_rem
290 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
291 ; CHECK-LABEL: test_short_rem:
293 ; CHECK-NEXT: pextrw $4, %xmm0, %eax
294 ; CHECK-NEXT: pextrw $4, %xmm1, %ecx
295 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
297 ; CHECK-NEXT: idivw %cx
298 ; CHECK-NEXT: movl %edx, %r8d
299 ; CHECK-NEXT: pextrw $3, %xmm0, %eax
300 ; CHECK-NEXT: pextrw $3, %xmm1, %ecx
301 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
303 ; CHECK-NEXT: idivw %cx
304 ; CHECK-NEXT: movl %edx, %r9d
305 ; CHECK-NEXT: pextrw $2, %xmm0, %eax
306 ; CHECK-NEXT: pextrw $2, %xmm1, %ecx
307 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
309 ; CHECK-NEXT: idivw %cx
310 ; CHECK-NEXT: movl %edx, %edi
311 ; CHECK-NEXT: movd %xmm0, %eax
312 ; CHECK-NEXT: movd %xmm1, %ecx
313 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
315 ; CHECK-NEXT: idivw %cx
316 ; CHECK-NEXT: movl %edx, %ecx
317 ; CHECK-NEXT: pextrw $1, %xmm0, %eax
318 ; CHECK-NEXT: pextrw $1, %xmm1, %esi
319 ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
321 ; CHECK-NEXT: idivw %si
322 ; CHECK-NEXT: # kill: def $dx killed $dx def $edx
323 ; CHECK-NEXT: movd %ecx, %xmm0
324 ; CHECK-NEXT: pinsrw $1, %edx, %xmm0
325 ; CHECK-NEXT: pinsrw $2, %edi, %xmm0
326 ; CHECK-NEXT: pinsrw $3, %r9d, %xmm0
327 ; CHECK-NEXT: pinsrw $4, %r8d, %xmm0
329 %rem.r = srem <5 x i16> %num, %rem
333 ; CHECK: test_uint_rem
334 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
335 ; CHECK-LABEL: test_uint_rem:
337 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
338 ; CHECK-NEXT: pextrd $1, %xmm1, %ecx
340 ; CHECK-NEXT: idivl %ecx
341 ; CHECK-NEXT: movl %edx, %ecx
342 ; CHECK-NEXT: movd %xmm0, %eax
343 ; CHECK-NEXT: movd %xmm1, %esi
345 ; CHECK-NEXT: idivl %esi
346 ; CHECK-NEXT: movd %edx, %xmm2
347 ; CHECK-NEXT: pinsrd $1, %ecx, %xmm2
348 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
349 ; CHECK-NEXT: pextrd $2, %xmm1, %ecx
351 ; CHECK-NEXT: idivl %ecx
352 ; CHECK-NEXT: pinsrd $2, %edx, %xmm2
353 ; CHECK-NEXT: pextrd $3, %xmm0, %eax
354 ; CHECK-NEXT: pextrd $3, %xmm1, %ecx
356 ; CHECK-NEXT: idivl %ecx
357 ; CHECK-NEXT: pinsrd $3, %edx, %xmm2
358 ; CHECK-NEXT: movdqa %xmm2, %xmm0
360 %rem.r = srem <4 x i32> %num, %rem
365 ; CHECK: test_ulong_rem
366 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
367 ; CHECK-LABEL: test_ulong_rem:
369 ; CHECK-NEXT: movq %rdx, %rax
370 ; CHECK-NEXT: xorl %edx, %edx
371 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
372 ; CHECK-NEXT: movq %rdx, %xmm0
373 ; CHECK-NEXT: movq %rsi, %rax
374 ; CHECK-NEXT: xorl %edx, %edx
375 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
376 ; CHECK-NEXT: movq %rdx, %xmm1
377 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
378 ; CHECK-NEXT: movq %r8, %rax
379 ; CHECK-NEXT: xorl %edx, %edx
380 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
381 ; CHECK-NEXT: movq %rdx, %xmm0
382 ; CHECK-NEXT: movq %rcx, %rax
383 ; CHECK-NEXT: xorl %edx, %edx
384 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
385 ; CHECK-NEXT: movq %rdx, %xmm2
386 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
387 ; CHECK-NEXT: movq %r9, %rax
388 ; CHECK-NEXT: xorl %edx, %edx
389 ; CHECK-NEXT: divq {{[0-9]+}}(%rsp)
390 ; CHECK-NEXT: movq %rdx, 32(%rdi)
391 ; CHECK-NEXT: movdqa %xmm2, 16(%rdi)
392 ; CHECK-NEXT: movdqa %xmm1, (%rdi)
393 ; CHECK-NEXT: movq %rdi, %rax
395 %rem.r = urem <5 x i64> %num, %rem
399 ; CHECK: test_int_div
400 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
401 ; CHECK-LABEL: test_int_div:
402 ; CHECK: # %bb.0: # %entry
403 ; CHECK-NEXT: testl %edx, %edx
404 ; CHECK-NEXT: jle .LBB12_3
405 ; CHECK-NEXT: # %bb.1: # %bb.nph
406 ; CHECK-NEXT: movl %edx, %r9d
407 ; CHECK-NEXT: xorl %ecx, %ecx
408 ; CHECK-NEXT: .p2align 4, 0x90
409 ; CHECK-NEXT: .LBB12_2: # %for.body
410 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
411 ; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0
412 ; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1
413 ; CHECK-NEXT: pextrd $1, %xmm0, %eax
414 ; CHECK-NEXT: pextrd $1, %xmm1, %r8d
416 ; CHECK-NEXT: idivl %r8d
417 ; CHECK-NEXT: movl %eax, %r8d
418 ; CHECK-NEXT: movd %xmm0, %eax
419 ; CHECK-NEXT: movd %xmm1, %r10d
421 ; CHECK-NEXT: idivl %r10d
422 ; CHECK-NEXT: movd %eax, %xmm2
423 ; CHECK-NEXT: pinsrd $1, %r8d, %xmm2
424 ; CHECK-NEXT: pextrd $2, %xmm0, %eax
425 ; CHECK-NEXT: pextrd $2, %xmm1, %r8d
427 ; CHECK-NEXT: idivl %r8d
428 ; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
429 ; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
430 ; CHECK-NEXT: addq $16, %rcx
431 ; CHECK-NEXT: decl %r9d
432 ; CHECK-NEXT: jne .LBB12_2
433 ; CHECK-NEXT: .LBB12_3: # %for.end
436 %cmp13 = icmp sgt i32 %n, 0
437 br i1 %cmp13, label %bb.nph, label %for.end
443 %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
444 %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014
445 %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
446 %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014
447 %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
448 %div = sdiv <3 x i32> %tmp4, %tmp8
449 store <3 x i32> %div, <3 x i32>* %arrayidx11
450 %inc = add nsw i32 %i.014, 1
451 %exitcond = icmp eq i32 %inc, %n
452 br i1 %exitcond, label %for.end, label %for.body
454 for.end: ; preds = %for.body, %entry