1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
5 define void @knownbits_zext_in_reg(i8*) nounwind {
6 ; X32-LABEL: knownbits_zext_in_reg:
10 ; X32-NEXT: pushl %edi
11 ; X32-NEXT: pushl %esi
12 ; X32-NEXT: subl $16, %esp
13 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
14 ; X32-NEXT: movzbl (%eax), %ecx
15 ; X32-NEXT: imull $101, %ecx, %eax
16 ; X32-NEXT: shrl $14, %eax
17 ; X32-NEXT: imull $177, %ecx, %ecx
18 ; X32-NEXT: shrl $14, %ecx
19 ; X32-NEXT: movzbl %al, %eax
20 ; X32-NEXT: vpxor %xmm0, %xmm0, %xmm0
21 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1
22 ; X32-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
23 ; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
24 ; X32-NEXT: movzbl %cl, %eax
25 ; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
26 ; X32-NEXT: vpand %xmm2, %xmm0, %xmm0
27 ; X32-NEXT: vpextrd $1, %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
28 ; X32-NEXT: vpextrd $1, %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
29 ; X32-NEXT: xorl %ecx, %ecx
30 ; X32-NEXT: vmovd %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
31 ; X32-NEXT: vmovd %xmm0, (%esp) # 4-byte Folded Spill
32 ; X32-NEXT: vpextrd $2, %xmm1, %edi
33 ; X32-NEXT: vpextrd $2, %xmm0, %esi
34 ; X32-NEXT: vpextrd $3, %xmm1, %ebx
35 ; X32-NEXT: vpextrd $3, %xmm0, %ebp
36 ; X32-NEXT: .p2align 4, 0x90
37 ; X32-NEXT: .LBB0_1: # %CF
38 ; X32-NEXT: # =>This Loop Header: Depth=1
39 ; X32-NEXT: # Child Loop BB0_2 Depth 2
40 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
41 ; X32-NEXT: xorl %edx, %edx
42 ; X32-NEXT: divl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
43 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
44 ; X32-NEXT: xorl %edx, %edx
45 ; X32-NEXT: divl (%esp) # 4-byte Folded Reload
46 ; X32-NEXT: movl %edi, %eax
47 ; X32-NEXT: xorl %edx, %edx
49 ; X32-NEXT: movl %ebx, %eax
50 ; X32-NEXT: xorl %edx, %edx
52 ; X32-NEXT: .p2align 4, 0x90
53 ; X32-NEXT: .LBB0_2: # %CF237
54 ; X32-NEXT: # Parent Loop BB0_1 Depth=1
55 ; X32-NEXT: # => This Inner Loop Header: Depth=2
56 ; X32-NEXT: testb %cl, %cl
57 ; X32-NEXT: jne .LBB0_2
58 ; X32-NEXT: jmp .LBB0_1
60 ; X64-LABEL: knownbits_zext_in_reg:
62 ; X64-NEXT: pushq %rbp
63 ; X64-NEXT: pushq %rbx
64 ; X64-NEXT: movzbl (%rdi), %eax
65 ; X64-NEXT: imull $101, %eax, %ecx
66 ; X64-NEXT: shrl $14, %ecx
67 ; X64-NEXT: imull $177, %eax, %eax
68 ; X64-NEXT: shrl $14, %eax
69 ; X64-NEXT: movzbl %cl, %ecx
70 ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
71 ; X64-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm1
72 ; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.57331108E-43,3.57331108E-43,3.57331108E-43,3.57331108E-43]
73 ; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
74 ; X64-NEXT: movzbl %al, %eax
75 ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
76 ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
77 ; X64-NEXT: vpextrd $1, %xmm1, %r8d
78 ; X64-NEXT: vpextrd $1, %xmm0, %r9d
79 ; X64-NEXT: xorl %esi, %esi
80 ; X64-NEXT: vmovd %xmm1, %r10d
81 ; X64-NEXT: vmovd %xmm0, %r11d
82 ; X64-NEXT: vpextrd $2, %xmm1, %edi
83 ; X64-NEXT: vpextrd $2, %xmm0, %ebx
84 ; X64-NEXT: vpextrd $3, %xmm1, %ecx
85 ; X64-NEXT: vpextrd $3, %xmm0, %ebp
86 ; X64-NEXT: .p2align 4, 0x90
87 ; X64-NEXT: .LBB0_1: # %CF
88 ; X64-NEXT: # =>This Loop Header: Depth=1
89 ; X64-NEXT: # Child Loop BB0_2 Depth 2
90 ; X64-NEXT: movl %r8d, %eax
91 ; X64-NEXT: xorl %edx, %edx
93 ; X64-NEXT: movl %r10d, %eax
94 ; X64-NEXT: xorl %edx, %edx
95 ; X64-NEXT: divl %r11d
96 ; X64-NEXT: movl %edi, %eax
97 ; X64-NEXT: xorl %edx, %edx
99 ; X64-NEXT: movl %ecx, %eax
100 ; X64-NEXT: xorl %edx, %edx
101 ; X64-NEXT: divl %ebp
102 ; X64-NEXT: .p2align 4, 0x90
103 ; X64-NEXT: .LBB0_2: # %CF237
104 ; X64-NEXT: # Parent Loop BB0_1 Depth=1
105 ; X64-NEXT: # => This Inner Loop Header: Depth=2
106 ; X64-NEXT: testb %sil, %sil
107 ; X64-NEXT: jne .LBB0_2
108 ; X64-NEXT: jmp .LBB0_1
110 %L5 = load i8, i8* %0
111 %Sl9 = select i1 true, i8 %L5, i8 undef
112 %B21 = udiv i8 %Sl9, -93
113 %B22 = udiv i8 %Sl9, 93
116 CF: ; preds = %CF246, %BB
117 %I40 = insertelement <4 x i8> zeroinitializer, i8 %B21, i32 1
118 %I41 = insertelement <4 x i8> zeroinitializer, i8 %B22, i32 1
119 %B41 = srem <4 x i8> %I40, %I41
122 CF237: ; preds = %CF237, %CF
123 %Cmp73 = icmp ne i1 undef, undef
124 br i1 %Cmp73, label %CF237, label %CF246
126 CF246: ; preds = %CF237
127 %Cmp117 = icmp ult <4 x i8> %B41, undef
128 %E156 = extractelement <4 x i1> %Cmp117, i32 2
132 define i32 @knownbits_mask_add_lshr(i32 %a0, i32 %a1) nounwind {
133 ; X32-LABEL: knownbits_mask_add_lshr:
135 ; X32-NEXT: xorl %eax, %eax
138 ; X64-LABEL: knownbits_mask_add_lshr:
140 ; X64-NEXT: xorl %eax, %eax
142 %1 = and i32 %a0, 32767
143 %2 = and i32 %a1, 32766
149 define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
150 ; X32-LABEL: knownbits_mask_addc_shl:
152 ; X32-NEXT: pushl %edi
153 ; X32-NEXT: pushl %esi
154 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
155 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
156 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
157 ; X32-NEXT: movl $-1024, %esi # imm = 0xFC00
158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
159 ; X32-NEXT: andl %esi, %edi
160 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
161 ; X32-NEXT: addl %edi, %esi
162 ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx
163 ; X32-NEXT: adcl $0, %ecx
164 ; X32-NEXT: shldl $22, %edx, %ecx
165 ; X32-NEXT: shldl $22, %esi, %edx
166 ; X32-NEXT: movl %edx, 8(%eax)
167 ; X32-NEXT: movl %ecx, 12(%eax)
168 ; X32-NEXT: movl $0, 4(%eax)
169 ; X32-NEXT: movl $0, (%eax)
170 ; X32-NEXT: popl %esi
171 ; X32-NEXT: popl %edi
174 ; X64-LABEL: knownbits_mask_addc_shl:
176 ; X64-NEXT: andq $-1024, %rdi # imm = 0xFC00
177 ; X64-NEXT: andq $-1024, %rsi # imm = 0xFC00
178 ; X64-NEXT: addq %rdi, %rsi
179 ; X64-NEXT: adcl $0, %edx
180 ; X64-NEXT: shldq $54, %rsi, %rdx
181 ; X64-NEXT: xorl %eax, %eax
183 %1 = and i64 %a0, -1024
184 %2 = zext i64 %1 to i128
185 %3 = and i64 %a1, -1024
186 %4 = zext i64 %3 to i128
188 %6 = zext i64 %a2 to i128
195 define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
196 ; X32-LABEL: knownbits_uaddo_saddo:
198 ; X32-NEXT: pushl %ebx
199 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
200 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
201 ; X32-NEXT: movl %ecx, %edx
202 ; X32-NEXT: addl %eax, %edx
204 ; X32-NEXT: testl %eax, %eax
205 ; X32-NEXT: setns %al
206 ; X32-NEXT: testl %ecx, %ecx
207 ; X32-NEXT: setns %cl
208 ; X32-NEXT: cmpb %al, %cl
210 ; X32-NEXT: testl %edx, %edx
211 ; X32-NEXT: setns %dl
212 ; X32-NEXT: cmpb %dl, %cl
213 ; X32-NEXT: setne %dl
214 ; X32-NEXT: andb %al, %dl
215 ; X32-NEXT: orb %bl, %dl
216 ; X32-NEXT: xorl %eax, %eax
217 ; X32-NEXT: popl %ebx
220 ; X64-LABEL: knownbits_uaddo_saddo:
222 ; X64-NEXT: shlq $32, %rdi
223 ; X64-NEXT: shlq $32, %rsi
224 ; X64-NEXT: addq %rdi, %rsi
227 ; X64-NEXT: orb %al, %dl
228 ; X64-NEXT: xorl %eax, %eax
232 %u = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %1, i64 %2)
233 %uval = extractvalue {i64, i1} %u, 0
234 %uovf = extractvalue {i64, i1} %u, 1
235 %s = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %1, i64 %2)
236 %sval = extractvalue {i64, i1} %s, 0
237 %sovf = extractvalue {i64, i1} %s, 1
238 %sum = add i64 %uval, %sval
239 %3 = trunc i64 %sum to i32
240 %4 = or i1 %uovf, %sovf
241 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0
242 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
246 define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
247 ; X32-LABEL: knownbits_usubo_ssubo:
249 ; X32-NEXT: pushl %ebx
250 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
251 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
252 ; X32-NEXT: movl %ecx, %edx
253 ; X32-NEXT: subl %eax, %edx
255 ; X32-NEXT: testl %eax, %eax
256 ; X32-NEXT: setns %al
257 ; X32-NEXT: testl %ecx, %ecx
258 ; X32-NEXT: setns %cl
259 ; X32-NEXT: cmpb %al, %cl
260 ; X32-NEXT: setne %al
261 ; X32-NEXT: testl %edx, %edx
262 ; X32-NEXT: setns %dl
263 ; X32-NEXT: cmpb %dl, %cl
264 ; X32-NEXT: setne %dl
265 ; X32-NEXT: andb %al, %dl
266 ; X32-NEXT: orb %bl, %dl
267 ; X32-NEXT: xorl %eax, %eax
268 ; X32-NEXT: popl %ebx
271 ; X64-LABEL: knownbits_usubo_ssubo:
273 ; X64-NEXT: shlq $32, %rdi
274 ; X64-NEXT: shlq $32, %rsi
275 ; X64-NEXT: cmpq %rsi, %rdi
278 ; X64-NEXT: orb %al, %dl
279 ; X64-NEXT: xorl %eax, %eax
283 %u = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %1, i64 %2)
284 %uval = extractvalue {i64, i1} %u, 0
285 %uovf = extractvalue {i64, i1} %u, 1
286 %s = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %1, i64 %2)
287 %sval = extractvalue {i64, i1} %s, 0
288 %sovf = extractvalue {i64, i1} %s, 1
289 %sum = add i64 %uval, %sval
290 %3 = trunc i64 %sum to i32
291 %4 = or i1 %uovf, %sovf
292 %ret0 = insertvalue {i32, i1} undef, i32 %3, 0
293 %ret1 = insertvalue {i32, i1} %ret0, i1 %4, 1
297 declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
298 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
299 declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
300 declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
302 define i32 @knownbits_fshl(i32 %a0) nounwind {
303 ; X32-LABEL: knownbits_fshl:
305 ; X32-NEXT: movl $3, %eax
308 ; X64-LABEL: knownbits_fshl:
310 ; X64-NEXT: movl $3, %eax
312 %1 = tail call i32 @llvm.fshl.i32(i32 %a0, i32 -1, i32 5)
317 define i32 @knownbits_fshr(i32 %a0) nounwind {
318 ; X32-LABEL: knownbits_fshr:
320 ; X32-NEXT: movl $3, %eax
323 ; X64-LABEL: knownbits_fshr:
325 ; X64-NEXT: movl $3, %eax
327 %1 = tail call i32 @llvm.fshr.i32(i32 %a0, i32 -1, i32 5)
332 declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
333 declare i32 @llvm.fshr.i32(i32, i32, i32) nounwind readnone