1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64
5 define i32 @bzhi32(i32 %x, i32 %y) {
8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
10 ; X86-NEXT: addl %ecx, %ecx
11 ; X86-NEXT: bzhil %eax, %ecx, %eax
16 ; X64-NEXT: addl %edi, %edi
17 ; X64-NEXT: bzhil %esi, %edi, %eax
20 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
24 define i32 @bzhi32_load(i32* %x, i32 %y) {
25 ; X86-LABEL: bzhi32_load:
27 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
28 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
29 ; X86-NEXT: bzhil %eax, (%ecx), %eax
32 ; X64-LABEL: bzhi32_load:
34 ; X64-NEXT: bzhil %esi, (%rdi), %eax
36 %x1 = load i32, i32* %x
37 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
41 ; PR48768 - 'bzhi' clears the overflow flag, so we don't need a separate 'test'.
42 define i1 @bzhi32_overflow(i32 %x, i32 %y) {
43 ; X86-LABEL: bzhi32_overflow:
45 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
46 ; X86-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
50 ; X64-LABEL: bzhi32_overflow:
52 ; X64-NEXT: bzhil %esi, %edi, %eax
55 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
56 %cmp = icmp slt i32 %tmp, 1
60 declare i32 @llvm.x86.bmi.bzhi.32(i32, i32)
62 define i32 @pdep32(i32 %x, i32 %y) {
65 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
67 ; X86-NEXT: addl %ecx, %ecx
68 ; X86-NEXT: pdepl %ecx, %eax, %eax
73 ; X64-NEXT: addl %esi, %esi
74 ; X64-NEXT: pdepl %esi, %edi, %eax
77 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
81 define i32 @pdep32_load(i32 %x, i32* %y) {
82 ; X86-LABEL: pdep32_load:
84 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
86 ; X86-NEXT: pdepl (%eax), %ecx, %eax
89 ; X64-LABEL: pdep32_load:
91 ; X64-NEXT: pdepl (%rsi), %edi, %eax
93 %y1 = load i32, i32* %y
94 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
98 define i32 @pdep32_anyext(i16 %x) {
99 ; X86-LABEL: pdep32_anyext:
101 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
102 ; X86-NEXT: movl $-1431655766, %ecx # imm = 0xAAAAAAAA
103 ; X86-NEXT: pdepl %ecx, %eax, %eax
106 ; X64-LABEL: pdep32_anyext:
108 ; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
109 ; X64-NEXT: pdepl %eax, %edi, %eax
111 %x1 = sext i16 %x to i32
112 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
116 define i32 @pdep32_demandedbits(i32 %x) {
117 ; X86-LABEL: pdep32_demandedbits:
119 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
120 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
121 ; X86-NEXT: pdepl %ecx, %eax, %eax
124 ; X64-LABEL: pdep32_demandedbits:
126 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
127 ; X64-NEXT: pdepl %eax, %edi, %eax
129 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
130 %tmp2 = and i32 %tmp, 1431655765
134 define i32 @pdep32_demandedbits2(i32 %x, i32 %y) {
135 ; X86-LABEL: pdep32_demandedbits2:
137 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
138 ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax
139 ; X86-NEXT: andl $128, %eax
142 ; X64-LABEL: pdep32_demandedbits2:
144 ; X64-NEXT: pdepl %esi, %edi, %eax
145 ; X64-NEXT: andl $128, %eax
147 %tmp = and i32 %x, 255
148 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
149 %tmp3 = and i32 %tmp2, 128
153 define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) {
154 ; X86-LABEL: pdep32_demandedbits_mask:
156 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
157 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
158 ; X86-NEXT: pdepl %eax, %ecx, %eax
159 ; X86-NEXT: andl $32768, %eax # imm = 0x8000
162 ; X64-LABEL: pdep32_demandedbits_mask:
164 ; X64-NEXT: pdepl %esi, %edi, %eax
165 ; X64-NEXT: andl $32768, %eax # imm = 0x8000
167 %tmp = sext i16 %y to i32
168 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
169 %tmp3 = and i32 %tmp2, 32768
173 define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) {
174 ; X86-LABEL: pdep32_demandedbits_mask2:
176 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
177 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
178 ; X86-NEXT: pdepl %eax, %ecx, %eax
179 ; X86-NEXT: movzwl %ax, %eax
182 ; X64-LABEL: pdep32_demandedbits_mask2:
184 ; X64-NEXT: pdepl %esi, %edi, %eax
185 ; X64-NEXT: movzwl %ax, %eax
187 %tmp = sext i16 %y to i32
188 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
189 %tmp3 = and i32 %tmp2, 65535
193 define i32 @pdep32_knownbits(i32 %x) {
194 ; X86-LABEL: pdep32_knownbits:
196 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
197 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
198 ; X86-NEXT: pdepl %ecx, %eax, %eax
199 ; X86-NEXT: imull %eax, %eax
202 ; X64-LABEL: pdep32_knownbits:
204 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
205 ; X64-NEXT: pdepl %eax, %edi, %eax
206 ; X64-NEXT: imull %eax, %eax
208 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
209 %tmp2 = and i32 %tmp, 1431655765
210 %tmp3 = mul i32 %tmp, %tmp2
214 define i32 @pdep32_knownbits2(i32 %x, i32 %y) {
215 ; X86-LABEL: pdep32_knownbits2:
217 ; X86-NEXT: movl $-256, %eax
218 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
219 ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax
220 ; X86-NEXT: imull %eax, %eax
223 ; X64-LABEL: pdep32_knownbits2:
225 ; X64-NEXT: andl $-256, %edi
226 ; X64-NEXT: pdepl %esi, %edi, %eax
227 ; X64-NEXT: imull %eax, %eax
229 %tmp = and i32 %x, -256
230 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
231 %tmp3 = and i32 %tmp2, -256
232 %tmp4 = mul i32 %tmp2, %tmp3
236 declare i32 @llvm.x86.bmi.pdep.32(i32, i32)
238 define i32 @pext32(i32 %x, i32 %y) {
241 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
242 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
243 ; X86-NEXT: addl %ecx, %ecx
244 ; X86-NEXT: pextl %ecx, %eax, %eax
249 ; X64-NEXT: addl %esi, %esi
250 ; X64-NEXT: pextl %esi, %edi, %eax
253 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
257 define i32 @pext32_load(i32 %x, i32* %y) {
258 ; X86-LABEL: pext32_load:
260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
261 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
262 ; X86-NEXT: pextl (%eax), %ecx, %eax
265 ; X64-LABEL: pext32_load:
267 ; X64-NEXT: pextl (%rsi), %edi, %eax
269 %y1 = load i32, i32* %y
270 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
274 define i32 @pext32_knownbits(i32 %x) {
275 ; X86-LABEL: pext32_knownbits:
277 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
278 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
279 ; X86-NEXT: pextl %ecx, %eax, %eax
282 ; X64-LABEL: pext32_knownbits:
284 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
285 ; X64-NEXT: pextl %eax, %edi, %eax
287 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765)
288 %tmp2 = and i32 %tmp, 65535
292 declare i32 @llvm.x86.bmi.pext.32(i32, i32)
294 define i32 @mulx32(i32 %x, i32 %y, i32* %p) {
297 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
298 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
299 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
300 ; X86-NEXT: addl %edx, %edx
301 ; X86-NEXT: addl %eax, %eax
302 ; X86-NEXT: mulxl %eax, %eax, %edx
303 ; X86-NEXT: movl %edx, (%ecx)
308 ; X64-NEXT: movl %esi, %eax
309 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
310 ; X64-NEXT: addl %edi, %edi
311 ; X64-NEXT: addl %eax, %eax
312 ; X64-NEXT: imulq %rdi, %rax
313 ; X64-NEXT: movq %rax, %rcx
314 ; X64-NEXT: shrq $32, %rcx
315 ; X64-NEXT: movl %ecx, (%rdx)
316 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
320 %x2 = zext i32 %x1 to i64
321 %y2 = zext i32 %y1 to i64
322 %r1 = mul i64 %x2, %y2
323 %h1 = lshr i64 %r1, 32
324 %h = trunc i64 %h1 to i32
325 %l = trunc i64 %r1 to i32
326 store i32 %h, i32* %p
330 define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) {
331 ; X86-LABEL: mulx32_load:
333 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
334 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
335 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
336 ; X86-NEXT: addl %edx, %edx
337 ; X86-NEXT: mulxl (%eax), %eax, %edx
338 ; X86-NEXT: movl %edx, (%ecx)
341 ; X64-LABEL: mulx32_load:
343 ; X64-NEXT: movl %edi, %eax
344 ; X64-NEXT: addl %eax, %eax
345 ; X64-NEXT: movl (%rsi), %ecx
346 ; X64-NEXT: imulq %rcx, %rax
347 ; X64-NEXT: movq %rax, %rcx
348 ; X64-NEXT: shrq $32, %rcx
349 ; X64-NEXT: movl %ecx, (%rdx)
350 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
353 %y1 = load i32, i32* %y
354 %x2 = zext i32 %x1 to i64
355 %y2 = zext i32 %y1 to i64
356 %r1 = mul i64 %x2, %y2
357 %h1 = lshr i64 %r1, 32
358 %h = trunc i64 %h1 to i32
359 %l = trunc i64 %r1 to i32
360 store i32 %h, i32* %p