1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+cmov | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefix=X64
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
6 define i32 @bzhi32(i32 %x, i32 %y) {
9 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
10 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
11 ; X86-NEXT: addl %ecx, %ecx
12 ; X86-NEXT: bzhil %eax, %ecx, %eax
17 ; X64-NEXT: addl %edi, %edi
18 ; X64-NEXT: bzhil %esi, %edi, %eax
23 ; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff]
24 ; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
25 ; EGPR-NEXT: retq # encoding: [0xc3]
27 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
31 define i32 @bzhi32_load(ptr %x, i32 %y) {
32 ; X86-LABEL: bzhi32_load:
34 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
35 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
36 ; X86-NEXT: bzhil %eax, (%ecx), %eax
39 ; X64-LABEL: bzhi32_load:
41 ; X64-NEXT: bzhil %esi, (%rdi), %eax
44 ; EGPR-LABEL: bzhi32_load:
46 ; EGPR-NEXT: bzhil %esi, (%rdi), %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0x07]
47 ; EGPR-NEXT: retq # encoding: [0xc3]
48 %x1 = load i32, ptr %x
49 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
53 ; PR48768 - 'bzhi' clears the overflow flag, so we don't need a separate 'test'.
54 define i1 @bzhi32_overflow(i32 %x, i32 %y) {
55 ; X86-LABEL: bzhi32_overflow:
57 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
58 ; X86-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
62 ; X64-LABEL: bzhi32_overflow:
64 ; X64-NEXT: bzhil %esi, %edi, %eax
68 ; EGPR-LABEL: bzhi32_overflow:
70 ; EGPR-NEXT: bzhil %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x48,0xf5,0xc7]
71 ; EGPR-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
72 ; EGPR-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0]
73 ; EGPR-NEXT: retq # encoding: [0xc3]
74 %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
75 %cmp = icmp slt i32 %tmp, 1
79 declare i32 @llvm.x86.bmi.bzhi.32(i32, i32)
81 define i32 @pdep32(i32 %x, i32 %y) {
84 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
85 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
86 ; X86-NEXT: addl %ecx, %ecx
87 ; X86-NEXT: pdepl %ecx, %eax, %eax
92 ; X64-NEXT: addl %esi, %esi
93 ; X64-NEXT: pdepl %esi, %edi, %eax
98 ; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6]
99 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
100 ; EGPR-NEXT: retq # encoding: [0xc3]
102 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
106 define i32 @pdep32_load(i32 %x, ptr %y) {
107 ; X86-LABEL: pdep32_load:
109 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
110 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
111 ; X86-NEXT: pdepl (%eax), %ecx, %eax
114 ; X64-LABEL: pdep32_load:
116 ; X64-NEXT: pdepl (%rsi), %edi, %eax
119 ; EGPR-LABEL: pdep32_load:
121 ; EGPR-NEXT: pdepl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0x06]
122 ; EGPR-NEXT: retq # encoding: [0xc3]
123 %y1 = load i32, ptr %y
124 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
128 define i32 @pdep32_anyext(i16 %x) {
129 ; X86-LABEL: pdep32_anyext:
131 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
132 ; X86-NEXT: movl $-1431655766, %ecx # imm = 0xAAAAAAAA
133 ; X86-NEXT: pdepl %ecx, %eax, %eax
136 ; X64-LABEL: pdep32_anyext:
138 ; X64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
139 ; X64-NEXT: pdepl %eax, %edi, %eax
142 ; EGPR-LABEL: pdep32_anyext:
144 ; EGPR-NEXT: movl $-1431655766, %eax # encoding: [0xb8,0xaa,0xaa,0xaa,0xaa]
145 ; EGPR-NEXT: # imm = 0xAAAAAAAA
146 ; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
147 ; EGPR-NEXT: retq # encoding: [0xc3]
148 %x1 = sext i16 %x to i32
149 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x1, i32 -1431655766)
153 define i32 @pdep32_demandedbits(i32 %x) {
154 ; X86-LABEL: pdep32_demandedbits:
156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
157 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
158 ; X86-NEXT: pdepl %ecx, %eax, %eax
161 ; X64-LABEL: pdep32_demandedbits:
163 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
164 ; X64-NEXT: pdepl %eax, %edi, %eax
167 ; EGPR-LABEL: pdep32_demandedbits:
169 ; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
170 ; EGPR-NEXT: # imm = 0x55555555
171 ; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
172 ; EGPR-NEXT: retq # encoding: [0xc3]
173 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
174 %tmp2 = and i32 %tmp, 1431655765
178 define i32 @pdep32_demandedbits2(i32 %x, i32 %y) {
179 ; X86-LABEL: pdep32_demandedbits2:
181 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
182 ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax
183 ; X86-NEXT: andl $128, %eax
186 ; X64-LABEL: pdep32_demandedbits2:
188 ; X64-NEXT: pdepl %esi, %edi, %eax
189 ; X64-NEXT: andl $128, %eax
192 ; EGPR-LABEL: pdep32_demandedbits2:
194 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
195 ; EGPR-NEXT: andl $128, %eax # encoding: [0x25,0x80,0x00,0x00,0x00]
196 ; EGPR-NEXT: retq # encoding: [0xc3]
197 %tmp = and i32 %x, 255
198 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
199 %tmp3 = and i32 %tmp2, 128
203 define i32 @pdep32_demandedbits_mask(i32 %x, i16 %y) {
204 ; X86-LABEL: pdep32_demandedbits_mask:
206 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
207 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
208 ; X86-NEXT: pdepl %eax, %ecx, %eax
209 ; X86-NEXT: andl $32768, %eax # imm = 0x8000
212 ; X64-LABEL: pdep32_demandedbits_mask:
214 ; X64-NEXT: pdepl %esi, %edi, %eax
215 ; X64-NEXT: andl $32768, %eax # imm = 0x8000
218 ; EGPR-LABEL: pdep32_demandedbits_mask:
220 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
221 ; EGPR-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00]
222 ; EGPR-NEXT: # imm = 0x8000
223 ; EGPR-NEXT: retq # encoding: [0xc3]
224 %tmp = sext i16 %y to i32
225 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
226 %tmp3 = and i32 %tmp2, 32768
230 define i32 @pdep32_demandedbits_mask2(i32 %x, i16 %y) {
231 ; X86-LABEL: pdep32_demandedbits_mask2:
233 ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
234 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
235 ; X86-NEXT: pdepl %eax, %ecx, %eax
236 ; X86-NEXT: movzwl %ax, %eax
239 ; X64-LABEL: pdep32_demandedbits_mask2:
241 ; X64-NEXT: pdepl %esi, %edi, %eax
242 ; X64-NEXT: movzwl %ax, %eax
245 ; EGPR-LABEL: pdep32_demandedbits_mask2:
247 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
248 ; EGPR-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0]
249 ; EGPR-NEXT: retq # encoding: [0xc3]
250 %tmp = sext i16 %y to i32
251 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %tmp)
252 %tmp3 = and i32 %tmp2, 65535
256 define i32 @pdep32_knownbits(i32 %x) {
257 ; X86-LABEL: pdep32_knownbits:
259 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
261 ; X86-NEXT: pdepl %ecx, %eax, %eax
262 ; X86-NEXT: imull %eax, %eax
265 ; X64-LABEL: pdep32_knownbits:
267 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
268 ; X64-NEXT: pdepl %eax, %edi, %eax
269 ; X64-NEXT: imull %eax, %eax
272 ; EGPR-LABEL: pdep32_knownbits:
274 ; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
275 ; EGPR-NEXT: # imm = 0x55555555
276 ; EGPR-NEXT: pdepl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc0]
277 ; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
278 ; EGPR-NEXT: retq # encoding: [0xc3]
279 %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 1431655765)
280 %tmp2 = and i32 %tmp, 1431655765
281 %tmp3 = mul i32 %tmp, %tmp2
285 define i32 @pdep32_knownbits2(i32 %x, i32 %y) {
286 ; X86-LABEL: pdep32_knownbits2:
288 ; X86-NEXT: movl $-256, %eax
289 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
290 ; X86-NEXT: pdepl {{[0-9]+}}(%esp), %eax, %eax
291 ; X86-NEXT: imull %eax, %eax
294 ; X64-LABEL: pdep32_knownbits2:
296 ; X64-NEXT: andl $-256, %edi
297 ; X64-NEXT: pdepl %esi, %edi, %eax
298 ; X64-NEXT: imull %eax, %eax
301 ; EGPR-LABEL: pdep32_knownbits2:
303 ; EGPR-NEXT: andl $-256, %edi # encoding: [0x81,0xe7,0x00,0xff,0xff,0xff]
304 ; EGPR-NEXT: pdepl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x43,0xf5,0xc6]
305 ; EGPR-NEXT: imull %eax, %eax # encoding: [0x0f,0xaf,0xc0]
306 ; EGPR-NEXT: retq # encoding: [0xc3]
307 %tmp = and i32 %x, -256
308 %tmp2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %tmp, i32 %y)
309 %tmp3 = and i32 %tmp2, -256
310 %tmp4 = mul i32 %tmp2, %tmp3
314 declare i32 @llvm.x86.bmi.pdep.32(i32, i32)
316 define i32 @pext32(i32 %x, i32 %y) {
319 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
320 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
321 ; X86-NEXT: addl %ecx, %ecx
322 ; X86-NEXT: pextl %ecx, %eax, %eax
327 ; X64-NEXT: addl %esi, %esi
328 ; X64-NEXT: pextl %esi, %edi, %eax
331 ; EGPR-LABEL: pext32:
333 ; EGPR-NEXT: addl %esi, %esi # encoding: [0x01,0xf6]
334 ; EGPR-NEXT: pextl %esi, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc6]
335 ; EGPR-NEXT: retq # encoding: [0xc3]
337 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
341 define i32 @pext32_load(i32 %x, ptr %y) {
342 ; X86-LABEL: pext32_load:
344 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
345 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
346 ; X86-NEXT: pextl (%eax), %ecx, %eax
349 ; X64-LABEL: pext32_load:
351 ; X64-NEXT: pextl (%rsi), %edi, %eax
354 ; EGPR-LABEL: pext32_load:
356 ; EGPR-NEXT: pextl (%rsi), %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0x06]
357 ; EGPR-NEXT: retq # encoding: [0xc3]
358 %y1 = load i32, ptr %y
359 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
363 define i32 @pext32_knownbits(i32 %x) {
364 ; X86-LABEL: pext32_knownbits:
366 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
367 ; X86-NEXT: movl $1431655765, %ecx # imm = 0x55555555
368 ; X86-NEXT: pextl %ecx, %eax, %eax
371 ; X64-LABEL: pext32_knownbits:
373 ; X64-NEXT: movl $1431655765, %eax # imm = 0x55555555
374 ; X64-NEXT: pextl %eax, %edi, %eax
377 ; EGPR-LABEL: pext32_knownbits:
379 ; EGPR-NEXT: movl $1431655765, %eax # encoding: [0xb8,0x55,0x55,0x55,0x55]
380 ; EGPR-NEXT: # imm = 0x55555555
381 ; EGPR-NEXT: pextl %eax, %edi, %eax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x42,0xf5,0xc0]
382 ; EGPR-NEXT: retq # encoding: [0xc3]
383 %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 1431655765)
384 %tmp2 = and i32 %tmp, 65535
388 declare i32 @llvm.x86.bmi.pext.32(i32, i32)
390 define i32 @mulx32(i32 %x, i32 %y, ptr %p) {
393 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
394 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
395 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
396 ; X86-NEXT: addl %edx, %edx
397 ; X86-NEXT: addl %eax, %eax
398 ; X86-NEXT: mulxl %eax, %eax, %edx
399 ; X86-NEXT: movl %edx, (%ecx)
404 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
405 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
406 ; X64-NEXT: addl %edi, %edi
407 ; X64-NEXT: leal (%rsi,%rsi), %eax
408 ; X64-NEXT: imulq %rdi, %rax
409 ; X64-NEXT: movq %rax, %rcx
410 ; X64-NEXT: shrq $32, %rcx
411 ; X64-NEXT: movl %ecx, (%rdx)
412 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
415 ; EGPR-LABEL: mulx32:
417 ; EGPR-NEXT: # kill: def $esi killed $esi def $rsi
418 ; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
419 ; EGPR-NEXT: addl %edi, %edi # encoding: [0x01,0xff]
420 ; EGPR-NEXT: leal (%rsi,%rsi), %eax # encoding: [0x8d,0x04,0x36]
421 ; EGPR-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
422 ; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
423 ; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
424 ; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a]
425 ; EGPR-NEXT: # kill: def $eax killed $eax killed $rax
426 ; EGPR-NEXT: retq # encoding: [0xc3]
429 %x2 = zext i32 %x1 to i64
430 %y2 = zext i32 %y1 to i64
431 %r1 = mul i64 %x2, %y2
432 %h1 = lshr i64 %r1, 32
433 %h = trunc i64 %h1 to i32
434 %l = trunc i64 %r1 to i32
439 define i32 @mulx32_load(i32 %x, ptr %y, ptr %p) {
440 ; X86-LABEL: mulx32_load:
442 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
443 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
444 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
445 ; X86-NEXT: addl %edx, %edx
446 ; X86-NEXT: mulxl (%eax), %eax, %edx
447 ; X86-NEXT: movl %edx, (%ecx)
450 ; X64-LABEL: mulx32_load:
452 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
453 ; X64-NEXT: leal (%rdi,%rdi), %eax
454 ; X64-NEXT: movl (%rsi), %ecx
455 ; X64-NEXT: imulq %rcx, %rax
456 ; X64-NEXT: movq %rax, %rcx
457 ; X64-NEXT: shrq $32, %rcx
458 ; X64-NEXT: movl %ecx, (%rdx)
459 ; X64-NEXT: # kill: def $eax killed $eax killed $rax
462 ; EGPR-LABEL: mulx32_load:
464 ; EGPR-NEXT: # kill: def $edi killed $edi def $rdi
465 ; EGPR-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
466 ; EGPR-NEXT: movl (%rsi), %ecx # encoding: [0x8b,0x0e]
467 ; EGPR-NEXT: imulq %rcx, %rax # encoding: [0x48,0x0f,0xaf,0xc1]
468 ; EGPR-NEXT: movq %rax, %rcx # encoding: [0x48,0x89,0xc1]
469 ; EGPR-NEXT: shrq $32, %rcx # encoding: [0x48,0xc1,0xe9,0x20]
470 ; EGPR-NEXT: movl %ecx, (%rdx) # encoding: [0x89,0x0a]
471 ; EGPR-NEXT: # kill: def $eax killed $eax killed $rax
472 ; EGPR-NEXT: retq # encoding: [0xc3]
474 %y1 = load i32, ptr %y
475 %x2 = zext i32 %x1 to i64
476 %y2 = zext i32 %y1 to i64
477 %r1 = mul i64 %x2, %y2
478 %h1 = lshr i64 %r1, 32
479 %h = trunc i64 %h1 to i32
480 %l = trunc i64 %r1 to i32