1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
5 define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
8 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
9 ; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
12 ; BMI264-LABEL: shl32:
14 ; BMI264-NEXT: shlxl %esi, %edi, %eax
16 %shl = shl i32 %x, %shamt
20 define i32 @shl32i(i32 %x) nounwind uwtable readnone {
23 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
24 ; BMI2-NEXT: shll $5, %eax
27 ; BMI264-LABEL: shl32i:
29 ; BMI264-NEXT: movl %edi, %eax
30 ; BMI264-NEXT: shll $5, %eax
36 define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
39 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
40 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
41 ; BMI2-NEXT: shlxl %ecx, (%eax), %eax
44 ; BMI264-LABEL: shl32p:
46 ; BMI264-NEXT: shlxl %esi, (%rdi), %eax
49 %shl = shl i32 %x, %shamt
53 define i32 @shl32pi(ptr %p) nounwind uwtable readnone {
54 ; BMI2-LABEL: shl32pi:
56 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
57 ; BMI2-NEXT: movl (%eax), %eax
58 ; BMI2-NEXT: shll $5, %eax
61 ; BMI264-LABEL: shl32pi:
63 ; BMI264-NEXT: movl (%rdi), %eax
64 ; BMI264-NEXT: shll $5, %eax
71 define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
74 ; BMI2-NEXT: pushl %esi
75 ; BMI2-NEXT: .cfi_def_cfa_offset 8
76 ; BMI2-NEXT: .cfi_offset %esi, -8
77 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
78 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
79 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
80 ; BMI2-NEXT: shldl %cl, %eax, %edx
81 ; BMI2-NEXT: shlxl %ecx, %eax, %esi
82 ; BMI2-NEXT: xorl %eax, %eax
83 ; BMI2-NEXT: testb $32, %cl
84 ; BMI2-NEXT: cmovnel %esi, %edx
85 ; BMI2-NEXT: cmovel %esi, %eax
86 ; BMI2-NEXT: popl %esi
87 ; BMI2-NEXT: .cfi_def_cfa_offset 4
90 ; BMI264-LABEL: shl64:
92 ; BMI264-NEXT: shlxq %rsi, %rdi, %rax
94 %shl = shl i64 %x, %shamt
98 define i64 @shl64i(i64 %x) nounwind uwtable readnone {
101 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
102 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
103 ; BMI2-NEXT: shldl $7, %eax, %edx
104 ; BMI2-NEXT: shll $7, %eax
107 ; BMI264-LABEL: shl64i:
109 ; BMI264-NEXT: movq %rdi, %rax
110 ; BMI264-NEXT: shlq $7, %rax
116 define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
117 ; BMI2-LABEL: shl64p:
119 ; BMI2-NEXT: pushl %esi
120 ; BMI2-NEXT: .cfi_def_cfa_offset 8
121 ; BMI2-NEXT: .cfi_offset %esi, -8
122 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
123 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
124 ; BMI2-NEXT: movl (%eax), %esi
125 ; BMI2-NEXT: movl 4(%eax), %edx
126 ; BMI2-NEXT: shldl %cl, %esi, %edx
127 ; BMI2-NEXT: shlxl %ecx, %esi, %esi
128 ; BMI2-NEXT: xorl %eax, %eax
129 ; BMI2-NEXT: testb $32, %cl
130 ; BMI2-NEXT: cmovnel %esi, %edx
131 ; BMI2-NEXT: cmovel %esi, %eax
132 ; BMI2-NEXT: popl %esi
133 ; BMI2-NEXT: .cfi_def_cfa_offset 4
136 ; BMI264-LABEL: shl64p:
138 ; BMI264-NEXT: shlxq %rsi, (%rdi), %rax
140 %x = load i64, ptr %p
141 %shl = shl i64 %x, %shamt
145 define i64 @shl64pi(ptr %p) nounwind uwtable readnone {
146 ; BMI2-LABEL: shl64pi:
148 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
149 ; BMI2-NEXT: movl (%ecx), %eax
150 ; BMI2-NEXT: movl 4(%ecx), %edx
151 ; BMI2-NEXT: shldl $7, %eax, %edx
152 ; BMI2-NEXT: shll $7, %eax
155 ; BMI264-LABEL: shl64pi:
157 ; BMI264-NEXT: movq (%rdi), %rax
158 ; BMI264-NEXT: shlq $7, %rax
160 %x = load i64, ptr %p
165 define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
166 ; BMI2-LABEL: lshr32:
168 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
169 ; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
172 ; BMI264-LABEL: lshr32:
174 ; BMI264-NEXT: shrxl %esi, %edi, %eax
176 %shl = lshr i32 %x, %shamt
180 define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
181 ; BMI2-LABEL: lshr32p:
183 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
184 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
185 ; BMI2-NEXT: shrxl %ecx, (%eax), %eax
188 ; BMI264-LABEL: lshr32p:
190 ; BMI264-NEXT: shrxl %esi, (%rdi), %eax
192 %x = load i32, ptr %p
193 %shl = lshr i32 %x, %shamt
197 define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
198 ; BMI2-LABEL: lshr64:
200 ; BMI2-NEXT: pushl %esi
201 ; BMI2-NEXT: .cfi_def_cfa_offset 8
202 ; BMI2-NEXT: .cfi_offset %esi, -8
203 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
204 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
205 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
206 ; BMI2-NEXT: shrdl %cl, %edx, %eax
207 ; BMI2-NEXT: shrxl %ecx, %edx, %esi
208 ; BMI2-NEXT: xorl %edx, %edx
209 ; BMI2-NEXT: testb $32, %cl
210 ; BMI2-NEXT: cmovnel %esi, %eax
211 ; BMI2-NEXT: cmovel %esi, %edx
212 ; BMI2-NEXT: popl %esi
213 ; BMI2-NEXT: .cfi_def_cfa_offset 4
216 ; BMI264-LABEL: lshr64:
218 ; BMI264-NEXT: shrxq %rsi, %rdi, %rax
220 %shl = lshr i64 %x, %shamt
224 define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
225 ; BMI2-LABEL: lshr64p:
227 ; BMI2-NEXT: pushl %esi
228 ; BMI2-NEXT: .cfi_def_cfa_offset 8
229 ; BMI2-NEXT: .cfi_offset %esi, -8
230 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
231 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
232 ; BMI2-NEXT: movl (%edx), %eax
233 ; BMI2-NEXT: movl 4(%edx), %edx
234 ; BMI2-NEXT: shrdl %cl, %edx, %eax
235 ; BMI2-NEXT: shrxl %ecx, %edx, %esi
236 ; BMI2-NEXT: xorl %edx, %edx
237 ; BMI2-NEXT: testb $32, %cl
238 ; BMI2-NEXT: cmovnel %esi, %eax
239 ; BMI2-NEXT: cmovel %esi, %edx
240 ; BMI2-NEXT: popl %esi
241 ; BMI2-NEXT: .cfi_def_cfa_offset 4
244 ; BMI264-LABEL: lshr64p:
246 ; BMI264-NEXT: shrxq %rsi, (%rdi), %rax
248 %x = load i64, ptr %p
249 %shl = lshr i64 %x, %shamt
253 define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
254 ; BMI2-LABEL: ashr32:
256 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
257 ; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
260 ; BMI264-LABEL: ashr32:
262 ; BMI264-NEXT: sarxl %esi, %edi, %eax
264 %shl = ashr i32 %x, %shamt
268 define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone {
269 ; BMI2-LABEL: ashr32p:
271 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
272 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
273 ; BMI2-NEXT: sarxl %ecx, (%eax), %eax
276 ; BMI264-LABEL: ashr32p:
278 ; BMI264-NEXT: sarxl %esi, (%rdi), %eax
280 %x = load i32, ptr %p
281 %shl = ashr i32 %x, %shamt
285 define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
286 ; BMI2-LABEL: ashr64:
288 ; BMI2-NEXT: pushl %esi
289 ; BMI2-NEXT: .cfi_def_cfa_offset 8
290 ; BMI2-NEXT: .cfi_offset %esi, -8
291 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
292 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
293 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
294 ; BMI2-NEXT: shrdl %cl, %edx, %eax
295 ; BMI2-NEXT: sarxl %ecx, %edx, %esi
296 ; BMI2-NEXT: sarl $31, %edx
297 ; BMI2-NEXT: testb $32, %cl
298 ; BMI2-NEXT: cmovnel %esi, %eax
299 ; BMI2-NEXT: cmovel %esi, %edx
300 ; BMI2-NEXT: popl %esi
301 ; BMI2-NEXT: .cfi_def_cfa_offset 4
304 ; BMI264-LABEL: ashr64:
306 ; BMI264-NEXT: sarxq %rsi, %rdi, %rax
308 %shl = ashr i64 %x, %shamt
312 define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone {
313 ; BMI2-LABEL: ashr64p:
315 ; BMI2-NEXT: pushl %esi
316 ; BMI2-NEXT: .cfi_def_cfa_offset 8
317 ; BMI2-NEXT: .cfi_offset %esi, -8
318 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
319 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
320 ; BMI2-NEXT: movl (%edx), %eax
321 ; BMI2-NEXT: movl 4(%edx), %edx
322 ; BMI2-NEXT: shrdl %cl, %edx, %eax
323 ; BMI2-NEXT: sarxl %ecx, %edx, %esi
324 ; BMI2-NEXT: sarl $31, %edx
325 ; BMI2-NEXT: testb $32, %cl
326 ; BMI2-NEXT: cmovnel %esi, %eax
327 ; BMI2-NEXT: cmovel %esi, %edx
328 ; BMI2-NEXT: popl %esi
329 ; BMI2-NEXT: .cfi_def_cfa_offset 4
332 ; BMI264-LABEL: ashr64p:
334 ; BMI264-NEXT: sarxq %rsi, (%rdi), %rax
336 %x = load i64, ptr %p
337 %shl = ashr i64 %x, %shamt
341 define i32 @shl32and(i32 %t, i32 %val) nounwind {
342 ; BMI2-LABEL: shl32and:
344 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
345 ; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
348 ; BMI264-LABEL: shl32and:
350 ; BMI264-NEXT: shlxl %edi, %esi, %eax
352 %shamt = and i32 %t, 31
353 %res = shl i32 %val, %shamt
357 define i64 @shl64and(i64 %t, i64 %val) nounwind {
358 ; BMI2-LABEL: shl64and:
360 ; BMI2-NEXT: pushl %esi
361 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
362 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
363 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
364 ; BMI2-NEXT: shldl %cl, %eax, %edx
365 ; BMI2-NEXT: shlxl %ecx, %eax, %esi
366 ; BMI2-NEXT: xorl %eax, %eax
367 ; BMI2-NEXT: testb $32, %cl
368 ; BMI2-NEXT: cmovnel %esi, %edx
369 ; BMI2-NEXT: cmovel %esi, %eax
370 ; BMI2-NEXT: popl %esi
373 ; BMI264-LABEL: shl64and:
375 ; BMI264-NEXT: shlxq %rdi, %rsi, %rax
377 %shamt = and i64 %t, 63
378 %res = shl i64 %val, %shamt
382 define i32 @lshr32and(i32 %t, i32 %val) nounwind {
383 ; BMI2-LABEL: lshr32and:
385 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
386 ; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
389 ; BMI264-LABEL: lshr32and:
391 ; BMI264-NEXT: shrxl %edi, %esi, %eax
393 %shamt = and i32 %t, 31
394 %res = lshr i32 %val, %shamt
398 define i64 @lshr64and(i64 %t, i64 %val) nounwind {
399 ; BMI2-LABEL: lshr64and:
401 ; BMI2-NEXT: pushl %esi
402 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
403 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
404 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
405 ; BMI2-NEXT: shrdl %cl, %edx, %eax
406 ; BMI2-NEXT: shrxl %ecx, %edx, %esi
407 ; BMI2-NEXT: xorl %edx, %edx
408 ; BMI2-NEXT: testb $32, %cl
409 ; BMI2-NEXT: cmovnel %esi, %eax
410 ; BMI2-NEXT: cmovel %esi, %edx
411 ; BMI2-NEXT: popl %esi
414 ; BMI264-LABEL: lshr64and:
416 ; BMI264-NEXT: shrxq %rdi, %rsi, %rax
418 %shamt = and i64 %t, 63
419 %res = lshr i64 %val, %shamt
423 define i32 @ashr32and(i32 %t, i32 %val) nounwind {
424 ; BMI2-LABEL: ashr32and:
426 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
427 ; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
430 ; BMI264-LABEL: ashr32and:
432 ; BMI264-NEXT: sarxl %edi, %esi, %eax
434 %shamt = and i32 %t, 31
435 %res = ashr i32 %val, %shamt
439 define i64 @ashr64and(i64 %t, i64 %val) nounwind {
440 ; BMI2-LABEL: ashr64and:
442 ; BMI2-NEXT: pushl %esi
443 ; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
444 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
445 ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
446 ; BMI2-NEXT: shrdl %cl, %edx, %eax
447 ; BMI2-NEXT: sarxl %ecx, %edx, %esi
448 ; BMI2-NEXT: sarl $31, %edx
449 ; BMI2-NEXT: testb $32, %cl
450 ; BMI2-NEXT: cmovnel %esi, %eax
451 ; BMI2-NEXT: cmovel %esi, %edx
452 ; BMI2-NEXT: popl %esi
455 ; BMI264-LABEL: ashr64and:
457 ; BMI264-NEXT: sarxq %rdi, %rsi, %rax
459 %shamt = and i64 %t, 63
460 %res = ashr i64 %val, %shamt