1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X64
5 ; If we have a shift by sign-extended value, we can replace sign-extension
8 define i32 @t0_shl(i32 %x, i8 %shamt) nounwind {
11 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
12 ; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
17 ; X64-NEXT: shlxl %esi, %edi, %eax
19 %shamt_wide = sext i8 %shamt to i32
20 %r = shl i32 %x, %shamt_wide
23 define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind {
26 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
27 ; X86-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax
32 ; X64-NEXT: shrxl %esi, %edi, %eax
34 %shamt_wide = sext i8 %shamt to i32
35 %r = lshr i32 %x, %shamt_wide
38 define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind {
41 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
42 ; X86-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax
47 ; X64-NEXT: sarxl %esi, %edi, %eax
49 %shamt_wide = sext i8 %shamt to i32
50 %r = ashr i32 %x, %shamt_wide
54 define <4 x i32> @t3_vec_shl(<4 x i32> %x, <4 x i8> %shamt) nounwind {
55 ; CHECK-LABEL: t3_vec_shl:
57 ; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1
58 ; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
59 ; CHECK-NEXT: ret{{[l|q]}}
60 %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
61 %r = shl <4 x i32> %x, %shamt_wide
64 define <4 x i32> @t4_vec_lshr(<4 x i32> %x, <4 x i8> %shamt) nounwind {
65 ; CHECK-LABEL: t4_vec_lshr:
67 ; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1
68 ; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
69 ; CHECK-NEXT: ret{{[l|q]}}
70 %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
71 %r = lshr <4 x i32> %x, %shamt_wide
74 define <4 x i32> @t5_vec_ashr(<4 x i32> %x, <4 x i8> %shamt) nounwind {
75 ; CHECK-LABEL: t5_vec_ashr:
77 ; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1
78 ; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0
79 ; CHECK-NEXT: ret{{[l|q]}}
80 %shamt_wide = sext <4 x i8> %shamt to <4 x i32>
81 %r = ashr <4 x i32> %x, %shamt_wide
85 ; This is not valid for funnel shifts
86 declare i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
87 declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
88 define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
91 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
92 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
93 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
94 ; X86-NEXT: shldl %cl, %edx, %eax
99 ; X64-NEXT: movl %edx, %ecx
100 ; X64-NEXT: movl %edi, %eax
101 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
102 ; X64-NEXT: shldl %cl, %esi, %eax
104 %shamt_wide = sext i8 %shamt to i32
105 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %shamt_wide)
108 define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
109 ; X86-LABEL: n7_fshr:
111 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
113 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
114 ; X86-NEXT: shrdl %cl, %edx, %eax
117 ; X64-LABEL: n7_fshr:
119 ; X64-NEXT: movl %edx, %ecx
120 ; X64-NEXT: movl %esi, %eax
121 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
122 ; X64-NEXT: shrdl %cl, %edi, %eax
124 %shamt_wide = sext i8 %shamt to i32
125 %r = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %shamt_wide)
129 define i32 @n8_extrause(i32 %x, i8 %shamt, i32* %shamt_wide_store) nounwind {
130 ; X86-LABEL: n8_extrause:
132 ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
133 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
134 ; X86-NEXT: movl %eax, (%ecx)
135 ; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax
138 ; X64-LABEL: n8_extrause:
140 ; X64-NEXT: movsbl %sil, %eax
141 ; X64-NEXT: movl %eax, (%rdx)
142 ; X64-NEXT: shlxl %eax, %edi, %eax
144 %shamt_wide = sext i8 %shamt to i32
145 store i32 %shamt_wide, i32* %shamt_wide_store, align 4
146 %r = shl i32 %x, %shamt_wide