1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE2,X64-NO-BMI2,X64-NO-SHLD-NO-BMI2,FALLBACK0
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE2,X64-NO-BMI2,X64-HAVE-SHLD-NO-BMI2,FALLBACK1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE2,X64-HAVE-BMI2,X64-NO-SHLD-HAVE-BMI2,FALLBACK2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE2,X64-HAVE-BMI2,X64-HAVE-SHLD-HAVE-BMI2,FALLBACK3
6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE42,X64-NO-BMI2,X64-NO-SHLD-NO-BMI2,FALLBACK4
7 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE42,X64-NO-BMI2,X64-HAVE-SHLD-NO-BMI2,FALLBACK5
8 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE42,X64-HAVE-BMI2,X64-NO-SHLD-HAVE-BMI2,FALLBACK6
9 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-SSE42,X64-HAVE-BMI2,X64-HAVE-SHLD-HAVE-BMI2,FALLBACK7
10 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX1,X64-NO-BMI2,X64-NO-SHLD-NO-BMI2,FALLBACK8
11 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX1,X64-NO-BMI2,X64-HAVE-SHLD-NO-BMI2,FALLBACK9
12 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX1,X64-HAVE-BMI2,X64-NO-SHLD-HAVE-BMI2,FALLBACK10
13 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX1,X64-HAVE-BMI2,X64-HAVE-SHLD-HAVE-BMI2,FALLBACK11
14 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX512,X64-NO-BMI2,X64-NO-SHLD-NO-BMI2,FALLBACK12
15 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX512,X64-NO-BMI2,X64-HAVE-SHLD-NO-BMI2,FALLBACK13
16 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX512,X64-HAVE-BMI2,X64-NO-SHLD-HAVE-BMI2,FALLBACK14
17 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X64,X64-AVX,X64-AVX512,X64-HAVE-BMI2,X64-HAVE-SHLD-HAVE-BMI2,FALLBACK15
18 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE2,X86-NO-BMI2,X86-NO-SHLD-NO-BMI2,FALLBACK16
19 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE2,X86-NO-BMI2,X86-HAVE-SHLD-NO-BMI2,FALLBACK17
20 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE2,X86-HAVE-BMI2,X86-NO-SHLD-HAVE-BMI2,FALLBACK18
21 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE2,X86-HAVE-BMI2,X86-HAVE-SHLD-HAVE-BMI2,FALLBACK19
22 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE42,X86-NO-BMI2,X86-NO-SHLD-NO-BMI2,FALLBACK20
23 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE42,X86-NO-BMI2,X86-HAVE-SHLD-NO-BMI2,FALLBACK21
24 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE42,X86-HAVE-BMI2,X86-NO-SHLD-HAVE-BMI2,FALLBACK22
25 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+sse4.2,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-SSE42,X86-HAVE-BMI2,X86-HAVE-SHLD-HAVE-BMI2,FALLBACK23
26 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX1,X86-NO-BMI2,X86-NO-SHLD-NO-BMI2,FALLBACK24
27 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX1,X86-NO-BMI2,X86-HAVE-SHLD-NO-BMI2,FALLBACK25
28 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX1,X86-HAVE-BMI2,X86-NO-SHLD-HAVE-BMI2,FALLBACK26
29 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX1,X86-HAVE-BMI2,X86-HAVE-SHLD-HAVE-BMI2,FALLBACK27
30 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,-bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX512,X86-NO-BMI2,X86-NO-SHLD-NO-BMI2,FALLBACK28
31 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,-bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX512,X86-NO-BMI2,X86-HAVE-SHLD-NO-BMI2,FALLBACK29
32 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,+bmi2,+slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX512,X86-HAVE-BMI2,X86-NO-SHLD-HAVE-BMI2,FALLBACK30
33 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512vl,+bmi2,-slow-shld | FileCheck %s --check-prefixes=ALL,X86,X86-AVX,X86-AVX512,X86-HAVE-BMI2,X86-HAVE-SHLD-HAVE-BMI2,FALLBACK31
35 define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
36 ; X64-NO-BMI2-LABEL: lshr_4bytes:
37 ; X64-NO-BMI2: # %bb.0:
38 ; X64-NO-BMI2-NEXT: movl (%rdi), %eax
39 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
40 ; X64-NO-BMI2-NEXT: shlb $3, %cl
41 ; X64-NO-BMI2-NEXT: shrl %cl, %eax
42 ; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
43 ; X64-NO-BMI2-NEXT: retq
45 ; X64-HAVE-BMI2-LABEL: lshr_4bytes:
46 ; X64-HAVE-BMI2: # %bb.0:
47 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
48 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
49 ; X64-HAVE-BMI2-NEXT: shrxl %eax, (%rdi), %eax
50 ; X64-HAVE-BMI2-NEXT: movl %eax, (%rdx)
51 ; X64-HAVE-BMI2-NEXT: retq
53 ; X86-NO-BMI2-LABEL: lshr_4bytes:
54 ; X86-NO-BMI2: # %bb.0:
55 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
56 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
57 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
58 ; X86-NO-BMI2-NEXT: movl (%edx), %edx
59 ; X86-NO-BMI2-NEXT: movzbl (%ecx), %ecx
60 ; X86-NO-BMI2-NEXT: shlb $3, %cl
61 ; X86-NO-BMI2-NEXT: shrl %cl, %edx
62 ; X86-NO-BMI2-NEXT: movl %edx, (%eax)
63 ; X86-NO-BMI2-NEXT: retl
65 ; X86-HAVE-BMI2-LABEL: lshr_4bytes:
66 ; X86-HAVE-BMI2: # %bb.0:
67 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
68 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
69 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
70 ; X86-HAVE-BMI2-NEXT: movzbl (%edx), %edx
71 ; X86-HAVE-BMI2-NEXT: shlb $3, %dl
72 ; X86-HAVE-BMI2-NEXT: shrxl %edx, (%ecx), %ecx
73 ; X86-HAVE-BMI2-NEXT: movl %ecx, (%eax)
74 ; X86-HAVE-BMI2-NEXT: retl
75 %src = load i32, ptr %src.ptr, align 1
76 %byteOff = load i32, ptr %byteOff.ptr, align 1
77 %bitOff = shl i32 %byteOff, 3
78 %res = lshr i32 %src, %bitOff
79 store i32 %res, ptr %dst, align 1
82 define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
83 ; X64-NO-BMI2-LABEL: shl_4bytes:
84 ; X64-NO-BMI2: # %bb.0:
85 ; X64-NO-BMI2-NEXT: movl (%rdi), %eax
86 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
87 ; X64-NO-BMI2-NEXT: shlb $3, %cl
88 ; X64-NO-BMI2-NEXT: shll %cl, %eax
89 ; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
90 ; X64-NO-BMI2-NEXT: retq
92 ; X64-HAVE-BMI2-LABEL: shl_4bytes:
93 ; X64-HAVE-BMI2: # %bb.0:
94 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
95 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
96 ; X64-HAVE-BMI2-NEXT: shlxl %eax, (%rdi), %eax
97 ; X64-HAVE-BMI2-NEXT: movl %eax, (%rdx)
98 ; X64-HAVE-BMI2-NEXT: retq
100 ; X86-NO-BMI2-LABEL: shl_4bytes:
101 ; X86-NO-BMI2: # %bb.0:
102 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
103 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
104 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
105 ; X86-NO-BMI2-NEXT: movl (%edx), %edx
106 ; X86-NO-BMI2-NEXT: movzbl (%ecx), %ecx
107 ; X86-NO-BMI2-NEXT: shlb $3, %cl
108 ; X86-NO-BMI2-NEXT: shll %cl, %edx
109 ; X86-NO-BMI2-NEXT: movl %edx, (%eax)
110 ; X86-NO-BMI2-NEXT: retl
112 ; X86-HAVE-BMI2-LABEL: shl_4bytes:
113 ; X86-HAVE-BMI2: # %bb.0:
114 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
115 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
116 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
117 ; X86-HAVE-BMI2-NEXT: movzbl (%edx), %edx
118 ; X86-HAVE-BMI2-NEXT: shlb $3, %dl
119 ; X86-HAVE-BMI2-NEXT: shlxl %edx, (%ecx), %ecx
120 ; X86-HAVE-BMI2-NEXT: movl %ecx, (%eax)
121 ; X86-HAVE-BMI2-NEXT: retl
122 %src = load i32, ptr %src.ptr, align 1
123 %byteOff = load i32, ptr %byteOff.ptr, align 1
124 %bitOff = shl i32 %byteOff, 3
125 %res = shl i32 %src, %bitOff
126 store i32 %res, ptr %dst, align 1
129 define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
130 ; X64-NO-BMI2-LABEL: ashr_4bytes:
131 ; X64-NO-BMI2: # %bb.0:
132 ; X64-NO-BMI2-NEXT: movl (%rdi), %eax
133 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
134 ; X64-NO-BMI2-NEXT: shlb $3, %cl
135 ; X64-NO-BMI2-NEXT: sarl %cl, %eax
136 ; X64-NO-BMI2-NEXT: movl %eax, (%rdx)
137 ; X64-NO-BMI2-NEXT: retq
139 ; X64-HAVE-BMI2-LABEL: ashr_4bytes:
140 ; X64-HAVE-BMI2: # %bb.0:
141 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
142 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
143 ; X64-HAVE-BMI2-NEXT: sarxl %eax, (%rdi), %eax
144 ; X64-HAVE-BMI2-NEXT: movl %eax, (%rdx)
145 ; X64-HAVE-BMI2-NEXT: retq
147 ; X86-NO-BMI2-LABEL: ashr_4bytes:
148 ; X86-NO-BMI2: # %bb.0:
149 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
150 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
151 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
152 ; X86-NO-BMI2-NEXT: movl (%edx), %edx
153 ; X86-NO-BMI2-NEXT: movzbl (%ecx), %ecx
154 ; X86-NO-BMI2-NEXT: shlb $3, %cl
155 ; X86-NO-BMI2-NEXT: sarl %cl, %edx
156 ; X86-NO-BMI2-NEXT: movl %edx, (%eax)
157 ; X86-NO-BMI2-NEXT: retl
159 ; X86-HAVE-BMI2-LABEL: ashr_4bytes:
160 ; X86-HAVE-BMI2: # %bb.0:
161 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
162 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
163 ; X86-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
164 ; X86-HAVE-BMI2-NEXT: movzbl (%edx), %edx
165 ; X86-HAVE-BMI2-NEXT: shlb $3, %dl
166 ; X86-HAVE-BMI2-NEXT: sarxl %edx, (%ecx), %ecx
167 ; X86-HAVE-BMI2-NEXT: movl %ecx, (%eax)
168 ; X86-HAVE-BMI2-NEXT: retl
169 %src = load i32, ptr %src.ptr, align 1
170 %byteOff = load i32, ptr %byteOff.ptr, align 1
171 %bitOff = shl i32 %byteOff, 3
172 %res = ashr i32 %src, %bitOff
173 store i32 %res, ptr %dst, align 1
177 define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
178 ; X64-NO-BMI2-LABEL: lshr_8bytes:
179 ; X64-NO-BMI2: # %bb.0:
180 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
181 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
182 ; X64-NO-BMI2-NEXT: shlb $3, %cl
183 ; X64-NO-BMI2-NEXT: shrq %cl, %rax
184 ; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
185 ; X64-NO-BMI2-NEXT: retq
187 ; X64-HAVE-BMI2-LABEL: lshr_8bytes:
188 ; X64-HAVE-BMI2: # %bb.0:
189 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
190 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
191 ; X64-HAVE-BMI2-NEXT: shrxq %rax, (%rdi), %rax
192 ; X64-HAVE-BMI2-NEXT: movq %rax, (%rdx)
193 ; X64-HAVE-BMI2-NEXT: retq
195 ; X86-NO-SHLD-NO-BMI2-LABEL: lshr_8bytes:
196 ; X86-NO-SHLD-NO-BMI2: # %bb.0:
197 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %ebx
198 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %edi
199 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %esi
200 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
201 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
202 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
203 ; X86-NO-SHLD-NO-BMI2-NEXT: movl (%ecx), %ebx
204 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 4(%ecx), %esi
205 ; X86-NO-SHLD-NO-BMI2-NEXT: movzbl (%eax), %eax
206 ; X86-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
207 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
208 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %ebx
209 ; X86-NO-SHLD-NO-BMI2-NEXT: leal (%esi,%esi), %edi
210 ; X86-NO-SHLD-NO-BMI2-NEXT: notb %cl
211 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %edi
212 ; X86-NO-SHLD-NO-BMI2-NEXT: orl %ebx, %edi
213 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
214 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %esi
215 ; X86-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
216 ; X86-NO-SHLD-NO-BMI2-NEXT: testb $32, %al
217 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovnel %esi, %edi
218 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovel %esi, %ecx
219 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, 4(%edx)
220 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edi, (%edx)
221 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %esi
222 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %edi
223 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %ebx
224 ; X86-NO-SHLD-NO-BMI2-NEXT: retl
226 ; X86-HAVE-SHLD-NO-BMI2-LABEL: lshr_8bytes:
227 ; X86-HAVE-SHLD-NO-BMI2: # %bb.0:
228 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %edi
229 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %esi
230 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
231 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
232 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
233 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl (%esi), %edx
234 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 4(%esi), %esi
235 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%ecx), %ecx
236 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
237 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, %edi
238 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrl %cl, %edi
239 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrdl %cl, %esi, %edx
240 ; X86-HAVE-SHLD-NO-BMI2-NEXT: xorl %esi, %esi
241 ; X86-HAVE-SHLD-NO-BMI2-NEXT: testb $32, %cl
242 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovnel %edi, %edx
243 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovel %edi, %esi
244 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, 4(%eax)
245 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, (%eax)
246 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %esi
247 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %edi
248 ; X86-HAVE-SHLD-NO-BMI2-NEXT: retl
250 ; X86-NO-SHLD-HAVE-BMI2-LABEL: lshr_8bytes:
251 ; X86-NO-SHLD-HAVE-BMI2: # %bb.0:
252 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %ebx
253 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %edi
254 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %esi
255 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
256 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
257 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
258 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 4(%edx), %esi
259 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %ecx
260 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
261 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %ecx, (%edx), %edx
262 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %ebx
263 ; X86-NO-SHLD-HAVE-BMI2-NEXT: notb %bl
264 ; X86-NO-SHLD-HAVE-BMI2-NEXT: leal (%esi,%esi), %edi
265 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %ebx, %edi, %edi
266 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %edx, %edi
267 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %ecx, %esi, %edx
268 ; X86-NO-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
269 ; X86-NO-SHLD-HAVE-BMI2-NEXT: testb $32, %cl
270 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovnel %edx, %edi
271 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovel %edx, %esi
272 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %esi, 4(%eax)
273 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edi, (%eax)
274 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %esi
275 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %edi
276 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %ebx
277 ; X86-NO-SHLD-HAVE-BMI2-NEXT: retl
279 ; X86-HAVE-SHLD-HAVE-BMI2-LABEL: lshr_8bytes:
280 ; X86-HAVE-SHLD-HAVE-BMI2: # %bb.0:
281 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %edi
282 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %esi
283 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
284 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
285 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
286 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl (%esi), %edx
287 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 4(%esi), %esi
288 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %ecx
289 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
290 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrdl %cl, %esi, %edx
291 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrxl %ecx, %esi, %esi
292 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %edi, %edi
293 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: testb $32, %cl
294 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovnel %esi, %edx
295 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovel %esi, %edi
296 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edi, 4(%eax)
297 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, (%eax)
298 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %esi
299 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %edi
300 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: retl
301 %src = load i64, ptr %src.ptr, align 1
302 %byteOff = load i64, ptr %byteOff.ptr, align 1
303 %bitOff = shl i64 %byteOff, 3
304 %res = lshr i64 %src, %bitOff
305 store i64 %res, ptr %dst, align 1
308 define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
309 ; X64-NO-BMI2-LABEL: shl_8bytes:
310 ; X64-NO-BMI2: # %bb.0:
311 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
312 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
313 ; X64-NO-BMI2-NEXT: shlb $3, %cl
314 ; X64-NO-BMI2-NEXT: shlq %cl, %rax
315 ; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
316 ; X64-NO-BMI2-NEXT: retq
318 ; X64-HAVE-BMI2-LABEL: shl_8bytes:
319 ; X64-HAVE-BMI2: # %bb.0:
320 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
321 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
322 ; X64-HAVE-BMI2-NEXT: shlxq %rax, (%rdi), %rax
323 ; X64-HAVE-BMI2-NEXT: movq %rax, (%rdx)
324 ; X64-HAVE-BMI2-NEXT: retq
326 ; X86-NO-SHLD-NO-BMI2-LABEL: shl_8bytes:
327 ; X86-NO-SHLD-NO-BMI2: # %bb.0:
328 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %ebx
329 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %edi
330 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %esi
331 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
332 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
333 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
334 ; X86-NO-SHLD-NO-BMI2-NEXT: movl (%ecx), %esi
335 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 4(%ecx), %ebx
336 ; X86-NO-SHLD-NO-BMI2-NEXT: movzbl (%eax), %eax
337 ; X86-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
338 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
339 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %ebx
340 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, %edi
341 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %edi
342 ; X86-NO-SHLD-NO-BMI2-NEXT: notb %cl
343 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %edi
344 ; X86-NO-SHLD-NO-BMI2-NEXT: orl %ebx, %edi
345 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
346 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %esi
347 ; X86-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
348 ; X86-NO-SHLD-NO-BMI2-NEXT: testb $32, %al
349 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovnel %esi, %edi
350 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovel %esi, %ecx
351 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, (%edx)
352 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edi, 4(%edx)
353 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %esi
354 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %edi
355 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %ebx
356 ; X86-NO-SHLD-NO-BMI2-NEXT: retl
358 ; X86-HAVE-SHLD-NO-BMI2-LABEL: shl_8bytes:
359 ; X86-HAVE-SHLD-NO-BMI2: # %bb.0:
360 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %edi
361 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %esi
362 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
363 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
364 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
365 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl (%edx), %esi
366 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 4(%edx), %edx
367 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%ecx), %ecx
368 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
369 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, %edi
370 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shll %cl, %edi
371 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shldl %cl, %esi, %edx
372 ; X86-HAVE-SHLD-NO-BMI2-NEXT: xorl %esi, %esi
373 ; X86-HAVE-SHLD-NO-BMI2-NEXT: testb $32, %cl
374 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovnel %edi, %edx
375 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovel %edi, %esi
376 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, 4(%eax)
377 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, (%eax)
378 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %esi
379 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %edi
380 ; X86-HAVE-SHLD-NO-BMI2-NEXT: retl
382 ; X86-NO-SHLD-HAVE-BMI2-LABEL: shl_8bytes:
383 ; X86-NO-SHLD-HAVE-BMI2: # %bb.0:
384 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %ebx
385 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %edi
386 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %esi
387 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
388 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
389 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
390 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl (%edx), %esi
391 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %ecx
392 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
393 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %ecx, 4(%edx), %edx
394 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %ebx
395 ; X86-NO-SHLD-HAVE-BMI2-NEXT: notb %bl
396 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %ecx, %esi, %edi
397 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrl %esi
398 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %ebx, %esi, %esi
399 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %edx, %esi
400 ; X86-NO-SHLD-HAVE-BMI2-NEXT: xorl %edx, %edx
401 ; X86-NO-SHLD-HAVE-BMI2-NEXT: testb $32, %cl
402 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovnel %edi, %esi
403 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovel %edi, %edx
404 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edx, (%eax)
405 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %esi, 4(%eax)
406 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %esi
407 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %edi
408 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %ebx
409 ; X86-NO-SHLD-HAVE-BMI2-NEXT: retl
411 ; X86-HAVE-SHLD-HAVE-BMI2-LABEL: shl_8bytes:
412 ; X86-HAVE-SHLD-HAVE-BMI2: # %bb.0:
413 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %edi
414 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %esi
415 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
416 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
417 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
418 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl (%edx), %esi
419 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 4(%edx), %edx
420 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %ecx
421 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
422 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shldl %cl, %esi, %edx
423 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shlxl %ecx, %esi, %esi
424 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %edi, %edi
425 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: testb $32, %cl
426 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovnel %esi, %edx
427 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovel %esi, %edi
428 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, 4(%eax)
429 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edi, (%eax)
430 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %esi
431 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %edi
432 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: retl
433 %src = load i64, ptr %src.ptr, align 1
434 %byteOff = load i64, ptr %byteOff.ptr, align 1
435 %bitOff = shl i64 %byteOff, 3
436 %res = shl i64 %src, %bitOff
437 store i64 %res, ptr %dst, align 1
440 define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
441 ; X64-NO-BMI2-LABEL: ashr_8bytes:
442 ; X64-NO-BMI2: # %bb.0:
443 ; X64-NO-BMI2-NEXT: movq (%rdi), %rax
444 ; X64-NO-BMI2-NEXT: movzbl (%rsi), %ecx
445 ; X64-NO-BMI2-NEXT: shlb $3, %cl
446 ; X64-NO-BMI2-NEXT: sarq %cl, %rax
447 ; X64-NO-BMI2-NEXT: movq %rax, (%rdx)
448 ; X64-NO-BMI2-NEXT: retq
450 ; X64-HAVE-BMI2-LABEL: ashr_8bytes:
451 ; X64-HAVE-BMI2: # %bb.0:
452 ; X64-HAVE-BMI2-NEXT: movzbl (%rsi), %eax
453 ; X64-HAVE-BMI2-NEXT: shlb $3, %al
454 ; X64-HAVE-BMI2-NEXT: sarxq %rax, (%rdi), %rax
455 ; X64-HAVE-BMI2-NEXT: movq %rax, (%rdx)
456 ; X64-HAVE-BMI2-NEXT: retq
458 ; X86-NO-SHLD-NO-BMI2-LABEL: ashr_8bytes:
459 ; X86-NO-SHLD-NO-BMI2: # %bb.0:
460 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %ebx
461 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %edi
462 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %esi
463 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
464 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
465 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
466 ; X86-NO-SHLD-NO-BMI2-NEXT: movl (%ecx), %ebx
467 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 4(%ecx), %esi
468 ; X86-NO-SHLD-NO-BMI2-NEXT: movzbl (%eax), %eax
469 ; X86-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
470 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
471 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %ebx
472 ; X86-NO-SHLD-NO-BMI2-NEXT: leal (%esi,%esi), %edi
473 ; X86-NO-SHLD-NO-BMI2-NEXT: notb %cl
474 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %edi
475 ; X86-NO-SHLD-NO-BMI2-NEXT: orl %ebx, %edi
476 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, %ebx
477 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
478 ; X86-NO-SHLD-NO-BMI2-NEXT: sarl %cl, %ebx
479 ; X86-NO-SHLD-NO-BMI2-NEXT: sarl $31, %esi
480 ; X86-NO-SHLD-NO-BMI2-NEXT: testb $32, %al
481 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovnel %ebx, %edi
482 ; X86-NO-SHLD-NO-BMI2-NEXT: cmovel %ebx, %esi
483 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, 4(%edx)
484 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edi, (%edx)
485 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %esi
486 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %edi
487 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %ebx
488 ; X86-NO-SHLD-NO-BMI2-NEXT: retl
490 ; X86-HAVE-SHLD-NO-BMI2-LABEL: ashr_8bytes:
491 ; X86-HAVE-SHLD-NO-BMI2: # %bb.0:
492 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %edi
493 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %esi
494 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
495 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
496 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
497 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl (%esi), %edx
498 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 4(%esi), %esi
499 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%ecx), %ecx
500 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
501 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, %edi
502 ; X86-HAVE-SHLD-NO-BMI2-NEXT: sarl %cl, %edi
503 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrdl %cl, %esi, %edx
504 ; X86-HAVE-SHLD-NO-BMI2-NEXT: sarl $31, %esi
505 ; X86-HAVE-SHLD-NO-BMI2-NEXT: testb $32, %cl
506 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovnel %edi, %edx
507 ; X86-HAVE-SHLD-NO-BMI2-NEXT: cmovel %edi, %esi
508 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, 4(%eax)
509 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, (%eax)
510 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %esi
511 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %edi
512 ; X86-HAVE-SHLD-NO-BMI2-NEXT: retl
514 ; X86-NO-SHLD-HAVE-BMI2-LABEL: ashr_8bytes:
515 ; X86-NO-SHLD-HAVE-BMI2: # %bb.0:
516 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %ebx
517 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %edi
518 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %esi
519 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
520 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
521 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
522 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 4(%esi), %ecx
523 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%edx), %edx
524 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %dl
525 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %edx, (%esi), %esi
526 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edx, %ebx
527 ; X86-NO-SHLD-HAVE-BMI2-NEXT: notb %bl
528 ; X86-NO-SHLD-HAVE-BMI2-NEXT: leal (%ecx,%ecx), %edi
529 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %ebx, %edi, %edi
530 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %esi, %edi
531 ; X86-NO-SHLD-HAVE-BMI2-NEXT: sarxl %edx, %ecx, %esi
532 ; X86-NO-SHLD-HAVE-BMI2-NEXT: sarl $31, %ecx
533 ; X86-NO-SHLD-HAVE-BMI2-NEXT: testb $32, %dl
534 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovnel %esi, %edi
535 ; X86-NO-SHLD-HAVE-BMI2-NEXT: cmovel %esi, %ecx
536 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, 4(%eax)
537 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edi, (%eax)
538 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %esi
539 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %edi
540 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %ebx
541 ; X86-NO-SHLD-HAVE-BMI2-NEXT: retl
543 ; X86-HAVE-SHLD-HAVE-BMI2-LABEL: ashr_8bytes:
544 ; X86-HAVE-SHLD-HAVE-BMI2: # %bb.0:
545 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %edi
546 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %esi
547 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
548 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
549 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
550 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl (%esi), %edx
551 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 4(%esi), %esi
552 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %ecx
553 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
554 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrdl %cl, %esi, %edx
555 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: sarxl %ecx, %esi, %edi
556 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: sarl $31, %esi
557 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: testb $32, %cl
558 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovnel %edi, %edx
559 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: cmovel %edi, %esi
560 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %esi, 4(%eax)
561 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, (%eax)
562 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %esi
563 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %edi
564 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: retl
565 %src = load i64, ptr %src.ptr, align 1
566 %byteOff = load i64, ptr %byteOff.ptr, align 1
567 %bitOff = shl i64 %byteOff, 3
568 %res = ashr i64 %src, %bitOff
569 store i64 %res, ptr %dst, align 1
573 define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
574 ; X64-NO-SHLD-NO-BMI2-LABEL: lshr_16bytes:
575 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
576 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
577 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
578 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
579 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
580 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
581 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %r8
582 ; X64-NO-SHLD-NO-BMI2-NEXT: leaq (%rdi,%rdi), %rsi
583 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
584 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
585 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %r8, %rsi
586 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
587 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %rdi
588 ; X64-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
589 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
590 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %rdi, %rsi
591 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %rdi, %rcx
592 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rcx, 8(%rdx)
593 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, (%rdx)
594 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
596 ; X64-HAVE-SHLD-NO-BMI2-LABEL: lshr_16bytes:
597 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
598 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
599 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
600 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
601 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
602 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, %rsi
603 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrq %cl, %rsi
604 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrdq %cl, %rdi, %rax
605 ; X64-HAVE-SHLD-NO-BMI2-NEXT: xorl %edi, %edi
606 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
607 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rax
608 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rdi
609 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
610 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
611 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
613 ; X64-NO-SHLD-HAVE-BMI2-LABEL: lshr_16bytes:
614 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
615 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rax
616 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
617 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
618 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, (%rdi), %rsi
619 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
620 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
621 ; X64-NO-SHLD-HAVE-BMI2-NEXT: leaq (%rax,%rax), %r8
622 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rdi, %r8, %rdi
623 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rdi
624 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, %rax, %rax
625 ; X64-NO-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
626 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
627 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %rax, %rdi
628 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %rax, %rsi
629 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rsi, 8(%rdx)
630 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rdi, (%rdx)
631 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
633 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: lshr_16bytes:
634 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
635 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
636 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
637 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
638 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
639 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrdq %cl, %rdi, %rax
640 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, %rdi, %rsi
641 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %edi, %edi
642 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
643 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rax
644 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rdi
645 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
646 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rax, (%rdx)
647 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
649 ; FALLBACK16-LABEL: lshr_16bytes:
650 ; FALLBACK16: # %bb.0:
651 ; FALLBACK16-NEXT: pushl %ebp
652 ; FALLBACK16-NEXT: pushl %ebx
653 ; FALLBACK16-NEXT: pushl %edi
654 ; FALLBACK16-NEXT: pushl %esi
655 ; FALLBACK16-NEXT: subl $60, %esp
656 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
657 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ecx
658 ; FALLBACK16-NEXT: movl (%ecx), %edx
659 ; FALLBACK16-NEXT: movl 4(%ecx), %esi
660 ; FALLBACK16-NEXT: movl 8(%ecx), %edi
661 ; FALLBACK16-NEXT: movl 12(%ecx), %ecx
662 ; FALLBACK16-NEXT: movb (%eax), %ah
663 ; FALLBACK16-NEXT: movb %ah, %al
664 ; FALLBACK16-NEXT: shlb $3, %al
665 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
666 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
667 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
668 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
669 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
670 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
671 ; FALLBACK16-NEXT: andb $12, %ah
672 ; FALLBACK16-NEXT: movzbl %ah, %ebp
673 ; FALLBACK16-NEXT: movl 20(%esp,%ebp), %esi
674 ; FALLBACK16-NEXT: movl %esi, %ebx
675 ; FALLBACK16-NEXT: movl %eax, %ecx
676 ; FALLBACK16-NEXT: shrl %cl, %ebx
677 ; FALLBACK16-NEXT: movl %eax, %edx
678 ; FALLBACK16-NEXT: notb %dl
679 ; FALLBACK16-NEXT: movl 24(%esp,%ebp), %ecx
680 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
681 ; FALLBACK16-NEXT: leal (%ecx,%ecx), %edi
682 ; FALLBACK16-NEXT: movl %edx, %ecx
683 ; FALLBACK16-NEXT: shll %cl, %edi
684 ; FALLBACK16-NEXT: orl %ebx, %edi
685 ; FALLBACK16-NEXT: movl 16(%esp,%ebp), %ebx
686 ; FALLBACK16-NEXT: movl %eax, %ecx
687 ; FALLBACK16-NEXT: shrl %cl, %ebx
688 ; FALLBACK16-NEXT: addl %esi, %esi
689 ; FALLBACK16-NEXT: movl %edx, %ecx
690 ; FALLBACK16-NEXT: shll %cl, %esi
691 ; FALLBACK16-NEXT: orl %ebx, %esi
692 ; FALLBACK16-NEXT: movl %eax, %ecx
693 ; FALLBACK16-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
694 ; FALLBACK16-NEXT: movl 28(%esp,%ebp), %ebx
695 ; FALLBACK16-NEXT: leal (%ebx,%ebx), %ebp
696 ; FALLBACK16-NEXT: movl %edx, %ecx
697 ; FALLBACK16-NEXT: shll %cl, %ebp
698 ; FALLBACK16-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
699 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %edx
700 ; FALLBACK16-NEXT: movl %eax, %ecx
701 ; FALLBACK16-NEXT: shrl %cl, %ebx
702 ; FALLBACK16-NEXT: movl %ebx, 12(%edx)
703 ; FALLBACK16-NEXT: movl %ebp, 8(%edx)
704 ; FALLBACK16-NEXT: movl %esi, (%edx)
705 ; FALLBACK16-NEXT: movl %edi, 4(%edx)
706 ; FALLBACK16-NEXT: addl $60, %esp
707 ; FALLBACK16-NEXT: popl %esi
708 ; FALLBACK16-NEXT: popl %edi
709 ; FALLBACK16-NEXT: popl %ebx
710 ; FALLBACK16-NEXT: popl %ebp
711 ; FALLBACK16-NEXT: retl
713 ; FALLBACK17-LABEL: lshr_16bytes:
714 ; FALLBACK17: # %bb.0:
715 ; FALLBACK17-NEXT: pushl %ebp
716 ; FALLBACK17-NEXT: pushl %ebx
717 ; FALLBACK17-NEXT: pushl %edi
718 ; FALLBACK17-NEXT: pushl %esi
719 ; FALLBACK17-NEXT: subl $44, %esp
720 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
721 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
722 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %edx
723 ; FALLBACK17-NEXT: movl (%edx), %esi
724 ; FALLBACK17-NEXT: movl 4(%edx), %edi
725 ; FALLBACK17-NEXT: movl 8(%edx), %ebx
726 ; FALLBACK17-NEXT: movl 12(%edx), %edx
727 ; FALLBACK17-NEXT: movb (%ecx), %ch
728 ; FALLBACK17-NEXT: movb %ch, %cl
729 ; FALLBACK17-NEXT: shlb $3, %cl
730 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
731 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
732 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
733 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
734 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
735 ; FALLBACK17-NEXT: movl %esi, (%esp)
736 ; FALLBACK17-NEXT: andb $12, %ch
737 ; FALLBACK17-NEXT: movzbl %ch, %ebx
738 ; FALLBACK17-NEXT: movl 8(%esp,%ebx), %esi
739 ; FALLBACK17-NEXT: movl (%esp,%ebx), %edx
740 ; FALLBACK17-NEXT: movl 4(%esp,%ebx), %ebp
741 ; FALLBACK17-NEXT: movl %ebp, %edi
742 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edi
743 ; FALLBACK17-NEXT: movl 12(%esp,%ebx), %ebx
744 ; FALLBACK17-NEXT: shrdl %cl, %ebx, %esi
745 ; FALLBACK17-NEXT: shrdl %cl, %ebp, %edx
746 ; FALLBACK17-NEXT: shrl %cl, %ebx
747 ; FALLBACK17-NEXT: movl %esi, 8(%eax)
748 ; FALLBACK17-NEXT: movl %ebx, 12(%eax)
749 ; FALLBACK17-NEXT: movl %edx, (%eax)
750 ; FALLBACK17-NEXT: movl %edi, 4(%eax)
751 ; FALLBACK17-NEXT: addl $44, %esp
752 ; FALLBACK17-NEXT: popl %esi
753 ; FALLBACK17-NEXT: popl %edi
754 ; FALLBACK17-NEXT: popl %ebx
755 ; FALLBACK17-NEXT: popl %ebp
756 ; FALLBACK17-NEXT: retl
758 ; FALLBACK18-LABEL: lshr_16bytes:
759 ; FALLBACK18: # %bb.0:
760 ; FALLBACK18-NEXT: pushl %ebp
761 ; FALLBACK18-NEXT: pushl %ebx
762 ; FALLBACK18-NEXT: pushl %edi
763 ; FALLBACK18-NEXT: pushl %esi
764 ; FALLBACK18-NEXT: subl $44, %esp
765 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
766 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ecx
767 ; FALLBACK18-NEXT: movl (%ecx), %edx
768 ; FALLBACK18-NEXT: movl 4(%ecx), %esi
769 ; FALLBACK18-NEXT: movl 8(%ecx), %edi
770 ; FALLBACK18-NEXT: movl 12(%ecx), %ecx
771 ; FALLBACK18-NEXT: movzbl (%eax), %ebx
772 ; FALLBACK18-NEXT: movl %ebx, %eax
773 ; FALLBACK18-NEXT: shlb $3, %al
774 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
775 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
776 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
777 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
778 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
779 ; FALLBACK18-NEXT: movl %edx, (%esp)
780 ; FALLBACK18-NEXT: andb $12, %bl
781 ; FALLBACK18-NEXT: movzbl %bl, %esi
782 ; FALLBACK18-NEXT: movl 4(%esp,%esi), %edi
783 ; FALLBACK18-NEXT: movl 8(%esp,%esi), %ebx
784 ; FALLBACK18-NEXT: shrxl %eax, %edi, %ebp
785 ; FALLBACK18-NEXT: movl %eax, %edx
786 ; FALLBACK18-NEXT: notb %dl
787 ; FALLBACK18-NEXT: leal (%ebx,%ebx), %ecx
788 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ecx
789 ; FALLBACK18-NEXT: orl %ebp, %ecx
790 ; FALLBACK18-NEXT: shrxl %eax, (%esp,%esi), %ebp
791 ; FALLBACK18-NEXT: addl %edi, %edi
792 ; FALLBACK18-NEXT: shlxl %edx, %edi, %edi
793 ; FALLBACK18-NEXT: orl %ebp, %edi
794 ; FALLBACK18-NEXT: shrxl %eax, %ebx, %ebx
795 ; FALLBACK18-NEXT: movl 12(%esp,%esi), %esi
796 ; FALLBACK18-NEXT: shrxl %eax, %esi, %eax
797 ; FALLBACK18-NEXT: addl %esi, %esi
798 ; FALLBACK18-NEXT: shlxl %edx, %esi, %edx
799 ; FALLBACK18-NEXT: orl %ebx, %edx
800 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %esi
801 ; FALLBACK18-NEXT: movl %eax, 12(%esi)
802 ; FALLBACK18-NEXT: movl %edx, 8(%esi)
803 ; FALLBACK18-NEXT: movl %edi, (%esi)
804 ; FALLBACK18-NEXT: movl %ecx, 4(%esi)
805 ; FALLBACK18-NEXT: addl $44, %esp
806 ; FALLBACK18-NEXT: popl %esi
807 ; FALLBACK18-NEXT: popl %edi
808 ; FALLBACK18-NEXT: popl %ebx
809 ; FALLBACK18-NEXT: popl %ebp
810 ; FALLBACK18-NEXT: retl
812 ; FALLBACK19-LABEL: lshr_16bytes:
813 ; FALLBACK19: # %bb.0:
814 ; FALLBACK19-NEXT: pushl %ebp
815 ; FALLBACK19-NEXT: pushl %ebx
816 ; FALLBACK19-NEXT: pushl %edi
817 ; FALLBACK19-NEXT: pushl %esi
818 ; FALLBACK19-NEXT: subl $44, %esp
819 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
820 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
821 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %edx
822 ; FALLBACK19-NEXT: movl (%edx), %esi
823 ; FALLBACK19-NEXT: movl 4(%edx), %edi
824 ; FALLBACK19-NEXT: movl 8(%edx), %ebx
825 ; FALLBACK19-NEXT: movl 12(%edx), %edx
826 ; FALLBACK19-NEXT: movzbl (%ecx), %eax
827 ; FALLBACK19-NEXT: movl %eax, %ecx
828 ; FALLBACK19-NEXT: shlb $3, %cl
829 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
830 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
831 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
832 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
833 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
834 ; FALLBACK19-NEXT: movl %esi, (%esp)
835 ; FALLBACK19-NEXT: andb $12, %al
836 ; FALLBACK19-NEXT: movzbl %al, %eax
837 ; FALLBACK19-NEXT: movl 8(%esp,%eax), %ebx
838 ; FALLBACK19-NEXT: movl (%esp,%eax), %edx
839 ; FALLBACK19-NEXT: movl 4(%esp,%eax), %esi
840 ; FALLBACK19-NEXT: movl %esi, %edi
841 ; FALLBACK19-NEXT: shrdl %cl, %ebx, %edi
842 ; FALLBACK19-NEXT: movl 12(%esp,%eax), %eax
843 ; FALLBACK19-NEXT: shrdl %cl, %eax, %ebx
844 ; FALLBACK19-NEXT: movl %ebx, 8(%ebp)
845 ; FALLBACK19-NEXT: shrxl %ecx, %eax, %eax
846 ; FALLBACK19-NEXT: movl %eax, 12(%ebp)
847 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
848 ; FALLBACK19-NEXT: shrdl %cl, %esi, %edx
849 ; FALLBACK19-NEXT: movl %edx, (%ebp)
850 ; FALLBACK19-NEXT: movl %edi, 4(%ebp)
851 ; FALLBACK19-NEXT: addl $44, %esp
852 ; FALLBACK19-NEXT: popl %esi
853 ; FALLBACK19-NEXT: popl %edi
854 ; FALLBACK19-NEXT: popl %ebx
855 ; FALLBACK19-NEXT: popl %ebp
856 ; FALLBACK19-NEXT: retl
858 ; FALLBACK20-LABEL: lshr_16bytes:
859 ; FALLBACK20: # %bb.0:
860 ; FALLBACK20-NEXT: pushl %ebp
861 ; FALLBACK20-NEXT: pushl %ebx
862 ; FALLBACK20-NEXT: pushl %edi
863 ; FALLBACK20-NEXT: pushl %esi
864 ; FALLBACK20-NEXT: subl $60, %esp
865 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
866 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
867 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
868 ; FALLBACK20-NEXT: movzbl (%eax), %ecx
869 ; FALLBACK20-NEXT: movl %ecx, %eax
870 ; FALLBACK20-NEXT: shlb $3, %al
871 ; FALLBACK20-NEXT: xorps %xmm1, %xmm1
872 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
873 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
874 ; FALLBACK20-NEXT: andb $12, %cl
875 ; FALLBACK20-NEXT: movzbl %cl, %edi
876 ; FALLBACK20-NEXT: movl 16(%esp,%edi), %ebx
877 ; FALLBACK20-NEXT: movl 20(%esp,%edi), %esi
878 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
879 ; FALLBACK20-NEXT: movl %eax, %ecx
880 ; FALLBACK20-NEXT: shrl %cl, %ebx
881 ; FALLBACK20-NEXT: movl %eax, %edx
882 ; FALLBACK20-NEXT: notb %dl
883 ; FALLBACK20-NEXT: addl %esi, %esi
884 ; FALLBACK20-NEXT: movl %edx, %ecx
885 ; FALLBACK20-NEXT: shll %cl, %esi
886 ; FALLBACK20-NEXT: orl %ebx, %esi
887 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
888 ; FALLBACK20-NEXT: movl 24(%esp,%edi), %ebx
889 ; FALLBACK20-NEXT: movl %ebx, %esi
890 ; FALLBACK20-NEXT: movl %eax, %ecx
891 ; FALLBACK20-NEXT: shrl %cl, %esi
892 ; FALLBACK20-NEXT: movl 28(%esp,%edi), %edi
893 ; FALLBACK20-NEXT: leal (%edi,%edi), %ebp
894 ; FALLBACK20-NEXT: movl %edx, %ecx
895 ; FALLBACK20-NEXT: shll %cl, %ebp
896 ; FALLBACK20-NEXT: orl %esi, %ebp
897 ; FALLBACK20-NEXT: movl %eax, %ecx
898 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
899 ; FALLBACK20-NEXT: shrl %cl, %esi
900 ; FALLBACK20-NEXT: addl %ebx, %ebx
901 ; FALLBACK20-NEXT: movl %edx, %ecx
902 ; FALLBACK20-NEXT: shll %cl, %ebx
903 ; FALLBACK20-NEXT: orl %esi, %ebx
904 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %edx
905 ; FALLBACK20-NEXT: movl %eax, %ecx
906 ; FALLBACK20-NEXT: shrl %cl, %edi
907 ; FALLBACK20-NEXT: movl %edi, 12(%edx)
908 ; FALLBACK20-NEXT: movl %ebx, 4(%edx)
909 ; FALLBACK20-NEXT: movl %ebp, 8(%edx)
910 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
911 ; FALLBACK20-NEXT: movl %eax, (%edx)
912 ; FALLBACK20-NEXT: addl $60, %esp
913 ; FALLBACK20-NEXT: popl %esi
914 ; FALLBACK20-NEXT: popl %edi
915 ; FALLBACK20-NEXT: popl %ebx
916 ; FALLBACK20-NEXT: popl %ebp
917 ; FALLBACK20-NEXT: retl
919 ; FALLBACK21-LABEL: lshr_16bytes:
920 ; FALLBACK21: # %bb.0:
921 ; FALLBACK21-NEXT: pushl %ebp
922 ; FALLBACK21-NEXT: pushl %ebx
923 ; FALLBACK21-NEXT: pushl %edi
924 ; FALLBACK21-NEXT: pushl %esi
925 ; FALLBACK21-NEXT: subl $44, %esp
926 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
927 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %edx
928 ; FALLBACK21-NEXT: movups (%edx), %xmm0
929 ; FALLBACK21-NEXT: movzbl (%ecx), %edx
930 ; FALLBACK21-NEXT: movl %edx, %ecx
931 ; FALLBACK21-NEXT: shlb $3, %cl
932 ; FALLBACK21-NEXT: xorps %xmm1, %xmm1
933 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
934 ; FALLBACK21-NEXT: movaps %xmm0, (%esp)
935 ; FALLBACK21-NEXT: andb $12, %dl
936 ; FALLBACK21-NEXT: movzbl %dl, %ebx
937 ; FALLBACK21-NEXT: movl 12(%esp,%ebx), %edx
938 ; FALLBACK21-NEXT: movl 8(%esp,%ebx), %ebp
939 ; FALLBACK21-NEXT: movl %ebp, %edi
940 ; FALLBACK21-NEXT: shrdl %cl, %edx, %edi
941 ; FALLBACK21-NEXT: movl (%esp,%ebx), %esi
942 ; FALLBACK21-NEXT: movl 4(%esp,%ebx), %eax
943 ; FALLBACK21-NEXT: movl %eax, %ebx
944 ; FALLBACK21-NEXT: shrdl %cl, %ebp, %ebx
945 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
946 ; FALLBACK21-NEXT: movl %ebx, 4(%ebp)
947 ; FALLBACK21-NEXT: movl %edi, 8(%ebp)
948 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
949 ; FALLBACK21-NEXT: shrl %cl, %edx
950 ; FALLBACK21-NEXT: movl %edx, 12(%ebp)
951 ; FALLBACK21-NEXT: movl %esi, (%ebp)
952 ; FALLBACK21-NEXT: addl $44, %esp
953 ; FALLBACK21-NEXT: popl %esi
954 ; FALLBACK21-NEXT: popl %edi
955 ; FALLBACK21-NEXT: popl %ebx
956 ; FALLBACK21-NEXT: popl %ebp
957 ; FALLBACK21-NEXT: retl
959 ; FALLBACK22-LABEL: lshr_16bytes:
960 ; FALLBACK22: # %bb.0:
961 ; FALLBACK22-NEXT: pushl %ebp
962 ; FALLBACK22-NEXT: pushl %ebx
963 ; FALLBACK22-NEXT: pushl %edi
964 ; FALLBACK22-NEXT: pushl %esi
965 ; FALLBACK22-NEXT: subl $44, %esp
966 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
967 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
968 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
969 ; FALLBACK22-NEXT: movzbl (%eax), %ecx
970 ; FALLBACK22-NEXT: movl %ecx, %eax
971 ; FALLBACK22-NEXT: shlb $3, %al
972 ; FALLBACK22-NEXT: xorps %xmm1, %xmm1
973 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
974 ; FALLBACK22-NEXT: movaps %xmm0, (%esp)
975 ; FALLBACK22-NEXT: andb $12, %cl
976 ; FALLBACK22-NEXT: movzbl %cl, %edi
977 ; FALLBACK22-NEXT: shrxl %eax, (%esp,%edi), %ebx
978 ; FALLBACK22-NEXT: movl %eax, %ecx
979 ; FALLBACK22-NEXT: notb %cl
980 ; FALLBACK22-NEXT: movl 4(%esp,%edi), %ebp
981 ; FALLBACK22-NEXT: movl 8(%esp,%edi), %esi
982 ; FALLBACK22-NEXT: leal (%ebp,%ebp), %edx
983 ; FALLBACK22-NEXT: shlxl %ecx, %edx, %edx
984 ; FALLBACK22-NEXT: orl %ebx, %edx
985 ; FALLBACK22-NEXT: shrxl %eax, %esi, %ebx
986 ; FALLBACK22-NEXT: shrxl %eax, %ebp, %ebp
987 ; FALLBACK22-NEXT: movl 12(%esp,%edi), %edi
988 ; FALLBACK22-NEXT: shrxl %eax, %edi, %eax
989 ; FALLBACK22-NEXT: addl %edi, %edi
990 ; FALLBACK22-NEXT: shlxl %ecx, %edi, %edi
991 ; FALLBACK22-NEXT: orl %ebx, %edi
992 ; FALLBACK22-NEXT: addl %esi, %esi
993 ; FALLBACK22-NEXT: shlxl %ecx, %esi, %ecx
994 ; FALLBACK22-NEXT: orl %ebp, %ecx
995 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %esi
996 ; FALLBACK22-NEXT: movl %eax, 12(%esi)
997 ; FALLBACK22-NEXT: movl %ecx, 4(%esi)
998 ; FALLBACK22-NEXT: movl %edi, 8(%esi)
999 ; FALLBACK22-NEXT: movl %edx, (%esi)
1000 ; FALLBACK22-NEXT: addl $44, %esp
1001 ; FALLBACK22-NEXT: popl %esi
1002 ; FALLBACK22-NEXT: popl %edi
1003 ; FALLBACK22-NEXT: popl %ebx
1004 ; FALLBACK22-NEXT: popl %ebp
1005 ; FALLBACK22-NEXT: retl
1007 ; FALLBACK23-LABEL: lshr_16bytes:
1008 ; FALLBACK23: # %bb.0:
1009 ; FALLBACK23-NEXT: pushl %ebp
1010 ; FALLBACK23-NEXT: pushl %ebx
1011 ; FALLBACK23-NEXT: pushl %edi
1012 ; FALLBACK23-NEXT: pushl %esi
1013 ; FALLBACK23-NEXT: subl $44, %esp
1014 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
1015 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %edx
1016 ; FALLBACK23-NEXT: movups (%edx), %xmm0
1017 ; FALLBACK23-NEXT: movzbl (%ecx), %edx
1018 ; FALLBACK23-NEXT: movl %edx, %ecx
1019 ; FALLBACK23-NEXT: shlb $3, %cl
1020 ; FALLBACK23-NEXT: xorps %xmm1, %xmm1
1021 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1022 ; FALLBACK23-NEXT: movaps %xmm0, (%esp)
1023 ; FALLBACK23-NEXT: andb $12, %dl
1024 ; FALLBACK23-NEXT: movzbl %dl, %ebx
1025 ; FALLBACK23-NEXT: movl 12(%esp,%ebx), %edx
1026 ; FALLBACK23-NEXT: movl 8(%esp,%ebx), %ebp
1027 ; FALLBACK23-NEXT: movl %ebp, %edi
1028 ; FALLBACK23-NEXT: shrdl %cl, %edx, %edi
1029 ; FALLBACK23-NEXT: movl (%esp,%ebx), %esi
1030 ; FALLBACK23-NEXT: movl 4(%esp,%ebx), %eax
1031 ; FALLBACK23-NEXT: movl %eax, %ebx
1032 ; FALLBACK23-NEXT: shrdl %cl, %ebp, %ebx
1033 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ebp
1034 ; FALLBACK23-NEXT: movl %ebx, 4(%ebp)
1035 ; FALLBACK23-NEXT: movl %edi, 8(%ebp)
1036 ; FALLBACK23-NEXT: shrxl %ecx, %edx, %edx
1037 ; FALLBACK23-NEXT: movl %edx, 12(%ebp)
1038 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
1039 ; FALLBACK23-NEXT: shrdl %cl, %eax, %esi
1040 ; FALLBACK23-NEXT: movl %esi, (%ebp)
1041 ; FALLBACK23-NEXT: addl $44, %esp
1042 ; FALLBACK23-NEXT: popl %esi
1043 ; FALLBACK23-NEXT: popl %edi
1044 ; FALLBACK23-NEXT: popl %ebx
1045 ; FALLBACK23-NEXT: popl %ebp
1046 ; FALLBACK23-NEXT: retl
1048 ; FALLBACK24-LABEL: lshr_16bytes:
1049 ; FALLBACK24: # %bb.0:
1050 ; FALLBACK24-NEXT: pushl %ebp
1051 ; FALLBACK24-NEXT: pushl %ebx
1052 ; FALLBACK24-NEXT: pushl %edi
1053 ; FALLBACK24-NEXT: pushl %esi
1054 ; FALLBACK24-NEXT: subl $60, %esp
1055 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
1056 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
1057 ; FALLBACK24-NEXT: vmovups (%ecx), %xmm0
1058 ; FALLBACK24-NEXT: movzbl (%eax), %ecx
1059 ; FALLBACK24-NEXT: movl %ecx, %eax
1060 ; FALLBACK24-NEXT: shlb $3, %al
1061 ; FALLBACK24-NEXT: vxorps %xmm1, %xmm1, %xmm1
1062 ; FALLBACK24-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1063 ; FALLBACK24-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1064 ; FALLBACK24-NEXT: andb $12, %cl
1065 ; FALLBACK24-NEXT: movzbl %cl, %edi
1066 ; FALLBACK24-NEXT: movl 16(%esp,%edi), %ebx
1067 ; FALLBACK24-NEXT: movl 20(%esp,%edi), %esi
1068 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1069 ; FALLBACK24-NEXT: movl %eax, %ecx
1070 ; FALLBACK24-NEXT: shrl %cl, %ebx
1071 ; FALLBACK24-NEXT: movl %eax, %edx
1072 ; FALLBACK24-NEXT: notb %dl
1073 ; FALLBACK24-NEXT: addl %esi, %esi
1074 ; FALLBACK24-NEXT: movl %edx, %ecx
1075 ; FALLBACK24-NEXT: shll %cl, %esi
1076 ; FALLBACK24-NEXT: orl %ebx, %esi
1077 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1078 ; FALLBACK24-NEXT: movl 24(%esp,%edi), %ebx
1079 ; FALLBACK24-NEXT: movl %ebx, %esi
1080 ; FALLBACK24-NEXT: movl %eax, %ecx
1081 ; FALLBACK24-NEXT: shrl %cl, %esi
1082 ; FALLBACK24-NEXT: movl 28(%esp,%edi), %edi
1083 ; FALLBACK24-NEXT: leal (%edi,%edi), %ebp
1084 ; FALLBACK24-NEXT: movl %edx, %ecx
1085 ; FALLBACK24-NEXT: shll %cl, %ebp
1086 ; FALLBACK24-NEXT: orl %esi, %ebp
1087 ; FALLBACK24-NEXT: movl %eax, %ecx
1088 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1089 ; FALLBACK24-NEXT: shrl %cl, %esi
1090 ; FALLBACK24-NEXT: addl %ebx, %ebx
1091 ; FALLBACK24-NEXT: movl %edx, %ecx
1092 ; FALLBACK24-NEXT: shll %cl, %ebx
1093 ; FALLBACK24-NEXT: orl %esi, %ebx
1094 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %edx
1095 ; FALLBACK24-NEXT: movl %eax, %ecx
1096 ; FALLBACK24-NEXT: shrl %cl, %edi
1097 ; FALLBACK24-NEXT: movl %edi, 12(%edx)
1098 ; FALLBACK24-NEXT: movl %ebx, 4(%edx)
1099 ; FALLBACK24-NEXT: movl %ebp, 8(%edx)
1100 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1101 ; FALLBACK24-NEXT: movl %eax, (%edx)
1102 ; FALLBACK24-NEXT: addl $60, %esp
1103 ; FALLBACK24-NEXT: popl %esi
1104 ; FALLBACK24-NEXT: popl %edi
1105 ; FALLBACK24-NEXT: popl %ebx
1106 ; FALLBACK24-NEXT: popl %ebp
1107 ; FALLBACK24-NEXT: retl
1109 ; FALLBACK25-LABEL: lshr_16bytes:
1110 ; FALLBACK25: # %bb.0:
1111 ; FALLBACK25-NEXT: pushl %ebp
1112 ; FALLBACK25-NEXT: pushl %ebx
1113 ; FALLBACK25-NEXT: pushl %edi
1114 ; FALLBACK25-NEXT: pushl %esi
1115 ; FALLBACK25-NEXT: subl $44, %esp
1116 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
1117 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %edx
1118 ; FALLBACK25-NEXT: vmovups (%edx), %xmm0
1119 ; FALLBACK25-NEXT: movzbl (%ecx), %edx
1120 ; FALLBACK25-NEXT: movl %edx, %ecx
1121 ; FALLBACK25-NEXT: shlb $3, %cl
1122 ; FALLBACK25-NEXT: vxorps %xmm1, %xmm1, %xmm1
1123 ; FALLBACK25-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1124 ; FALLBACK25-NEXT: vmovaps %xmm0, (%esp)
1125 ; FALLBACK25-NEXT: andb $12, %dl
1126 ; FALLBACK25-NEXT: movzbl %dl, %ebx
1127 ; FALLBACK25-NEXT: movl 12(%esp,%ebx), %edx
1128 ; FALLBACK25-NEXT: movl 8(%esp,%ebx), %ebp
1129 ; FALLBACK25-NEXT: movl %ebp, %edi
1130 ; FALLBACK25-NEXT: shrdl %cl, %edx, %edi
1131 ; FALLBACK25-NEXT: movl (%esp,%ebx), %esi
1132 ; FALLBACK25-NEXT: movl 4(%esp,%ebx), %eax
1133 ; FALLBACK25-NEXT: movl %eax, %ebx
1134 ; FALLBACK25-NEXT: shrdl %cl, %ebp, %ebx
1135 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
1136 ; FALLBACK25-NEXT: movl %ebx, 4(%ebp)
1137 ; FALLBACK25-NEXT: movl %edi, 8(%ebp)
1138 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
1139 ; FALLBACK25-NEXT: shrl %cl, %edx
1140 ; FALLBACK25-NEXT: movl %edx, 12(%ebp)
1141 ; FALLBACK25-NEXT: movl %esi, (%ebp)
1142 ; FALLBACK25-NEXT: addl $44, %esp
1143 ; FALLBACK25-NEXT: popl %esi
1144 ; FALLBACK25-NEXT: popl %edi
1145 ; FALLBACK25-NEXT: popl %ebx
1146 ; FALLBACK25-NEXT: popl %ebp
1147 ; FALLBACK25-NEXT: retl
1149 ; FALLBACK26-LABEL: lshr_16bytes:
1150 ; FALLBACK26: # %bb.0:
1151 ; FALLBACK26-NEXT: pushl %ebp
1152 ; FALLBACK26-NEXT: pushl %ebx
1153 ; FALLBACK26-NEXT: pushl %edi
1154 ; FALLBACK26-NEXT: pushl %esi
1155 ; FALLBACK26-NEXT: subl $44, %esp
1156 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
1157 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
1158 ; FALLBACK26-NEXT: vmovups (%ecx), %xmm0
1159 ; FALLBACK26-NEXT: movzbl (%eax), %ecx
1160 ; FALLBACK26-NEXT: movl %ecx, %eax
1161 ; FALLBACK26-NEXT: shlb $3, %al
1162 ; FALLBACK26-NEXT: vxorps %xmm1, %xmm1, %xmm1
1163 ; FALLBACK26-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1164 ; FALLBACK26-NEXT: vmovaps %xmm0, (%esp)
1165 ; FALLBACK26-NEXT: andb $12, %cl
1166 ; FALLBACK26-NEXT: movzbl %cl, %edi
1167 ; FALLBACK26-NEXT: shrxl %eax, (%esp,%edi), %ebx
1168 ; FALLBACK26-NEXT: movl %eax, %ecx
1169 ; FALLBACK26-NEXT: notb %cl
1170 ; FALLBACK26-NEXT: movl 4(%esp,%edi), %ebp
1171 ; FALLBACK26-NEXT: movl 8(%esp,%edi), %esi
1172 ; FALLBACK26-NEXT: leal (%ebp,%ebp), %edx
1173 ; FALLBACK26-NEXT: shlxl %ecx, %edx, %edx
1174 ; FALLBACK26-NEXT: orl %ebx, %edx
1175 ; FALLBACK26-NEXT: shrxl %eax, %esi, %ebx
1176 ; FALLBACK26-NEXT: shrxl %eax, %ebp, %ebp
1177 ; FALLBACK26-NEXT: movl 12(%esp,%edi), %edi
1178 ; FALLBACK26-NEXT: shrxl %eax, %edi, %eax
1179 ; FALLBACK26-NEXT: addl %edi, %edi
1180 ; FALLBACK26-NEXT: shlxl %ecx, %edi, %edi
1181 ; FALLBACK26-NEXT: orl %ebx, %edi
1182 ; FALLBACK26-NEXT: addl %esi, %esi
1183 ; FALLBACK26-NEXT: shlxl %ecx, %esi, %ecx
1184 ; FALLBACK26-NEXT: orl %ebp, %ecx
1185 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %esi
1186 ; FALLBACK26-NEXT: movl %eax, 12(%esi)
1187 ; FALLBACK26-NEXT: movl %ecx, 4(%esi)
1188 ; FALLBACK26-NEXT: movl %edi, 8(%esi)
1189 ; FALLBACK26-NEXT: movl %edx, (%esi)
1190 ; FALLBACK26-NEXT: addl $44, %esp
1191 ; FALLBACK26-NEXT: popl %esi
1192 ; FALLBACK26-NEXT: popl %edi
1193 ; FALLBACK26-NEXT: popl %ebx
1194 ; FALLBACK26-NEXT: popl %ebp
1195 ; FALLBACK26-NEXT: retl
1197 ; FALLBACK27-LABEL: lshr_16bytes:
1198 ; FALLBACK27: # %bb.0:
1199 ; FALLBACK27-NEXT: pushl %ebp
1200 ; FALLBACK27-NEXT: pushl %ebx
1201 ; FALLBACK27-NEXT: pushl %edi
1202 ; FALLBACK27-NEXT: pushl %esi
1203 ; FALLBACK27-NEXT: subl $44, %esp
1204 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
1205 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %edx
1206 ; FALLBACK27-NEXT: vmovups (%edx), %xmm0
1207 ; FALLBACK27-NEXT: movzbl (%ecx), %edx
1208 ; FALLBACK27-NEXT: movl %edx, %ecx
1209 ; FALLBACK27-NEXT: shlb $3, %cl
1210 ; FALLBACK27-NEXT: vxorps %xmm1, %xmm1, %xmm1
1211 ; FALLBACK27-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1212 ; FALLBACK27-NEXT: vmovaps %xmm0, (%esp)
1213 ; FALLBACK27-NEXT: andb $12, %dl
1214 ; FALLBACK27-NEXT: movzbl %dl, %ebx
1215 ; FALLBACK27-NEXT: movl 12(%esp,%ebx), %edx
1216 ; FALLBACK27-NEXT: movl 8(%esp,%ebx), %ebp
1217 ; FALLBACK27-NEXT: movl %ebp, %edi
1218 ; FALLBACK27-NEXT: shrdl %cl, %edx, %edi
1219 ; FALLBACK27-NEXT: movl (%esp,%ebx), %esi
1220 ; FALLBACK27-NEXT: movl 4(%esp,%ebx), %eax
1221 ; FALLBACK27-NEXT: movl %eax, %ebx
1222 ; FALLBACK27-NEXT: shrdl %cl, %ebp, %ebx
1223 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ebp
1224 ; FALLBACK27-NEXT: movl %ebx, 4(%ebp)
1225 ; FALLBACK27-NEXT: movl %edi, 8(%ebp)
1226 ; FALLBACK27-NEXT: shrxl %ecx, %edx, %edx
1227 ; FALLBACK27-NEXT: movl %edx, 12(%ebp)
1228 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
1229 ; FALLBACK27-NEXT: shrdl %cl, %eax, %esi
1230 ; FALLBACK27-NEXT: movl %esi, (%ebp)
1231 ; FALLBACK27-NEXT: addl $44, %esp
1232 ; FALLBACK27-NEXT: popl %esi
1233 ; FALLBACK27-NEXT: popl %edi
1234 ; FALLBACK27-NEXT: popl %ebx
1235 ; FALLBACK27-NEXT: popl %ebp
1236 ; FALLBACK27-NEXT: retl
1238 ; FALLBACK28-LABEL: lshr_16bytes:
1239 ; FALLBACK28: # %bb.0:
1240 ; FALLBACK28-NEXT: pushl %ebp
1241 ; FALLBACK28-NEXT: pushl %ebx
1242 ; FALLBACK28-NEXT: pushl %edi
1243 ; FALLBACK28-NEXT: pushl %esi
1244 ; FALLBACK28-NEXT: subl $60, %esp
1245 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
1246 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
1247 ; FALLBACK28-NEXT: vmovups (%ecx), %xmm0
1248 ; FALLBACK28-NEXT: movzbl (%eax), %ecx
1249 ; FALLBACK28-NEXT: movl %ecx, %eax
1250 ; FALLBACK28-NEXT: shlb $3, %al
1251 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
1252 ; FALLBACK28-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1253 ; FALLBACK28-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1254 ; FALLBACK28-NEXT: andb $12, %cl
1255 ; FALLBACK28-NEXT: movzbl %cl, %edi
1256 ; FALLBACK28-NEXT: movl 16(%esp,%edi), %ebx
1257 ; FALLBACK28-NEXT: movl 20(%esp,%edi), %esi
1258 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1259 ; FALLBACK28-NEXT: movl %eax, %ecx
1260 ; FALLBACK28-NEXT: shrl %cl, %ebx
1261 ; FALLBACK28-NEXT: movl %eax, %edx
1262 ; FALLBACK28-NEXT: notb %dl
1263 ; FALLBACK28-NEXT: addl %esi, %esi
1264 ; FALLBACK28-NEXT: movl %edx, %ecx
1265 ; FALLBACK28-NEXT: shll %cl, %esi
1266 ; FALLBACK28-NEXT: orl %ebx, %esi
1267 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1268 ; FALLBACK28-NEXT: movl 24(%esp,%edi), %ebx
1269 ; FALLBACK28-NEXT: movl %ebx, %esi
1270 ; FALLBACK28-NEXT: movl %eax, %ecx
1271 ; FALLBACK28-NEXT: shrl %cl, %esi
1272 ; FALLBACK28-NEXT: movl 28(%esp,%edi), %edi
1273 ; FALLBACK28-NEXT: leal (%edi,%edi), %ebp
1274 ; FALLBACK28-NEXT: movl %edx, %ecx
1275 ; FALLBACK28-NEXT: shll %cl, %ebp
1276 ; FALLBACK28-NEXT: orl %esi, %ebp
1277 ; FALLBACK28-NEXT: movl %eax, %ecx
1278 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1279 ; FALLBACK28-NEXT: shrl %cl, %esi
1280 ; FALLBACK28-NEXT: addl %ebx, %ebx
1281 ; FALLBACK28-NEXT: movl %edx, %ecx
1282 ; FALLBACK28-NEXT: shll %cl, %ebx
1283 ; FALLBACK28-NEXT: orl %esi, %ebx
1284 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %edx
1285 ; FALLBACK28-NEXT: movl %eax, %ecx
1286 ; FALLBACK28-NEXT: shrl %cl, %edi
1287 ; FALLBACK28-NEXT: movl %edi, 12(%edx)
1288 ; FALLBACK28-NEXT: movl %ebx, 4(%edx)
1289 ; FALLBACK28-NEXT: movl %ebp, 8(%edx)
1290 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1291 ; FALLBACK28-NEXT: movl %eax, (%edx)
1292 ; FALLBACK28-NEXT: addl $60, %esp
1293 ; FALLBACK28-NEXT: popl %esi
1294 ; FALLBACK28-NEXT: popl %edi
1295 ; FALLBACK28-NEXT: popl %ebx
1296 ; FALLBACK28-NEXT: popl %ebp
1297 ; FALLBACK28-NEXT: retl
1299 ; FALLBACK29-LABEL: lshr_16bytes:
1300 ; FALLBACK29: # %bb.0:
1301 ; FALLBACK29-NEXT: pushl %ebp
1302 ; FALLBACK29-NEXT: pushl %ebx
1303 ; FALLBACK29-NEXT: pushl %edi
1304 ; FALLBACK29-NEXT: pushl %esi
1305 ; FALLBACK29-NEXT: subl $44, %esp
1306 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
1307 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %edx
1308 ; FALLBACK29-NEXT: vmovups (%edx), %xmm0
1309 ; FALLBACK29-NEXT: movzbl (%ecx), %edx
1310 ; FALLBACK29-NEXT: movl %edx, %ecx
1311 ; FALLBACK29-NEXT: shlb $3, %cl
1312 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
1313 ; FALLBACK29-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1314 ; FALLBACK29-NEXT: vmovaps %xmm0, (%esp)
1315 ; FALLBACK29-NEXT: andb $12, %dl
1316 ; FALLBACK29-NEXT: movzbl %dl, %ebx
1317 ; FALLBACK29-NEXT: movl 12(%esp,%ebx), %edx
1318 ; FALLBACK29-NEXT: movl 8(%esp,%ebx), %ebp
1319 ; FALLBACK29-NEXT: movl %ebp, %edi
1320 ; FALLBACK29-NEXT: shrdl %cl, %edx, %edi
1321 ; FALLBACK29-NEXT: movl (%esp,%ebx), %esi
1322 ; FALLBACK29-NEXT: movl 4(%esp,%ebx), %eax
1323 ; FALLBACK29-NEXT: movl %eax, %ebx
1324 ; FALLBACK29-NEXT: shrdl %cl, %ebp, %ebx
1325 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
1326 ; FALLBACK29-NEXT: movl %ebx, 4(%ebp)
1327 ; FALLBACK29-NEXT: movl %edi, 8(%ebp)
1328 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
1329 ; FALLBACK29-NEXT: shrl %cl, %edx
1330 ; FALLBACK29-NEXT: movl %edx, 12(%ebp)
1331 ; FALLBACK29-NEXT: movl %esi, (%ebp)
1332 ; FALLBACK29-NEXT: addl $44, %esp
1333 ; FALLBACK29-NEXT: popl %esi
1334 ; FALLBACK29-NEXT: popl %edi
1335 ; FALLBACK29-NEXT: popl %ebx
1336 ; FALLBACK29-NEXT: popl %ebp
1337 ; FALLBACK29-NEXT: retl
1339 ; FALLBACK30-LABEL: lshr_16bytes:
1340 ; FALLBACK30: # %bb.0:
1341 ; FALLBACK30-NEXT: pushl %ebp
1342 ; FALLBACK30-NEXT: pushl %ebx
1343 ; FALLBACK30-NEXT: pushl %edi
1344 ; FALLBACK30-NEXT: pushl %esi
1345 ; FALLBACK30-NEXT: subl $44, %esp
1346 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
1347 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
1348 ; FALLBACK30-NEXT: vmovups (%ecx), %xmm0
1349 ; FALLBACK30-NEXT: movzbl (%eax), %ecx
1350 ; FALLBACK30-NEXT: movl %ecx, %eax
1351 ; FALLBACK30-NEXT: shlb $3, %al
1352 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
1353 ; FALLBACK30-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1354 ; FALLBACK30-NEXT: vmovaps %xmm0, (%esp)
1355 ; FALLBACK30-NEXT: andb $12, %cl
1356 ; FALLBACK30-NEXT: movzbl %cl, %edi
1357 ; FALLBACK30-NEXT: shrxl %eax, (%esp,%edi), %ebx
1358 ; FALLBACK30-NEXT: movl %eax, %ecx
1359 ; FALLBACK30-NEXT: notb %cl
1360 ; FALLBACK30-NEXT: movl 4(%esp,%edi), %ebp
1361 ; FALLBACK30-NEXT: movl 8(%esp,%edi), %esi
1362 ; FALLBACK30-NEXT: leal (%ebp,%ebp), %edx
1363 ; FALLBACK30-NEXT: shlxl %ecx, %edx, %edx
1364 ; FALLBACK30-NEXT: orl %ebx, %edx
1365 ; FALLBACK30-NEXT: shrxl %eax, %esi, %ebx
1366 ; FALLBACK30-NEXT: shrxl %eax, %ebp, %ebp
1367 ; FALLBACK30-NEXT: movl 12(%esp,%edi), %edi
1368 ; FALLBACK30-NEXT: shrxl %eax, %edi, %eax
1369 ; FALLBACK30-NEXT: addl %edi, %edi
1370 ; FALLBACK30-NEXT: shlxl %ecx, %edi, %edi
1371 ; FALLBACK30-NEXT: orl %ebx, %edi
1372 ; FALLBACK30-NEXT: addl %esi, %esi
1373 ; FALLBACK30-NEXT: shlxl %ecx, %esi, %ecx
1374 ; FALLBACK30-NEXT: orl %ebp, %ecx
1375 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %esi
1376 ; FALLBACK30-NEXT: movl %eax, 12(%esi)
1377 ; FALLBACK30-NEXT: movl %ecx, 4(%esi)
1378 ; FALLBACK30-NEXT: movl %edi, 8(%esi)
1379 ; FALLBACK30-NEXT: movl %edx, (%esi)
1380 ; FALLBACK30-NEXT: addl $44, %esp
1381 ; FALLBACK30-NEXT: popl %esi
1382 ; FALLBACK30-NEXT: popl %edi
1383 ; FALLBACK30-NEXT: popl %ebx
1384 ; FALLBACK30-NEXT: popl %ebp
1385 ; FALLBACK30-NEXT: retl
1387 ; FALLBACK31-LABEL: lshr_16bytes:
1388 ; FALLBACK31: # %bb.0:
1389 ; FALLBACK31-NEXT: pushl %ebp
1390 ; FALLBACK31-NEXT: pushl %ebx
1391 ; FALLBACK31-NEXT: pushl %edi
1392 ; FALLBACK31-NEXT: pushl %esi
1393 ; FALLBACK31-NEXT: subl $44, %esp
1394 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
1395 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %edx
1396 ; FALLBACK31-NEXT: vmovups (%edx), %xmm0
1397 ; FALLBACK31-NEXT: movzbl (%ecx), %edx
1398 ; FALLBACK31-NEXT: movl %edx, %ecx
1399 ; FALLBACK31-NEXT: shlb $3, %cl
1400 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
1401 ; FALLBACK31-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1402 ; FALLBACK31-NEXT: vmovaps %xmm0, (%esp)
1403 ; FALLBACK31-NEXT: andb $12, %dl
1404 ; FALLBACK31-NEXT: movzbl %dl, %ebx
1405 ; FALLBACK31-NEXT: movl 12(%esp,%ebx), %edx
1406 ; FALLBACK31-NEXT: movl 8(%esp,%ebx), %ebp
1407 ; FALLBACK31-NEXT: movl %ebp, %edi
1408 ; FALLBACK31-NEXT: shrdl %cl, %edx, %edi
1409 ; FALLBACK31-NEXT: movl (%esp,%ebx), %esi
1410 ; FALLBACK31-NEXT: movl 4(%esp,%ebx), %eax
1411 ; FALLBACK31-NEXT: movl %eax, %ebx
1412 ; FALLBACK31-NEXT: shrdl %cl, %ebp, %ebx
1413 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ebp
1414 ; FALLBACK31-NEXT: movl %ebx, 4(%ebp)
1415 ; FALLBACK31-NEXT: movl %edi, 8(%ebp)
1416 ; FALLBACK31-NEXT: shrxl %ecx, %edx, %edx
1417 ; FALLBACK31-NEXT: movl %edx, 12(%ebp)
1418 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
1419 ; FALLBACK31-NEXT: shrdl %cl, %eax, %esi
1420 ; FALLBACK31-NEXT: movl %esi, (%ebp)
1421 ; FALLBACK31-NEXT: addl $44, %esp
1422 ; FALLBACK31-NEXT: popl %esi
1423 ; FALLBACK31-NEXT: popl %edi
1424 ; FALLBACK31-NEXT: popl %ebx
1425 ; FALLBACK31-NEXT: popl %ebp
1426 ; FALLBACK31-NEXT: retl
1427 %src = load i128, ptr %src.ptr, align 1
1428 %byteOff = load i128, ptr %byteOff.ptr, align 1
1429 %bitOff = shl i128 %byteOff, 3
1430 %res = lshr i128 %src, %bitOff
1431 store i128 %res, ptr %dst, align 1
1435 define void @lshr_16bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
1436 ; X64-NO-SHLD-NO-BMI2-LABEL: lshr_16bytes_dwordOff:
1437 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
1438 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
1439 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
1440 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
1441 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $5, %al
1442 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
1443 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %r8
1444 ; X64-NO-SHLD-NO-BMI2-NEXT: leaq (%rdi,%rdi), %rsi
1445 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
1446 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
1447 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %r8, %rsi
1448 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
1449 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %rdi
1450 ; X64-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
1451 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
1452 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %rdi, %rsi
1453 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %rdi, %rcx
1454 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rcx, 8(%rdx)
1455 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, (%rdx)
1456 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
1458 ; X64-HAVE-SHLD-NO-BMI2-LABEL: lshr_16bytes_dwordOff:
1459 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
1460 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
1461 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
1462 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
1463 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $5, %cl
1464 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, %rsi
1465 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrq %cl, %rsi
1466 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrdq %cl, %rdi, %rax
1467 ; X64-HAVE-SHLD-NO-BMI2-NEXT: xorl %edi, %edi
1468 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
1469 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rax
1470 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rdi
1471 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
1472 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
1473 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
1475 ; X64-NO-SHLD-HAVE-BMI2-LABEL: lshr_16bytes_dwordOff:
1476 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
1477 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rax
1478 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
1479 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
1480 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, (%rdi), %rsi
1481 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
1482 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
1483 ; X64-NO-SHLD-HAVE-BMI2-NEXT: leaq (%rax,%rax), %r8
1484 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rdi, %r8, %rdi
1485 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rdi
1486 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, %rax, %rax
1487 ; X64-NO-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
1488 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
1489 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %rax, %rdi
1490 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %rax, %rsi
1491 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rsi, 8(%rdx)
1492 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rdi, (%rdx)
1493 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
1495 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: lshr_16bytes_dwordOff:
1496 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
1497 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
1498 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
1499 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
1500 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
1501 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrdq %cl, %rdi, %rax
1502 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, %rdi, %rsi
1503 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %edi, %edi
1504 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
1505 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rax
1506 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rdi
1507 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
1508 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rax, (%rdx)
1509 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
1511 ; X86-SSE2-LABEL: lshr_16bytes_dwordOff:
1512 ; X86-SSE2: # %bb.0:
1513 ; X86-SSE2-NEXT: pushl %ebx
1514 ; X86-SSE2-NEXT: pushl %edi
1515 ; X86-SSE2-NEXT: pushl %esi
1516 ; X86-SSE2-NEXT: subl $32, %esp
1517 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1518 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
1519 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1520 ; X86-SSE2-NEXT: movl (%edx), %esi
1521 ; X86-SSE2-NEXT: movl 4(%edx), %edi
1522 ; X86-SSE2-NEXT: movl 8(%edx), %ebx
1523 ; X86-SSE2-NEXT: movl 12(%edx), %edx
1524 ; X86-SSE2-NEXT: movzbl (%ecx), %ecx
1525 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
1526 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1527 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
1528 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
1529 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
1530 ; X86-SSE2-NEXT: movl %esi, (%esp)
1531 ; X86-SSE2-NEXT: andl $3, %ecx
1532 ; X86-SSE2-NEXT: movl (%esp,%ecx,4), %edx
1533 ; X86-SSE2-NEXT: movl 4(%esp,%ecx,4), %esi
1534 ; X86-SSE2-NEXT: movl 12(%esp,%ecx,4), %edi
1535 ; X86-SSE2-NEXT: movl 8(%esp,%ecx,4), %ecx
1536 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
1537 ; X86-SSE2-NEXT: movl %edi, 12(%eax)
1538 ; X86-SSE2-NEXT: movl %edx, (%eax)
1539 ; X86-SSE2-NEXT: movl %esi, 4(%eax)
1540 ; X86-SSE2-NEXT: addl $32, %esp
1541 ; X86-SSE2-NEXT: popl %esi
1542 ; X86-SSE2-NEXT: popl %edi
1543 ; X86-SSE2-NEXT: popl %ebx
1544 ; X86-SSE2-NEXT: retl
1546 ; X86-SSE42-LABEL: lshr_16bytes_dwordOff:
1547 ; X86-SSE42: # %bb.0:
1548 ; X86-SSE42-NEXT: subl $44, %esp
1549 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
1550 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
1551 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
1552 ; X86-SSE42-NEXT: movups (%edx), %xmm0
1553 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
1554 ; X86-SSE42-NEXT: xorps %xmm1, %xmm1
1555 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1556 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
1557 ; X86-SSE42-NEXT: andl $3, %ecx
1558 ; X86-SSE42-NEXT: movups (%esp,%ecx,4), %xmm0
1559 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
1560 ; X86-SSE42-NEXT: addl $44, %esp
1561 ; X86-SSE42-NEXT: retl
1563 ; X86-AVX-LABEL: lshr_16bytes_dwordOff:
1565 ; X86-AVX-NEXT: subl $44, %esp
1566 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
1567 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
1568 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
1569 ; X86-AVX-NEXT: vmovups (%edx), %xmm0
1570 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
1571 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
1572 ; X86-AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
1573 ; X86-AVX-NEXT: vmovaps %xmm0, (%esp)
1574 ; X86-AVX-NEXT: andl $3, %ecx
1575 ; X86-AVX-NEXT: vmovups (%esp,%ecx,4), %xmm0
1576 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
1577 ; X86-AVX-NEXT: addl $44, %esp
1578 ; X86-AVX-NEXT: retl
1579 %src = load i128, ptr %src.ptr, align 1
1580 %dwordOff = load i128, ptr %dwordOff.ptr, align 1
1581 %bitOff = shl i128 %dwordOff, 5
1582 %res = lshr i128 %src, %bitOff
1583 store i128 %res, ptr %dst, align 1
1587 define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
1588 ; X64-NO-SHLD-NO-BMI2-LABEL: shl_16bytes:
1589 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
1590 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
1591 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
1592 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
1593 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
1594 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
1595 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rdi
1596 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %r8, %rsi
1597 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %rsi
1598 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
1599 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %rsi
1600 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %rdi, %rsi
1601 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
1602 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %r8
1603 ; X64-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
1604 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
1605 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %r8, %rsi
1606 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %r8, %rcx
1607 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rcx, (%rdx)
1608 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, 8(%rdx)
1609 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
1611 ; X64-HAVE-SHLD-NO-BMI2-LABEL: shl_16bytes:
1612 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
1613 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
1614 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
1615 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
1616 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
1617 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, %rsi
1618 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
1619 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shldq %cl, %rax, %rdi
1620 ; X64-HAVE-SHLD-NO-BMI2-NEXT: xorl %eax, %eax
1621 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
1622 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rdi
1623 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rax
1624 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
1625 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
1626 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
1628 ; X64-NO-SHLD-HAVE-BMI2-LABEL: shl_16bytes:
1629 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
1630 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
1631 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
1632 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
1633 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, 8(%rdi), %rsi
1634 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
1635 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
1636 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, %rax, %r8
1637 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrq %rax
1638 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rdi, %rax, %rax
1639 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rax
1640 ; X64-NO-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
1641 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
1642 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %r8, %rax
1643 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %r8, %rsi
1644 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rsi, (%rdx)
1645 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rax, 8(%rdx)
1646 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
1648 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: shl_16bytes:
1649 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
1650 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
1651 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
1652 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
1653 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
1654 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shldq %cl, %rax, %rdi
1655 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, %rax, %rax
1656 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
1657 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
1658 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rax, %rdi
1659 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rax, %rsi
1660 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
1661 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rsi, (%rdx)
1662 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
1664 ; FALLBACK16-LABEL: shl_16bytes:
1665 ; FALLBACK16: # %bb.0:
1666 ; FALLBACK16-NEXT: pushl %ebp
1667 ; FALLBACK16-NEXT: pushl %ebx
1668 ; FALLBACK16-NEXT: pushl %edi
1669 ; FALLBACK16-NEXT: pushl %esi
1670 ; FALLBACK16-NEXT: subl $60, %esp
1671 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
1672 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ecx
1673 ; FALLBACK16-NEXT: movl (%ecx), %ebx
1674 ; FALLBACK16-NEXT: movl 4(%ecx), %esi
1675 ; FALLBACK16-NEXT: movl 8(%ecx), %edi
1676 ; FALLBACK16-NEXT: movl 12(%ecx), %ecx
1677 ; FALLBACK16-NEXT: movb (%eax), %ah
1678 ; FALLBACK16-NEXT: movb %ah, %dh
1679 ; FALLBACK16-NEXT: shlb $3, %dh
1680 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
1681 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1682 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1683 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
1684 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
1685 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
1686 ; FALLBACK16-NEXT: andb $12, %ah
1687 ; FALLBACK16-NEXT: negb %ah
1688 ; FALLBACK16-NEXT: movsbl %ah, %ebp
1689 ; FALLBACK16-NEXT: movl 32(%esp,%ebp), %ebx
1690 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1691 ; FALLBACK16-NEXT: movl 36(%esp,%ebp), %esi
1692 ; FALLBACK16-NEXT: movl %esi, %edi
1693 ; FALLBACK16-NEXT: movb %dh, %cl
1694 ; FALLBACK16-NEXT: shll %cl, %edi
1695 ; FALLBACK16-NEXT: movb %dh, %dl
1696 ; FALLBACK16-NEXT: notb %dl
1697 ; FALLBACK16-NEXT: shrl %ebx
1698 ; FALLBACK16-NEXT: movl %edx, %ecx
1699 ; FALLBACK16-NEXT: shrl %cl, %ebx
1700 ; FALLBACK16-NEXT: orl %edi, %ebx
1701 ; FALLBACK16-NEXT: movl 44(%esp,%ebp), %eax
1702 ; FALLBACK16-NEXT: movb %dh, %cl
1703 ; FALLBACK16-NEXT: shll %cl, %eax
1704 ; FALLBACK16-NEXT: movl 40(%esp,%ebp), %edi
1705 ; FALLBACK16-NEXT: movl %edi, %ebp
1706 ; FALLBACK16-NEXT: shrl %ebp
1707 ; FALLBACK16-NEXT: movl %edx, %ecx
1708 ; FALLBACK16-NEXT: shrl %cl, %ebp
1709 ; FALLBACK16-NEXT: orl %eax, %ebp
1710 ; FALLBACK16-NEXT: movb %dh, %cl
1711 ; FALLBACK16-NEXT: shll %cl, %edi
1712 ; FALLBACK16-NEXT: shrl %esi
1713 ; FALLBACK16-NEXT: movl %edx, %ecx
1714 ; FALLBACK16-NEXT: shrl %cl, %esi
1715 ; FALLBACK16-NEXT: orl %edi, %esi
1716 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
1717 ; FALLBACK16-NEXT: movb %dh, %cl
1718 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1719 ; FALLBACK16-NEXT: shll %cl, %edx
1720 ; FALLBACK16-NEXT: movl %edx, (%eax)
1721 ; FALLBACK16-NEXT: movl %esi, 8(%eax)
1722 ; FALLBACK16-NEXT: movl %ebp, 12(%eax)
1723 ; FALLBACK16-NEXT: movl %ebx, 4(%eax)
1724 ; FALLBACK16-NEXT: addl $60, %esp
1725 ; FALLBACK16-NEXT: popl %esi
1726 ; FALLBACK16-NEXT: popl %edi
1727 ; FALLBACK16-NEXT: popl %ebx
1728 ; FALLBACK16-NEXT: popl %ebp
1729 ; FALLBACK16-NEXT: retl
1731 ; FALLBACK17-LABEL: shl_16bytes:
1732 ; FALLBACK17: # %bb.0:
1733 ; FALLBACK17-NEXT: pushl %ebx
1734 ; FALLBACK17-NEXT: pushl %edi
1735 ; FALLBACK17-NEXT: pushl %esi
1736 ; FALLBACK17-NEXT: subl $32, %esp
1737 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
1738 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
1739 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %edx
1740 ; FALLBACK17-NEXT: movl (%edx), %esi
1741 ; FALLBACK17-NEXT: movl 4(%edx), %edi
1742 ; FALLBACK17-NEXT: movl 8(%edx), %ebx
1743 ; FALLBACK17-NEXT: movl 12(%edx), %edx
1744 ; FALLBACK17-NEXT: movb (%ecx), %ch
1745 ; FALLBACK17-NEXT: movb %ch, %cl
1746 ; FALLBACK17-NEXT: shlb $3, %cl
1747 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
1748 ; FALLBACK17-NEXT: movaps %xmm0, (%esp)
1749 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
1750 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
1751 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
1752 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
1753 ; FALLBACK17-NEXT: andb $12, %ch
1754 ; FALLBACK17-NEXT: negb %ch
1755 ; FALLBACK17-NEXT: movsbl %ch, %edi
1756 ; FALLBACK17-NEXT: movl 24(%esp,%edi), %esi
1757 ; FALLBACK17-NEXT: movl 28(%esp,%edi), %edx
1758 ; FALLBACK17-NEXT: shldl %cl, %esi, %edx
1759 ; FALLBACK17-NEXT: movl 16(%esp,%edi), %ebx
1760 ; FALLBACK17-NEXT: movl 20(%esp,%edi), %edi
1761 ; FALLBACK17-NEXT: shldl %cl, %edi, %esi
1762 ; FALLBACK17-NEXT: shldl %cl, %ebx, %edi
1763 ; FALLBACK17-NEXT: shll %cl, %ebx
1764 ; FALLBACK17-NEXT: movl %esi, 8(%eax)
1765 ; FALLBACK17-NEXT: movl %edx, 12(%eax)
1766 ; FALLBACK17-NEXT: movl %ebx, (%eax)
1767 ; FALLBACK17-NEXT: movl %edi, 4(%eax)
1768 ; FALLBACK17-NEXT: addl $32, %esp
1769 ; FALLBACK17-NEXT: popl %esi
1770 ; FALLBACK17-NEXT: popl %edi
1771 ; FALLBACK17-NEXT: popl %ebx
1772 ; FALLBACK17-NEXT: retl
1774 ; FALLBACK18-LABEL: shl_16bytes:
1775 ; FALLBACK18: # %bb.0:
1776 ; FALLBACK18-NEXT: pushl %ebp
1777 ; FALLBACK18-NEXT: pushl %ebx
1778 ; FALLBACK18-NEXT: pushl %edi
1779 ; FALLBACK18-NEXT: pushl %esi
1780 ; FALLBACK18-NEXT: subl $44, %esp
1781 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
1782 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ecx
1783 ; FALLBACK18-NEXT: movl (%ecx), %edx
1784 ; FALLBACK18-NEXT: movl 4(%ecx), %esi
1785 ; FALLBACK18-NEXT: movl 8(%ecx), %edi
1786 ; FALLBACK18-NEXT: movl 12(%ecx), %ecx
1787 ; FALLBACK18-NEXT: movzbl (%eax), %eax
1788 ; FALLBACK18-NEXT: movl %eax, %ebx
1789 ; FALLBACK18-NEXT: shlb $3, %bl
1790 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
1791 ; FALLBACK18-NEXT: movaps %xmm0, (%esp)
1792 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
1793 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
1794 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
1795 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
1796 ; FALLBACK18-NEXT: andb $12, %al
1797 ; FALLBACK18-NEXT: negb %al
1798 ; FALLBACK18-NEXT: movsbl %al, %edx
1799 ; FALLBACK18-NEXT: movl 16(%esp,%edx), %edi
1800 ; FALLBACK18-NEXT: movl 20(%esp,%edx), %ecx
1801 ; FALLBACK18-NEXT: shlxl %ebx, %ecx, %esi
1802 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %ebp
1803 ; FALLBACK18-NEXT: movl %ebx, %eax
1804 ; FALLBACK18-NEXT: notb %al
1805 ; FALLBACK18-NEXT: shrl %edi
1806 ; FALLBACK18-NEXT: shrxl %eax, %edi, %edi
1807 ; FALLBACK18-NEXT: orl %esi, %edi
1808 ; FALLBACK18-NEXT: shlxl %ebx, 28(%esp,%edx), %esi
1809 ; FALLBACK18-NEXT: movl 24(%esp,%edx), %edx
1810 ; FALLBACK18-NEXT: shlxl %ebx, %edx, %ebx
1811 ; FALLBACK18-NEXT: shrl %edx
1812 ; FALLBACK18-NEXT: shrxl %eax, %edx, %edx
1813 ; FALLBACK18-NEXT: orl %esi, %edx
1814 ; FALLBACK18-NEXT: shrl %ecx
1815 ; FALLBACK18-NEXT: shrxl %eax, %ecx, %eax
1816 ; FALLBACK18-NEXT: orl %ebx, %eax
1817 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ecx
1818 ; FALLBACK18-NEXT: movl %ebp, (%ecx)
1819 ; FALLBACK18-NEXT: movl %eax, 8(%ecx)
1820 ; FALLBACK18-NEXT: movl %edx, 12(%ecx)
1821 ; FALLBACK18-NEXT: movl %edi, 4(%ecx)
1822 ; FALLBACK18-NEXT: addl $44, %esp
1823 ; FALLBACK18-NEXT: popl %esi
1824 ; FALLBACK18-NEXT: popl %edi
1825 ; FALLBACK18-NEXT: popl %ebx
1826 ; FALLBACK18-NEXT: popl %ebp
1827 ; FALLBACK18-NEXT: retl
1829 ; FALLBACK19-LABEL: shl_16bytes:
1830 ; FALLBACK19: # %bb.0:
1831 ; FALLBACK19-NEXT: pushl %ebp
1832 ; FALLBACK19-NEXT: pushl %ebx
1833 ; FALLBACK19-NEXT: pushl %edi
1834 ; FALLBACK19-NEXT: pushl %esi
1835 ; FALLBACK19-NEXT: subl $44, %esp
1836 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
1837 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
1838 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %edx
1839 ; FALLBACK19-NEXT: movl (%edx), %esi
1840 ; FALLBACK19-NEXT: movl 4(%edx), %edi
1841 ; FALLBACK19-NEXT: movl 8(%edx), %ebx
1842 ; FALLBACK19-NEXT: movl 12(%edx), %edx
1843 ; FALLBACK19-NEXT: movzbl (%ecx), %eax
1844 ; FALLBACK19-NEXT: movl %eax, %ecx
1845 ; FALLBACK19-NEXT: shlb $3, %cl
1846 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
1847 ; FALLBACK19-NEXT: movaps %xmm0, (%esp)
1848 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
1849 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
1850 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
1851 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
1852 ; FALLBACK19-NEXT: andb $12, %al
1853 ; FALLBACK19-NEXT: negb %al
1854 ; FALLBACK19-NEXT: movsbl %al, %eax
1855 ; FALLBACK19-NEXT: movl 24(%esp,%eax), %esi
1856 ; FALLBACK19-NEXT: movl 28(%esp,%eax), %edx
1857 ; FALLBACK19-NEXT: shldl %cl, %esi, %edx
1858 ; FALLBACK19-NEXT: movl 16(%esp,%eax), %edi
1859 ; FALLBACK19-NEXT: movl 20(%esp,%eax), %eax
1860 ; FALLBACK19-NEXT: shldl %cl, %eax, %esi
1861 ; FALLBACK19-NEXT: shldl %cl, %edi, %eax
1862 ; FALLBACK19-NEXT: shlxl %ecx, %edi, %ecx
1863 ; FALLBACK19-NEXT: movl %esi, 8(%ebp)
1864 ; FALLBACK19-NEXT: movl %edx, 12(%ebp)
1865 ; FALLBACK19-NEXT: movl %ecx, (%ebp)
1866 ; FALLBACK19-NEXT: movl %eax, 4(%ebp)
1867 ; FALLBACK19-NEXT: addl $44, %esp
1868 ; FALLBACK19-NEXT: popl %esi
1869 ; FALLBACK19-NEXT: popl %edi
1870 ; FALLBACK19-NEXT: popl %ebx
1871 ; FALLBACK19-NEXT: popl %ebp
1872 ; FALLBACK19-NEXT: retl
1874 ; FALLBACK20-LABEL: shl_16bytes:
1875 ; FALLBACK20: # %bb.0:
1876 ; FALLBACK20-NEXT: pushl %ebp
1877 ; FALLBACK20-NEXT: pushl %ebx
1878 ; FALLBACK20-NEXT: pushl %edi
1879 ; FALLBACK20-NEXT: pushl %esi
1880 ; FALLBACK20-NEXT: subl $60, %esp
1881 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
1882 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
1883 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
1884 ; FALLBACK20-NEXT: movzbl (%eax), %ecx
1885 ; FALLBACK20-NEXT: movl %ecx, %eax
1886 ; FALLBACK20-NEXT: shlb $3, %al
1887 ; FALLBACK20-NEXT: xorps %xmm1, %xmm1
1888 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
1889 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1890 ; FALLBACK20-NEXT: andb $12, %cl
1891 ; FALLBACK20-NEXT: negb %cl
1892 ; FALLBACK20-NEXT: movsbl %cl, %edi
1893 ; FALLBACK20-NEXT: movl 44(%esp,%edi), %ebx
1894 ; FALLBACK20-NEXT: movl %eax, %ecx
1895 ; FALLBACK20-NEXT: shll %cl, %ebx
1896 ; FALLBACK20-NEXT: movl %eax, %edx
1897 ; FALLBACK20-NEXT: notb %dl
1898 ; FALLBACK20-NEXT: movl 40(%esp,%edi), %ebp
1899 ; FALLBACK20-NEXT: movl %ebp, %esi
1900 ; FALLBACK20-NEXT: shrl %esi
1901 ; FALLBACK20-NEXT: movl %edx, %ecx
1902 ; FALLBACK20-NEXT: shrl %cl, %esi
1903 ; FALLBACK20-NEXT: orl %ebx, %esi
1904 ; FALLBACK20-NEXT: movl %eax, %ecx
1905 ; FALLBACK20-NEXT: shll %cl, %ebp
1906 ; FALLBACK20-NEXT: movl 32(%esp,%edi), %ecx
1907 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1908 ; FALLBACK20-NEXT: movl 36(%esp,%edi), %ebx
1909 ; FALLBACK20-NEXT: movl %ebx, %edi
1910 ; FALLBACK20-NEXT: shrl %edi
1911 ; FALLBACK20-NEXT: movl %edx, %ecx
1912 ; FALLBACK20-NEXT: shrl %cl, %edi
1913 ; FALLBACK20-NEXT: orl %ebp, %edi
1914 ; FALLBACK20-NEXT: movl %eax, %ecx
1915 ; FALLBACK20-NEXT: shll %cl, %ebx
1916 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
1917 ; FALLBACK20-NEXT: shrl %ebp
1918 ; FALLBACK20-NEXT: movl %edx, %ecx
1919 ; FALLBACK20-NEXT: shrl %cl, %ebp
1920 ; FALLBACK20-NEXT: orl %ebx, %ebp
1921 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %edx
1922 ; FALLBACK20-NEXT: movl %eax, %ecx
1923 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
1924 ; FALLBACK20-NEXT: shll %cl, %eax
1925 ; FALLBACK20-NEXT: movl %eax, (%edx)
1926 ; FALLBACK20-NEXT: movl %ebp, 4(%edx)
1927 ; FALLBACK20-NEXT: movl %edi, 8(%edx)
1928 ; FALLBACK20-NEXT: movl %esi, 12(%edx)
1929 ; FALLBACK20-NEXT: addl $60, %esp
1930 ; FALLBACK20-NEXT: popl %esi
1931 ; FALLBACK20-NEXT: popl %edi
1932 ; FALLBACK20-NEXT: popl %ebx
1933 ; FALLBACK20-NEXT: popl %ebp
1934 ; FALLBACK20-NEXT: retl
1936 ; FALLBACK21-LABEL: shl_16bytes:
1937 ; FALLBACK21: # %bb.0:
1938 ; FALLBACK21-NEXT: pushl %ebp
1939 ; FALLBACK21-NEXT: pushl %ebx
1940 ; FALLBACK21-NEXT: pushl %edi
1941 ; FALLBACK21-NEXT: pushl %esi
1942 ; FALLBACK21-NEXT: subl $44, %esp
1943 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
1944 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
1945 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %edx
1946 ; FALLBACK21-NEXT: movups (%edx), %xmm0
1947 ; FALLBACK21-NEXT: movzbl (%ecx), %edx
1948 ; FALLBACK21-NEXT: movl %edx, %ecx
1949 ; FALLBACK21-NEXT: shlb $3, %cl
1950 ; FALLBACK21-NEXT: xorps %xmm1, %xmm1
1951 ; FALLBACK21-NEXT: movaps %xmm1, (%esp)
1952 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1953 ; FALLBACK21-NEXT: andb $12, %dl
1954 ; FALLBACK21-NEXT: negb %dl
1955 ; FALLBACK21-NEXT: movsbl %dl, %edi
1956 ; FALLBACK21-NEXT: movl 24(%esp,%edi), %esi
1957 ; FALLBACK21-NEXT: movl 28(%esp,%edi), %edx
1958 ; FALLBACK21-NEXT: shldl %cl, %esi, %edx
1959 ; FALLBACK21-NEXT: movl 16(%esp,%edi), %ebx
1960 ; FALLBACK21-NEXT: movl 20(%esp,%edi), %edi
1961 ; FALLBACK21-NEXT: shldl %cl, %edi, %esi
1962 ; FALLBACK21-NEXT: movl %ebx, %ebp
1963 ; FALLBACK21-NEXT: shll %cl, %ebp
1964 ; FALLBACK21-NEXT: shldl %cl, %ebx, %edi
1965 ; FALLBACK21-NEXT: movl %edi, 4(%eax)
1966 ; FALLBACK21-NEXT: movl %esi, 8(%eax)
1967 ; FALLBACK21-NEXT: movl %edx, 12(%eax)
1968 ; FALLBACK21-NEXT: movl %ebp, (%eax)
1969 ; FALLBACK21-NEXT: addl $44, %esp
1970 ; FALLBACK21-NEXT: popl %esi
1971 ; FALLBACK21-NEXT: popl %edi
1972 ; FALLBACK21-NEXT: popl %ebx
1973 ; FALLBACK21-NEXT: popl %ebp
1974 ; FALLBACK21-NEXT: retl
1976 ; FALLBACK22-LABEL: shl_16bytes:
1977 ; FALLBACK22: # %bb.0:
1978 ; FALLBACK22-NEXT: pushl %ebp
1979 ; FALLBACK22-NEXT: pushl %ebx
1980 ; FALLBACK22-NEXT: pushl %edi
1981 ; FALLBACK22-NEXT: pushl %esi
1982 ; FALLBACK22-NEXT: subl $44, %esp
1983 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
1984 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
1985 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
1986 ; FALLBACK22-NEXT: movzbl (%eax), %ecx
1987 ; FALLBACK22-NEXT: movl %ecx, %eax
1988 ; FALLBACK22-NEXT: shlb $3, %al
1989 ; FALLBACK22-NEXT: xorps %xmm1, %xmm1
1990 ; FALLBACK22-NEXT: movaps %xmm1, (%esp)
1991 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
1992 ; FALLBACK22-NEXT: andb $12, %cl
1993 ; FALLBACK22-NEXT: negb %cl
1994 ; FALLBACK22-NEXT: movsbl %cl, %ecx
1995 ; FALLBACK22-NEXT: shlxl %eax, 28(%esp,%ecx), %esi
1996 ; FALLBACK22-NEXT: movl 24(%esp,%ecx), %edx
1997 ; FALLBACK22-NEXT: shlxl %eax, %edx, %edi
1998 ; FALLBACK22-NEXT: movl %eax, %ebx
1999 ; FALLBACK22-NEXT: notb %bl
2000 ; FALLBACK22-NEXT: shrl %edx
2001 ; FALLBACK22-NEXT: shrxl %ebx, %edx, %edx
2002 ; FALLBACK22-NEXT: orl %esi, %edx
2003 ; FALLBACK22-NEXT: movl 20(%esp,%ecx), %esi
2004 ; FALLBACK22-NEXT: movl %esi, %ebp
2005 ; FALLBACK22-NEXT: shrl %ebp
2006 ; FALLBACK22-NEXT: shrxl %ebx, %ebp, %ebp
2007 ; FALLBACK22-NEXT: orl %edi, %ebp
2008 ; FALLBACK22-NEXT: shlxl %eax, %esi, %esi
2009 ; FALLBACK22-NEXT: movl 16(%esp,%ecx), %ecx
2010 ; FALLBACK22-NEXT: shlxl %eax, %ecx, %eax
2011 ; FALLBACK22-NEXT: shrl %ecx
2012 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %ecx
2013 ; FALLBACK22-NEXT: orl %esi, %ecx
2014 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %esi
2015 ; FALLBACK22-NEXT: movl %eax, (%esi)
2016 ; FALLBACK22-NEXT: movl %ecx, 4(%esi)
2017 ; FALLBACK22-NEXT: movl %ebp, 8(%esi)
2018 ; FALLBACK22-NEXT: movl %edx, 12(%esi)
2019 ; FALLBACK22-NEXT: addl $44, %esp
2020 ; FALLBACK22-NEXT: popl %esi
2021 ; FALLBACK22-NEXT: popl %edi
2022 ; FALLBACK22-NEXT: popl %ebx
2023 ; FALLBACK22-NEXT: popl %ebp
2024 ; FALLBACK22-NEXT: retl
2026 ; FALLBACK23-LABEL: shl_16bytes:
2027 ; FALLBACK23: # %bb.0:
2028 ; FALLBACK23-NEXT: pushl %ebp
2029 ; FALLBACK23-NEXT: pushl %ebx
2030 ; FALLBACK23-NEXT: pushl %edi
2031 ; FALLBACK23-NEXT: pushl %esi
2032 ; FALLBACK23-NEXT: subl $44, %esp
2033 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
2034 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
2035 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %edx
2036 ; FALLBACK23-NEXT: movups (%edx), %xmm0
2037 ; FALLBACK23-NEXT: movzbl (%ecx), %edx
2038 ; FALLBACK23-NEXT: movl %edx, %ecx
2039 ; FALLBACK23-NEXT: shlb $3, %cl
2040 ; FALLBACK23-NEXT: xorps %xmm1, %xmm1
2041 ; FALLBACK23-NEXT: movaps %xmm1, (%esp)
2042 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
2043 ; FALLBACK23-NEXT: andb $12, %dl
2044 ; FALLBACK23-NEXT: negb %dl
2045 ; FALLBACK23-NEXT: movsbl %dl, %edi
2046 ; FALLBACK23-NEXT: movl 24(%esp,%edi), %esi
2047 ; FALLBACK23-NEXT: movl 28(%esp,%edi), %edx
2048 ; FALLBACK23-NEXT: shldl %cl, %esi, %edx
2049 ; FALLBACK23-NEXT: movl 16(%esp,%edi), %ebx
2050 ; FALLBACK23-NEXT: movl 20(%esp,%edi), %edi
2051 ; FALLBACK23-NEXT: shldl %cl, %edi, %esi
2052 ; FALLBACK23-NEXT: shlxl %ecx, %ebx, %ebp
2053 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
2054 ; FALLBACK23-NEXT: shldl %cl, %ebx, %edi
2055 ; FALLBACK23-NEXT: movl %edi, 4(%eax)
2056 ; FALLBACK23-NEXT: movl %esi, 8(%eax)
2057 ; FALLBACK23-NEXT: movl %edx, 12(%eax)
2058 ; FALLBACK23-NEXT: movl %ebp, (%eax)
2059 ; FALLBACK23-NEXT: addl $44, %esp
2060 ; FALLBACK23-NEXT: popl %esi
2061 ; FALLBACK23-NEXT: popl %edi
2062 ; FALLBACK23-NEXT: popl %ebx
2063 ; FALLBACK23-NEXT: popl %ebp
2064 ; FALLBACK23-NEXT: retl
2066 ; FALLBACK24-LABEL: shl_16bytes:
2067 ; FALLBACK24: # %bb.0:
2068 ; FALLBACK24-NEXT: pushl %ebp
2069 ; FALLBACK24-NEXT: pushl %ebx
2070 ; FALLBACK24-NEXT: pushl %edi
2071 ; FALLBACK24-NEXT: pushl %esi
2072 ; FALLBACK24-NEXT: subl $60, %esp
2073 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
2074 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
2075 ; FALLBACK24-NEXT: vmovups (%ecx), %xmm0
2076 ; FALLBACK24-NEXT: movzbl (%eax), %ecx
2077 ; FALLBACK24-NEXT: movl %ecx, %eax
2078 ; FALLBACK24-NEXT: shlb $3, %al
2079 ; FALLBACK24-NEXT: vxorps %xmm1, %xmm1, %xmm1
2080 ; FALLBACK24-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
2081 ; FALLBACK24-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2082 ; FALLBACK24-NEXT: andb $12, %cl
2083 ; FALLBACK24-NEXT: negb %cl
2084 ; FALLBACK24-NEXT: movsbl %cl, %edi
2085 ; FALLBACK24-NEXT: movl 44(%esp,%edi), %ebx
2086 ; FALLBACK24-NEXT: movl %eax, %ecx
2087 ; FALLBACK24-NEXT: shll %cl, %ebx
2088 ; FALLBACK24-NEXT: movl %eax, %edx
2089 ; FALLBACK24-NEXT: notb %dl
2090 ; FALLBACK24-NEXT: movl 40(%esp,%edi), %ebp
2091 ; FALLBACK24-NEXT: movl %ebp, %esi
2092 ; FALLBACK24-NEXT: shrl %esi
2093 ; FALLBACK24-NEXT: movl %edx, %ecx
2094 ; FALLBACK24-NEXT: shrl %cl, %esi
2095 ; FALLBACK24-NEXT: orl %ebx, %esi
2096 ; FALLBACK24-NEXT: movl %eax, %ecx
2097 ; FALLBACK24-NEXT: shll %cl, %ebp
2098 ; FALLBACK24-NEXT: movl 32(%esp,%edi), %ecx
2099 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2100 ; FALLBACK24-NEXT: movl 36(%esp,%edi), %ebx
2101 ; FALLBACK24-NEXT: movl %ebx, %edi
2102 ; FALLBACK24-NEXT: shrl %edi
2103 ; FALLBACK24-NEXT: movl %edx, %ecx
2104 ; FALLBACK24-NEXT: shrl %cl, %edi
2105 ; FALLBACK24-NEXT: orl %ebp, %edi
2106 ; FALLBACK24-NEXT: movl %eax, %ecx
2107 ; FALLBACK24-NEXT: shll %cl, %ebx
2108 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
2109 ; FALLBACK24-NEXT: shrl %ebp
2110 ; FALLBACK24-NEXT: movl %edx, %ecx
2111 ; FALLBACK24-NEXT: shrl %cl, %ebp
2112 ; FALLBACK24-NEXT: orl %ebx, %ebp
2113 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %edx
2114 ; FALLBACK24-NEXT: movl %eax, %ecx
2115 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2116 ; FALLBACK24-NEXT: shll %cl, %eax
2117 ; FALLBACK24-NEXT: movl %eax, (%edx)
2118 ; FALLBACK24-NEXT: movl %ebp, 4(%edx)
2119 ; FALLBACK24-NEXT: movl %edi, 8(%edx)
2120 ; FALLBACK24-NEXT: movl %esi, 12(%edx)
2121 ; FALLBACK24-NEXT: addl $60, %esp
2122 ; FALLBACK24-NEXT: popl %esi
2123 ; FALLBACK24-NEXT: popl %edi
2124 ; FALLBACK24-NEXT: popl %ebx
2125 ; FALLBACK24-NEXT: popl %ebp
2126 ; FALLBACK24-NEXT: retl
2128 ; FALLBACK25-LABEL: shl_16bytes:
2129 ; FALLBACK25: # %bb.0:
2130 ; FALLBACK25-NEXT: pushl %ebp
2131 ; FALLBACK25-NEXT: pushl %ebx
2132 ; FALLBACK25-NEXT: pushl %edi
2133 ; FALLBACK25-NEXT: pushl %esi
2134 ; FALLBACK25-NEXT: subl $44, %esp
2135 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
2136 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
2137 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %edx
2138 ; FALLBACK25-NEXT: vmovups (%edx), %xmm0
2139 ; FALLBACK25-NEXT: movzbl (%ecx), %edx
2140 ; FALLBACK25-NEXT: movl %edx, %ecx
2141 ; FALLBACK25-NEXT: shlb $3, %cl
2142 ; FALLBACK25-NEXT: vxorps %xmm1, %xmm1, %xmm1
2143 ; FALLBACK25-NEXT: vmovaps %xmm1, (%esp)
2144 ; FALLBACK25-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2145 ; FALLBACK25-NEXT: andb $12, %dl
2146 ; FALLBACK25-NEXT: negb %dl
2147 ; FALLBACK25-NEXT: movsbl %dl, %edi
2148 ; FALLBACK25-NEXT: movl 24(%esp,%edi), %esi
2149 ; FALLBACK25-NEXT: movl 28(%esp,%edi), %edx
2150 ; FALLBACK25-NEXT: shldl %cl, %esi, %edx
2151 ; FALLBACK25-NEXT: movl 16(%esp,%edi), %ebx
2152 ; FALLBACK25-NEXT: movl 20(%esp,%edi), %edi
2153 ; FALLBACK25-NEXT: shldl %cl, %edi, %esi
2154 ; FALLBACK25-NEXT: movl %ebx, %ebp
2155 ; FALLBACK25-NEXT: shll %cl, %ebp
2156 ; FALLBACK25-NEXT: shldl %cl, %ebx, %edi
2157 ; FALLBACK25-NEXT: movl %edi, 4(%eax)
2158 ; FALLBACK25-NEXT: movl %esi, 8(%eax)
2159 ; FALLBACK25-NEXT: movl %edx, 12(%eax)
2160 ; FALLBACK25-NEXT: movl %ebp, (%eax)
2161 ; FALLBACK25-NEXT: addl $44, %esp
2162 ; FALLBACK25-NEXT: popl %esi
2163 ; FALLBACK25-NEXT: popl %edi
2164 ; FALLBACK25-NEXT: popl %ebx
2165 ; FALLBACK25-NEXT: popl %ebp
2166 ; FALLBACK25-NEXT: retl
2168 ; FALLBACK26-LABEL: shl_16bytes:
2169 ; FALLBACK26: # %bb.0:
2170 ; FALLBACK26-NEXT: pushl %ebp
2171 ; FALLBACK26-NEXT: pushl %ebx
2172 ; FALLBACK26-NEXT: pushl %edi
2173 ; FALLBACK26-NEXT: pushl %esi
2174 ; FALLBACK26-NEXT: subl $44, %esp
2175 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
2176 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
2177 ; FALLBACK26-NEXT: vmovups (%ecx), %xmm0
2178 ; FALLBACK26-NEXT: movzbl (%eax), %ecx
2179 ; FALLBACK26-NEXT: movl %ecx, %eax
2180 ; FALLBACK26-NEXT: shlb $3, %al
2181 ; FALLBACK26-NEXT: vxorps %xmm1, %xmm1, %xmm1
2182 ; FALLBACK26-NEXT: vmovaps %xmm1, (%esp)
2183 ; FALLBACK26-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2184 ; FALLBACK26-NEXT: andb $12, %cl
2185 ; FALLBACK26-NEXT: negb %cl
2186 ; FALLBACK26-NEXT: movsbl %cl, %ecx
2187 ; FALLBACK26-NEXT: shlxl %eax, 28(%esp,%ecx), %esi
2188 ; FALLBACK26-NEXT: movl 24(%esp,%ecx), %edx
2189 ; FALLBACK26-NEXT: shlxl %eax, %edx, %edi
2190 ; FALLBACK26-NEXT: movl %eax, %ebx
2191 ; FALLBACK26-NEXT: notb %bl
2192 ; FALLBACK26-NEXT: shrl %edx
2193 ; FALLBACK26-NEXT: shrxl %ebx, %edx, %edx
2194 ; FALLBACK26-NEXT: orl %esi, %edx
2195 ; FALLBACK26-NEXT: movl 20(%esp,%ecx), %esi
2196 ; FALLBACK26-NEXT: movl %esi, %ebp
2197 ; FALLBACK26-NEXT: shrl %ebp
2198 ; FALLBACK26-NEXT: shrxl %ebx, %ebp, %ebp
2199 ; FALLBACK26-NEXT: orl %edi, %ebp
2200 ; FALLBACK26-NEXT: shlxl %eax, %esi, %esi
2201 ; FALLBACK26-NEXT: movl 16(%esp,%ecx), %ecx
2202 ; FALLBACK26-NEXT: shlxl %eax, %ecx, %eax
2203 ; FALLBACK26-NEXT: shrl %ecx
2204 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %ecx
2205 ; FALLBACK26-NEXT: orl %esi, %ecx
2206 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %esi
2207 ; FALLBACK26-NEXT: movl %eax, (%esi)
2208 ; FALLBACK26-NEXT: movl %ecx, 4(%esi)
2209 ; FALLBACK26-NEXT: movl %ebp, 8(%esi)
2210 ; FALLBACK26-NEXT: movl %edx, 12(%esi)
2211 ; FALLBACK26-NEXT: addl $44, %esp
2212 ; FALLBACK26-NEXT: popl %esi
2213 ; FALLBACK26-NEXT: popl %edi
2214 ; FALLBACK26-NEXT: popl %ebx
2215 ; FALLBACK26-NEXT: popl %ebp
2216 ; FALLBACK26-NEXT: retl
2218 ; FALLBACK27-LABEL: shl_16bytes:
2219 ; FALLBACK27: # %bb.0:
2220 ; FALLBACK27-NEXT: pushl %ebp
2221 ; FALLBACK27-NEXT: pushl %ebx
2222 ; FALLBACK27-NEXT: pushl %edi
2223 ; FALLBACK27-NEXT: pushl %esi
2224 ; FALLBACK27-NEXT: subl $44, %esp
2225 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
2226 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
2227 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %edx
2228 ; FALLBACK27-NEXT: vmovups (%edx), %xmm0
2229 ; FALLBACK27-NEXT: movzbl (%ecx), %edx
2230 ; FALLBACK27-NEXT: movl %edx, %ecx
2231 ; FALLBACK27-NEXT: shlb $3, %cl
2232 ; FALLBACK27-NEXT: vxorps %xmm1, %xmm1, %xmm1
2233 ; FALLBACK27-NEXT: vmovaps %xmm1, (%esp)
2234 ; FALLBACK27-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2235 ; FALLBACK27-NEXT: andb $12, %dl
2236 ; FALLBACK27-NEXT: negb %dl
2237 ; FALLBACK27-NEXT: movsbl %dl, %edi
2238 ; FALLBACK27-NEXT: movl 24(%esp,%edi), %esi
2239 ; FALLBACK27-NEXT: movl 28(%esp,%edi), %edx
2240 ; FALLBACK27-NEXT: shldl %cl, %esi, %edx
2241 ; FALLBACK27-NEXT: movl 16(%esp,%edi), %ebx
2242 ; FALLBACK27-NEXT: movl 20(%esp,%edi), %edi
2243 ; FALLBACK27-NEXT: shldl %cl, %edi, %esi
2244 ; FALLBACK27-NEXT: shlxl %ecx, %ebx, %ebp
2245 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
2246 ; FALLBACK27-NEXT: shldl %cl, %ebx, %edi
2247 ; FALLBACK27-NEXT: movl %edi, 4(%eax)
2248 ; FALLBACK27-NEXT: movl %esi, 8(%eax)
2249 ; FALLBACK27-NEXT: movl %edx, 12(%eax)
2250 ; FALLBACK27-NEXT: movl %ebp, (%eax)
2251 ; FALLBACK27-NEXT: addl $44, %esp
2252 ; FALLBACK27-NEXT: popl %esi
2253 ; FALLBACK27-NEXT: popl %edi
2254 ; FALLBACK27-NEXT: popl %ebx
2255 ; FALLBACK27-NEXT: popl %ebp
2256 ; FALLBACK27-NEXT: retl
2258 ; FALLBACK28-LABEL: shl_16bytes:
2259 ; FALLBACK28: # %bb.0:
2260 ; FALLBACK28-NEXT: pushl %ebp
2261 ; FALLBACK28-NEXT: pushl %ebx
2262 ; FALLBACK28-NEXT: pushl %edi
2263 ; FALLBACK28-NEXT: pushl %esi
2264 ; FALLBACK28-NEXT: subl $60, %esp
2265 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
2266 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
2267 ; FALLBACK28-NEXT: vmovups (%ecx), %xmm0
2268 ; FALLBACK28-NEXT: movzbl (%eax), %ecx
2269 ; FALLBACK28-NEXT: movl %ecx, %eax
2270 ; FALLBACK28-NEXT: shlb $3, %al
2271 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
2272 ; FALLBACK28-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
2273 ; FALLBACK28-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2274 ; FALLBACK28-NEXT: andb $12, %cl
2275 ; FALLBACK28-NEXT: negb %cl
2276 ; FALLBACK28-NEXT: movsbl %cl, %edi
2277 ; FALLBACK28-NEXT: movl 44(%esp,%edi), %ebx
2278 ; FALLBACK28-NEXT: movl %eax, %ecx
2279 ; FALLBACK28-NEXT: shll %cl, %ebx
2280 ; FALLBACK28-NEXT: movl %eax, %edx
2281 ; FALLBACK28-NEXT: notb %dl
2282 ; FALLBACK28-NEXT: movl 40(%esp,%edi), %ebp
2283 ; FALLBACK28-NEXT: movl %ebp, %esi
2284 ; FALLBACK28-NEXT: shrl %esi
2285 ; FALLBACK28-NEXT: movl %edx, %ecx
2286 ; FALLBACK28-NEXT: shrl %cl, %esi
2287 ; FALLBACK28-NEXT: orl %ebx, %esi
2288 ; FALLBACK28-NEXT: movl %eax, %ecx
2289 ; FALLBACK28-NEXT: shll %cl, %ebp
2290 ; FALLBACK28-NEXT: movl 32(%esp,%edi), %ecx
2291 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2292 ; FALLBACK28-NEXT: movl 36(%esp,%edi), %ebx
2293 ; FALLBACK28-NEXT: movl %ebx, %edi
2294 ; FALLBACK28-NEXT: shrl %edi
2295 ; FALLBACK28-NEXT: movl %edx, %ecx
2296 ; FALLBACK28-NEXT: shrl %cl, %edi
2297 ; FALLBACK28-NEXT: orl %ebp, %edi
2298 ; FALLBACK28-NEXT: movl %eax, %ecx
2299 ; FALLBACK28-NEXT: shll %cl, %ebx
2300 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
2301 ; FALLBACK28-NEXT: shrl %ebp
2302 ; FALLBACK28-NEXT: movl %edx, %ecx
2303 ; FALLBACK28-NEXT: shrl %cl, %ebp
2304 ; FALLBACK28-NEXT: orl %ebx, %ebp
2305 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %edx
2306 ; FALLBACK28-NEXT: movl %eax, %ecx
2307 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
2308 ; FALLBACK28-NEXT: shll %cl, %eax
2309 ; FALLBACK28-NEXT: movl %eax, (%edx)
2310 ; FALLBACK28-NEXT: movl %ebp, 4(%edx)
2311 ; FALLBACK28-NEXT: movl %edi, 8(%edx)
2312 ; FALLBACK28-NEXT: movl %esi, 12(%edx)
2313 ; FALLBACK28-NEXT: addl $60, %esp
2314 ; FALLBACK28-NEXT: popl %esi
2315 ; FALLBACK28-NEXT: popl %edi
2316 ; FALLBACK28-NEXT: popl %ebx
2317 ; FALLBACK28-NEXT: popl %ebp
2318 ; FALLBACK28-NEXT: retl
2320 ; FALLBACK29-LABEL: shl_16bytes:
2321 ; FALLBACK29: # %bb.0:
2322 ; FALLBACK29-NEXT: pushl %ebp
2323 ; FALLBACK29-NEXT: pushl %ebx
2324 ; FALLBACK29-NEXT: pushl %edi
2325 ; FALLBACK29-NEXT: pushl %esi
2326 ; FALLBACK29-NEXT: subl $44, %esp
2327 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
2328 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
2329 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %edx
2330 ; FALLBACK29-NEXT: vmovups (%edx), %xmm0
2331 ; FALLBACK29-NEXT: movzbl (%ecx), %edx
2332 ; FALLBACK29-NEXT: movl %edx, %ecx
2333 ; FALLBACK29-NEXT: shlb $3, %cl
2334 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
2335 ; FALLBACK29-NEXT: vmovaps %xmm1, (%esp)
2336 ; FALLBACK29-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2337 ; FALLBACK29-NEXT: andb $12, %dl
2338 ; FALLBACK29-NEXT: negb %dl
2339 ; FALLBACK29-NEXT: movsbl %dl, %edi
2340 ; FALLBACK29-NEXT: movl 24(%esp,%edi), %esi
2341 ; FALLBACK29-NEXT: movl 28(%esp,%edi), %edx
2342 ; FALLBACK29-NEXT: shldl %cl, %esi, %edx
2343 ; FALLBACK29-NEXT: movl 16(%esp,%edi), %ebx
2344 ; FALLBACK29-NEXT: movl 20(%esp,%edi), %edi
2345 ; FALLBACK29-NEXT: shldl %cl, %edi, %esi
2346 ; FALLBACK29-NEXT: movl %ebx, %ebp
2347 ; FALLBACK29-NEXT: shll %cl, %ebp
2348 ; FALLBACK29-NEXT: shldl %cl, %ebx, %edi
2349 ; FALLBACK29-NEXT: movl %edi, 4(%eax)
2350 ; FALLBACK29-NEXT: movl %esi, 8(%eax)
2351 ; FALLBACK29-NEXT: movl %edx, 12(%eax)
2352 ; FALLBACK29-NEXT: movl %ebp, (%eax)
2353 ; FALLBACK29-NEXT: addl $44, %esp
2354 ; FALLBACK29-NEXT: popl %esi
2355 ; FALLBACK29-NEXT: popl %edi
2356 ; FALLBACK29-NEXT: popl %ebx
2357 ; FALLBACK29-NEXT: popl %ebp
2358 ; FALLBACK29-NEXT: retl
2360 ; FALLBACK30-LABEL: shl_16bytes:
2361 ; FALLBACK30: # %bb.0:
2362 ; FALLBACK30-NEXT: pushl %ebp
2363 ; FALLBACK30-NEXT: pushl %ebx
2364 ; FALLBACK30-NEXT: pushl %edi
2365 ; FALLBACK30-NEXT: pushl %esi
2366 ; FALLBACK30-NEXT: subl $44, %esp
2367 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
2368 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
2369 ; FALLBACK30-NEXT: vmovups (%ecx), %xmm0
2370 ; FALLBACK30-NEXT: movzbl (%eax), %ecx
2371 ; FALLBACK30-NEXT: movl %ecx, %eax
2372 ; FALLBACK30-NEXT: shlb $3, %al
2373 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
2374 ; FALLBACK30-NEXT: vmovaps %xmm1, (%esp)
2375 ; FALLBACK30-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2376 ; FALLBACK30-NEXT: andb $12, %cl
2377 ; FALLBACK30-NEXT: negb %cl
2378 ; FALLBACK30-NEXT: movsbl %cl, %ecx
2379 ; FALLBACK30-NEXT: shlxl %eax, 28(%esp,%ecx), %esi
2380 ; FALLBACK30-NEXT: movl 24(%esp,%ecx), %edx
2381 ; FALLBACK30-NEXT: shlxl %eax, %edx, %edi
2382 ; FALLBACK30-NEXT: movl %eax, %ebx
2383 ; FALLBACK30-NEXT: notb %bl
2384 ; FALLBACK30-NEXT: shrl %edx
2385 ; FALLBACK30-NEXT: shrxl %ebx, %edx, %edx
2386 ; FALLBACK30-NEXT: orl %esi, %edx
2387 ; FALLBACK30-NEXT: movl 20(%esp,%ecx), %esi
2388 ; FALLBACK30-NEXT: movl %esi, %ebp
2389 ; FALLBACK30-NEXT: shrl %ebp
2390 ; FALLBACK30-NEXT: shrxl %ebx, %ebp, %ebp
2391 ; FALLBACK30-NEXT: orl %edi, %ebp
2392 ; FALLBACK30-NEXT: shlxl %eax, %esi, %esi
2393 ; FALLBACK30-NEXT: movl 16(%esp,%ecx), %ecx
2394 ; FALLBACK30-NEXT: shlxl %eax, %ecx, %eax
2395 ; FALLBACK30-NEXT: shrl %ecx
2396 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %ecx
2397 ; FALLBACK30-NEXT: orl %esi, %ecx
2398 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %esi
2399 ; FALLBACK30-NEXT: movl %eax, (%esi)
2400 ; FALLBACK30-NEXT: movl %ecx, 4(%esi)
2401 ; FALLBACK30-NEXT: movl %ebp, 8(%esi)
2402 ; FALLBACK30-NEXT: movl %edx, 12(%esi)
2403 ; FALLBACK30-NEXT: addl $44, %esp
2404 ; FALLBACK30-NEXT: popl %esi
2405 ; FALLBACK30-NEXT: popl %edi
2406 ; FALLBACK30-NEXT: popl %ebx
2407 ; FALLBACK30-NEXT: popl %ebp
2408 ; FALLBACK30-NEXT: retl
2410 ; FALLBACK31-LABEL: shl_16bytes:
2411 ; FALLBACK31: # %bb.0:
2412 ; FALLBACK31-NEXT: pushl %ebp
2413 ; FALLBACK31-NEXT: pushl %ebx
2414 ; FALLBACK31-NEXT: pushl %edi
2415 ; FALLBACK31-NEXT: pushl %esi
2416 ; FALLBACK31-NEXT: subl $44, %esp
2417 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
2418 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
2419 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %edx
2420 ; FALLBACK31-NEXT: vmovups (%edx), %xmm0
2421 ; FALLBACK31-NEXT: movzbl (%ecx), %edx
2422 ; FALLBACK31-NEXT: movl %edx, %ecx
2423 ; FALLBACK31-NEXT: shlb $3, %cl
2424 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
2425 ; FALLBACK31-NEXT: vmovaps %xmm1, (%esp)
2426 ; FALLBACK31-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2427 ; FALLBACK31-NEXT: andb $12, %dl
2428 ; FALLBACK31-NEXT: negb %dl
2429 ; FALLBACK31-NEXT: movsbl %dl, %edi
2430 ; FALLBACK31-NEXT: movl 24(%esp,%edi), %esi
2431 ; FALLBACK31-NEXT: movl 28(%esp,%edi), %edx
2432 ; FALLBACK31-NEXT: shldl %cl, %esi, %edx
2433 ; FALLBACK31-NEXT: movl 16(%esp,%edi), %ebx
2434 ; FALLBACK31-NEXT: movl 20(%esp,%edi), %edi
2435 ; FALLBACK31-NEXT: shldl %cl, %edi, %esi
2436 ; FALLBACK31-NEXT: shlxl %ecx, %ebx, %ebp
2437 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
2438 ; FALLBACK31-NEXT: shldl %cl, %ebx, %edi
2439 ; FALLBACK31-NEXT: movl %edi, 4(%eax)
2440 ; FALLBACK31-NEXT: movl %esi, 8(%eax)
2441 ; FALLBACK31-NEXT: movl %edx, 12(%eax)
2442 ; FALLBACK31-NEXT: movl %ebp, (%eax)
2443 ; FALLBACK31-NEXT: addl $44, %esp
2444 ; FALLBACK31-NEXT: popl %esi
2445 ; FALLBACK31-NEXT: popl %edi
2446 ; FALLBACK31-NEXT: popl %ebx
2447 ; FALLBACK31-NEXT: popl %ebp
2448 ; FALLBACK31-NEXT: retl
2449 %src = load i128, ptr %src.ptr, align 1
2450 %byteOff = load i128, ptr %byteOff.ptr, align 1
2451 %bitOff = shl i128 %byteOff, 3
2452 %res = shl i128 %src, %bitOff
2453 store i128 %res, ptr %dst, align 1
2457 define void @shl_16bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
2458 ; X64-NO-SHLD-NO-BMI2-LABEL: shl_16bytes_dwordOff:
2459 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
2460 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
2461 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2462 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
2463 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $5, %al
2464 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2465 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rdi
2466 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %r8, %rsi
2467 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %rsi
2468 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
2469 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %rsi
2470 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %rdi, %rsi
2471 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2472 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %r8
2473 ; X64-NO-SHLD-NO-BMI2-NEXT: xorl %ecx, %ecx
2474 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
2475 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %r8, %rsi
2476 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %r8, %rcx
2477 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rcx, (%rdx)
2478 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, 8(%rdx)
2479 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
2481 ; X64-HAVE-SHLD-NO-BMI2-LABEL: shl_16bytes_dwordOff:
2482 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
2483 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
2484 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2485 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
2486 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $5, %cl
2487 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, %rsi
2488 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
2489 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shldq %cl, %rax, %rdi
2490 ; X64-HAVE-SHLD-NO-BMI2-NEXT: xorl %eax, %eax
2491 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
2492 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rdi
2493 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rax
2494 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
2495 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
2496 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
2498 ; X64-NO-SHLD-HAVE-BMI2-LABEL: shl_16bytes_dwordOff:
2499 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
2500 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
2501 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2502 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
2503 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, 8(%rdi), %rsi
2504 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
2505 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
2506 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, %rax, %r8
2507 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrq %rax
2508 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rdi, %rax, %rax
2509 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rax
2510 ; X64-NO-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
2511 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2512 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %r8, %rax
2513 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %r8, %rsi
2514 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rsi, (%rdx)
2515 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rax, 8(%rdx)
2516 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
2518 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: shl_16bytes_dwordOff:
2519 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
2520 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
2521 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
2522 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2523 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
2524 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shldq %cl, %rax, %rdi
2525 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlxq %rcx, %rax, %rax
2526 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: xorl %esi, %esi
2527 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2528 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rax, %rdi
2529 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rax, %rsi
2530 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
2531 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rsi, (%rdx)
2532 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
2534 ; X86-SSE2-LABEL: shl_16bytes_dwordOff:
2535 ; X86-SSE2: # %bb.0:
2536 ; X86-SSE2-NEXT: pushl %ebx
2537 ; X86-SSE2-NEXT: pushl %edi
2538 ; X86-SSE2-NEXT: pushl %esi
2539 ; X86-SSE2-NEXT: subl $32, %esp
2540 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
2541 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2542 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
2543 ; X86-SSE2-NEXT: movl (%edx), %esi
2544 ; X86-SSE2-NEXT: movl 4(%edx), %edi
2545 ; X86-SSE2-NEXT: movl 8(%edx), %ebx
2546 ; X86-SSE2-NEXT: movl 12(%edx), %edx
2547 ; X86-SSE2-NEXT: movzbl (%ecx), %ecx
2548 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
2549 ; X86-SSE2-NEXT: movaps %xmm0, (%esp)
2550 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2551 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
2552 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
2553 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
2554 ; X86-SSE2-NEXT: shlb $2, %cl
2555 ; X86-SSE2-NEXT: andb $12, %cl
2556 ; X86-SSE2-NEXT: negb %cl
2557 ; X86-SSE2-NEXT: movsbl %cl, %ecx
2558 ; X86-SSE2-NEXT: movl 16(%esp,%ecx), %edx
2559 ; X86-SSE2-NEXT: movl 20(%esp,%ecx), %esi
2560 ; X86-SSE2-NEXT: movl 28(%esp,%ecx), %edi
2561 ; X86-SSE2-NEXT: movl 24(%esp,%ecx), %ecx
2562 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
2563 ; X86-SSE2-NEXT: movl %edi, 12(%eax)
2564 ; X86-SSE2-NEXT: movl %edx, (%eax)
2565 ; X86-SSE2-NEXT: movl %esi, 4(%eax)
2566 ; X86-SSE2-NEXT: addl $32, %esp
2567 ; X86-SSE2-NEXT: popl %esi
2568 ; X86-SSE2-NEXT: popl %edi
2569 ; X86-SSE2-NEXT: popl %ebx
2570 ; X86-SSE2-NEXT: retl
2572 ; X86-SSE42-LABEL: shl_16bytes_dwordOff:
2573 ; X86-SSE42: # %bb.0:
2574 ; X86-SSE42-NEXT: subl $44, %esp
2575 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
2576 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
2577 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
2578 ; X86-SSE42-NEXT: movups (%edx), %xmm0
2579 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
2580 ; X86-SSE42-NEXT: xorps %xmm1, %xmm1
2581 ; X86-SSE42-NEXT: movaps %xmm1, (%esp)
2582 ; X86-SSE42-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
2583 ; X86-SSE42-NEXT: shlb $2, %cl
2584 ; X86-SSE42-NEXT: andb $12, %cl
2585 ; X86-SSE42-NEXT: negb %cl
2586 ; X86-SSE42-NEXT: movsbl %cl, %ecx
2587 ; X86-SSE42-NEXT: movups 16(%esp,%ecx), %xmm0
2588 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
2589 ; X86-SSE42-NEXT: addl $44, %esp
2590 ; X86-SSE42-NEXT: retl
2592 ; X86-AVX-LABEL: shl_16bytes_dwordOff:
2594 ; X86-AVX-NEXT: subl $44, %esp
2595 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
2596 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
2597 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
2598 ; X86-AVX-NEXT: vmovups (%edx), %xmm0
2599 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
2600 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
2601 ; X86-AVX-NEXT: vmovaps %xmm1, (%esp)
2602 ; X86-AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
2603 ; X86-AVX-NEXT: shlb $2, %cl
2604 ; X86-AVX-NEXT: andb $12, %cl
2605 ; X86-AVX-NEXT: negb %cl
2606 ; X86-AVX-NEXT: movsbl %cl, %ecx
2607 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx), %xmm0
2608 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
2609 ; X86-AVX-NEXT: addl $44, %esp
2610 ; X86-AVX-NEXT: retl
2611 %src = load i128, ptr %src.ptr, align 1
2612 %dwordOff = load i128, ptr %dwordOff.ptr, align 1
2613 %bitOff = shl i128 %dwordOff, 5
2614 %res = shl i128 %src, %bitOff
2615 store i128 %res, ptr %dst, align 1
2619 define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
2620 ; X64-NO-SHLD-NO-BMI2-LABEL: ashr_16bytes:
2621 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
2622 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
2623 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2624 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
2625 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
2626 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2627 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %r8
2628 ; X64-NO-SHLD-NO-BMI2-NEXT: leaq (%rdi,%rdi), %rsi
2629 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
2630 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
2631 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %r8, %rsi
2632 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rdi, %r8
2633 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2634 ; X64-NO-SHLD-NO-BMI2-NEXT: sarq %cl, %r8
2635 ; X64-NO-SHLD-NO-BMI2-NEXT: sarq $63, %rdi
2636 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
2637 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %r8, %rsi
2638 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %r8, %rdi
2639 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
2640 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, (%rdx)
2641 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
2643 ; X64-HAVE-SHLD-NO-BMI2-LABEL: ashr_16bytes:
2644 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
2645 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
2646 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2647 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
2648 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
2649 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, %rsi
2650 ; X64-HAVE-SHLD-NO-BMI2-NEXT: sarq %cl, %rsi
2651 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrdq %cl, %rdi, %rax
2652 ; X64-HAVE-SHLD-NO-BMI2-NEXT: sarq $63, %rdi
2653 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
2654 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rax
2655 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rdi
2656 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
2657 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
2658 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
2660 ; X64-NO-SHLD-HAVE-BMI2-LABEL: ashr_16bytes:
2661 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
2662 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rax
2663 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2664 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
2665 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, (%rdi), %rsi
2666 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
2667 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
2668 ; X64-NO-SHLD-HAVE-BMI2-NEXT: leaq (%rax,%rax), %r8
2669 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rdi, %r8, %rdi
2670 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rdi
2671 ; X64-NO-SHLD-HAVE-BMI2-NEXT: sarxq %rcx, %rax, %rsi
2672 ; X64-NO-SHLD-HAVE-BMI2-NEXT: sarq $63, %rax
2673 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2674 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rdi
2675 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rax
2676 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rax, 8(%rdx)
2677 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rdi, (%rdx)
2678 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
2680 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: ashr_16bytes:
2681 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
2682 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
2683 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
2684 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2685 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
2686 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrdq %cl, %rdi, %rax
2687 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: sarxq %rcx, %rdi, %rsi
2688 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: sarq $63, %rdi
2689 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2690 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rax
2691 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rdi
2692 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
2693 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rax, (%rdx)
2694 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
2696 ; X86-NO-SHLD-NO-BMI2-LABEL: ashr_16bytes:
2697 ; X86-NO-SHLD-NO-BMI2: # %bb.0:
2698 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %ebp
2699 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %ebx
2700 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %edi
2701 ; X86-NO-SHLD-NO-BMI2-NEXT: pushl %esi
2702 ; X86-NO-SHLD-NO-BMI2-NEXT: subl $60, %esp
2703 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
2704 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2705 ; X86-NO-SHLD-NO-BMI2-NEXT: movl (%ecx), %edx
2706 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 4(%ecx), %esi
2707 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 8(%ecx), %edi
2708 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 12(%ecx), %ecx
2709 ; X86-NO-SHLD-NO-BMI2-NEXT: movb (%eax), %ah
2710 ; X86-NO-SHLD-NO-BMI2-NEXT: movb %ah, %al
2711 ; X86-NO-SHLD-NO-BMI2-NEXT: shlb $3, %al
2712 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2713 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp)
2714 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, {{[0-9]+}}(%esp)
2715 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2716 ; X86-NO-SHLD-NO-BMI2-NEXT: sarl $31, %ecx
2717 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2718 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2719 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2720 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2721 ; X86-NO-SHLD-NO-BMI2-NEXT: andb $12, %ah
2722 ; X86-NO-SHLD-NO-BMI2-NEXT: movzbl %ah, %ebp
2723 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 20(%esp,%ebp), %esi
2724 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, %ebx
2725 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2726 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %ebx
2727 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %edx
2728 ; X86-NO-SHLD-NO-BMI2-NEXT: notb %dl
2729 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 24(%esp,%ebp), %ecx
2730 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
2731 ; X86-NO-SHLD-NO-BMI2-NEXT: leal (%ecx,%ecx), %edi
2732 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edx, %ecx
2733 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %edi
2734 ; X86-NO-SHLD-NO-BMI2-NEXT: orl %ebx, %edi
2735 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 16(%esp,%ebp), %ebx
2736 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2737 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, %ebx
2738 ; X86-NO-SHLD-NO-BMI2-NEXT: addl %esi, %esi
2739 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edx, %ecx
2740 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %esi
2741 ; X86-NO-SHLD-NO-BMI2-NEXT: orl %ebx, %esi
2742 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2743 ; X86-NO-SHLD-NO-BMI2-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
2744 ; X86-NO-SHLD-NO-BMI2-NEXT: movl 28(%esp,%ebp), %ebx
2745 ; X86-NO-SHLD-NO-BMI2-NEXT: leal (%ebx,%ebx), %ebp
2746 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edx, %ecx
2747 ; X86-NO-SHLD-NO-BMI2-NEXT: shll %cl, %ebp
2748 ; X86-NO-SHLD-NO-BMI2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
2749 ; X86-NO-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
2750 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2751 ; X86-NO-SHLD-NO-BMI2-NEXT: sarl %cl, %ebx
2752 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ebx, 12(%edx)
2753 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %ebp, 8(%edx)
2754 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %esi, (%edx)
2755 ; X86-NO-SHLD-NO-BMI2-NEXT: movl %edi, 4(%edx)
2756 ; X86-NO-SHLD-NO-BMI2-NEXT: addl $60, %esp
2757 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %esi
2758 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %edi
2759 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %ebx
2760 ; X86-NO-SHLD-NO-BMI2-NEXT: popl %ebp
2761 ; X86-NO-SHLD-NO-BMI2-NEXT: retl
2763 ; X86-HAVE-SHLD-NO-BMI2-LABEL: ashr_16bytes:
2764 ; X86-HAVE-SHLD-NO-BMI2: # %bb.0:
2765 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %ebp
2766 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %ebx
2767 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %edi
2768 ; X86-HAVE-SHLD-NO-BMI2-NEXT: pushl %esi
2769 ; X86-HAVE-SHLD-NO-BMI2-NEXT: subl $44, %esp
2770 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
2771 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2772 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
2773 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl (%edx), %esi
2774 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 4(%edx), %edi
2775 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 8(%edx), %ebx
2776 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 12(%edx), %edx
2777 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movb (%ecx), %ch
2778 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movb %ch, %cl
2779 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shlb $3, %cl
2780 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2781 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
2782 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp)
2783 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, (%esp)
2784 ; X86-HAVE-SHLD-NO-BMI2-NEXT: sarl $31, %edx
2785 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2786 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2787 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2788 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2789 ; X86-HAVE-SHLD-NO-BMI2-NEXT: andb $12, %ch
2790 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movzbl %ch, %ebx
2791 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 8(%esp,%ebx), %esi
2792 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl (%esp,%ebx), %edx
2793 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 4(%esp,%ebx), %ebp
2794 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %ebp, %edi
2795 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrdl %cl, %esi, %edi
2796 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl 12(%esp,%ebx), %ebx
2797 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrdl %cl, %ebx, %esi
2798 ; X86-HAVE-SHLD-NO-BMI2-NEXT: shrdl %cl, %ebp, %edx
2799 ; X86-HAVE-SHLD-NO-BMI2-NEXT: sarl %cl, %ebx
2800 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %esi, 8(%eax)
2801 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %ebx, 12(%eax)
2802 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edx, (%eax)
2803 ; X86-HAVE-SHLD-NO-BMI2-NEXT: movl %edi, 4(%eax)
2804 ; X86-HAVE-SHLD-NO-BMI2-NEXT: addl $44, %esp
2805 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %esi
2806 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %edi
2807 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %ebx
2808 ; X86-HAVE-SHLD-NO-BMI2-NEXT: popl %ebp
2809 ; X86-HAVE-SHLD-NO-BMI2-NEXT: retl
2811 ; X86-NO-SHLD-HAVE-BMI2-LABEL: ashr_16bytes:
2812 ; X86-NO-SHLD-HAVE-BMI2: # %bb.0:
2813 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %ebp
2814 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %ebx
2815 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %edi
2816 ; X86-NO-SHLD-HAVE-BMI2-NEXT: pushl %esi
2817 ; X86-NO-SHLD-HAVE-BMI2-NEXT: subl $44, %esp
2818 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
2819 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2820 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl (%ecx), %edx
2821 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 4(%ecx), %esi
2822 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 8(%ecx), %edi
2823 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 12(%ecx), %ecx
2824 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%eax), %ebx
2825 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ebx, %eax
2826 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlb $3, %al
2827 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2828 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp)
2829 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %esi, {{[0-9]+}}(%esp)
2830 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edx, (%esp)
2831 ; X86-NO-SHLD-HAVE-BMI2-NEXT: sarl $31, %ecx
2832 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2833 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2834 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2835 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
2836 ; X86-NO-SHLD-HAVE-BMI2-NEXT: andb $12, %bl
2837 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movzbl %bl, %esi
2838 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 4(%esp,%esi), %edi
2839 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 8(%esp,%esi), %ebx
2840 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %eax, %edi, %ebp
2841 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %eax, %edx
2842 ; X86-NO-SHLD-HAVE-BMI2-NEXT: notb %dl
2843 ; X86-NO-SHLD-HAVE-BMI2-NEXT: leal (%ebx,%ebx), %ecx
2844 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %edx, %ecx, %ecx
2845 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %ebp, %ecx
2846 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %eax, (%esp,%esi), %ebp
2847 ; X86-NO-SHLD-HAVE-BMI2-NEXT: addl %edi, %edi
2848 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %edx, %edi, %edi
2849 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %ebp, %edi
2850 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shrxl %eax, %ebx, %ebx
2851 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl 12(%esp,%esi), %esi
2852 ; X86-NO-SHLD-HAVE-BMI2-NEXT: sarxl %eax, %esi, %eax
2853 ; X86-NO-SHLD-HAVE-BMI2-NEXT: addl %esi, %esi
2854 ; X86-NO-SHLD-HAVE-BMI2-NEXT: shlxl %edx, %esi, %edx
2855 ; X86-NO-SHLD-HAVE-BMI2-NEXT: orl %ebx, %edx
2856 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi
2857 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %eax, 12(%esi)
2858 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edx, 8(%esi)
2859 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %edi, (%esi)
2860 ; X86-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, 4(%esi)
2861 ; X86-NO-SHLD-HAVE-BMI2-NEXT: addl $44, %esp
2862 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %esi
2863 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %edi
2864 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %ebx
2865 ; X86-NO-SHLD-HAVE-BMI2-NEXT: popl %ebp
2866 ; X86-NO-SHLD-HAVE-BMI2-NEXT: retl
2868 ; X86-HAVE-SHLD-HAVE-BMI2-LABEL: ashr_16bytes:
2869 ; X86-HAVE-SHLD-HAVE-BMI2: # %bb.0:
2870 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %ebp
2871 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %ebx
2872 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %edi
2873 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: pushl %esi
2874 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: subl $44, %esp
2875 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebp
2876 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
2877 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx
2878 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl (%edx), %esi
2879 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 4(%edx), %edi
2880 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 8(%edx), %ebx
2881 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 12(%edx), %edx
2882 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%ecx), %eax
2883 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %eax, %ecx
2884 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $3, %cl
2885 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2886 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
2887 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edi, {{[0-9]+}}(%esp)
2888 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %esi, (%esp)
2889 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: sarl $31, %edx
2890 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2891 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2892 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2893 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, {{[0-9]+}}(%esp)
2894 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: andb $12, %al
2895 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl %al, %eax
2896 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 8(%esp,%eax), %ebx
2897 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl (%esp,%eax), %edx
2898 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 4(%esp,%eax), %esi
2899 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %esi, %edi
2900 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrdl %cl, %ebx, %edi
2901 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl 12(%esp,%eax), %eax
2902 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrdl %cl, %eax, %ebx
2903 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %ebx, 8(%ebp)
2904 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: sarxl %ecx, %eax, %eax
2905 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %eax, 12(%ebp)
2906 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
2907 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: shrdl %cl, %esi, %edx
2908 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edx, (%ebp)
2909 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: movl %edi, 4(%ebp)
2910 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: addl $44, %esp
2911 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %esi
2912 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %edi
2913 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %ebx
2914 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: popl %ebp
2915 ; X86-HAVE-SHLD-HAVE-BMI2-NEXT: retl
2916 %src = load i128, ptr %src.ptr, align 1
2917 %byteOff = load i128, ptr %byteOff.ptr, align 1
2918 %bitOff = shl i128 %byteOff, 3
2919 %res = ashr i128 %src, %bitOff
2920 store i128 %res, ptr %dst, align 1
2924 define void @ashr_16bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
2925 ; X64-NO-SHLD-NO-BMI2-LABEL: ashr_16bytes_dwordOff:
2926 ; X64-NO-SHLD-NO-BMI2: # %bb.0:
2927 ; X64-NO-SHLD-NO-BMI2-NEXT: movq (%rdi), %r8
2928 ; X64-NO-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2929 ; X64-NO-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %eax
2930 ; X64-NO-SHLD-NO-BMI2-NEXT: shlb $5, %al
2931 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2932 ; X64-NO-SHLD-NO-BMI2-NEXT: shrq %cl, %r8
2933 ; X64-NO-SHLD-NO-BMI2-NEXT: leaq (%rdi,%rdi), %rsi
2934 ; X64-NO-SHLD-NO-BMI2-NEXT: notb %cl
2935 ; X64-NO-SHLD-NO-BMI2-NEXT: shlq %cl, %rsi
2936 ; X64-NO-SHLD-NO-BMI2-NEXT: orq %r8, %rsi
2937 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rdi, %r8
2938 ; X64-NO-SHLD-NO-BMI2-NEXT: movl %eax, %ecx
2939 ; X64-NO-SHLD-NO-BMI2-NEXT: sarq %cl, %r8
2940 ; X64-NO-SHLD-NO-BMI2-NEXT: sarq $63, %rdi
2941 ; X64-NO-SHLD-NO-BMI2-NEXT: testb $64, %al
2942 ; X64-NO-SHLD-NO-BMI2-NEXT: cmovneq %r8, %rsi
2943 ; X64-NO-SHLD-NO-BMI2-NEXT: cmoveq %r8, %rdi
2944 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
2945 ; X64-NO-SHLD-NO-BMI2-NEXT: movq %rsi, (%rdx)
2946 ; X64-NO-SHLD-NO-BMI2-NEXT: retq
2948 ; X64-HAVE-SHLD-NO-BMI2-LABEL: ashr_16bytes_dwordOff:
2949 ; X64-HAVE-SHLD-NO-BMI2: # %bb.0:
2950 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq (%rdi), %rax
2951 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq 8(%rdi), %rdi
2952 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movzbl (%rsi), %ecx
2953 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shlb $5, %cl
2954 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, %rsi
2955 ; X64-HAVE-SHLD-NO-BMI2-NEXT: sarq %cl, %rsi
2956 ; X64-HAVE-SHLD-NO-BMI2-NEXT: shrdq %cl, %rdi, %rax
2957 ; X64-HAVE-SHLD-NO-BMI2-NEXT: sarq $63, %rdi
2958 ; X64-HAVE-SHLD-NO-BMI2-NEXT: testb $64, %cl
2959 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmovneq %rsi, %rax
2960 ; X64-HAVE-SHLD-NO-BMI2-NEXT: cmoveq %rsi, %rdi
2961 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rdi, 8(%rdx)
2962 ; X64-HAVE-SHLD-NO-BMI2-NEXT: movq %rax, (%rdx)
2963 ; X64-HAVE-SHLD-NO-BMI2-NEXT: retq
2965 ; X64-NO-SHLD-HAVE-BMI2-LABEL: ashr_16bytes_dwordOff:
2966 ; X64-NO-SHLD-HAVE-BMI2: # %bb.0:
2967 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rax
2968 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2969 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
2970 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shrxq %rcx, (%rdi), %rsi
2971 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movl %ecx, %edi
2972 ; X64-NO-SHLD-HAVE-BMI2-NEXT: notb %dil
2973 ; X64-NO-SHLD-HAVE-BMI2-NEXT: leaq (%rax,%rax), %r8
2974 ; X64-NO-SHLD-HAVE-BMI2-NEXT: shlxq %rdi, %r8, %rdi
2975 ; X64-NO-SHLD-HAVE-BMI2-NEXT: orq %rsi, %rdi
2976 ; X64-NO-SHLD-HAVE-BMI2-NEXT: sarxq %rcx, %rax, %rsi
2977 ; X64-NO-SHLD-HAVE-BMI2-NEXT: sarq $63, %rax
2978 ; X64-NO-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2979 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rdi
2980 ; X64-NO-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rax
2981 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rax, 8(%rdx)
2982 ; X64-NO-SHLD-HAVE-BMI2-NEXT: movq %rdi, (%rdx)
2983 ; X64-NO-SHLD-HAVE-BMI2-NEXT: retq
2985 ; X64-HAVE-SHLD-HAVE-BMI2-LABEL: ashr_16bytes_dwordOff:
2986 ; X64-HAVE-SHLD-HAVE-BMI2: # %bb.0:
2987 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq (%rdi), %rax
2988 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq 8(%rdi), %rdi
2989 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movzbl (%rsi), %ecx
2990 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shlb $5, %cl
2991 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: shrdq %cl, %rdi, %rax
2992 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: sarxq %rcx, %rdi, %rsi
2993 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: sarq $63, %rdi
2994 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: testb $64, %cl
2995 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmovneq %rsi, %rax
2996 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: cmoveq %rsi, %rdi
2997 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rdi, 8(%rdx)
2998 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: movq %rax, (%rdx)
2999 ; X64-HAVE-SHLD-HAVE-BMI2-NEXT: retq
3001 ; X86-SSE2-LABEL: ashr_16bytes_dwordOff:
3002 ; X86-SSE2: # %bb.0:
3003 ; X86-SSE2-NEXT: pushl %ebx
3004 ; X86-SSE2-NEXT: pushl %edi
3005 ; X86-SSE2-NEXT: pushl %esi
3006 ; X86-SSE2-NEXT: subl $32, %esp
3007 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
3008 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
3009 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
3010 ; X86-SSE2-NEXT: movl (%edx), %esi
3011 ; X86-SSE2-NEXT: movl 4(%edx), %edi
3012 ; X86-SSE2-NEXT: movl 8(%edx), %ebx
3013 ; X86-SSE2-NEXT: movl 12(%edx), %edx
3014 ; X86-SSE2-NEXT: movzbl (%ecx), %ecx
3015 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
3016 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
3017 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
3018 ; X86-SSE2-NEXT: movl %esi, (%esp)
3019 ; X86-SSE2-NEXT: sarl $31, %edx
3020 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
3021 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
3022 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
3023 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
3024 ; X86-SSE2-NEXT: andl $3, %ecx
3025 ; X86-SSE2-NEXT: movl (%esp,%ecx,4), %edx
3026 ; X86-SSE2-NEXT: movl 4(%esp,%ecx,4), %esi
3027 ; X86-SSE2-NEXT: movl 12(%esp,%ecx,4), %edi
3028 ; X86-SSE2-NEXT: movl 8(%esp,%ecx,4), %ecx
3029 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
3030 ; X86-SSE2-NEXT: movl %edi, 12(%eax)
3031 ; X86-SSE2-NEXT: movl %edx, (%eax)
3032 ; X86-SSE2-NEXT: movl %esi, 4(%eax)
3033 ; X86-SSE2-NEXT: addl $32, %esp
3034 ; X86-SSE2-NEXT: popl %esi
3035 ; X86-SSE2-NEXT: popl %edi
3036 ; X86-SSE2-NEXT: popl %ebx
3037 ; X86-SSE2-NEXT: retl
3039 ; X86-SSE42-LABEL: ashr_16bytes_dwordOff:
3040 ; X86-SSE42: # %bb.0:
3041 ; X86-SSE42-NEXT: pushl %ebx
3042 ; X86-SSE42-NEXT: pushl %edi
3043 ; X86-SSE42-NEXT: pushl %esi
3044 ; X86-SSE42-NEXT: subl $32, %esp
3045 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
3046 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
3047 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
3048 ; X86-SSE42-NEXT: movl (%edx), %esi
3049 ; X86-SSE42-NEXT: movl 4(%edx), %edi
3050 ; X86-SSE42-NEXT: movl 8(%edx), %ebx
3051 ; X86-SSE42-NEXT: movl 12(%edx), %edx
3052 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
3053 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
3054 ; X86-SSE42-NEXT: movl %ebx, {{[0-9]+}}(%esp)
3055 ; X86-SSE42-NEXT: movl %edi, {{[0-9]+}}(%esp)
3056 ; X86-SSE42-NEXT: movl %esi, (%esp)
3057 ; X86-SSE42-NEXT: sarl $31, %edx
3058 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
3059 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
3060 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
3061 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
3062 ; X86-SSE42-NEXT: andl $3, %ecx
3063 ; X86-SSE42-NEXT: movups (%esp,%ecx,4), %xmm0
3064 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
3065 ; X86-SSE42-NEXT: addl $32, %esp
3066 ; X86-SSE42-NEXT: popl %esi
3067 ; X86-SSE42-NEXT: popl %edi
3068 ; X86-SSE42-NEXT: popl %ebx
3069 ; X86-SSE42-NEXT: retl
3071 ; X86-AVX-LABEL: ashr_16bytes_dwordOff:
3073 ; X86-AVX-NEXT: pushl %ebx
3074 ; X86-AVX-NEXT: pushl %edi
3075 ; X86-AVX-NEXT: pushl %esi
3076 ; X86-AVX-NEXT: subl $32, %esp
3077 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
3078 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
3079 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
3080 ; X86-AVX-NEXT: movl (%edx), %esi
3081 ; X86-AVX-NEXT: movl 4(%edx), %edi
3082 ; X86-AVX-NEXT: movl 8(%edx), %ebx
3083 ; X86-AVX-NEXT: movl 12(%edx), %edx
3084 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
3085 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
3086 ; X86-AVX-NEXT: movl %ebx, {{[0-9]+}}(%esp)
3087 ; X86-AVX-NEXT: movl %edi, {{[0-9]+}}(%esp)
3088 ; X86-AVX-NEXT: movl %esi, (%esp)
3089 ; X86-AVX-NEXT: sarl $31, %edx
3090 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
3091 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
3092 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
3093 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
3094 ; X86-AVX-NEXT: andl $3, %ecx
3095 ; X86-AVX-NEXT: vmovups (%esp,%ecx,4), %xmm0
3096 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
3097 ; X86-AVX-NEXT: addl $32, %esp
3098 ; X86-AVX-NEXT: popl %esi
3099 ; X86-AVX-NEXT: popl %edi
3100 ; X86-AVX-NEXT: popl %ebx
3101 ; X86-AVX-NEXT: retl
3102 %src = load i128, ptr %src.ptr, align 1
3103 %dwordOff = load i128, ptr %dwordOff.ptr, align 1
3104 %bitOff = shl i128 %dwordOff, 5
3105 %res = ashr i128 %src, %bitOff
3106 store i128 %res, ptr %dst, align 1
3110 define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
3111 ; FALLBACK0-LABEL: lshr_32bytes:
3112 ; FALLBACK0: # %bb.0:
3113 ; FALLBACK0-NEXT: pushq %rbx
3114 ; FALLBACK0-NEXT: movq (%rdi), %rcx
3115 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
3116 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
3117 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
3118 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
3119 ; FALLBACK0-NEXT: leal (,%rsi,8), %eax
3120 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
3121 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3122 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3123 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
3124 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
3125 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
3126 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
3127 ; FALLBACK0-NEXT: andb $24, %sil
3128 ; FALLBACK0-NEXT: movzbl %sil, %r9d
3129 ; FALLBACK0-NEXT: movq -64(%rsp,%r9), %r10
3130 ; FALLBACK0-NEXT: movq -56(%rsp,%r9), %rdi
3131 ; FALLBACK0-NEXT: movq %rdi, %r11
3132 ; FALLBACK0-NEXT: movl %eax, %ecx
3133 ; FALLBACK0-NEXT: shrq %cl, %r11
3134 ; FALLBACK0-NEXT: movl %eax, %esi
3135 ; FALLBACK0-NEXT: notb %sil
3136 ; FALLBACK0-NEXT: movq -48(%rsp,%r9), %rbx
3137 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r8
3138 ; FALLBACK0-NEXT: movl %esi, %ecx
3139 ; FALLBACK0-NEXT: shlq %cl, %r8
3140 ; FALLBACK0-NEXT: orq %r11, %r8
3141 ; FALLBACK0-NEXT: movl %eax, %ecx
3142 ; FALLBACK0-NEXT: shrq %cl, %r10
3143 ; FALLBACK0-NEXT: addq %rdi, %rdi
3144 ; FALLBACK0-NEXT: movl %esi, %ecx
3145 ; FALLBACK0-NEXT: shlq %cl, %rdi
3146 ; FALLBACK0-NEXT: orq %r10, %rdi
3147 ; FALLBACK0-NEXT: movl %eax, %ecx
3148 ; FALLBACK0-NEXT: shrq %cl, %rbx
3149 ; FALLBACK0-NEXT: movq -40(%rsp,%r9), %r9
3150 ; FALLBACK0-NEXT: leaq (%r9,%r9), %r10
3151 ; FALLBACK0-NEXT: movl %esi, %ecx
3152 ; FALLBACK0-NEXT: shlq %cl, %r10
3153 ; FALLBACK0-NEXT: orq %rbx, %r10
3154 ; FALLBACK0-NEXT: movl %eax, %ecx
3155 ; FALLBACK0-NEXT: shrq %cl, %r9
3156 ; FALLBACK0-NEXT: movq %r9, 24(%rdx)
3157 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
3158 ; FALLBACK0-NEXT: movq %rdi, (%rdx)
3159 ; FALLBACK0-NEXT: movq %r8, 8(%rdx)
3160 ; FALLBACK0-NEXT: popq %rbx
3161 ; FALLBACK0-NEXT: retq
3163 ; FALLBACK1-LABEL: lshr_32bytes:
3164 ; FALLBACK1: # %bb.0:
3165 ; FALLBACK1-NEXT: movq (%rdi), %rax
3166 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
3167 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
3168 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
3169 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
3170 ; FALLBACK1-NEXT: leal (,%rsi,8), %ecx
3171 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
3172 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3173 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3174 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
3175 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
3176 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
3177 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
3178 ; FALLBACK1-NEXT: andb $24, %sil
3179 ; FALLBACK1-NEXT: movzbl %sil, %eax
3180 ; FALLBACK1-NEXT: movq -56(%rsp,%rax), %rsi
3181 ; FALLBACK1-NEXT: movq -72(%rsp,%rax), %rdi
3182 ; FALLBACK1-NEXT: movq -64(%rsp,%rax), %r8
3183 ; FALLBACK1-NEXT: movq %r8, %r9
3184 ; FALLBACK1-NEXT: shrdq %cl, %rsi, %r9
3185 ; FALLBACK1-NEXT: movq -48(%rsp,%rax), %rax
3186 ; FALLBACK1-NEXT: shrdq %cl, %rax, %rsi
3187 ; FALLBACK1-NEXT: shrdq %cl, %r8, %rdi
3188 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
3189 ; FALLBACK1-NEXT: shrq %cl, %rax
3190 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
3191 ; FALLBACK1-NEXT: movq %rax, 24(%rdx)
3192 ; FALLBACK1-NEXT: movq %rdi, (%rdx)
3193 ; FALLBACK1-NEXT: movq %r9, 8(%rdx)
3194 ; FALLBACK1-NEXT: retq
3196 ; FALLBACK2-LABEL: lshr_32bytes:
3197 ; FALLBACK2: # %bb.0:
3198 ; FALLBACK2-NEXT: movq (%rdi), %rcx
3199 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
3200 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
3201 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
3202 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
3203 ; FALLBACK2-NEXT: leal (,%rsi,8), %eax
3204 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
3205 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3206 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3207 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
3208 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
3209 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
3210 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
3211 ; FALLBACK2-NEXT: andb $24, %sil
3212 ; FALLBACK2-NEXT: movzbl %sil, %ecx
3213 ; FALLBACK2-NEXT: movq -64(%rsp,%rcx), %rsi
3214 ; FALLBACK2-NEXT: movq -56(%rsp,%rcx), %rdi
3215 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %r8
3216 ; FALLBACK2-NEXT: shrxq %rax, -72(%rsp,%rcx), %r9
3217 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %r10
3218 ; FALLBACK2-NEXT: movq -48(%rsp,%rcx), %rcx
3219 ; FALLBACK2-NEXT: shrxq %rax, %rcx, %r11
3220 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
3221 ; FALLBACK2-NEXT: notb %al
3222 ; FALLBACK2-NEXT: addq %rdi, %rdi
3223 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %rdi
3224 ; FALLBACK2-NEXT: orq %r8, %rdi
3225 ; FALLBACK2-NEXT: addq %rsi, %rsi
3226 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %rsi
3227 ; FALLBACK2-NEXT: orq %r9, %rsi
3228 ; FALLBACK2-NEXT: addq %rcx, %rcx
3229 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %rax
3230 ; FALLBACK2-NEXT: orq %r10, %rax
3231 ; FALLBACK2-NEXT: movq %r11, 24(%rdx)
3232 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
3233 ; FALLBACK2-NEXT: movq %rsi, (%rdx)
3234 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
3235 ; FALLBACK2-NEXT: retq
3237 ; FALLBACK3-LABEL: lshr_32bytes:
3238 ; FALLBACK3: # %bb.0:
3239 ; FALLBACK3-NEXT: movq (%rdi), %rax
3240 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
3241 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
3242 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
3243 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
3244 ; FALLBACK3-NEXT: leal (,%rsi,8), %ecx
3245 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
3246 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3247 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3248 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
3249 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
3250 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
3251 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
3252 ; FALLBACK3-NEXT: andb $24, %sil
3253 ; FALLBACK3-NEXT: movzbl %sil, %eax
3254 ; FALLBACK3-NEXT: movq -56(%rsp,%rax), %rsi
3255 ; FALLBACK3-NEXT: movq -72(%rsp,%rax), %rdi
3256 ; FALLBACK3-NEXT: movq -64(%rsp,%rax), %r8
3257 ; FALLBACK3-NEXT: movq %r8, %r9
3258 ; FALLBACK3-NEXT: shrdq %cl, %rsi, %r9
3259 ; FALLBACK3-NEXT: movq -48(%rsp,%rax), %rax
3260 ; FALLBACK3-NEXT: shrdq %cl, %rax, %rsi
3261 ; FALLBACK3-NEXT: shrdq %cl, %r8, %rdi
3262 ; FALLBACK3-NEXT: shrxq %rcx, %rax, %rax
3263 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
3264 ; FALLBACK3-NEXT: movq %rax, 24(%rdx)
3265 ; FALLBACK3-NEXT: movq %rdi, (%rdx)
3266 ; FALLBACK3-NEXT: movq %r9, 8(%rdx)
3267 ; FALLBACK3-NEXT: retq
3269 ; FALLBACK4-LABEL: lshr_32bytes:
3270 ; FALLBACK4: # %bb.0:
3271 ; FALLBACK4-NEXT: pushq %rbx
3272 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
3273 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
3274 ; FALLBACK4-NEXT: movzbl (%rsi), %ecx
3275 ; FALLBACK4-NEXT: leal (,%rcx,8), %eax
3276 ; FALLBACK4-NEXT: xorps %xmm2, %xmm2
3277 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3278 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3279 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
3280 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3281 ; FALLBACK4-NEXT: andb $24, %cl
3282 ; FALLBACK4-NEXT: movzbl %cl, %r9d
3283 ; FALLBACK4-NEXT: movq -64(%rsp,%r9), %r10
3284 ; FALLBACK4-NEXT: movq -56(%rsp,%r9), %r8
3285 ; FALLBACK4-NEXT: movl %eax, %ecx
3286 ; FALLBACK4-NEXT: shrq %cl, %r10
3287 ; FALLBACK4-NEXT: movl %eax, %esi
3288 ; FALLBACK4-NEXT: notb %sil
3289 ; FALLBACK4-NEXT: leaq (%r8,%r8), %rdi
3290 ; FALLBACK4-NEXT: movl %esi, %ecx
3291 ; FALLBACK4-NEXT: shlq %cl, %rdi
3292 ; FALLBACK4-NEXT: orq %r10, %rdi
3293 ; FALLBACK4-NEXT: movq -48(%rsp,%r9), %r10
3294 ; FALLBACK4-NEXT: movq %r10, %r11
3295 ; FALLBACK4-NEXT: movl %eax, %ecx
3296 ; FALLBACK4-NEXT: shrq %cl, %r11
3297 ; FALLBACK4-NEXT: movq -40(%rsp,%r9), %r9
3298 ; FALLBACK4-NEXT: leaq (%r9,%r9), %rbx
3299 ; FALLBACK4-NEXT: movl %esi, %ecx
3300 ; FALLBACK4-NEXT: shlq %cl, %rbx
3301 ; FALLBACK4-NEXT: orq %r11, %rbx
3302 ; FALLBACK4-NEXT: movl %eax, %ecx
3303 ; FALLBACK4-NEXT: shrq %cl, %r8
3304 ; FALLBACK4-NEXT: addq %r10, %r10
3305 ; FALLBACK4-NEXT: movl %esi, %ecx
3306 ; FALLBACK4-NEXT: shlq %cl, %r10
3307 ; FALLBACK4-NEXT: orq %r8, %r10
3308 ; FALLBACK4-NEXT: movl %eax, %ecx
3309 ; FALLBACK4-NEXT: shrq %cl, %r9
3310 ; FALLBACK4-NEXT: movq %r9, 24(%rdx)
3311 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
3312 ; FALLBACK4-NEXT: movq %rbx, 16(%rdx)
3313 ; FALLBACK4-NEXT: movq %rdi, (%rdx)
3314 ; FALLBACK4-NEXT: popq %rbx
3315 ; FALLBACK4-NEXT: retq
3317 ; FALLBACK5-LABEL: lshr_32bytes:
3318 ; FALLBACK5: # %bb.0:
3319 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
3320 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
3321 ; FALLBACK5-NEXT: movzbl (%rsi), %eax
3322 ; FALLBACK5-NEXT: leal (,%rax,8), %ecx
3323 ; FALLBACK5-NEXT: xorps %xmm2, %xmm2
3324 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3325 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3326 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
3327 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3328 ; FALLBACK5-NEXT: andb $24, %al
3329 ; FALLBACK5-NEXT: movzbl %al, %eax
3330 ; FALLBACK5-NEXT: movq -48(%rsp,%rax), %rsi
3331 ; FALLBACK5-NEXT: movq -56(%rsp,%rax), %rdi
3332 ; FALLBACK5-NEXT: movq %rdi, %r8
3333 ; FALLBACK5-NEXT: shrdq %cl, %rsi, %r8
3334 ; FALLBACK5-NEXT: movq -72(%rsp,%rax), %r9
3335 ; FALLBACK5-NEXT: movq -64(%rsp,%rax), %rax
3336 ; FALLBACK5-NEXT: movq %rax, %r10
3337 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %r10
3338 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r9
3339 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
3340 ; FALLBACK5-NEXT: shrq %cl, %rsi
3341 ; FALLBACK5-NEXT: movq %r10, 8(%rdx)
3342 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
3343 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
3344 ; FALLBACK5-NEXT: movq %r9, (%rdx)
3345 ; FALLBACK5-NEXT: retq
3347 ; FALLBACK6-LABEL: lshr_32bytes:
3348 ; FALLBACK6: # %bb.0:
3349 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
3350 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
3351 ; FALLBACK6-NEXT: movzbl (%rsi), %ecx
3352 ; FALLBACK6-NEXT: leal (,%rcx,8), %eax
3353 ; FALLBACK6-NEXT: xorps %xmm2, %xmm2
3354 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3355 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3356 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
3357 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3358 ; FALLBACK6-NEXT: andb $24, %cl
3359 ; FALLBACK6-NEXT: movzbl %cl, %ecx
3360 ; FALLBACK6-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
3361 ; FALLBACK6-NEXT: movq -64(%rsp,%rcx), %rdi
3362 ; FALLBACK6-NEXT: movq -56(%rsp,%rcx), %r8
3363 ; FALLBACK6-NEXT: shrxq %rax, %r8, %r9
3364 ; FALLBACK6-NEXT: movq -48(%rsp,%rcx), %rcx
3365 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %r10
3366 ; FALLBACK6-NEXT: shrxq %rax, %rcx, %r11
3367 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
3368 ; FALLBACK6-NEXT: notb %al
3369 ; FALLBACK6-NEXT: addq %rdi, %rdi
3370 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %rdi
3371 ; FALLBACK6-NEXT: orq %rsi, %rdi
3372 ; FALLBACK6-NEXT: addq %rcx, %rcx
3373 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %rcx
3374 ; FALLBACK6-NEXT: orq %r9, %rcx
3375 ; FALLBACK6-NEXT: addq %r8, %r8
3376 ; FALLBACK6-NEXT: shlxq %rax, %r8, %rax
3377 ; FALLBACK6-NEXT: orq %r10, %rax
3378 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
3379 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
3380 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
3381 ; FALLBACK6-NEXT: movq %rdi, (%rdx)
3382 ; FALLBACK6-NEXT: retq
3384 ; FALLBACK7-LABEL: lshr_32bytes:
3385 ; FALLBACK7: # %bb.0:
3386 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
3387 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
3388 ; FALLBACK7-NEXT: movzbl (%rsi), %eax
3389 ; FALLBACK7-NEXT: leal (,%rax,8), %ecx
3390 ; FALLBACK7-NEXT: xorps %xmm2, %xmm2
3391 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3392 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
3393 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
3394 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
3395 ; FALLBACK7-NEXT: andb $24, %al
3396 ; FALLBACK7-NEXT: movzbl %al, %eax
3397 ; FALLBACK7-NEXT: movq -48(%rsp,%rax), %rsi
3398 ; FALLBACK7-NEXT: movq -56(%rsp,%rax), %rdi
3399 ; FALLBACK7-NEXT: movq %rdi, %r8
3400 ; FALLBACK7-NEXT: shrdq %cl, %rsi, %r8
3401 ; FALLBACK7-NEXT: movq -72(%rsp,%rax), %r9
3402 ; FALLBACK7-NEXT: movq -64(%rsp,%rax), %rax
3403 ; FALLBACK7-NEXT: movq %rax, %r10
3404 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %r10
3405 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r9
3406 ; FALLBACK7-NEXT: shrxq %rcx, %rsi, %rax
3407 ; FALLBACK7-NEXT: movq %r10, 8(%rdx)
3408 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
3409 ; FALLBACK7-NEXT: movq %rax, 24(%rdx)
3410 ; FALLBACK7-NEXT: movq %r9, (%rdx)
3411 ; FALLBACK7-NEXT: retq
3413 ; FALLBACK8-LABEL: lshr_32bytes:
3414 ; FALLBACK8: # %bb.0:
3415 ; FALLBACK8-NEXT: pushq %rbx
3416 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
3417 ; FALLBACK8-NEXT: movzbl (%rsi), %ecx
3418 ; FALLBACK8-NEXT: leal (,%rcx,8), %eax
3419 ; FALLBACK8-NEXT: vxorps %xmm1, %xmm1, %xmm1
3420 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3421 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3422 ; FALLBACK8-NEXT: andb $24, %cl
3423 ; FALLBACK8-NEXT: movzbl %cl, %r9d
3424 ; FALLBACK8-NEXT: movq -64(%rsp,%r9), %r10
3425 ; FALLBACK8-NEXT: movq -56(%rsp,%r9), %r8
3426 ; FALLBACK8-NEXT: movl %eax, %ecx
3427 ; FALLBACK8-NEXT: shrq %cl, %r10
3428 ; FALLBACK8-NEXT: movl %eax, %esi
3429 ; FALLBACK8-NEXT: notb %sil
3430 ; FALLBACK8-NEXT: leaq (%r8,%r8), %rdi
3431 ; FALLBACK8-NEXT: movl %esi, %ecx
3432 ; FALLBACK8-NEXT: shlq %cl, %rdi
3433 ; FALLBACK8-NEXT: orq %r10, %rdi
3434 ; FALLBACK8-NEXT: movq -48(%rsp,%r9), %r10
3435 ; FALLBACK8-NEXT: movq %r10, %r11
3436 ; FALLBACK8-NEXT: movl %eax, %ecx
3437 ; FALLBACK8-NEXT: shrq %cl, %r11
3438 ; FALLBACK8-NEXT: movq -40(%rsp,%r9), %r9
3439 ; FALLBACK8-NEXT: leaq (%r9,%r9), %rbx
3440 ; FALLBACK8-NEXT: movl %esi, %ecx
3441 ; FALLBACK8-NEXT: shlq %cl, %rbx
3442 ; FALLBACK8-NEXT: orq %r11, %rbx
3443 ; FALLBACK8-NEXT: movl %eax, %ecx
3444 ; FALLBACK8-NEXT: shrq %cl, %r8
3445 ; FALLBACK8-NEXT: addq %r10, %r10
3446 ; FALLBACK8-NEXT: movl %esi, %ecx
3447 ; FALLBACK8-NEXT: shlq %cl, %r10
3448 ; FALLBACK8-NEXT: orq %r8, %r10
3449 ; FALLBACK8-NEXT: movl %eax, %ecx
3450 ; FALLBACK8-NEXT: shrq %cl, %r9
3451 ; FALLBACK8-NEXT: movq %r9, 24(%rdx)
3452 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
3453 ; FALLBACK8-NEXT: movq %rbx, 16(%rdx)
3454 ; FALLBACK8-NEXT: movq %rdi, (%rdx)
3455 ; FALLBACK8-NEXT: popq %rbx
3456 ; FALLBACK8-NEXT: vzeroupper
3457 ; FALLBACK8-NEXT: retq
3459 ; FALLBACK9-LABEL: lshr_32bytes:
3460 ; FALLBACK9: # %bb.0:
3461 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
3462 ; FALLBACK9-NEXT: movzbl (%rsi), %eax
3463 ; FALLBACK9-NEXT: leal (,%rax,8), %ecx
3464 ; FALLBACK9-NEXT: vxorps %xmm1, %xmm1, %xmm1
3465 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3466 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3467 ; FALLBACK9-NEXT: andb $24, %al
3468 ; FALLBACK9-NEXT: movzbl %al, %eax
3469 ; FALLBACK9-NEXT: movq -48(%rsp,%rax), %rsi
3470 ; FALLBACK9-NEXT: movq -56(%rsp,%rax), %rdi
3471 ; FALLBACK9-NEXT: movq %rdi, %r8
3472 ; FALLBACK9-NEXT: shrdq %cl, %rsi, %r8
3473 ; FALLBACK9-NEXT: movq -72(%rsp,%rax), %r9
3474 ; FALLBACK9-NEXT: movq -64(%rsp,%rax), %rax
3475 ; FALLBACK9-NEXT: movq %rax, %r10
3476 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %r10
3477 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r9
3478 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
3479 ; FALLBACK9-NEXT: shrq %cl, %rsi
3480 ; FALLBACK9-NEXT: movq %r10, 8(%rdx)
3481 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
3482 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
3483 ; FALLBACK9-NEXT: movq %r9, (%rdx)
3484 ; FALLBACK9-NEXT: vzeroupper
3485 ; FALLBACK9-NEXT: retq
3487 ; FALLBACK10-LABEL: lshr_32bytes:
3488 ; FALLBACK10: # %bb.0:
3489 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
3490 ; FALLBACK10-NEXT: movzbl (%rsi), %ecx
3491 ; FALLBACK10-NEXT: leal (,%rcx,8), %eax
3492 ; FALLBACK10-NEXT: vxorps %xmm1, %xmm1, %xmm1
3493 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3494 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3495 ; FALLBACK10-NEXT: andb $24, %cl
3496 ; FALLBACK10-NEXT: movzbl %cl, %ecx
3497 ; FALLBACK10-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
3498 ; FALLBACK10-NEXT: movq -64(%rsp,%rcx), %rdi
3499 ; FALLBACK10-NEXT: movq -56(%rsp,%rcx), %r8
3500 ; FALLBACK10-NEXT: shrxq %rax, %r8, %r9
3501 ; FALLBACK10-NEXT: movq -48(%rsp,%rcx), %rcx
3502 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %r10
3503 ; FALLBACK10-NEXT: shrxq %rax, %rcx, %r11
3504 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
3505 ; FALLBACK10-NEXT: notb %al
3506 ; FALLBACK10-NEXT: addq %rdi, %rdi
3507 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %rdi
3508 ; FALLBACK10-NEXT: orq %rsi, %rdi
3509 ; FALLBACK10-NEXT: addq %rcx, %rcx
3510 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %rcx
3511 ; FALLBACK10-NEXT: orq %r9, %rcx
3512 ; FALLBACK10-NEXT: addq %r8, %r8
3513 ; FALLBACK10-NEXT: shlxq %rax, %r8, %rax
3514 ; FALLBACK10-NEXT: orq %r10, %rax
3515 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
3516 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
3517 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
3518 ; FALLBACK10-NEXT: movq %rdi, (%rdx)
3519 ; FALLBACK10-NEXT: vzeroupper
3520 ; FALLBACK10-NEXT: retq
3522 ; FALLBACK11-LABEL: lshr_32bytes:
3523 ; FALLBACK11: # %bb.0:
3524 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
3525 ; FALLBACK11-NEXT: movzbl (%rsi), %eax
3526 ; FALLBACK11-NEXT: leal (,%rax,8), %ecx
3527 ; FALLBACK11-NEXT: vxorps %xmm1, %xmm1, %xmm1
3528 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3529 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3530 ; FALLBACK11-NEXT: andb $24, %al
3531 ; FALLBACK11-NEXT: movzbl %al, %eax
3532 ; FALLBACK11-NEXT: movq -48(%rsp,%rax), %rsi
3533 ; FALLBACK11-NEXT: movq -56(%rsp,%rax), %rdi
3534 ; FALLBACK11-NEXT: movq %rdi, %r8
3535 ; FALLBACK11-NEXT: shrdq %cl, %rsi, %r8
3536 ; FALLBACK11-NEXT: movq -72(%rsp,%rax), %r9
3537 ; FALLBACK11-NEXT: movq -64(%rsp,%rax), %rax
3538 ; FALLBACK11-NEXT: movq %rax, %r10
3539 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %r10
3540 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r9
3541 ; FALLBACK11-NEXT: shrxq %rcx, %rsi, %rax
3542 ; FALLBACK11-NEXT: movq %r10, 8(%rdx)
3543 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
3544 ; FALLBACK11-NEXT: movq %rax, 24(%rdx)
3545 ; FALLBACK11-NEXT: movq %r9, (%rdx)
3546 ; FALLBACK11-NEXT: vzeroupper
3547 ; FALLBACK11-NEXT: retq
3549 ; FALLBACK12-LABEL: lshr_32bytes:
3550 ; FALLBACK12: # %bb.0:
3551 ; FALLBACK12-NEXT: pushq %rbx
3552 ; FALLBACK12-NEXT: vmovups (%rdi), %ymm0
3553 ; FALLBACK12-NEXT: movzbl (%rsi), %ecx
3554 ; FALLBACK12-NEXT: leal (,%rcx,8), %eax
3555 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
3556 ; FALLBACK12-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3557 ; FALLBACK12-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3558 ; FALLBACK12-NEXT: andb $24, %cl
3559 ; FALLBACK12-NEXT: movzbl %cl, %r9d
3560 ; FALLBACK12-NEXT: movq -64(%rsp,%r9), %r10
3561 ; FALLBACK12-NEXT: movq -56(%rsp,%r9), %r8
3562 ; FALLBACK12-NEXT: movl %eax, %ecx
3563 ; FALLBACK12-NEXT: shrq %cl, %r10
3564 ; FALLBACK12-NEXT: movl %eax, %esi
3565 ; FALLBACK12-NEXT: notb %sil
3566 ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi
3567 ; FALLBACK12-NEXT: movl %esi, %ecx
3568 ; FALLBACK12-NEXT: shlq %cl, %rdi
3569 ; FALLBACK12-NEXT: orq %r10, %rdi
3570 ; FALLBACK12-NEXT: movq -48(%rsp,%r9), %r10
3571 ; FALLBACK12-NEXT: movq %r10, %r11
3572 ; FALLBACK12-NEXT: movl %eax, %ecx
3573 ; FALLBACK12-NEXT: shrq %cl, %r11
3574 ; FALLBACK12-NEXT: movq -40(%rsp,%r9), %r9
3575 ; FALLBACK12-NEXT: leaq (%r9,%r9), %rbx
3576 ; FALLBACK12-NEXT: movl %esi, %ecx
3577 ; FALLBACK12-NEXT: shlq %cl, %rbx
3578 ; FALLBACK12-NEXT: orq %r11, %rbx
3579 ; FALLBACK12-NEXT: movl %eax, %ecx
3580 ; FALLBACK12-NEXT: shrq %cl, %r8
3581 ; FALLBACK12-NEXT: addq %r10, %r10
3582 ; FALLBACK12-NEXT: movl %esi, %ecx
3583 ; FALLBACK12-NEXT: shlq %cl, %r10
3584 ; FALLBACK12-NEXT: orq %r8, %r10
3585 ; FALLBACK12-NEXT: movl %eax, %ecx
3586 ; FALLBACK12-NEXT: shrq %cl, %r9
3587 ; FALLBACK12-NEXT: movq %r9, 24(%rdx)
3588 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
3589 ; FALLBACK12-NEXT: movq %rbx, 16(%rdx)
3590 ; FALLBACK12-NEXT: movq %rdi, (%rdx)
3591 ; FALLBACK12-NEXT: popq %rbx
3592 ; FALLBACK12-NEXT: vzeroupper
3593 ; FALLBACK12-NEXT: retq
3595 ; FALLBACK13-LABEL: lshr_32bytes:
3596 ; FALLBACK13: # %bb.0:
3597 ; FALLBACK13-NEXT: vmovups (%rdi), %ymm0
3598 ; FALLBACK13-NEXT: movzbl (%rsi), %eax
3599 ; FALLBACK13-NEXT: leal (,%rax,8), %ecx
3600 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
3601 ; FALLBACK13-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3602 ; FALLBACK13-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3603 ; FALLBACK13-NEXT: andb $24, %al
3604 ; FALLBACK13-NEXT: movzbl %al, %eax
3605 ; FALLBACK13-NEXT: movq -48(%rsp,%rax), %rsi
3606 ; FALLBACK13-NEXT: movq -56(%rsp,%rax), %rdi
3607 ; FALLBACK13-NEXT: movq %rdi, %r8
3608 ; FALLBACK13-NEXT: shrdq %cl, %rsi, %r8
3609 ; FALLBACK13-NEXT: movq -72(%rsp,%rax), %r9
3610 ; FALLBACK13-NEXT: movq -64(%rsp,%rax), %rax
3611 ; FALLBACK13-NEXT: movq %rax, %r10
3612 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %r10
3613 ; FALLBACK13-NEXT: shrdq %cl, %rax, %r9
3614 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
3615 ; FALLBACK13-NEXT: shrq %cl, %rsi
3616 ; FALLBACK13-NEXT: movq %r10, 8(%rdx)
3617 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
3618 ; FALLBACK13-NEXT: movq %rsi, 24(%rdx)
3619 ; FALLBACK13-NEXT: movq %r9, (%rdx)
3620 ; FALLBACK13-NEXT: vzeroupper
3621 ; FALLBACK13-NEXT: retq
3623 ; FALLBACK14-LABEL: lshr_32bytes:
3624 ; FALLBACK14: # %bb.0:
3625 ; FALLBACK14-NEXT: vmovups (%rdi), %ymm0
3626 ; FALLBACK14-NEXT: movzbl (%rsi), %ecx
3627 ; FALLBACK14-NEXT: leal (,%rcx,8), %eax
3628 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
3629 ; FALLBACK14-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3630 ; FALLBACK14-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3631 ; FALLBACK14-NEXT: andb $24, %cl
3632 ; FALLBACK14-NEXT: movzbl %cl, %ecx
3633 ; FALLBACK14-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
3634 ; FALLBACK14-NEXT: movq -64(%rsp,%rcx), %rdi
3635 ; FALLBACK14-NEXT: movq -56(%rsp,%rcx), %r8
3636 ; FALLBACK14-NEXT: shrxq %rax, %r8, %r9
3637 ; FALLBACK14-NEXT: movq -48(%rsp,%rcx), %rcx
3638 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %r10
3639 ; FALLBACK14-NEXT: shrxq %rax, %rcx, %r11
3640 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
3641 ; FALLBACK14-NEXT: notb %al
3642 ; FALLBACK14-NEXT: addq %rdi, %rdi
3643 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %rdi
3644 ; FALLBACK14-NEXT: orq %rsi, %rdi
3645 ; FALLBACK14-NEXT: addq %rcx, %rcx
3646 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %rcx
3647 ; FALLBACK14-NEXT: orq %r9, %rcx
3648 ; FALLBACK14-NEXT: addq %r8, %r8
3649 ; FALLBACK14-NEXT: shlxq %rax, %r8, %rax
3650 ; FALLBACK14-NEXT: orq %r10, %rax
3651 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
3652 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
3653 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
3654 ; FALLBACK14-NEXT: movq %rdi, (%rdx)
3655 ; FALLBACK14-NEXT: vzeroupper
3656 ; FALLBACK14-NEXT: retq
3658 ; FALLBACK15-LABEL: lshr_32bytes:
3659 ; FALLBACK15: # %bb.0:
3660 ; FALLBACK15-NEXT: vmovups (%rdi), %ymm0
3661 ; FALLBACK15-NEXT: movzbl (%rsi), %eax
3662 ; FALLBACK15-NEXT: leal (,%rax,8), %ecx
3663 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
3664 ; FALLBACK15-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
3665 ; FALLBACK15-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
3666 ; FALLBACK15-NEXT: andb $24, %al
3667 ; FALLBACK15-NEXT: movzbl %al, %eax
3668 ; FALLBACK15-NEXT: movq -48(%rsp,%rax), %rsi
3669 ; FALLBACK15-NEXT: movq -56(%rsp,%rax), %rdi
3670 ; FALLBACK15-NEXT: movq %rdi, %r8
3671 ; FALLBACK15-NEXT: shrdq %cl, %rsi, %r8
3672 ; FALLBACK15-NEXT: movq -72(%rsp,%rax), %r9
3673 ; FALLBACK15-NEXT: movq -64(%rsp,%rax), %rax
3674 ; FALLBACK15-NEXT: movq %rax, %r10
3675 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %r10
3676 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r9
3677 ; FALLBACK15-NEXT: shrxq %rcx, %rsi, %rax
3678 ; FALLBACK15-NEXT: movq %r10, 8(%rdx)
3679 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
3680 ; FALLBACK15-NEXT: movq %rax, 24(%rdx)
3681 ; FALLBACK15-NEXT: movq %r9, (%rdx)
3682 ; FALLBACK15-NEXT: vzeroupper
3683 ; FALLBACK15-NEXT: retq
3685 ; FALLBACK16-LABEL: lshr_32bytes:
3686 ; FALLBACK16: # %bb.0:
3687 ; FALLBACK16-NEXT: pushl %ebp
3688 ; FALLBACK16-NEXT: pushl %ebx
3689 ; FALLBACK16-NEXT: pushl %edi
3690 ; FALLBACK16-NEXT: pushl %esi
3691 ; FALLBACK16-NEXT: subl $108, %esp
3692 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
3693 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ebp
3694 ; FALLBACK16-NEXT: movl (%ebp), %ecx
3695 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3696 ; FALLBACK16-NEXT: movl 4(%ebp), %ecx
3697 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3698 ; FALLBACK16-NEXT: movl 8(%ebp), %ecx
3699 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3700 ; FALLBACK16-NEXT: movl 12(%ebp), %edi
3701 ; FALLBACK16-NEXT: movl 16(%ebp), %ebx
3702 ; FALLBACK16-NEXT: movb (%eax), %ah
3703 ; FALLBACK16-NEXT: movl 20(%ebp), %esi
3704 ; FALLBACK16-NEXT: movl 24(%ebp), %ecx
3705 ; FALLBACK16-NEXT: movl 28(%ebp), %ebp
3706 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
3707 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3708 ; FALLBACK16-NEXT: movl %ebp, {{[0-9]+}}(%esp)
3709 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3710 ; FALLBACK16-NEXT: movb %ah, %dh
3711 ; FALLBACK16-NEXT: shlb $3, %dh
3712 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3713 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
3714 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
3715 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
3716 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3717 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3718 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3719 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3720 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3721 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3722 ; FALLBACK16-NEXT: andb $28, %ah
3723 ; FALLBACK16-NEXT: movzbl %ah, %edi
3724 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3725 ; FALLBACK16-NEXT: movl 32(%esp,%edi), %esi
3726 ; FALLBACK16-NEXT: movl 36(%esp,%edi), %eax
3727 ; FALLBACK16-NEXT: movl %eax, %ebx
3728 ; FALLBACK16-NEXT: movb %dh, %cl
3729 ; FALLBACK16-NEXT: shrl %cl, %ebx
3730 ; FALLBACK16-NEXT: movb %dh, %dl
3731 ; FALLBACK16-NEXT: notb %dl
3732 ; FALLBACK16-NEXT: movl 40(%esp,%edi), %edi
3733 ; FALLBACK16-NEXT: leal (%edi,%edi), %ebp
3734 ; FALLBACK16-NEXT: movl %edx, %ecx
3735 ; FALLBACK16-NEXT: shll %cl, %ebp
3736 ; FALLBACK16-NEXT: orl %ebx, %ebp
3737 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3738 ; FALLBACK16-NEXT: movb %dh, %cl
3739 ; FALLBACK16-NEXT: shrl %cl, %esi
3740 ; FALLBACK16-NEXT: movl %eax, %ebx
3741 ; FALLBACK16-NEXT: addl %eax, %ebx
3742 ; FALLBACK16-NEXT: movl %edx, %ecx
3743 ; FALLBACK16-NEXT: shll %cl, %ebx
3744 ; FALLBACK16-NEXT: orl %esi, %ebx
3745 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3746 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3747 ; FALLBACK16-NEXT: movl 44(%esp,%eax), %ebp
3748 ; FALLBACK16-NEXT: movl %ebp, %esi
3749 ; FALLBACK16-NEXT: movb %dh, %cl
3750 ; FALLBACK16-NEXT: shrl %cl, %esi
3751 ; FALLBACK16-NEXT: movl 48(%esp,%eax), %eax
3752 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3753 ; FALLBACK16-NEXT: leal (%eax,%eax), %ebx
3754 ; FALLBACK16-NEXT: movl %edx, %ecx
3755 ; FALLBACK16-NEXT: shll %cl, %ebx
3756 ; FALLBACK16-NEXT: orl %esi, %ebx
3757 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3758 ; FALLBACK16-NEXT: movb %dh, %cl
3759 ; FALLBACK16-NEXT: shrl %cl, %edi
3760 ; FALLBACK16-NEXT: addl %ebp, %ebp
3761 ; FALLBACK16-NEXT: movl %edx, %ecx
3762 ; FALLBACK16-NEXT: shll %cl, %ebp
3763 ; FALLBACK16-NEXT: orl %edi, %ebp
3764 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3765 ; FALLBACK16-NEXT: movl 52(%esp,%eax), %edi
3766 ; FALLBACK16-NEXT: movl %edi, %ebx
3767 ; FALLBACK16-NEXT: movb %dh, %cl
3768 ; FALLBACK16-NEXT: shrl %cl, %ebx
3769 ; FALLBACK16-NEXT: movl 56(%esp,%eax), %esi
3770 ; FALLBACK16-NEXT: leal (%esi,%esi), %eax
3771 ; FALLBACK16-NEXT: movl %edx, %ecx
3772 ; FALLBACK16-NEXT: shll %cl, %eax
3773 ; FALLBACK16-NEXT: orl %ebx, %eax
3774 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3775 ; FALLBACK16-NEXT: movb %dh, %cl
3776 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
3777 ; FALLBACK16-NEXT: shrl %cl, %ebx
3778 ; FALLBACK16-NEXT: addl %edi, %edi
3779 ; FALLBACK16-NEXT: movl %edx, %ecx
3780 ; FALLBACK16-NEXT: shll %cl, %edi
3781 ; FALLBACK16-NEXT: orl %ebx, %edi
3782 ; FALLBACK16-NEXT: movb %dh, %cl
3783 ; FALLBACK16-NEXT: movl %esi, %eax
3784 ; FALLBACK16-NEXT: shrl %cl, %eax
3785 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3786 ; FALLBACK16-NEXT: movl 60(%esp,%ecx), %ebx
3787 ; FALLBACK16-NEXT: leal (%ebx,%ebx), %esi
3788 ; FALLBACK16-NEXT: movl %edx, %ecx
3789 ; FALLBACK16-NEXT: shll %cl, %esi
3790 ; FALLBACK16-NEXT: orl %eax, %esi
3791 ; FALLBACK16-NEXT: movb %dh, %cl
3792 ; FALLBACK16-NEXT: shrl %cl, %ebx
3793 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
3794 ; FALLBACK16-NEXT: movl %ebx, 28(%eax)
3795 ; FALLBACK16-NEXT: movl %esi, 24(%eax)
3796 ; FALLBACK16-NEXT: movl %edi, 16(%eax)
3797 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3798 ; FALLBACK16-NEXT: movl %ecx, 20(%eax)
3799 ; FALLBACK16-NEXT: movl %ebp, 8(%eax)
3800 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3801 ; FALLBACK16-NEXT: movl %ecx, 12(%eax)
3802 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3803 ; FALLBACK16-NEXT: movl %ecx, (%eax)
3804 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3805 ; FALLBACK16-NEXT: movl %ecx, 4(%eax)
3806 ; FALLBACK16-NEXT: addl $108, %esp
3807 ; FALLBACK16-NEXT: popl %esi
3808 ; FALLBACK16-NEXT: popl %edi
3809 ; FALLBACK16-NEXT: popl %ebx
3810 ; FALLBACK16-NEXT: popl %ebp
3811 ; FALLBACK16-NEXT: retl
3813 ; FALLBACK17-LABEL: lshr_32bytes:
3814 ; FALLBACK17: # %bb.0:
3815 ; FALLBACK17-NEXT: pushl %ebp
3816 ; FALLBACK17-NEXT: pushl %ebx
3817 ; FALLBACK17-NEXT: pushl %edi
3818 ; FALLBACK17-NEXT: pushl %esi
3819 ; FALLBACK17-NEXT: subl $92, %esp
3820 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
3821 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
3822 ; FALLBACK17-NEXT: movl (%ebp), %eax
3823 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3824 ; FALLBACK17-NEXT: movl 4(%ebp), %eax
3825 ; FALLBACK17-NEXT: movl %eax, (%esp) # 4-byte Spill
3826 ; FALLBACK17-NEXT: movl 8(%ebp), %esi
3827 ; FALLBACK17-NEXT: movl 12(%ebp), %edi
3828 ; FALLBACK17-NEXT: movl 16(%ebp), %ebx
3829 ; FALLBACK17-NEXT: movb (%ecx), %ch
3830 ; FALLBACK17-NEXT: movl 20(%ebp), %edx
3831 ; FALLBACK17-NEXT: movl 24(%ebp), %eax
3832 ; FALLBACK17-NEXT: movl 28(%ebp), %ebp
3833 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
3834 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3835 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
3836 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
3837 ; FALLBACK17-NEXT: movb %ch, %cl
3838 ; FALLBACK17-NEXT: shlb $3, %cl
3839 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3840 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
3841 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
3842 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
3843 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
3844 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
3845 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
3846 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3847 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
3848 ; FALLBACK17-NEXT: andb $28, %ch
3849 ; FALLBACK17-NEXT: movzbl %ch, %ebp
3850 ; FALLBACK17-NEXT: movl 24(%esp,%ebp), %edx
3851 ; FALLBACK17-NEXT: movl 20(%esp,%ebp), %eax
3852 ; FALLBACK17-NEXT: movl %eax, (%esp) # 4-byte Spill
3853 ; FALLBACK17-NEXT: shrdl %cl, %edx, %eax
3854 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3855 ; FALLBACK17-NEXT: movl 32(%esp,%ebp), %ebx
3856 ; FALLBACK17-NEXT: movl 28(%esp,%ebp), %eax
3857 ; FALLBACK17-NEXT: movl %eax, %esi
3858 ; FALLBACK17-NEXT: shrdl %cl, %ebx, %esi
3859 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3860 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
3861 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3862 ; FALLBACK17-NEXT: movl 40(%esp,%ebp), %edx
3863 ; FALLBACK17-NEXT: movl 36(%esp,%ebp), %eax
3864 ; FALLBACK17-NEXT: movl %eax, %edi
3865 ; FALLBACK17-NEXT: shrdl %cl, %edx, %edi
3866 ; FALLBACK17-NEXT: shrdl %cl, %eax, %ebx
3867 ; FALLBACK17-NEXT: movl 16(%esp,%ebp), %esi
3868 ; FALLBACK17-NEXT: movl 44(%esp,%ebp), %eax
3869 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
3870 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
3871 ; FALLBACK17-NEXT: movl %edx, 24(%ebp)
3872 ; FALLBACK17-NEXT: movl (%esp), %edx # 4-byte Reload
3873 ; FALLBACK17-NEXT: shrdl %cl, %edx, %esi
3874 ; FALLBACK17-NEXT: shrl %cl, %eax
3875 ; FALLBACK17-NEXT: movl %eax, 28(%ebp)
3876 ; FALLBACK17-NEXT: movl %ebx, 16(%ebp)
3877 ; FALLBACK17-NEXT: movl %edi, 20(%ebp)
3878 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3879 ; FALLBACK17-NEXT: movl %eax, 8(%ebp)
3880 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3881 ; FALLBACK17-NEXT: movl %eax, 12(%ebp)
3882 ; FALLBACK17-NEXT: movl %esi, (%ebp)
3883 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
3884 ; FALLBACK17-NEXT: movl %eax, 4(%ebp)
3885 ; FALLBACK17-NEXT: addl $92, %esp
3886 ; FALLBACK17-NEXT: popl %esi
3887 ; FALLBACK17-NEXT: popl %edi
3888 ; FALLBACK17-NEXT: popl %ebx
3889 ; FALLBACK17-NEXT: popl %ebp
3890 ; FALLBACK17-NEXT: retl
3892 ; FALLBACK18-LABEL: lshr_32bytes:
3893 ; FALLBACK18: # %bb.0:
3894 ; FALLBACK18-NEXT: pushl %ebp
3895 ; FALLBACK18-NEXT: pushl %ebx
3896 ; FALLBACK18-NEXT: pushl %edi
3897 ; FALLBACK18-NEXT: pushl %esi
3898 ; FALLBACK18-NEXT: subl $108, %esp
3899 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ebx
3900 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
3901 ; FALLBACK18-NEXT: movl (%eax), %ecx
3902 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3903 ; FALLBACK18-NEXT: movl 4(%eax), %ecx
3904 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3905 ; FALLBACK18-NEXT: movl 8(%eax), %esi
3906 ; FALLBACK18-NEXT: movl 12(%eax), %edi
3907 ; FALLBACK18-NEXT: movl 16(%eax), %ebp
3908 ; FALLBACK18-NEXT: movzbl (%ebx), %ebx
3909 ; FALLBACK18-NEXT: movl 20(%eax), %edx
3910 ; FALLBACK18-NEXT: movl 24(%eax), %ecx
3911 ; FALLBACK18-NEXT: movl 28(%eax), %eax
3912 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
3913 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3914 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
3915 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3916 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
3917 ; FALLBACK18-NEXT: movl %ebx, %eax
3918 ; FALLBACK18-NEXT: shlb $3, %al
3919 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
3920 ; FALLBACK18-NEXT: movl %ebp, {{[0-9]+}}(%esp)
3921 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
3922 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
3923 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3924 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3925 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3926 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
3927 ; FALLBACK18-NEXT: andb $28, %bl
3928 ; FALLBACK18-NEXT: movzbl %bl, %edi
3929 ; FALLBACK18-NEXT: movl 36(%esp,%edi), %esi
3930 ; FALLBACK18-NEXT: movl 40(%esp,%edi), %ecx
3931 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3932 ; FALLBACK18-NEXT: shrxl %eax, %esi, %edx
3933 ; FALLBACK18-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3934 ; FALLBACK18-NEXT: movl %eax, %edx
3935 ; FALLBACK18-NEXT: movl %eax, %ebx
3936 ; FALLBACK18-NEXT: notb %dl
3937 ; FALLBACK18-NEXT: leal (%ecx,%ecx), %ebp
3938 ; FALLBACK18-NEXT: shlxl %edx, %ebp, %eax
3939 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
3940 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3941 ; FALLBACK18-NEXT: movl %ebx, %ecx
3942 ; FALLBACK18-NEXT: shrxl %ebx, 32(%esp,%edi), %ebx
3943 ; FALLBACK18-NEXT: addl %esi, %esi
3944 ; FALLBACK18-NEXT: shlxl %edx, %esi, %eax
3945 ; FALLBACK18-NEXT: orl %ebx, %eax
3946 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3947 ; FALLBACK18-NEXT: movl 48(%esp,%edi), %eax
3948 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3949 ; FALLBACK18-NEXT: leal (%eax,%eax), %ebx
3950 ; FALLBACK18-NEXT: shlxl %edx, %ebx, %esi
3951 ; FALLBACK18-NEXT: movl 44(%esp,%edi), %ebp
3952 ; FALLBACK18-NEXT: movl %ecx, %eax
3953 ; FALLBACK18-NEXT: shrxl %ecx, %ebp, %ebx
3954 ; FALLBACK18-NEXT: orl %ebx, %esi
3955 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3956 ; FALLBACK18-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
3957 ; FALLBACK18-NEXT: movl %eax, %ebx
3958 ; FALLBACK18-NEXT: addl %ebp, %ebp
3959 ; FALLBACK18-NEXT: shlxl %edx, %ebp, %eax
3960 ; FALLBACK18-NEXT: orl %ecx, %eax
3961 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3962 ; FALLBACK18-NEXT: movl 56(%esp,%edi), %ebp
3963 ; FALLBACK18-NEXT: leal (%ebp,%ebp), %ecx
3964 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ecx
3965 ; FALLBACK18-NEXT: movl 52(%esp,%edi), %eax
3966 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %esi
3967 ; FALLBACK18-NEXT: orl %esi, %ecx
3968 ; FALLBACK18-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
3969 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
3970 ; FALLBACK18-NEXT: addl %eax, %eax
3971 ; FALLBACK18-NEXT: shlxl %edx, %eax, %esi
3972 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
3973 ; FALLBACK18-NEXT: shrxl %ebx, %ebp, %eax
3974 ; FALLBACK18-NEXT: movl 60(%esp,%edi), %edi
3975 ; FALLBACK18-NEXT: shrxl %ebx, %edi, %ebx
3976 ; FALLBACK18-NEXT: addl %edi, %edi
3977 ; FALLBACK18-NEXT: shlxl %edx, %edi, %edi
3978 ; FALLBACK18-NEXT: orl %eax, %edi
3979 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
3980 ; FALLBACK18-NEXT: movl %ebx, 28(%eax)
3981 ; FALLBACK18-NEXT: movl %edi, 24(%eax)
3982 ; FALLBACK18-NEXT: movl %esi, 16(%eax)
3983 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
3984 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3985 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
3986 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3987 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
3988 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3989 ; FALLBACK18-NEXT: movl %ecx, (%eax)
3990 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
3991 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
3992 ; FALLBACK18-NEXT: addl $108, %esp
3993 ; FALLBACK18-NEXT: popl %esi
3994 ; FALLBACK18-NEXT: popl %edi
3995 ; FALLBACK18-NEXT: popl %ebx
3996 ; FALLBACK18-NEXT: popl %ebp
3997 ; FALLBACK18-NEXT: retl
3999 ; FALLBACK19-LABEL: lshr_32bytes:
4000 ; FALLBACK19: # %bb.0:
4001 ; FALLBACK19-NEXT: pushl %ebp
4002 ; FALLBACK19-NEXT: pushl %ebx
4003 ; FALLBACK19-NEXT: pushl %edi
4004 ; FALLBACK19-NEXT: pushl %esi
4005 ; FALLBACK19-NEXT: subl $92, %esp
4006 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebx
4007 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
4008 ; FALLBACK19-NEXT: movl (%ecx), %eax
4009 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4010 ; FALLBACK19-NEXT: movl 4(%ecx), %eax
4011 ; FALLBACK19-NEXT: movl %eax, (%esp) # 4-byte Spill
4012 ; FALLBACK19-NEXT: movl 8(%ecx), %esi
4013 ; FALLBACK19-NEXT: movl 12(%ecx), %edi
4014 ; FALLBACK19-NEXT: movl 16(%ecx), %ebp
4015 ; FALLBACK19-NEXT: movzbl (%ebx), %ebx
4016 ; FALLBACK19-NEXT: movl 20(%ecx), %edx
4017 ; FALLBACK19-NEXT: movl 24(%ecx), %eax
4018 ; FALLBACK19-NEXT: movl 28(%ecx), %ecx
4019 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
4020 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4021 ; FALLBACK19-NEXT: movl %ecx, {{[0-9]+}}(%esp)
4022 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
4023 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
4024 ; FALLBACK19-NEXT: movl %ebx, %ecx
4025 ; FALLBACK19-NEXT: shlb $3, %cl
4026 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4027 ; FALLBACK19-NEXT: movl %ebp, {{[0-9]+}}(%esp)
4028 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
4029 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
4030 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
4031 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
4032 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4033 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
4034 ; FALLBACK19-NEXT: andb $28, %bl
4035 ; FALLBACK19-NEXT: movzbl %bl, %ebp
4036 ; FALLBACK19-NEXT: movl 24(%esp,%ebp), %esi
4037 ; FALLBACK19-NEXT: movl 20(%esp,%ebp), %eax
4038 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4039 ; FALLBACK19-NEXT: shrdl %cl, %esi, %eax
4040 ; FALLBACK19-NEXT: movl %eax, (%esp) # 4-byte Spill
4041 ; FALLBACK19-NEXT: movl 32(%esp,%ebp), %ebx
4042 ; FALLBACK19-NEXT: movl 28(%esp,%ebp), %eax
4043 ; FALLBACK19-NEXT: movl %eax, %edx
4044 ; FALLBACK19-NEXT: shrdl %cl, %ebx, %edx
4045 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4046 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
4047 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4048 ; FALLBACK19-NEXT: movl 40(%esp,%ebp), %eax
4049 ; FALLBACK19-NEXT: movl 36(%esp,%ebp), %edx
4050 ; FALLBACK19-NEXT: movl %edx, %esi
4051 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
4052 ; FALLBACK19-NEXT: shrdl %cl, %edx, %ebx
4053 ; FALLBACK19-NEXT: movl 16(%esp,%ebp), %edx
4054 ; FALLBACK19-NEXT: movl 44(%esp,%ebp), %edi
4055 ; FALLBACK19-NEXT: shrdl %cl, %edi, %eax
4056 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
4057 ; FALLBACK19-NEXT: movl %eax, 24(%ebp)
4058 ; FALLBACK19-NEXT: shrxl %ecx, %edi, %eax
4059 ; FALLBACK19-NEXT: movl %eax, 28(%ebp)
4060 ; FALLBACK19-NEXT: movl %ebx, 16(%ebp)
4061 ; FALLBACK19-NEXT: movl %esi, 20(%ebp)
4062 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4063 ; FALLBACK19-NEXT: movl %eax, 8(%ebp)
4064 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4065 ; FALLBACK19-NEXT: movl %eax, 12(%ebp)
4066 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
4067 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4068 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
4069 ; FALLBACK19-NEXT: movl %edx, (%ebp)
4070 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
4071 ; FALLBACK19-NEXT: movl %eax, 4(%ebp)
4072 ; FALLBACK19-NEXT: addl $92, %esp
4073 ; FALLBACK19-NEXT: popl %esi
4074 ; FALLBACK19-NEXT: popl %edi
4075 ; FALLBACK19-NEXT: popl %ebx
4076 ; FALLBACK19-NEXT: popl %ebp
4077 ; FALLBACK19-NEXT: retl
4079 ; FALLBACK20-LABEL: lshr_32bytes:
4080 ; FALLBACK20: # %bb.0:
4081 ; FALLBACK20-NEXT: pushl %ebp
4082 ; FALLBACK20-NEXT: pushl %ebx
4083 ; FALLBACK20-NEXT: pushl %edi
4084 ; FALLBACK20-NEXT: pushl %esi
4085 ; FALLBACK20-NEXT: subl $108, %esp
4086 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
4087 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
4088 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
4089 ; FALLBACK20-NEXT: movups 16(%ecx), %xmm1
4090 ; FALLBACK20-NEXT: movzbl (%eax), %ecx
4091 ; FALLBACK20-NEXT: movl %ecx, %eax
4092 ; FALLBACK20-NEXT: shlb $3, %al
4093 ; FALLBACK20-NEXT: xorps %xmm2, %xmm2
4094 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4095 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4096 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
4097 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4098 ; FALLBACK20-NEXT: andb $28, %cl
4099 ; FALLBACK20-NEXT: movzbl %cl, %edi
4100 ; FALLBACK20-NEXT: movl 32(%esp,%edi), %esi
4101 ; FALLBACK20-NEXT: movl 36(%esp,%edi), %ebx
4102 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4103 ; FALLBACK20-NEXT: movl %eax, %ecx
4104 ; FALLBACK20-NEXT: shrl %cl, %esi
4105 ; FALLBACK20-NEXT: movl %eax, %edx
4106 ; FALLBACK20-NEXT: notb %dl
4107 ; FALLBACK20-NEXT: addl %ebx, %ebx
4108 ; FALLBACK20-NEXT: movl %edx, %ecx
4109 ; FALLBACK20-NEXT: shll %cl, %ebx
4110 ; FALLBACK20-NEXT: orl %esi, %ebx
4111 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4112 ; FALLBACK20-NEXT: movl 44(%esp,%edi), %ebp
4113 ; FALLBACK20-NEXT: movl %ebp, %esi
4114 ; FALLBACK20-NEXT: movl %eax, %ecx
4115 ; FALLBACK20-NEXT: shrl %cl, %esi
4116 ; FALLBACK20-NEXT: movl 48(%esp,%edi), %ecx
4117 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4118 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4119 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
4120 ; FALLBACK20-NEXT: movl %edx, %ecx
4121 ; FALLBACK20-NEXT: shll %cl, %ebx
4122 ; FALLBACK20-NEXT: orl %esi, %ebx
4123 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4124 ; FALLBACK20-NEXT: movl 40(%esp,%edi), %esi
4125 ; FALLBACK20-NEXT: movl %esi, %ebx
4126 ; FALLBACK20-NEXT: movl %eax, %ecx
4127 ; FALLBACK20-NEXT: shrl %cl, %ebx
4128 ; FALLBACK20-NEXT: addl %ebp, %ebp
4129 ; FALLBACK20-NEXT: movl %edx, %ecx
4130 ; FALLBACK20-NEXT: shll %cl, %ebp
4131 ; FALLBACK20-NEXT: orl %ebx, %ebp
4132 ; FALLBACK20-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4133 ; FALLBACK20-NEXT: movl 52(%esp,%edi), %ebp
4134 ; FALLBACK20-NEXT: movl %ebp, %ebx
4135 ; FALLBACK20-NEXT: movl %eax, %ecx
4136 ; FALLBACK20-NEXT: shrl %cl, %ebx
4137 ; FALLBACK20-NEXT: movl 56(%esp,%edi), %ecx
4138 ; FALLBACK20-NEXT: movl %ecx, (%esp) # 4-byte Spill
4139 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %edi
4140 ; FALLBACK20-NEXT: movl %edx, %ecx
4141 ; FALLBACK20-NEXT: shll %cl, %edi
4142 ; FALLBACK20-NEXT: orl %ebx, %edi
4143 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4144 ; FALLBACK20-NEXT: movl %eax, %ecx
4145 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
4146 ; FALLBACK20-NEXT: shrl %cl, %edi
4147 ; FALLBACK20-NEXT: addl %ebp, %ebp
4148 ; FALLBACK20-NEXT: movl %edx, %ecx
4149 ; FALLBACK20-NEXT: shll %cl, %ebp
4150 ; FALLBACK20-NEXT: orl %edi, %ebp
4151 ; FALLBACK20-NEXT: movl %eax, %ecx
4152 ; FALLBACK20-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill
4153 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4154 ; FALLBACK20-NEXT: movl 60(%esp,%ecx), %ebx
4155 ; FALLBACK20-NEXT: leal (%ebx,%ebx), %edi
4156 ; FALLBACK20-NEXT: movl %edx, %ecx
4157 ; FALLBACK20-NEXT: shll %cl, %edi
4158 ; FALLBACK20-NEXT: orl (%esp), %edi # 4-byte Folded Reload
4159 ; FALLBACK20-NEXT: movl %eax, %ecx
4160 ; FALLBACK20-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
4161 ; FALLBACK20-NEXT: addl %esi, %esi
4162 ; FALLBACK20-NEXT: movl %edx, %ecx
4163 ; FALLBACK20-NEXT: shll %cl, %esi
4164 ; FALLBACK20-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4165 ; FALLBACK20-NEXT: movl %eax, %ecx
4166 ; FALLBACK20-NEXT: shrl %cl, %ebx
4167 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
4168 ; FALLBACK20-NEXT: movl %ebx, 28(%eax)
4169 ; FALLBACK20-NEXT: movl %esi, 4(%eax)
4170 ; FALLBACK20-NEXT: movl %edi, 24(%eax)
4171 ; FALLBACK20-NEXT: movl %ebp, 16(%eax)
4172 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4173 ; FALLBACK20-NEXT: movl %ecx, 20(%eax)
4174 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4175 ; FALLBACK20-NEXT: movl %ecx, 8(%eax)
4176 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4177 ; FALLBACK20-NEXT: movl %ecx, 12(%eax)
4178 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4179 ; FALLBACK20-NEXT: movl %ecx, (%eax)
4180 ; FALLBACK20-NEXT: addl $108, %esp
4181 ; FALLBACK20-NEXT: popl %esi
4182 ; FALLBACK20-NEXT: popl %edi
4183 ; FALLBACK20-NEXT: popl %ebx
4184 ; FALLBACK20-NEXT: popl %ebp
4185 ; FALLBACK20-NEXT: retl
4187 ; FALLBACK21-LABEL: lshr_32bytes:
4188 ; FALLBACK21: # %bb.0:
4189 ; FALLBACK21-NEXT: pushl %ebp
4190 ; FALLBACK21-NEXT: pushl %ebx
4191 ; FALLBACK21-NEXT: pushl %edi
4192 ; FALLBACK21-NEXT: pushl %esi
4193 ; FALLBACK21-NEXT: subl $108, %esp
4194 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
4195 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
4196 ; FALLBACK21-NEXT: movups (%ecx), %xmm0
4197 ; FALLBACK21-NEXT: movups 16(%ecx), %xmm1
4198 ; FALLBACK21-NEXT: movzbl (%eax), %eax
4199 ; FALLBACK21-NEXT: movl %eax, %ecx
4200 ; FALLBACK21-NEXT: shlb $3, %cl
4201 ; FALLBACK21-NEXT: xorps %xmm2, %xmm2
4202 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4203 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4204 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
4205 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4206 ; FALLBACK21-NEXT: andb $28, %al
4207 ; FALLBACK21-NEXT: movzbl %al, %ebp
4208 ; FALLBACK21-NEXT: movl 48(%esp,%ebp), %esi
4209 ; FALLBACK21-NEXT: movl 44(%esp,%ebp), %eax
4210 ; FALLBACK21-NEXT: movl %eax, %edx
4211 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
4212 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4213 ; FALLBACK21-NEXT: movl 40(%esp,%ebp), %edx
4214 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4215 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
4216 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4217 ; FALLBACK21-NEXT: movl 56(%esp,%ebp), %ebx
4218 ; FALLBACK21-NEXT: movl 52(%esp,%ebp), %eax
4219 ; FALLBACK21-NEXT: movl %eax, %edx
4220 ; FALLBACK21-NEXT: shrdl %cl, %ebx, %edx
4221 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4222 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
4223 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4224 ; FALLBACK21-NEXT: movl 60(%esp,%ebp), %eax
4225 ; FALLBACK21-NEXT: shrdl %cl, %eax, %ebx
4226 ; FALLBACK21-NEXT: movl 32(%esp,%ebp), %edx
4227 ; FALLBACK21-NEXT: movl 36(%esp,%ebp), %edi
4228 ; FALLBACK21-NEXT: movl %edi, %esi
4229 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
4230 ; FALLBACK21-NEXT: shrdl %cl, %ebp, %esi
4231 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
4232 ; FALLBACK21-NEXT: movl %esi, 4(%ebp)
4233 ; FALLBACK21-NEXT: movl %ebx, 24(%ebp)
4234 ; FALLBACK21-NEXT: shrdl %cl, %edi, %edx
4235 ; FALLBACK21-NEXT: shrl %cl, %eax
4236 ; FALLBACK21-NEXT: movl %eax, 28(%ebp)
4237 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4238 ; FALLBACK21-NEXT: movl %eax, 16(%ebp)
4239 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4240 ; FALLBACK21-NEXT: movl %eax, 20(%ebp)
4241 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4242 ; FALLBACK21-NEXT: movl %eax, 8(%ebp)
4243 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4244 ; FALLBACK21-NEXT: movl %eax, 12(%ebp)
4245 ; FALLBACK21-NEXT: movl %edx, (%ebp)
4246 ; FALLBACK21-NEXT: addl $108, %esp
4247 ; FALLBACK21-NEXT: popl %esi
4248 ; FALLBACK21-NEXT: popl %edi
4249 ; FALLBACK21-NEXT: popl %ebx
4250 ; FALLBACK21-NEXT: popl %ebp
4251 ; FALLBACK21-NEXT: retl
4253 ; FALLBACK22-LABEL: lshr_32bytes:
4254 ; FALLBACK22: # %bb.0:
4255 ; FALLBACK22-NEXT: pushl %ebp
4256 ; FALLBACK22-NEXT: pushl %ebx
4257 ; FALLBACK22-NEXT: pushl %edi
4258 ; FALLBACK22-NEXT: pushl %esi
4259 ; FALLBACK22-NEXT: subl $108, %esp
4260 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
4261 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
4262 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
4263 ; FALLBACK22-NEXT: movups 16(%ecx), %xmm1
4264 ; FALLBACK22-NEXT: movzbl (%eax), %ecx
4265 ; FALLBACK22-NEXT: movl %ecx, %edx
4266 ; FALLBACK22-NEXT: shlb $3, %dl
4267 ; FALLBACK22-NEXT: xorps %xmm2, %xmm2
4268 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4269 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4270 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
4271 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4272 ; FALLBACK22-NEXT: andb $28, %cl
4273 ; FALLBACK22-NEXT: movzbl %cl, %edi
4274 ; FALLBACK22-NEXT: shrxl %edx, 32(%esp,%edi), %ecx
4275 ; FALLBACK22-NEXT: movl %edx, %eax
4276 ; FALLBACK22-NEXT: notb %al
4277 ; FALLBACK22-NEXT: movl 36(%esp,%edi), %esi
4278 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4279 ; FALLBACK22-NEXT: addl %esi, %esi
4280 ; FALLBACK22-NEXT: shlxl %eax, %esi, %esi
4281 ; FALLBACK22-NEXT: orl %ecx, %esi
4282 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4283 ; FALLBACK22-NEXT: movl 48(%esp,%edi), %ecx
4284 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4285 ; FALLBACK22-NEXT: addl %ecx, %ecx
4286 ; FALLBACK22-NEXT: shlxl %eax, %ecx, %esi
4287 ; FALLBACK22-NEXT: movl %eax, %ebp
4288 ; FALLBACK22-NEXT: movl 44(%esp,%edi), %ecx
4289 ; FALLBACK22-NEXT: shrxl %edx, %ecx, %ebx
4290 ; FALLBACK22-NEXT: orl %ebx, %esi
4291 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4292 ; FALLBACK22-NEXT: addl %ecx, %ecx
4293 ; FALLBACK22-NEXT: shlxl %eax, %ecx, %esi
4294 ; FALLBACK22-NEXT: movl 40(%esp,%edi), %eax
4295 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4296 ; FALLBACK22-NEXT: shrxl %edx, %eax, %ebx
4297 ; FALLBACK22-NEXT: orl %ebx, %esi
4298 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4299 ; FALLBACK22-NEXT: movl 56(%esp,%edi), %esi
4300 ; FALLBACK22-NEXT: leal (%esi,%esi), %ebx
4301 ; FALLBACK22-NEXT: shlxl %ebp, %ebx, %eax
4302 ; FALLBACK22-NEXT: movl %ebp, %ecx
4303 ; FALLBACK22-NEXT: movl 52(%esp,%edi), %ebx
4304 ; FALLBACK22-NEXT: shrxl %edx, %ebx, %ebp
4305 ; FALLBACK22-NEXT: orl %ebp, %eax
4306 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4307 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
4308 ; FALLBACK22-NEXT: addl %ebx, %ebx
4309 ; FALLBACK22-NEXT: shlxl %ecx, %ebx, %ebx
4310 ; FALLBACK22-NEXT: orl %ebp, %ebx
4311 ; FALLBACK22-NEXT: shrxl %edx, %esi, %ebp
4312 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4313 ; FALLBACK22-NEXT: movl 60(%esp,%edi), %edi
4314 ; FALLBACK22-NEXT: shrxl %edx, %edi, %eax
4315 ; FALLBACK22-NEXT: addl %edi, %edi
4316 ; FALLBACK22-NEXT: movl %ecx, %edx
4317 ; FALLBACK22-NEXT: shlxl %ecx, %edi, %edi
4318 ; FALLBACK22-NEXT: orl %ebp, %edi
4319 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4320 ; FALLBACK22-NEXT: addl %ecx, %ecx
4321 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %ecx
4322 ; FALLBACK22-NEXT: orl %esi, %ecx
4323 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %edx
4324 ; FALLBACK22-NEXT: movl %eax, 28(%edx)
4325 ; FALLBACK22-NEXT: movl %ecx, 4(%edx)
4326 ; FALLBACK22-NEXT: movl %edi, 24(%edx)
4327 ; FALLBACK22-NEXT: movl %ebx, 16(%edx)
4328 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4329 ; FALLBACK22-NEXT: movl %eax, 20(%edx)
4330 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4331 ; FALLBACK22-NEXT: movl %eax, 8(%edx)
4332 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4333 ; FALLBACK22-NEXT: movl %eax, 12(%edx)
4334 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4335 ; FALLBACK22-NEXT: movl %eax, (%edx)
4336 ; FALLBACK22-NEXT: addl $108, %esp
4337 ; FALLBACK22-NEXT: popl %esi
4338 ; FALLBACK22-NEXT: popl %edi
4339 ; FALLBACK22-NEXT: popl %ebx
4340 ; FALLBACK22-NEXT: popl %ebp
4341 ; FALLBACK22-NEXT: retl
4343 ; FALLBACK23-LABEL: lshr_32bytes:
4344 ; FALLBACK23: # %bb.0:
4345 ; FALLBACK23-NEXT: pushl %ebp
4346 ; FALLBACK23-NEXT: pushl %ebx
4347 ; FALLBACK23-NEXT: pushl %edi
4348 ; FALLBACK23-NEXT: pushl %esi
4349 ; FALLBACK23-NEXT: subl $108, %esp
4350 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
4351 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
4352 ; FALLBACK23-NEXT: movups (%ecx), %xmm0
4353 ; FALLBACK23-NEXT: movups 16(%ecx), %xmm1
4354 ; FALLBACK23-NEXT: movzbl (%eax), %eax
4355 ; FALLBACK23-NEXT: movl %eax, %ecx
4356 ; FALLBACK23-NEXT: shlb $3, %cl
4357 ; FALLBACK23-NEXT: xorps %xmm2, %xmm2
4358 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4359 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
4360 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
4361 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
4362 ; FALLBACK23-NEXT: andb $28, %al
4363 ; FALLBACK23-NEXT: movzbl %al, %ebx
4364 ; FALLBACK23-NEXT: movl 48(%esp,%ebx), %esi
4365 ; FALLBACK23-NEXT: movl 44(%esp,%ebx), %eax
4366 ; FALLBACK23-NEXT: movl %eax, %edx
4367 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
4368 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4369 ; FALLBACK23-NEXT: movl 40(%esp,%ebx), %edx
4370 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4371 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
4372 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4373 ; FALLBACK23-NEXT: movl 56(%esp,%ebx), %ebp
4374 ; FALLBACK23-NEXT: movl 52(%esp,%ebx), %eax
4375 ; FALLBACK23-NEXT: movl %eax, %edi
4376 ; FALLBACK23-NEXT: shrdl %cl, %ebp, %edi
4377 ; FALLBACK23-NEXT: shrdl %cl, %eax, %esi
4378 ; FALLBACK23-NEXT: movl 60(%esp,%ebx), %eax
4379 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4380 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebp
4381 ; FALLBACK23-NEXT: movl 32(%esp,%ebx), %edx
4382 ; FALLBACK23-NEXT: movl 36(%esp,%ebx), %ebx
4383 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4384 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4385 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebx
4386 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
4387 ; FALLBACK23-NEXT: movl %ebx, 4(%eax)
4388 ; FALLBACK23-NEXT: movl %ebp, 24(%eax)
4389 ; FALLBACK23-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
4390 ; FALLBACK23-NEXT: movl %ebx, 28(%eax)
4391 ; FALLBACK23-NEXT: movl %esi, 16(%eax)
4392 ; FALLBACK23-NEXT: movl %edi, 20(%eax)
4393 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4394 ; FALLBACK23-NEXT: movl %esi, 8(%eax)
4395 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4396 ; FALLBACK23-NEXT: movl %esi, 12(%eax)
4397 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
4398 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4399 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
4400 ; FALLBACK23-NEXT: movl %edx, (%eax)
4401 ; FALLBACK23-NEXT: addl $108, %esp
4402 ; FALLBACK23-NEXT: popl %esi
4403 ; FALLBACK23-NEXT: popl %edi
4404 ; FALLBACK23-NEXT: popl %ebx
4405 ; FALLBACK23-NEXT: popl %ebp
4406 ; FALLBACK23-NEXT: retl
4408 ; FALLBACK24-LABEL: lshr_32bytes:
4409 ; FALLBACK24: # %bb.0:
4410 ; FALLBACK24-NEXT: pushl %ebp
4411 ; FALLBACK24-NEXT: pushl %ebx
4412 ; FALLBACK24-NEXT: pushl %edi
4413 ; FALLBACK24-NEXT: pushl %esi
4414 ; FALLBACK24-NEXT: subl $108, %esp
4415 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
4416 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
4417 ; FALLBACK24-NEXT: vmovups (%ecx), %ymm0
4418 ; FALLBACK24-NEXT: movzbl (%eax), %ecx
4419 ; FALLBACK24-NEXT: movl %ecx, %eax
4420 ; FALLBACK24-NEXT: shlb $3, %al
4421 ; FALLBACK24-NEXT: vxorps %xmm1, %xmm1, %xmm1
4422 ; FALLBACK24-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4423 ; FALLBACK24-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4424 ; FALLBACK24-NEXT: andb $28, %cl
4425 ; FALLBACK24-NEXT: movzbl %cl, %edi
4426 ; FALLBACK24-NEXT: movl 32(%esp,%edi), %esi
4427 ; FALLBACK24-NEXT: movl 36(%esp,%edi), %ebx
4428 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4429 ; FALLBACK24-NEXT: movl %eax, %ecx
4430 ; FALLBACK24-NEXT: shrl %cl, %esi
4431 ; FALLBACK24-NEXT: movl %eax, %edx
4432 ; FALLBACK24-NEXT: notb %dl
4433 ; FALLBACK24-NEXT: addl %ebx, %ebx
4434 ; FALLBACK24-NEXT: movl %edx, %ecx
4435 ; FALLBACK24-NEXT: shll %cl, %ebx
4436 ; FALLBACK24-NEXT: orl %esi, %ebx
4437 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4438 ; FALLBACK24-NEXT: movl 44(%esp,%edi), %ebp
4439 ; FALLBACK24-NEXT: movl %ebp, %esi
4440 ; FALLBACK24-NEXT: movl %eax, %ecx
4441 ; FALLBACK24-NEXT: shrl %cl, %esi
4442 ; FALLBACK24-NEXT: movl 48(%esp,%edi), %ecx
4443 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4444 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4445 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %ebx
4446 ; FALLBACK24-NEXT: movl %edx, %ecx
4447 ; FALLBACK24-NEXT: shll %cl, %ebx
4448 ; FALLBACK24-NEXT: orl %esi, %ebx
4449 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4450 ; FALLBACK24-NEXT: movl 40(%esp,%edi), %esi
4451 ; FALLBACK24-NEXT: movl %esi, %ebx
4452 ; FALLBACK24-NEXT: movl %eax, %ecx
4453 ; FALLBACK24-NEXT: shrl %cl, %ebx
4454 ; FALLBACK24-NEXT: addl %ebp, %ebp
4455 ; FALLBACK24-NEXT: movl %edx, %ecx
4456 ; FALLBACK24-NEXT: shll %cl, %ebp
4457 ; FALLBACK24-NEXT: orl %ebx, %ebp
4458 ; FALLBACK24-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4459 ; FALLBACK24-NEXT: movl 52(%esp,%edi), %ebp
4460 ; FALLBACK24-NEXT: movl %ebp, %ebx
4461 ; FALLBACK24-NEXT: movl %eax, %ecx
4462 ; FALLBACK24-NEXT: shrl %cl, %ebx
4463 ; FALLBACK24-NEXT: movl 56(%esp,%edi), %ecx
4464 ; FALLBACK24-NEXT: movl %ecx, (%esp) # 4-byte Spill
4465 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %edi
4466 ; FALLBACK24-NEXT: movl %edx, %ecx
4467 ; FALLBACK24-NEXT: shll %cl, %edi
4468 ; FALLBACK24-NEXT: orl %ebx, %edi
4469 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4470 ; FALLBACK24-NEXT: movl %eax, %ecx
4471 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
4472 ; FALLBACK24-NEXT: shrl %cl, %edi
4473 ; FALLBACK24-NEXT: addl %ebp, %ebp
4474 ; FALLBACK24-NEXT: movl %edx, %ecx
4475 ; FALLBACK24-NEXT: shll %cl, %ebp
4476 ; FALLBACK24-NEXT: orl %edi, %ebp
4477 ; FALLBACK24-NEXT: movl %eax, %ecx
4478 ; FALLBACK24-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill
4479 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4480 ; FALLBACK24-NEXT: movl 60(%esp,%ecx), %ebx
4481 ; FALLBACK24-NEXT: leal (%ebx,%ebx), %edi
4482 ; FALLBACK24-NEXT: movl %edx, %ecx
4483 ; FALLBACK24-NEXT: shll %cl, %edi
4484 ; FALLBACK24-NEXT: orl (%esp), %edi # 4-byte Folded Reload
4485 ; FALLBACK24-NEXT: movl %eax, %ecx
4486 ; FALLBACK24-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
4487 ; FALLBACK24-NEXT: addl %esi, %esi
4488 ; FALLBACK24-NEXT: movl %edx, %ecx
4489 ; FALLBACK24-NEXT: shll %cl, %esi
4490 ; FALLBACK24-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4491 ; FALLBACK24-NEXT: movl %eax, %ecx
4492 ; FALLBACK24-NEXT: shrl %cl, %ebx
4493 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
4494 ; FALLBACK24-NEXT: movl %ebx, 28(%eax)
4495 ; FALLBACK24-NEXT: movl %esi, 4(%eax)
4496 ; FALLBACK24-NEXT: movl %edi, 24(%eax)
4497 ; FALLBACK24-NEXT: movl %ebp, 16(%eax)
4498 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4499 ; FALLBACK24-NEXT: movl %ecx, 20(%eax)
4500 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4501 ; FALLBACK24-NEXT: movl %ecx, 8(%eax)
4502 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4503 ; FALLBACK24-NEXT: movl %ecx, 12(%eax)
4504 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4505 ; FALLBACK24-NEXT: movl %ecx, (%eax)
4506 ; FALLBACK24-NEXT: addl $108, %esp
4507 ; FALLBACK24-NEXT: popl %esi
4508 ; FALLBACK24-NEXT: popl %edi
4509 ; FALLBACK24-NEXT: popl %ebx
4510 ; FALLBACK24-NEXT: popl %ebp
4511 ; FALLBACK24-NEXT: vzeroupper
4512 ; FALLBACK24-NEXT: retl
4514 ; FALLBACK25-LABEL: lshr_32bytes:
4515 ; FALLBACK25: # %bb.0:
4516 ; FALLBACK25-NEXT: pushl %ebp
4517 ; FALLBACK25-NEXT: pushl %ebx
4518 ; FALLBACK25-NEXT: pushl %edi
4519 ; FALLBACK25-NEXT: pushl %esi
4520 ; FALLBACK25-NEXT: subl $108, %esp
4521 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
4522 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
4523 ; FALLBACK25-NEXT: vmovups (%ecx), %ymm0
4524 ; FALLBACK25-NEXT: movzbl (%eax), %eax
4525 ; FALLBACK25-NEXT: movl %eax, %ecx
4526 ; FALLBACK25-NEXT: shlb $3, %cl
4527 ; FALLBACK25-NEXT: vxorps %xmm1, %xmm1, %xmm1
4528 ; FALLBACK25-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4529 ; FALLBACK25-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4530 ; FALLBACK25-NEXT: andb $28, %al
4531 ; FALLBACK25-NEXT: movzbl %al, %ebp
4532 ; FALLBACK25-NEXT: movl 48(%esp,%ebp), %esi
4533 ; FALLBACK25-NEXT: movl 44(%esp,%ebp), %eax
4534 ; FALLBACK25-NEXT: movl %eax, %edx
4535 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
4536 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4537 ; FALLBACK25-NEXT: movl 40(%esp,%ebp), %edx
4538 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4539 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
4540 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4541 ; FALLBACK25-NEXT: movl 56(%esp,%ebp), %ebx
4542 ; FALLBACK25-NEXT: movl 52(%esp,%ebp), %eax
4543 ; FALLBACK25-NEXT: movl %eax, %edx
4544 ; FALLBACK25-NEXT: shrdl %cl, %ebx, %edx
4545 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4546 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
4547 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4548 ; FALLBACK25-NEXT: movl 60(%esp,%ebp), %eax
4549 ; FALLBACK25-NEXT: shrdl %cl, %eax, %ebx
4550 ; FALLBACK25-NEXT: movl 32(%esp,%ebp), %edx
4551 ; FALLBACK25-NEXT: movl 36(%esp,%ebp), %edi
4552 ; FALLBACK25-NEXT: movl %edi, %esi
4553 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
4554 ; FALLBACK25-NEXT: shrdl %cl, %ebp, %esi
4555 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
4556 ; FALLBACK25-NEXT: movl %esi, 4(%ebp)
4557 ; FALLBACK25-NEXT: movl %ebx, 24(%ebp)
4558 ; FALLBACK25-NEXT: shrdl %cl, %edi, %edx
4559 ; FALLBACK25-NEXT: shrl %cl, %eax
4560 ; FALLBACK25-NEXT: movl %eax, 28(%ebp)
4561 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4562 ; FALLBACK25-NEXT: movl %eax, 16(%ebp)
4563 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4564 ; FALLBACK25-NEXT: movl %eax, 20(%ebp)
4565 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4566 ; FALLBACK25-NEXT: movl %eax, 8(%ebp)
4567 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4568 ; FALLBACK25-NEXT: movl %eax, 12(%ebp)
4569 ; FALLBACK25-NEXT: movl %edx, (%ebp)
4570 ; FALLBACK25-NEXT: addl $108, %esp
4571 ; FALLBACK25-NEXT: popl %esi
4572 ; FALLBACK25-NEXT: popl %edi
4573 ; FALLBACK25-NEXT: popl %ebx
4574 ; FALLBACK25-NEXT: popl %ebp
4575 ; FALLBACK25-NEXT: vzeroupper
4576 ; FALLBACK25-NEXT: retl
4578 ; FALLBACK26-LABEL: lshr_32bytes:
4579 ; FALLBACK26: # %bb.0:
4580 ; FALLBACK26-NEXT: pushl %ebp
4581 ; FALLBACK26-NEXT: pushl %ebx
4582 ; FALLBACK26-NEXT: pushl %edi
4583 ; FALLBACK26-NEXT: pushl %esi
4584 ; FALLBACK26-NEXT: subl $108, %esp
4585 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
4586 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
4587 ; FALLBACK26-NEXT: vmovups (%ecx), %ymm0
4588 ; FALLBACK26-NEXT: movzbl (%eax), %ecx
4589 ; FALLBACK26-NEXT: movl %ecx, %edx
4590 ; FALLBACK26-NEXT: shlb $3, %dl
4591 ; FALLBACK26-NEXT: vxorps %xmm1, %xmm1, %xmm1
4592 ; FALLBACK26-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4593 ; FALLBACK26-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4594 ; FALLBACK26-NEXT: andb $28, %cl
4595 ; FALLBACK26-NEXT: movzbl %cl, %edi
4596 ; FALLBACK26-NEXT: shrxl %edx, 32(%esp,%edi), %ecx
4597 ; FALLBACK26-NEXT: movl %edx, %eax
4598 ; FALLBACK26-NEXT: notb %al
4599 ; FALLBACK26-NEXT: movl 36(%esp,%edi), %esi
4600 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4601 ; FALLBACK26-NEXT: addl %esi, %esi
4602 ; FALLBACK26-NEXT: shlxl %eax, %esi, %esi
4603 ; FALLBACK26-NEXT: orl %ecx, %esi
4604 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4605 ; FALLBACK26-NEXT: movl 48(%esp,%edi), %ecx
4606 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4607 ; FALLBACK26-NEXT: addl %ecx, %ecx
4608 ; FALLBACK26-NEXT: shlxl %eax, %ecx, %esi
4609 ; FALLBACK26-NEXT: movl %eax, %ebp
4610 ; FALLBACK26-NEXT: movl 44(%esp,%edi), %ecx
4611 ; FALLBACK26-NEXT: shrxl %edx, %ecx, %ebx
4612 ; FALLBACK26-NEXT: orl %ebx, %esi
4613 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4614 ; FALLBACK26-NEXT: addl %ecx, %ecx
4615 ; FALLBACK26-NEXT: shlxl %eax, %ecx, %esi
4616 ; FALLBACK26-NEXT: movl 40(%esp,%edi), %eax
4617 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4618 ; FALLBACK26-NEXT: shrxl %edx, %eax, %ebx
4619 ; FALLBACK26-NEXT: orl %ebx, %esi
4620 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4621 ; FALLBACK26-NEXT: movl 56(%esp,%edi), %esi
4622 ; FALLBACK26-NEXT: leal (%esi,%esi), %ebx
4623 ; FALLBACK26-NEXT: shlxl %ebp, %ebx, %eax
4624 ; FALLBACK26-NEXT: movl %ebp, %ecx
4625 ; FALLBACK26-NEXT: movl 52(%esp,%edi), %ebx
4626 ; FALLBACK26-NEXT: shrxl %edx, %ebx, %ebp
4627 ; FALLBACK26-NEXT: orl %ebp, %eax
4628 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4629 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
4630 ; FALLBACK26-NEXT: addl %ebx, %ebx
4631 ; FALLBACK26-NEXT: shlxl %ecx, %ebx, %ebx
4632 ; FALLBACK26-NEXT: orl %ebp, %ebx
4633 ; FALLBACK26-NEXT: shrxl %edx, %esi, %ebp
4634 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4635 ; FALLBACK26-NEXT: movl 60(%esp,%edi), %edi
4636 ; FALLBACK26-NEXT: shrxl %edx, %edi, %eax
4637 ; FALLBACK26-NEXT: addl %edi, %edi
4638 ; FALLBACK26-NEXT: movl %ecx, %edx
4639 ; FALLBACK26-NEXT: shlxl %ecx, %edi, %edi
4640 ; FALLBACK26-NEXT: orl %ebp, %edi
4641 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4642 ; FALLBACK26-NEXT: addl %ecx, %ecx
4643 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %ecx
4644 ; FALLBACK26-NEXT: orl %esi, %ecx
4645 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %edx
4646 ; FALLBACK26-NEXT: movl %eax, 28(%edx)
4647 ; FALLBACK26-NEXT: movl %ecx, 4(%edx)
4648 ; FALLBACK26-NEXT: movl %edi, 24(%edx)
4649 ; FALLBACK26-NEXT: movl %ebx, 16(%edx)
4650 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4651 ; FALLBACK26-NEXT: movl %eax, 20(%edx)
4652 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4653 ; FALLBACK26-NEXT: movl %eax, 8(%edx)
4654 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4655 ; FALLBACK26-NEXT: movl %eax, 12(%edx)
4656 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4657 ; FALLBACK26-NEXT: movl %eax, (%edx)
4658 ; FALLBACK26-NEXT: addl $108, %esp
4659 ; FALLBACK26-NEXT: popl %esi
4660 ; FALLBACK26-NEXT: popl %edi
4661 ; FALLBACK26-NEXT: popl %ebx
4662 ; FALLBACK26-NEXT: popl %ebp
4663 ; FALLBACK26-NEXT: vzeroupper
4664 ; FALLBACK26-NEXT: retl
4666 ; FALLBACK27-LABEL: lshr_32bytes:
4667 ; FALLBACK27: # %bb.0:
4668 ; FALLBACK27-NEXT: pushl %ebp
4669 ; FALLBACK27-NEXT: pushl %ebx
4670 ; FALLBACK27-NEXT: pushl %edi
4671 ; FALLBACK27-NEXT: pushl %esi
4672 ; FALLBACK27-NEXT: subl $108, %esp
4673 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
4674 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
4675 ; FALLBACK27-NEXT: vmovups (%ecx), %ymm0
4676 ; FALLBACK27-NEXT: movzbl (%eax), %eax
4677 ; FALLBACK27-NEXT: movl %eax, %ecx
4678 ; FALLBACK27-NEXT: shlb $3, %cl
4679 ; FALLBACK27-NEXT: vxorps %xmm1, %xmm1, %xmm1
4680 ; FALLBACK27-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4681 ; FALLBACK27-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4682 ; FALLBACK27-NEXT: andb $28, %al
4683 ; FALLBACK27-NEXT: movzbl %al, %ebx
4684 ; FALLBACK27-NEXT: movl 48(%esp,%ebx), %esi
4685 ; FALLBACK27-NEXT: movl 44(%esp,%ebx), %eax
4686 ; FALLBACK27-NEXT: movl %eax, %edx
4687 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
4688 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4689 ; FALLBACK27-NEXT: movl 40(%esp,%ebx), %edx
4690 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4691 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
4692 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4693 ; FALLBACK27-NEXT: movl 56(%esp,%ebx), %ebp
4694 ; FALLBACK27-NEXT: movl 52(%esp,%ebx), %eax
4695 ; FALLBACK27-NEXT: movl %eax, %edi
4696 ; FALLBACK27-NEXT: shrdl %cl, %ebp, %edi
4697 ; FALLBACK27-NEXT: shrdl %cl, %eax, %esi
4698 ; FALLBACK27-NEXT: movl 60(%esp,%ebx), %eax
4699 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4700 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebp
4701 ; FALLBACK27-NEXT: movl 32(%esp,%ebx), %edx
4702 ; FALLBACK27-NEXT: movl 36(%esp,%ebx), %ebx
4703 ; FALLBACK27-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4704 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4705 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebx
4706 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
4707 ; FALLBACK27-NEXT: movl %ebx, 4(%eax)
4708 ; FALLBACK27-NEXT: movl %ebp, 24(%eax)
4709 ; FALLBACK27-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
4710 ; FALLBACK27-NEXT: movl %ebx, 28(%eax)
4711 ; FALLBACK27-NEXT: movl %esi, 16(%eax)
4712 ; FALLBACK27-NEXT: movl %edi, 20(%eax)
4713 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4714 ; FALLBACK27-NEXT: movl %esi, 8(%eax)
4715 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4716 ; FALLBACK27-NEXT: movl %esi, 12(%eax)
4717 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
4718 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
4719 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
4720 ; FALLBACK27-NEXT: movl %edx, (%eax)
4721 ; FALLBACK27-NEXT: addl $108, %esp
4722 ; FALLBACK27-NEXT: popl %esi
4723 ; FALLBACK27-NEXT: popl %edi
4724 ; FALLBACK27-NEXT: popl %ebx
4725 ; FALLBACK27-NEXT: popl %ebp
4726 ; FALLBACK27-NEXT: vzeroupper
4727 ; FALLBACK27-NEXT: retl
4729 ; FALLBACK28-LABEL: lshr_32bytes:
4730 ; FALLBACK28: # %bb.0:
4731 ; FALLBACK28-NEXT: pushl %ebp
4732 ; FALLBACK28-NEXT: pushl %ebx
4733 ; FALLBACK28-NEXT: pushl %edi
4734 ; FALLBACK28-NEXT: pushl %esi
4735 ; FALLBACK28-NEXT: subl $108, %esp
4736 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
4737 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
4738 ; FALLBACK28-NEXT: vmovups (%ecx), %ymm0
4739 ; FALLBACK28-NEXT: movzbl (%eax), %ecx
4740 ; FALLBACK28-NEXT: movl %ecx, %eax
4741 ; FALLBACK28-NEXT: shlb $3, %al
4742 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
4743 ; FALLBACK28-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4744 ; FALLBACK28-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4745 ; FALLBACK28-NEXT: andb $28, %cl
4746 ; FALLBACK28-NEXT: movzbl %cl, %edi
4747 ; FALLBACK28-NEXT: movl 32(%esp,%edi), %esi
4748 ; FALLBACK28-NEXT: movl 36(%esp,%edi), %ebx
4749 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4750 ; FALLBACK28-NEXT: movl %eax, %ecx
4751 ; FALLBACK28-NEXT: shrl %cl, %esi
4752 ; FALLBACK28-NEXT: movl %eax, %edx
4753 ; FALLBACK28-NEXT: notb %dl
4754 ; FALLBACK28-NEXT: addl %ebx, %ebx
4755 ; FALLBACK28-NEXT: movl %edx, %ecx
4756 ; FALLBACK28-NEXT: shll %cl, %ebx
4757 ; FALLBACK28-NEXT: orl %esi, %ebx
4758 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4759 ; FALLBACK28-NEXT: movl 44(%esp,%edi), %ebp
4760 ; FALLBACK28-NEXT: movl %ebp, %esi
4761 ; FALLBACK28-NEXT: movl %eax, %ecx
4762 ; FALLBACK28-NEXT: shrl %cl, %esi
4763 ; FALLBACK28-NEXT: movl 48(%esp,%edi), %ecx
4764 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4765 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4766 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %ebx
4767 ; FALLBACK28-NEXT: movl %edx, %ecx
4768 ; FALLBACK28-NEXT: shll %cl, %ebx
4769 ; FALLBACK28-NEXT: orl %esi, %ebx
4770 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4771 ; FALLBACK28-NEXT: movl 40(%esp,%edi), %esi
4772 ; FALLBACK28-NEXT: movl %esi, %ebx
4773 ; FALLBACK28-NEXT: movl %eax, %ecx
4774 ; FALLBACK28-NEXT: shrl %cl, %ebx
4775 ; FALLBACK28-NEXT: addl %ebp, %ebp
4776 ; FALLBACK28-NEXT: movl %edx, %ecx
4777 ; FALLBACK28-NEXT: shll %cl, %ebp
4778 ; FALLBACK28-NEXT: orl %ebx, %ebp
4779 ; FALLBACK28-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4780 ; FALLBACK28-NEXT: movl 52(%esp,%edi), %ebp
4781 ; FALLBACK28-NEXT: movl %ebp, %ebx
4782 ; FALLBACK28-NEXT: movl %eax, %ecx
4783 ; FALLBACK28-NEXT: shrl %cl, %ebx
4784 ; FALLBACK28-NEXT: movl 56(%esp,%edi), %ecx
4785 ; FALLBACK28-NEXT: movl %ecx, (%esp) # 4-byte Spill
4786 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %edi
4787 ; FALLBACK28-NEXT: movl %edx, %ecx
4788 ; FALLBACK28-NEXT: shll %cl, %edi
4789 ; FALLBACK28-NEXT: orl %ebx, %edi
4790 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4791 ; FALLBACK28-NEXT: movl %eax, %ecx
4792 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
4793 ; FALLBACK28-NEXT: shrl %cl, %edi
4794 ; FALLBACK28-NEXT: addl %ebp, %ebp
4795 ; FALLBACK28-NEXT: movl %edx, %ecx
4796 ; FALLBACK28-NEXT: shll %cl, %ebp
4797 ; FALLBACK28-NEXT: orl %edi, %ebp
4798 ; FALLBACK28-NEXT: movl %eax, %ecx
4799 ; FALLBACK28-NEXT: shrl %cl, (%esp) # 4-byte Folded Spill
4800 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4801 ; FALLBACK28-NEXT: movl 60(%esp,%ecx), %ebx
4802 ; FALLBACK28-NEXT: leal (%ebx,%ebx), %edi
4803 ; FALLBACK28-NEXT: movl %edx, %ecx
4804 ; FALLBACK28-NEXT: shll %cl, %edi
4805 ; FALLBACK28-NEXT: orl (%esp), %edi # 4-byte Folded Reload
4806 ; FALLBACK28-NEXT: movl %eax, %ecx
4807 ; FALLBACK28-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
4808 ; FALLBACK28-NEXT: addl %esi, %esi
4809 ; FALLBACK28-NEXT: movl %edx, %ecx
4810 ; FALLBACK28-NEXT: shll %cl, %esi
4811 ; FALLBACK28-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4812 ; FALLBACK28-NEXT: movl %eax, %ecx
4813 ; FALLBACK28-NEXT: shrl %cl, %ebx
4814 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
4815 ; FALLBACK28-NEXT: movl %ebx, 28(%eax)
4816 ; FALLBACK28-NEXT: movl %esi, 4(%eax)
4817 ; FALLBACK28-NEXT: movl %edi, 24(%eax)
4818 ; FALLBACK28-NEXT: movl %ebp, 16(%eax)
4819 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4820 ; FALLBACK28-NEXT: movl %ecx, 20(%eax)
4821 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4822 ; FALLBACK28-NEXT: movl %ecx, 8(%eax)
4823 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4824 ; FALLBACK28-NEXT: movl %ecx, 12(%eax)
4825 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4826 ; FALLBACK28-NEXT: movl %ecx, (%eax)
4827 ; FALLBACK28-NEXT: addl $108, %esp
4828 ; FALLBACK28-NEXT: popl %esi
4829 ; FALLBACK28-NEXT: popl %edi
4830 ; FALLBACK28-NEXT: popl %ebx
4831 ; FALLBACK28-NEXT: popl %ebp
4832 ; FALLBACK28-NEXT: vzeroupper
4833 ; FALLBACK28-NEXT: retl
4835 ; FALLBACK29-LABEL: lshr_32bytes:
4836 ; FALLBACK29: # %bb.0:
4837 ; FALLBACK29-NEXT: pushl %ebp
4838 ; FALLBACK29-NEXT: pushl %ebx
4839 ; FALLBACK29-NEXT: pushl %edi
4840 ; FALLBACK29-NEXT: pushl %esi
4841 ; FALLBACK29-NEXT: subl $108, %esp
4842 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
4843 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
4844 ; FALLBACK29-NEXT: vmovups (%ecx), %ymm0
4845 ; FALLBACK29-NEXT: movzbl (%eax), %eax
4846 ; FALLBACK29-NEXT: movl %eax, %ecx
4847 ; FALLBACK29-NEXT: shlb $3, %cl
4848 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
4849 ; FALLBACK29-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4850 ; FALLBACK29-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4851 ; FALLBACK29-NEXT: andb $28, %al
4852 ; FALLBACK29-NEXT: movzbl %al, %ebp
4853 ; FALLBACK29-NEXT: movl 48(%esp,%ebp), %esi
4854 ; FALLBACK29-NEXT: movl 44(%esp,%ebp), %eax
4855 ; FALLBACK29-NEXT: movl %eax, %edx
4856 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
4857 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4858 ; FALLBACK29-NEXT: movl 40(%esp,%ebp), %edx
4859 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4860 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
4861 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4862 ; FALLBACK29-NEXT: movl 56(%esp,%ebp), %ebx
4863 ; FALLBACK29-NEXT: movl 52(%esp,%ebp), %eax
4864 ; FALLBACK29-NEXT: movl %eax, %edx
4865 ; FALLBACK29-NEXT: shrdl %cl, %ebx, %edx
4866 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4867 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
4868 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4869 ; FALLBACK29-NEXT: movl 60(%esp,%ebp), %eax
4870 ; FALLBACK29-NEXT: shrdl %cl, %eax, %ebx
4871 ; FALLBACK29-NEXT: movl 32(%esp,%ebp), %edx
4872 ; FALLBACK29-NEXT: movl 36(%esp,%ebp), %edi
4873 ; FALLBACK29-NEXT: movl %edi, %esi
4874 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
4875 ; FALLBACK29-NEXT: shrdl %cl, %ebp, %esi
4876 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
4877 ; FALLBACK29-NEXT: movl %esi, 4(%ebp)
4878 ; FALLBACK29-NEXT: movl %ebx, 24(%ebp)
4879 ; FALLBACK29-NEXT: shrdl %cl, %edi, %edx
4880 ; FALLBACK29-NEXT: shrl %cl, %eax
4881 ; FALLBACK29-NEXT: movl %eax, 28(%ebp)
4882 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4883 ; FALLBACK29-NEXT: movl %eax, 16(%ebp)
4884 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4885 ; FALLBACK29-NEXT: movl %eax, 20(%ebp)
4886 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4887 ; FALLBACK29-NEXT: movl %eax, 8(%ebp)
4888 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4889 ; FALLBACK29-NEXT: movl %eax, 12(%ebp)
4890 ; FALLBACK29-NEXT: movl %edx, (%ebp)
4891 ; FALLBACK29-NEXT: addl $108, %esp
4892 ; FALLBACK29-NEXT: popl %esi
4893 ; FALLBACK29-NEXT: popl %edi
4894 ; FALLBACK29-NEXT: popl %ebx
4895 ; FALLBACK29-NEXT: popl %ebp
4896 ; FALLBACK29-NEXT: vzeroupper
4897 ; FALLBACK29-NEXT: retl
4899 ; FALLBACK30-LABEL: lshr_32bytes:
4900 ; FALLBACK30: # %bb.0:
4901 ; FALLBACK30-NEXT: pushl %ebp
4902 ; FALLBACK30-NEXT: pushl %ebx
4903 ; FALLBACK30-NEXT: pushl %edi
4904 ; FALLBACK30-NEXT: pushl %esi
4905 ; FALLBACK30-NEXT: subl $108, %esp
4906 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
4907 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
4908 ; FALLBACK30-NEXT: vmovups (%ecx), %ymm0
4909 ; FALLBACK30-NEXT: movzbl (%eax), %ecx
4910 ; FALLBACK30-NEXT: movl %ecx, %edx
4911 ; FALLBACK30-NEXT: shlb $3, %dl
4912 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
4913 ; FALLBACK30-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
4914 ; FALLBACK30-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
4915 ; FALLBACK30-NEXT: andb $28, %cl
4916 ; FALLBACK30-NEXT: movzbl %cl, %edi
4917 ; FALLBACK30-NEXT: shrxl %edx, 32(%esp,%edi), %ecx
4918 ; FALLBACK30-NEXT: movl %edx, %eax
4919 ; FALLBACK30-NEXT: notb %al
4920 ; FALLBACK30-NEXT: movl 36(%esp,%edi), %esi
4921 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4922 ; FALLBACK30-NEXT: addl %esi, %esi
4923 ; FALLBACK30-NEXT: shlxl %eax, %esi, %esi
4924 ; FALLBACK30-NEXT: orl %ecx, %esi
4925 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4926 ; FALLBACK30-NEXT: movl 48(%esp,%edi), %ecx
4927 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4928 ; FALLBACK30-NEXT: addl %ecx, %ecx
4929 ; FALLBACK30-NEXT: shlxl %eax, %ecx, %esi
4930 ; FALLBACK30-NEXT: movl %eax, %ebp
4931 ; FALLBACK30-NEXT: movl 44(%esp,%edi), %ecx
4932 ; FALLBACK30-NEXT: shrxl %edx, %ecx, %ebx
4933 ; FALLBACK30-NEXT: orl %ebx, %esi
4934 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4935 ; FALLBACK30-NEXT: addl %ecx, %ecx
4936 ; FALLBACK30-NEXT: shlxl %eax, %ecx, %esi
4937 ; FALLBACK30-NEXT: movl 40(%esp,%edi), %eax
4938 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4939 ; FALLBACK30-NEXT: shrxl %edx, %eax, %ebx
4940 ; FALLBACK30-NEXT: orl %ebx, %esi
4941 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4942 ; FALLBACK30-NEXT: movl 56(%esp,%edi), %esi
4943 ; FALLBACK30-NEXT: leal (%esi,%esi), %ebx
4944 ; FALLBACK30-NEXT: shlxl %ebp, %ebx, %eax
4945 ; FALLBACK30-NEXT: movl %ebp, %ecx
4946 ; FALLBACK30-NEXT: movl 52(%esp,%edi), %ebx
4947 ; FALLBACK30-NEXT: shrxl %edx, %ebx, %ebp
4948 ; FALLBACK30-NEXT: orl %ebp, %eax
4949 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
4950 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
4951 ; FALLBACK30-NEXT: addl %ebx, %ebx
4952 ; FALLBACK30-NEXT: shlxl %ecx, %ebx, %ebx
4953 ; FALLBACK30-NEXT: orl %ebp, %ebx
4954 ; FALLBACK30-NEXT: shrxl %edx, %esi, %ebp
4955 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
4956 ; FALLBACK30-NEXT: movl 60(%esp,%edi), %edi
4957 ; FALLBACK30-NEXT: shrxl %edx, %edi, %eax
4958 ; FALLBACK30-NEXT: addl %edi, %edi
4959 ; FALLBACK30-NEXT: movl %ecx, %edx
4960 ; FALLBACK30-NEXT: shlxl %ecx, %edi, %edi
4961 ; FALLBACK30-NEXT: orl %ebp, %edi
4962 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
4963 ; FALLBACK30-NEXT: addl %ecx, %ecx
4964 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %ecx
4965 ; FALLBACK30-NEXT: orl %esi, %ecx
4966 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %edx
4967 ; FALLBACK30-NEXT: movl %eax, 28(%edx)
4968 ; FALLBACK30-NEXT: movl %ecx, 4(%edx)
4969 ; FALLBACK30-NEXT: movl %edi, 24(%edx)
4970 ; FALLBACK30-NEXT: movl %ebx, 16(%edx)
4971 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4972 ; FALLBACK30-NEXT: movl %eax, 20(%edx)
4973 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4974 ; FALLBACK30-NEXT: movl %eax, 8(%edx)
4975 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4976 ; FALLBACK30-NEXT: movl %eax, 12(%edx)
4977 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
4978 ; FALLBACK30-NEXT: movl %eax, (%edx)
4979 ; FALLBACK30-NEXT: addl $108, %esp
4980 ; FALLBACK30-NEXT: popl %esi
4981 ; FALLBACK30-NEXT: popl %edi
4982 ; FALLBACK30-NEXT: popl %ebx
4983 ; FALLBACK30-NEXT: popl %ebp
4984 ; FALLBACK30-NEXT: vzeroupper
4985 ; FALLBACK30-NEXT: retl
4987 ; FALLBACK31-LABEL: lshr_32bytes:
4988 ; FALLBACK31: # %bb.0:
4989 ; FALLBACK31-NEXT: pushl %ebp
4990 ; FALLBACK31-NEXT: pushl %ebx
4991 ; FALLBACK31-NEXT: pushl %edi
4992 ; FALLBACK31-NEXT: pushl %esi
4993 ; FALLBACK31-NEXT: subl $108, %esp
4994 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
4995 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
4996 ; FALLBACK31-NEXT: vmovups (%ecx), %ymm0
4997 ; FALLBACK31-NEXT: movzbl (%eax), %eax
4998 ; FALLBACK31-NEXT: movl %eax, %ecx
4999 ; FALLBACK31-NEXT: shlb $3, %cl
5000 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
5001 ; FALLBACK31-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
5002 ; FALLBACK31-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
5003 ; FALLBACK31-NEXT: andb $28, %al
5004 ; FALLBACK31-NEXT: movzbl %al, %ebx
5005 ; FALLBACK31-NEXT: movl 48(%esp,%ebx), %esi
5006 ; FALLBACK31-NEXT: movl 44(%esp,%ebx), %eax
5007 ; FALLBACK31-NEXT: movl %eax, %edx
5008 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
5009 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5010 ; FALLBACK31-NEXT: movl 40(%esp,%ebx), %edx
5011 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5012 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
5013 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5014 ; FALLBACK31-NEXT: movl 56(%esp,%ebx), %ebp
5015 ; FALLBACK31-NEXT: movl 52(%esp,%ebx), %eax
5016 ; FALLBACK31-NEXT: movl %eax, %edi
5017 ; FALLBACK31-NEXT: shrdl %cl, %ebp, %edi
5018 ; FALLBACK31-NEXT: shrdl %cl, %eax, %esi
5019 ; FALLBACK31-NEXT: movl 60(%esp,%ebx), %eax
5020 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5021 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebp
5022 ; FALLBACK31-NEXT: movl 32(%esp,%ebx), %edx
5023 ; FALLBACK31-NEXT: movl 36(%esp,%ebx), %ebx
5024 ; FALLBACK31-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5025 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
5026 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebx
5027 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
5028 ; FALLBACK31-NEXT: movl %ebx, 4(%eax)
5029 ; FALLBACK31-NEXT: movl %ebp, 24(%eax)
5030 ; FALLBACK31-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
5031 ; FALLBACK31-NEXT: movl %ebx, 28(%eax)
5032 ; FALLBACK31-NEXT: movl %esi, 16(%eax)
5033 ; FALLBACK31-NEXT: movl %edi, 20(%eax)
5034 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
5035 ; FALLBACK31-NEXT: movl %esi, 8(%eax)
5036 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
5037 ; FALLBACK31-NEXT: movl %esi, 12(%eax)
5038 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
5039 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
5040 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
5041 ; FALLBACK31-NEXT: movl %edx, (%eax)
5042 ; FALLBACK31-NEXT: addl $108, %esp
5043 ; FALLBACK31-NEXT: popl %esi
5044 ; FALLBACK31-NEXT: popl %edi
5045 ; FALLBACK31-NEXT: popl %ebx
5046 ; FALLBACK31-NEXT: popl %ebp
5047 ; FALLBACK31-NEXT: vzeroupper
5048 ; FALLBACK31-NEXT: retl
5049 %src = load i256, ptr %src.ptr, align 1
5050 %byteOff = load i256, ptr %byteOff.ptr, align 1
5051 %bitOff = shl i256 %byteOff, 3
5052 %res = lshr i256 %src, %bitOff
5053 store i256 %res, ptr %dst, align 1
5057 define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
5058 ; FALLBACK0-LABEL: lshr_32bytes_dwordOff:
5059 ; FALLBACK0: # %bb.0:
5060 ; FALLBACK0-NEXT: pushq %rbx
5061 ; FALLBACK0-NEXT: movq (%rdi), %rcx
5062 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
5063 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
5064 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
5065 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
5066 ; FALLBACK0-NEXT: movl %esi, %eax
5067 ; FALLBACK0-NEXT: shlb $5, %al
5068 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
5069 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5070 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5071 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5072 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5073 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5074 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
5075 ; FALLBACK0-NEXT: andb $6, %sil
5076 ; FALLBACK0-NEXT: movzbl %sil, %r9d
5077 ; FALLBACK0-NEXT: movq -64(%rsp,%r9,4), %r10
5078 ; FALLBACK0-NEXT: movq -56(%rsp,%r9,4), %rdi
5079 ; FALLBACK0-NEXT: movq %rdi, %r11
5080 ; FALLBACK0-NEXT: movl %eax, %ecx
5081 ; FALLBACK0-NEXT: shrq %cl, %r11
5082 ; FALLBACK0-NEXT: movl %eax, %esi
5083 ; FALLBACK0-NEXT: notb %sil
5084 ; FALLBACK0-NEXT: movq -48(%rsp,%r9,4), %rbx
5085 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r8
5086 ; FALLBACK0-NEXT: movl %esi, %ecx
5087 ; FALLBACK0-NEXT: shlq %cl, %r8
5088 ; FALLBACK0-NEXT: orq %r11, %r8
5089 ; FALLBACK0-NEXT: movl %eax, %ecx
5090 ; FALLBACK0-NEXT: shrq %cl, %r10
5091 ; FALLBACK0-NEXT: addq %rdi, %rdi
5092 ; FALLBACK0-NEXT: movl %esi, %ecx
5093 ; FALLBACK0-NEXT: shlq %cl, %rdi
5094 ; FALLBACK0-NEXT: orq %r10, %rdi
5095 ; FALLBACK0-NEXT: movl %eax, %ecx
5096 ; FALLBACK0-NEXT: shrq %cl, %rbx
5097 ; FALLBACK0-NEXT: movq -40(%rsp,%r9,4), %r9
5098 ; FALLBACK0-NEXT: leaq (%r9,%r9), %r10
5099 ; FALLBACK0-NEXT: movl %esi, %ecx
5100 ; FALLBACK0-NEXT: shlq %cl, %r10
5101 ; FALLBACK0-NEXT: orq %rbx, %r10
5102 ; FALLBACK0-NEXT: movl %eax, %ecx
5103 ; FALLBACK0-NEXT: shrq %cl, %r9
5104 ; FALLBACK0-NEXT: movq %r9, 24(%rdx)
5105 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
5106 ; FALLBACK0-NEXT: movq %rdi, (%rdx)
5107 ; FALLBACK0-NEXT: movq %r8, 8(%rdx)
5108 ; FALLBACK0-NEXT: popq %rbx
5109 ; FALLBACK0-NEXT: retq
5111 ; FALLBACK1-LABEL: lshr_32bytes_dwordOff:
5112 ; FALLBACK1: # %bb.0:
5113 ; FALLBACK1-NEXT: movq (%rdi), %rax
5114 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
5115 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
5116 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
5117 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
5118 ; FALLBACK1-NEXT: movl %esi, %ecx
5119 ; FALLBACK1-NEXT: shlb $5, %cl
5120 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
5121 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5122 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5123 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5124 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5125 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5126 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
5127 ; FALLBACK1-NEXT: andb $6, %sil
5128 ; FALLBACK1-NEXT: movzbl %sil, %eax
5129 ; FALLBACK1-NEXT: movq -56(%rsp,%rax,4), %rsi
5130 ; FALLBACK1-NEXT: movq -72(%rsp,%rax,4), %rdi
5131 ; FALLBACK1-NEXT: movq -64(%rsp,%rax,4), %r8
5132 ; FALLBACK1-NEXT: movq %r8, %r9
5133 ; FALLBACK1-NEXT: shrdq %cl, %rsi, %r9
5134 ; FALLBACK1-NEXT: movq -48(%rsp,%rax,4), %rax
5135 ; FALLBACK1-NEXT: shrdq %cl, %rax, %rsi
5136 ; FALLBACK1-NEXT: shrdq %cl, %r8, %rdi
5137 ; FALLBACK1-NEXT: shrq %cl, %rax
5138 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
5139 ; FALLBACK1-NEXT: movq %rax, 24(%rdx)
5140 ; FALLBACK1-NEXT: movq %rdi, (%rdx)
5141 ; FALLBACK1-NEXT: movq %r9, 8(%rdx)
5142 ; FALLBACK1-NEXT: retq
5144 ; FALLBACK2-LABEL: lshr_32bytes_dwordOff:
5145 ; FALLBACK2: # %bb.0:
5146 ; FALLBACK2-NEXT: movq (%rdi), %rcx
5147 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
5148 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
5149 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
5150 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
5151 ; FALLBACK2-NEXT: movl %esi, %eax
5152 ; FALLBACK2-NEXT: shlb $5, %al
5153 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
5154 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5155 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5156 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5157 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5158 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5159 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
5160 ; FALLBACK2-NEXT: andb $6, %sil
5161 ; FALLBACK2-NEXT: movzbl %sil, %ecx
5162 ; FALLBACK2-NEXT: movq -64(%rsp,%rcx,4), %rsi
5163 ; FALLBACK2-NEXT: movq -56(%rsp,%rcx,4), %rdi
5164 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %r8
5165 ; FALLBACK2-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %r9
5166 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %r10
5167 ; FALLBACK2-NEXT: movq -48(%rsp,%rcx,4), %rcx
5168 ; FALLBACK2-NEXT: shrxq %rax, %rcx, %r11
5169 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
5170 ; FALLBACK2-NEXT: notb %al
5171 ; FALLBACK2-NEXT: addq %rdi, %rdi
5172 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %rdi
5173 ; FALLBACK2-NEXT: orq %r8, %rdi
5174 ; FALLBACK2-NEXT: addq %rsi, %rsi
5175 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %rsi
5176 ; FALLBACK2-NEXT: orq %r9, %rsi
5177 ; FALLBACK2-NEXT: addq %rcx, %rcx
5178 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %rax
5179 ; FALLBACK2-NEXT: orq %r10, %rax
5180 ; FALLBACK2-NEXT: movq %r11, 24(%rdx)
5181 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
5182 ; FALLBACK2-NEXT: movq %rsi, (%rdx)
5183 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
5184 ; FALLBACK2-NEXT: retq
5186 ; FALLBACK3-LABEL: lshr_32bytes_dwordOff:
5187 ; FALLBACK3: # %bb.0:
5188 ; FALLBACK3-NEXT: movq (%rdi), %rax
5189 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
5190 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
5191 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
5192 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
5193 ; FALLBACK3-NEXT: movl %esi, %ecx
5194 ; FALLBACK3-NEXT: shlb $5, %cl
5195 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
5196 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5197 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5198 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5199 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5200 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5201 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
5202 ; FALLBACK3-NEXT: andb $6, %sil
5203 ; FALLBACK3-NEXT: movzbl %sil, %eax
5204 ; FALLBACK3-NEXT: movq -56(%rsp,%rax,4), %rsi
5205 ; FALLBACK3-NEXT: movq -72(%rsp,%rax,4), %rdi
5206 ; FALLBACK3-NEXT: movq -64(%rsp,%rax,4), %r8
5207 ; FALLBACK3-NEXT: movq %r8, %r9
5208 ; FALLBACK3-NEXT: shrdq %cl, %rsi, %r9
5209 ; FALLBACK3-NEXT: movq -48(%rsp,%rax,4), %rax
5210 ; FALLBACK3-NEXT: shrdq %cl, %rax, %rsi
5211 ; FALLBACK3-NEXT: shrdq %cl, %r8, %rdi
5212 ; FALLBACK3-NEXT: shrxq %rcx, %rax, %rax
5213 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
5214 ; FALLBACK3-NEXT: movq %rax, 24(%rdx)
5215 ; FALLBACK3-NEXT: movq %rdi, (%rdx)
5216 ; FALLBACK3-NEXT: movq %r9, 8(%rdx)
5217 ; FALLBACK3-NEXT: retq
5219 ; FALLBACK4-LABEL: lshr_32bytes_dwordOff:
5220 ; FALLBACK4: # %bb.0:
5221 ; FALLBACK4-NEXT: pushq %rbx
5222 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
5223 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
5224 ; FALLBACK4-NEXT: movzbl (%rsi), %ecx
5225 ; FALLBACK4-NEXT: movl %ecx, %eax
5226 ; FALLBACK4-NEXT: shlb $5, %al
5227 ; FALLBACK4-NEXT: xorps %xmm2, %xmm2
5228 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5229 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5230 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
5231 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5232 ; FALLBACK4-NEXT: andb $6, %cl
5233 ; FALLBACK4-NEXT: movzbl %cl, %r9d
5234 ; FALLBACK4-NEXT: movq -64(%rsp,%r9,4), %r10
5235 ; FALLBACK4-NEXT: movq -56(%rsp,%r9,4), %r8
5236 ; FALLBACK4-NEXT: movl %eax, %ecx
5237 ; FALLBACK4-NEXT: shrq %cl, %r10
5238 ; FALLBACK4-NEXT: movl %eax, %esi
5239 ; FALLBACK4-NEXT: notb %sil
5240 ; FALLBACK4-NEXT: leaq (%r8,%r8), %rdi
5241 ; FALLBACK4-NEXT: movl %esi, %ecx
5242 ; FALLBACK4-NEXT: shlq %cl, %rdi
5243 ; FALLBACK4-NEXT: orq %r10, %rdi
5244 ; FALLBACK4-NEXT: movq -48(%rsp,%r9,4), %r10
5245 ; FALLBACK4-NEXT: movq %r10, %r11
5246 ; FALLBACK4-NEXT: movl %eax, %ecx
5247 ; FALLBACK4-NEXT: shrq %cl, %r11
5248 ; FALLBACK4-NEXT: movq -40(%rsp,%r9,4), %r9
5249 ; FALLBACK4-NEXT: leaq (%r9,%r9), %rbx
5250 ; FALLBACK4-NEXT: movl %esi, %ecx
5251 ; FALLBACK4-NEXT: shlq %cl, %rbx
5252 ; FALLBACK4-NEXT: orq %r11, %rbx
5253 ; FALLBACK4-NEXT: movl %eax, %ecx
5254 ; FALLBACK4-NEXT: shrq %cl, %r8
5255 ; FALLBACK4-NEXT: addq %r10, %r10
5256 ; FALLBACK4-NEXT: movl %esi, %ecx
5257 ; FALLBACK4-NEXT: shlq %cl, %r10
5258 ; FALLBACK4-NEXT: orq %r8, %r10
5259 ; FALLBACK4-NEXT: movl %eax, %ecx
5260 ; FALLBACK4-NEXT: shrq %cl, %r9
5261 ; FALLBACK4-NEXT: movq %r9, 24(%rdx)
5262 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
5263 ; FALLBACK4-NEXT: movq %rbx, 16(%rdx)
5264 ; FALLBACK4-NEXT: movq %rdi, (%rdx)
5265 ; FALLBACK4-NEXT: popq %rbx
5266 ; FALLBACK4-NEXT: retq
5268 ; FALLBACK5-LABEL: lshr_32bytes_dwordOff:
5269 ; FALLBACK5: # %bb.0:
5270 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
5271 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
5272 ; FALLBACK5-NEXT: movzbl (%rsi), %eax
5273 ; FALLBACK5-NEXT: movl %eax, %ecx
5274 ; FALLBACK5-NEXT: shlb $5, %cl
5275 ; FALLBACK5-NEXT: xorps %xmm2, %xmm2
5276 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5277 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5278 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
5279 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5280 ; FALLBACK5-NEXT: andb $6, %al
5281 ; FALLBACK5-NEXT: movzbl %al, %eax
5282 ; FALLBACK5-NEXT: movq -48(%rsp,%rax,4), %rsi
5283 ; FALLBACK5-NEXT: movq -56(%rsp,%rax,4), %rdi
5284 ; FALLBACK5-NEXT: movq %rdi, %r8
5285 ; FALLBACK5-NEXT: shrdq %cl, %rsi, %r8
5286 ; FALLBACK5-NEXT: movq -72(%rsp,%rax,4), %r9
5287 ; FALLBACK5-NEXT: movq -64(%rsp,%rax,4), %rax
5288 ; FALLBACK5-NEXT: movq %rax, %r10
5289 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %r10
5290 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r9
5291 ; FALLBACK5-NEXT: shrq %cl, %rsi
5292 ; FALLBACK5-NEXT: movq %r10, 8(%rdx)
5293 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
5294 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
5295 ; FALLBACK5-NEXT: movq %r9, (%rdx)
5296 ; FALLBACK5-NEXT: retq
5298 ; FALLBACK6-LABEL: lshr_32bytes_dwordOff:
5299 ; FALLBACK6: # %bb.0:
5300 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
5301 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
5302 ; FALLBACK6-NEXT: movzbl (%rsi), %ecx
5303 ; FALLBACK6-NEXT: movl %ecx, %eax
5304 ; FALLBACK6-NEXT: shlb $5, %al
5305 ; FALLBACK6-NEXT: xorps %xmm2, %xmm2
5306 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5307 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5308 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
5309 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5310 ; FALLBACK6-NEXT: andb $6, %cl
5311 ; FALLBACK6-NEXT: movzbl %cl, %ecx
5312 ; FALLBACK6-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
5313 ; FALLBACK6-NEXT: movq -64(%rsp,%rcx,4), %rdi
5314 ; FALLBACK6-NEXT: movq -56(%rsp,%rcx,4), %r8
5315 ; FALLBACK6-NEXT: shrxq %rax, %r8, %r9
5316 ; FALLBACK6-NEXT: movq -48(%rsp,%rcx,4), %rcx
5317 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %r10
5318 ; FALLBACK6-NEXT: shrxq %rax, %rcx, %r11
5319 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
5320 ; FALLBACK6-NEXT: notb %al
5321 ; FALLBACK6-NEXT: addq %rdi, %rdi
5322 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %rdi
5323 ; FALLBACK6-NEXT: orq %rsi, %rdi
5324 ; FALLBACK6-NEXT: addq %rcx, %rcx
5325 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %rcx
5326 ; FALLBACK6-NEXT: orq %r9, %rcx
5327 ; FALLBACK6-NEXT: addq %r8, %r8
5328 ; FALLBACK6-NEXT: shlxq %rax, %r8, %rax
5329 ; FALLBACK6-NEXT: orq %r10, %rax
5330 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
5331 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
5332 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
5333 ; FALLBACK6-NEXT: movq %rdi, (%rdx)
5334 ; FALLBACK6-NEXT: retq
5336 ; FALLBACK7-LABEL: lshr_32bytes_dwordOff:
5337 ; FALLBACK7: # %bb.0:
5338 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
5339 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
5340 ; FALLBACK7-NEXT: movzbl (%rsi), %eax
5341 ; FALLBACK7-NEXT: movl %eax, %ecx
5342 ; FALLBACK7-NEXT: shlb $5, %cl
5343 ; FALLBACK7-NEXT: xorps %xmm2, %xmm2
5344 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5345 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5346 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
5347 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5348 ; FALLBACK7-NEXT: andb $6, %al
5349 ; FALLBACK7-NEXT: movzbl %al, %eax
5350 ; FALLBACK7-NEXT: movq -48(%rsp,%rax,4), %rsi
5351 ; FALLBACK7-NEXT: movq -56(%rsp,%rax,4), %rdi
5352 ; FALLBACK7-NEXT: movq %rdi, %r8
5353 ; FALLBACK7-NEXT: shrdq %cl, %rsi, %r8
5354 ; FALLBACK7-NEXT: movq -72(%rsp,%rax,4), %r9
5355 ; FALLBACK7-NEXT: movq -64(%rsp,%rax,4), %rax
5356 ; FALLBACK7-NEXT: movq %rax, %r10
5357 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %r10
5358 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r9
5359 ; FALLBACK7-NEXT: shrxq %rcx, %rsi, %rax
5360 ; FALLBACK7-NEXT: movq %r10, 8(%rdx)
5361 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
5362 ; FALLBACK7-NEXT: movq %rax, 24(%rdx)
5363 ; FALLBACK7-NEXT: movq %r9, (%rdx)
5364 ; FALLBACK7-NEXT: retq
5366 ; FALLBACK8-LABEL: lshr_32bytes_dwordOff:
5367 ; FALLBACK8: # %bb.0:
5368 ; FALLBACK8-NEXT: pushq %rbx
5369 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
5370 ; FALLBACK8-NEXT: movzbl (%rsi), %ecx
5371 ; FALLBACK8-NEXT: movl %ecx, %eax
5372 ; FALLBACK8-NEXT: shlb $5, %al
5373 ; FALLBACK8-NEXT: vxorps %xmm1, %xmm1, %xmm1
5374 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5375 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5376 ; FALLBACK8-NEXT: andb $6, %cl
5377 ; FALLBACK8-NEXT: movzbl %cl, %r9d
5378 ; FALLBACK8-NEXT: movq -64(%rsp,%r9,4), %r10
5379 ; FALLBACK8-NEXT: movq -56(%rsp,%r9,4), %r8
5380 ; FALLBACK8-NEXT: movl %eax, %ecx
5381 ; FALLBACK8-NEXT: shrq %cl, %r10
5382 ; FALLBACK8-NEXT: movl %eax, %esi
5383 ; FALLBACK8-NEXT: notb %sil
5384 ; FALLBACK8-NEXT: leaq (%r8,%r8), %rdi
5385 ; FALLBACK8-NEXT: movl %esi, %ecx
5386 ; FALLBACK8-NEXT: shlq %cl, %rdi
5387 ; FALLBACK8-NEXT: orq %r10, %rdi
5388 ; FALLBACK8-NEXT: movq -48(%rsp,%r9,4), %r10
5389 ; FALLBACK8-NEXT: movq %r10, %r11
5390 ; FALLBACK8-NEXT: movl %eax, %ecx
5391 ; FALLBACK8-NEXT: shrq %cl, %r11
5392 ; FALLBACK8-NEXT: movq -40(%rsp,%r9,4), %r9
5393 ; FALLBACK8-NEXT: leaq (%r9,%r9), %rbx
5394 ; FALLBACK8-NEXT: movl %esi, %ecx
5395 ; FALLBACK8-NEXT: shlq %cl, %rbx
5396 ; FALLBACK8-NEXT: orq %r11, %rbx
5397 ; FALLBACK8-NEXT: movl %eax, %ecx
5398 ; FALLBACK8-NEXT: shrq %cl, %r8
5399 ; FALLBACK8-NEXT: addq %r10, %r10
5400 ; FALLBACK8-NEXT: movl %esi, %ecx
5401 ; FALLBACK8-NEXT: shlq %cl, %r10
5402 ; FALLBACK8-NEXT: orq %r8, %r10
5403 ; FALLBACK8-NEXT: movl %eax, %ecx
5404 ; FALLBACK8-NEXT: shrq %cl, %r9
5405 ; FALLBACK8-NEXT: movq %r9, 24(%rdx)
5406 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
5407 ; FALLBACK8-NEXT: movq %rbx, 16(%rdx)
5408 ; FALLBACK8-NEXT: movq %rdi, (%rdx)
5409 ; FALLBACK8-NEXT: popq %rbx
5410 ; FALLBACK8-NEXT: vzeroupper
5411 ; FALLBACK8-NEXT: retq
5413 ; FALLBACK9-LABEL: lshr_32bytes_dwordOff:
5414 ; FALLBACK9: # %bb.0:
5415 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
5416 ; FALLBACK9-NEXT: movzbl (%rsi), %eax
5417 ; FALLBACK9-NEXT: movl %eax, %ecx
5418 ; FALLBACK9-NEXT: shlb $5, %cl
5419 ; FALLBACK9-NEXT: vxorps %xmm1, %xmm1, %xmm1
5420 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5421 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5422 ; FALLBACK9-NEXT: andb $6, %al
5423 ; FALLBACK9-NEXT: movzbl %al, %eax
5424 ; FALLBACK9-NEXT: movq -48(%rsp,%rax,4), %rsi
5425 ; FALLBACK9-NEXT: movq -56(%rsp,%rax,4), %rdi
5426 ; FALLBACK9-NEXT: movq %rdi, %r8
5427 ; FALLBACK9-NEXT: shrdq %cl, %rsi, %r8
5428 ; FALLBACK9-NEXT: movq -72(%rsp,%rax,4), %r9
5429 ; FALLBACK9-NEXT: movq -64(%rsp,%rax,4), %rax
5430 ; FALLBACK9-NEXT: movq %rax, %r10
5431 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %r10
5432 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r9
5433 ; FALLBACK9-NEXT: shrq %cl, %rsi
5434 ; FALLBACK9-NEXT: movq %r10, 8(%rdx)
5435 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
5436 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
5437 ; FALLBACK9-NEXT: movq %r9, (%rdx)
5438 ; FALLBACK9-NEXT: vzeroupper
5439 ; FALLBACK9-NEXT: retq
5441 ; FALLBACK10-LABEL: lshr_32bytes_dwordOff:
5442 ; FALLBACK10: # %bb.0:
5443 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
5444 ; FALLBACK10-NEXT: movzbl (%rsi), %ecx
5445 ; FALLBACK10-NEXT: movl %ecx, %eax
5446 ; FALLBACK10-NEXT: shlb $5, %al
5447 ; FALLBACK10-NEXT: vxorps %xmm1, %xmm1, %xmm1
5448 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5449 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5450 ; FALLBACK10-NEXT: andb $6, %cl
5451 ; FALLBACK10-NEXT: movzbl %cl, %ecx
5452 ; FALLBACK10-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
5453 ; FALLBACK10-NEXT: movq -64(%rsp,%rcx,4), %rdi
5454 ; FALLBACK10-NEXT: movq -56(%rsp,%rcx,4), %r8
5455 ; FALLBACK10-NEXT: shrxq %rax, %r8, %r9
5456 ; FALLBACK10-NEXT: movq -48(%rsp,%rcx,4), %rcx
5457 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %r10
5458 ; FALLBACK10-NEXT: shrxq %rax, %rcx, %r11
5459 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
5460 ; FALLBACK10-NEXT: notb %al
5461 ; FALLBACK10-NEXT: addq %rdi, %rdi
5462 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %rdi
5463 ; FALLBACK10-NEXT: orq %rsi, %rdi
5464 ; FALLBACK10-NEXT: addq %rcx, %rcx
5465 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %rcx
5466 ; FALLBACK10-NEXT: orq %r9, %rcx
5467 ; FALLBACK10-NEXT: addq %r8, %r8
5468 ; FALLBACK10-NEXT: shlxq %rax, %r8, %rax
5469 ; FALLBACK10-NEXT: orq %r10, %rax
5470 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
5471 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
5472 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
5473 ; FALLBACK10-NEXT: movq %rdi, (%rdx)
5474 ; FALLBACK10-NEXT: vzeroupper
5475 ; FALLBACK10-NEXT: retq
5477 ; FALLBACK11-LABEL: lshr_32bytes_dwordOff:
5478 ; FALLBACK11: # %bb.0:
5479 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
5480 ; FALLBACK11-NEXT: movzbl (%rsi), %eax
5481 ; FALLBACK11-NEXT: movl %eax, %ecx
5482 ; FALLBACK11-NEXT: shlb $5, %cl
5483 ; FALLBACK11-NEXT: vxorps %xmm1, %xmm1, %xmm1
5484 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5485 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5486 ; FALLBACK11-NEXT: andb $6, %al
5487 ; FALLBACK11-NEXT: movzbl %al, %eax
5488 ; FALLBACK11-NEXT: movq -48(%rsp,%rax,4), %rsi
5489 ; FALLBACK11-NEXT: movq -56(%rsp,%rax,4), %rdi
5490 ; FALLBACK11-NEXT: movq %rdi, %r8
5491 ; FALLBACK11-NEXT: shrdq %cl, %rsi, %r8
5492 ; FALLBACK11-NEXT: movq -72(%rsp,%rax,4), %r9
5493 ; FALLBACK11-NEXT: movq -64(%rsp,%rax,4), %rax
5494 ; FALLBACK11-NEXT: movq %rax, %r10
5495 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %r10
5496 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r9
5497 ; FALLBACK11-NEXT: shrxq %rcx, %rsi, %rax
5498 ; FALLBACK11-NEXT: movq %r10, 8(%rdx)
5499 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
5500 ; FALLBACK11-NEXT: movq %rax, 24(%rdx)
5501 ; FALLBACK11-NEXT: movq %r9, (%rdx)
5502 ; FALLBACK11-NEXT: vzeroupper
5503 ; FALLBACK11-NEXT: retq
5505 ; FALLBACK12-LABEL: lshr_32bytes_dwordOff:
5506 ; FALLBACK12: # %bb.0:
5507 ; FALLBACK12-NEXT: pushq %rbx
5508 ; FALLBACK12-NEXT: vmovups (%rdi), %ymm0
5509 ; FALLBACK12-NEXT: movzbl (%rsi), %ecx
5510 ; FALLBACK12-NEXT: movl %ecx, %eax
5511 ; FALLBACK12-NEXT: shlb $5, %al
5512 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
5513 ; FALLBACK12-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5514 ; FALLBACK12-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5515 ; FALLBACK12-NEXT: andb $6, %cl
5516 ; FALLBACK12-NEXT: movzbl %cl, %r9d
5517 ; FALLBACK12-NEXT: movq -64(%rsp,%r9,4), %r10
5518 ; FALLBACK12-NEXT: movq -56(%rsp,%r9,4), %r8
5519 ; FALLBACK12-NEXT: movl %eax, %ecx
5520 ; FALLBACK12-NEXT: shrq %cl, %r10
5521 ; FALLBACK12-NEXT: movl %eax, %esi
5522 ; FALLBACK12-NEXT: notb %sil
5523 ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi
5524 ; FALLBACK12-NEXT: movl %esi, %ecx
5525 ; FALLBACK12-NEXT: shlq %cl, %rdi
5526 ; FALLBACK12-NEXT: orq %r10, %rdi
5527 ; FALLBACK12-NEXT: movq -48(%rsp,%r9,4), %r10
5528 ; FALLBACK12-NEXT: movq %r10, %r11
5529 ; FALLBACK12-NEXT: movl %eax, %ecx
5530 ; FALLBACK12-NEXT: shrq %cl, %r11
5531 ; FALLBACK12-NEXT: movq -40(%rsp,%r9,4), %r9
5532 ; FALLBACK12-NEXT: leaq (%r9,%r9), %rbx
5533 ; FALLBACK12-NEXT: movl %esi, %ecx
5534 ; FALLBACK12-NEXT: shlq %cl, %rbx
5535 ; FALLBACK12-NEXT: orq %r11, %rbx
5536 ; FALLBACK12-NEXT: movl %eax, %ecx
5537 ; FALLBACK12-NEXT: shrq %cl, %r8
5538 ; FALLBACK12-NEXT: addq %r10, %r10
5539 ; FALLBACK12-NEXT: movl %esi, %ecx
5540 ; FALLBACK12-NEXT: shlq %cl, %r10
5541 ; FALLBACK12-NEXT: orq %r8, %r10
5542 ; FALLBACK12-NEXT: movl %eax, %ecx
5543 ; FALLBACK12-NEXT: shrq %cl, %r9
5544 ; FALLBACK12-NEXT: movq %r9, 24(%rdx)
5545 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
5546 ; FALLBACK12-NEXT: movq %rbx, 16(%rdx)
5547 ; FALLBACK12-NEXT: movq %rdi, (%rdx)
5548 ; FALLBACK12-NEXT: popq %rbx
5549 ; FALLBACK12-NEXT: vzeroupper
5550 ; FALLBACK12-NEXT: retq
5552 ; FALLBACK13-LABEL: lshr_32bytes_dwordOff:
5553 ; FALLBACK13: # %bb.0:
5554 ; FALLBACK13-NEXT: vmovups (%rdi), %ymm0
5555 ; FALLBACK13-NEXT: movzbl (%rsi), %eax
5556 ; FALLBACK13-NEXT: movl %eax, %ecx
5557 ; FALLBACK13-NEXT: shlb $5, %cl
5558 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
5559 ; FALLBACK13-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5560 ; FALLBACK13-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5561 ; FALLBACK13-NEXT: andb $6, %al
5562 ; FALLBACK13-NEXT: movzbl %al, %eax
5563 ; FALLBACK13-NEXT: movq -48(%rsp,%rax,4), %rsi
5564 ; FALLBACK13-NEXT: movq -56(%rsp,%rax,4), %rdi
5565 ; FALLBACK13-NEXT: movq %rdi, %r8
5566 ; FALLBACK13-NEXT: shrdq %cl, %rsi, %r8
5567 ; FALLBACK13-NEXT: movq -72(%rsp,%rax,4), %r9
5568 ; FALLBACK13-NEXT: movq -64(%rsp,%rax,4), %rax
5569 ; FALLBACK13-NEXT: movq %rax, %r10
5570 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %r10
5571 ; FALLBACK13-NEXT: shrdq %cl, %rax, %r9
5572 ; FALLBACK13-NEXT: shrq %cl, %rsi
5573 ; FALLBACK13-NEXT: movq %r10, 8(%rdx)
5574 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
5575 ; FALLBACK13-NEXT: movq %rsi, 24(%rdx)
5576 ; FALLBACK13-NEXT: movq %r9, (%rdx)
5577 ; FALLBACK13-NEXT: vzeroupper
5578 ; FALLBACK13-NEXT: retq
5580 ; FALLBACK14-LABEL: lshr_32bytes_dwordOff:
5581 ; FALLBACK14: # %bb.0:
5582 ; FALLBACK14-NEXT: vmovups (%rdi), %ymm0
5583 ; FALLBACK14-NEXT: movzbl (%rsi), %ecx
5584 ; FALLBACK14-NEXT: movl %ecx, %eax
5585 ; FALLBACK14-NEXT: shlb $5, %al
5586 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
5587 ; FALLBACK14-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5588 ; FALLBACK14-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5589 ; FALLBACK14-NEXT: andb $6, %cl
5590 ; FALLBACK14-NEXT: movzbl %cl, %ecx
5591 ; FALLBACK14-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
5592 ; FALLBACK14-NEXT: movq -64(%rsp,%rcx,4), %rdi
5593 ; FALLBACK14-NEXT: movq -56(%rsp,%rcx,4), %r8
5594 ; FALLBACK14-NEXT: shrxq %rax, %r8, %r9
5595 ; FALLBACK14-NEXT: movq -48(%rsp,%rcx,4), %rcx
5596 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %r10
5597 ; FALLBACK14-NEXT: shrxq %rax, %rcx, %r11
5598 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
5599 ; FALLBACK14-NEXT: notb %al
5600 ; FALLBACK14-NEXT: addq %rdi, %rdi
5601 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %rdi
5602 ; FALLBACK14-NEXT: orq %rsi, %rdi
5603 ; FALLBACK14-NEXT: addq %rcx, %rcx
5604 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %rcx
5605 ; FALLBACK14-NEXT: orq %r9, %rcx
5606 ; FALLBACK14-NEXT: addq %r8, %r8
5607 ; FALLBACK14-NEXT: shlxq %rax, %r8, %rax
5608 ; FALLBACK14-NEXT: orq %r10, %rax
5609 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
5610 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
5611 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
5612 ; FALLBACK14-NEXT: movq %rdi, (%rdx)
5613 ; FALLBACK14-NEXT: vzeroupper
5614 ; FALLBACK14-NEXT: retq
5616 ; FALLBACK15-LABEL: lshr_32bytes_dwordOff:
5617 ; FALLBACK15: # %bb.0:
5618 ; FALLBACK15-NEXT: vmovups (%rdi), %ymm0
5619 ; FALLBACK15-NEXT: movzbl (%rsi), %eax
5620 ; FALLBACK15-NEXT: movl %eax, %ecx
5621 ; FALLBACK15-NEXT: shlb $5, %cl
5622 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
5623 ; FALLBACK15-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5624 ; FALLBACK15-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5625 ; FALLBACK15-NEXT: andb $6, %al
5626 ; FALLBACK15-NEXT: movzbl %al, %eax
5627 ; FALLBACK15-NEXT: movq -48(%rsp,%rax,4), %rsi
5628 ; FALLBACK15-NEXT: movq -56(%rsp,%rax,4), %rdi
5629 ; FALLBACK15-NEXT: movq %rdi, %r8
5630 ; FALLBACK15-NEXT: shrdq %cl, %rsi, %r8
5631 ; FALLBACK15-NEXT: movq -72(%rsp,%rax,4), %r9
5632 ; FALLBACK15-NEXT: movq -64(%rsp,%rax,4), %rax
5633 ; FALLBACK15-NEXT: movq %rax, %r10
5634 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %r10
5635 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r9
5636 ; FALLBACK15-NEXT: shrxq %rcx, %rsi, %rax
5637 ; FALLBACK15-NEXT: movq %r10, 8(%rdx)
5638 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
5639 ; FALLBACK15-NEXT: movq %rax, 24(%rdx)
5640 ; FALLBACK15-NEXT: movq %r9, (%rdx)
5641 ; FALLBACK15-NEXT: vzeroupper
5642 ; FALLBACK15-NEXT: retq
5644 ; X86-SSE2-LABEL: lshr_32bytes_dwordOff:
5645 ; X86-SSE2: # %bb.0:
5646 ; X86-SSE2-NEXT: pushl %ebp
5647 ; X86-SSE2-NEXT: pushl %ebx
5648 ; X86-SSE2-NEXT: pushl %edi
5649 ; X86-SSE2-NEXT: pushl %esi
5650 ; X86-SSE2-NEXT: subl $92, %esp
5651 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5652 ; X86-SSE2-NEXT: movl (%eax), %ecx
5653 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5654 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
5655 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5656 ; X86-SSE2-NEXT: movl 8(%eax), %esi
5657 ; X86-SSE2-NEXT: movl 12(%eax), %edi
5658 ; X86-SSE2-NEXT: movl 16(%eax), %ebx
5659 ; X86-SSE2-NEXT: movl 20(%eax), %ebp
5660 ; X86-SSE2-NEXT: movl 24(%eax), %edx
5661 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
5662 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5663 ; X86-SSE2-NEXT: movzbl (%eax), %eax
5664 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
5665 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
5666 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5667 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
5668 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
5669 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
5670 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
5671 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
5672 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
5673 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5674 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5675 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5676 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5677 ; X86-SSE2-NEXT: andl $7, %eax
5678 ; X86-SSE2-NEXT: movl 16(%esp,%eax,4), %ecx
5679 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5680 ; X86-SSE2-NEXT: movl 20(%esp,%eax,4), %ecx
5681 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5682 ; X86-SSE2-NEXT: movl 28(%esp,%eax,4), %esi
5683 ; X86-SSE2-NEXT: movl 24(%esp,%eax,4), %edi
5684 ; X86-SSE2-NEXT: movl 36(%esp,%eax,4), %ebx
5685 ; X86-SSE2-NEXT: movl 32(%esp,%eax,4), %ebp
5686 ; X86-SSE2-NEXT: movl 44(%esp,%eax,4), %edx
5687 ; X86-SSE2-NEXT: movl 40(%esp,%eax,4), %ecx
5688 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5689 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
5690 ; X86-SSE2-NEXT: movl %edx, 28(%eax)
5691 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
5692 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
5693 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
5694 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
5695 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5696 ; X86-SSE2-NEXT: movl %ecx, (%eax)
5697 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5698 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
5699 ; X86-SSE2-NEXT: addl $92, %esp
5700 ; X86-SSE2-NEXT: popl %esi
5701 ; X86-SSE2-NEXT: popl %edi
5702 ; X86-SSE2-NEXT: popl %ebx
5703 ; X86-SSE2-NEXT: popl %ebp
5704 ; X86-SSE2-NEXT: retl
5706 ; X86-SSE42-LABEL: lshr_32bytes_dwordOff:
5707 ; X86-SSE42: # %bb.0:
5708 ; X86-SSE42-NEXT: subl $76, %esp
5709 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
5710 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
5711 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
5712 ; X86-SSE42-NEXT: movups (%edx), %xmm0
5713 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
5714 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
5715 ; X86-SSE42-NEXT: xorps %xmm2, %xmm2
5716 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
5717 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
5718 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
5719 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
5720 ; X86-SSE42-NEXT: andl $7, %ecx
5721 ; X86-SSE42-NEXT: movups (%esp,%ecx,4), %xmm0
5722 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,4), %xmm1
5723 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
5724 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
5725 ; X86-SSE42-NEXT: addl $76, %esp
5726 ; X86-SSE42-NEXT: retl
5728 ; X86-AVX-LABEL: lshr_32bytes_dwordOff:
5730 ; X86-AVX-NEXT: subl $76, %esp
5731 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
5732 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
5733 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
5734 ; X86-AVX-NEXT: vmovups (%edx), %ymm0
5735 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
5736 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
5737 ; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
5738 ; X86-AVX-NEXT: vmovups %ymm0, (%esp)
5739 ; X86-AVX-NEXT: andl $7, %ecx
5740 ; X86-AVX-NEXT: vmovups (%esp,%ecx,4), %xmm0
5741 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx,4), %xmm1
5742 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
5743 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
5744 ; X86-AVX-NEXT: addl $76, %esp
5745 ; X86-AVX-NEXT: vzeroupper
5746 ; X86-AVX-NEXT: retl
5747 %src = load i256, ptr %src.ptr, align 1
5748 %dwordOff = load i256, ptr %dwordOff.ptr, align 1
5749 %bitOff = shl i256 %dwordOff, 5
5750 %res = lshr i256 %src, %bitOff
5751 store i256 %res, ptr %dst, align 1
5755 define void @lshr_32bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
5756 ; X64-SSE2-LABEL: lshr_32bytes_qwordOff:
5757 ; X64-SSE2: # %bb.0:
5758 ; X64-SSE2-NEXT: movq (%rdi), %rax
5759 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
5760 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
5761 ; X64-SSE2-NEXT: movq 24(%rdi), %rdi
5762 ; X64-SSE2-NEXT: movzbl (%rsi), %esi
5763 ; X64-SSE2-NEXT: xorps %xmm0, %xmm0
5764 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5765 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5766 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5767 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5768 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
5769 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
5770 ; X64-SSE2-NEXT: andl $3, %esi
5771 ; X64-SSE2-NEXT: movq -72(%rsp,%rsi,8), %rax
5772 ; X64-SSE2-NEXT: movq -64(%rsp,%rsi,8), %rcx
5773 ; X64-SSE2-NEXT: movq -48(%rsp,%rsi,8), %rdi
5774 ; X64-SSE2-NEXT: movq -56(%rsp,%rsi,8), %rsi
5775 ; X64-SSE2-NEXT: movq %rsi, 16(%rdx)
5776 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
5777 ; X64-SSE2-NEXT: movq %rax, (%rdx)
5778 ; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
5779 ; X64-SSE2-NEXT: retq
5781 ; X64-SSE42-LABEL: lshr_32bytes_qwordOff:
5782 ; X64-SSE42: # %bb.0:
5783 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
5784 ; X64-SSE42-NEXT: movups 16(%rdi), %xmm1
5785 ; X64-SSE42-NEXT: movzbl (%rsi), %eax
5786 ; X64-SSE42-NEXT: xorps %xmm2, %xmm2
5787 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5788 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
5789 ; X64-SSE42-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
5790 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5791 ; X64-SSE42-NEXT: andl $3, %eax
5792 ; X64-SSE42-NEXT: movups -72(%rsp,%rax,8), %xmm0
5793 ; X64-SSE42-NEXT: movups -56(%rsp,%rax,8), %xmm1
5794 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
5795 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
5796 ; X64-SSE42-NEXT: retq
5798 ; X64-AVX-LABEL: lshr_32bytes_qwordOff:
5800 ; X64-AVX-NEXT: vmovups (%rdi), %ymm0
5801 ; X64-AVX-NEXT: movzbl (%rsi), %eax
5802 ; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
5803 ; X64-AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
5804 ; X64-AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
5805 ; X64-AVX-NEXT: andl $3, %eax
5806 ; X64-AVX-NEXT: vmovups -72(%rsp,%rax,8), %xmm0
5807 ; X64-AVX-NEXT: vmovups -56(%rsp,%rax,8), %xmm1
5808 ; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
5809 ; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
5810 ; X64-AVX-NEXT: vzeroupper
5811 ; X64-AVX-NEXT: retq
5813 ; X86-SSE2-LABEL: lshr_32bytes_qwordOff:
5814 ; X86-SSE2: # %bb.0:
5815 ; X86-SSE2-NEXT: pushl %ebp
5816 ; X86-SSE2-NEXT: pushl %ebx
5817 ; X86-SSE2-NEXT: pushl %edi
5818 ; X86-SSE2-NEXT: pushl %esi
5819 ; X86-SSE2-NEXT: subl $92, %esp
5820 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5821 ; X86-SSE2-NEXT: movl (%eax), %ecx
5822 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5823 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
5824 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5825 ; X86-SSE2-NEXT: movl 8(%eax), %esi
5826 ; X86-SSE2-NEXT: movl 12(%eax), %edi
5827 ; X86-SSE2-NEXT: movl 16(%eax), %ebx
5828 ; X86-SSE2-NEXT: movl 20(%eax), %ebp
5829 ; X86-SSE2-NEXT: movl 24(%eax), %edx
5830 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
5831 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5832 ; X86-SSE2-NEXT: movzbl (%eax), %eax
5833 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
5834 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
5835 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5836 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
5837 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
5838 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
5839 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
5840 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
5841 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
5842 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5843 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5844 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5845 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
5846 ; X86-SSE2-NEXT: andl $3, %eax
5847 ; X86-SSE2-NEXT: movl 16(%esp,%eax,8), %ecx
5848 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5849 ; X86-SSE2-NEXT: movl 20(%esp,%eax,8), %ecx
5850 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
5851 ; X86-SSE2-NEXT: movl 28(%esp,%eax,8), %esi
5852 ; X86-SSE2-NEXT: movl 24(%esp,%eax,8), %edi
5853 ; X86-SSE2-NEXT: movl 36(%esp,%eax,8), %ebx
5854 ; X86-SSE2-NEXT: movl 32(%esp,%eax,8), %ebp
5855 ; X86-SSE2-NEXT: movl 44(%esp,%eax,8), %edx
5856 ; X86-SSE2-NEXT: movl 40(%esp,%eax,8), %ecx
5857 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
5858 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
5859 ; X86-SSE2-NEXT: movl %edx, 28(%eax)
5860 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
5861 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
5862 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
5863 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
5864 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5865 ; X86-SSE2-NEXT: movl %ecx, (%eax)
5866 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
5867 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
5868 ; X86-SSE2-NEXT: addl $92, %esp
5869 ; X86-SSE2-NEXT: popl %esi
5870 ; X86-SSE2-NEXT: popl %edi
5871 ; X86-SSE2-NEXT: popl %ebx
5872 ; X86-SSE2-NEXT: popl %ebp
5873 ; X86-SSE2-NEXT: retl
5875 ; X86-SSE42-LABEL: lshr_32bytes_qwordOff:
5876 ; X86-SSE42: # %bb.0:
5877 ; X86-SSE42-NEXT: subl $76, %esp
5878 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
5879 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
5880 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
5881 ; X86-SSE42-NEXT: movups (%edx), %xmm0
5882 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
5883 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
5884 ; X86-SSE42-NEXT: xorps %xmm2, %xmm2
5885 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
5886 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
5887 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
5888 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
5889 ; X86-SSE42-NEXT: andl $3, %ecx
5890 ; X86-SSE42-NEXT: movups (%esp,%ecx,8), %xmm0
5891 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,8), %xmm1
5892 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
5893 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
5894 ; X86-SSE42-NEXT: addl $76, %esp
5895 ; X86-SSE42-NEXT: retl
5897 ; X86-AVX-LABEL: lshr_32bytes_qwordOff:
5899 ; X86-AVX-NEXT: subl $76, %esp
5900 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
5901 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
5902 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
5903 ; X86-AVX-NEXT: vmovups (%edx), %ymm0
5904 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
5905 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
5906 ; X86-AVX-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
5907 ; X86-AVX-NEXT: vmovups %ymm0, (%esp)
5908 ; X86-AVX-NEXT: andl $3, %ecx
5909 ; X86-AVX-NEXT: vmovups (%esp,%ecx,8), %xmm0
5910 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx,8), %xmm1
5911 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
5912 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
5913 ; X86-AVX-NEXT: addl $76, %esp
5914 ; X86-AVX-NEXT: vzeroupper
5915 ; X86-AVX-NEXT: retl
5916 %src = load i256, ptr %src.ptr, align 1
5917 %qwordOff = load i256, ptr %qwordOff.ptr, align 1
5918 %bitOff = shl i256 %qwordOff, 6
5919 %res = lshr i256 %src, %bitOff
5920 store i256 %res, ptr %dst, align 1
5924 define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
5925 ; FALLBACK0-LABEL: shl_32bytes:
5926 ; FALLBACK0: # %bb.0:
5927 ; FALLBACK0-NEXT: pushq %rbx
5928 ; FALLBACK0-NEXT: movq (%rdi), %rcx
5929 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
5930 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
5931 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
5932 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
5933 ; FALLBACK0-NEXT: leal (,%rsi,8), %eax
5934 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
5935 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5936 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5937 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5938 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5939 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5940 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
5941 ; FALLBACK0-NEXT: andb $24, %sil
5942 ; FALLBACK0-NEXT: negb %sil
5943 ; FALLBACK0-NEXT: movsbq %sil, %r10
5944 ; FALLBACK0-NEXT: movq -32(%rsp,%r10), %r8
5945 ; FALLBACK0-NEXT: movq -24(%rsp,%r10), %rdi
5946 ; FALLBACK0-NEXT: movq %rdi, %r11
5947 ; FALLBACK0-NEXT: movl %eax, %ecx
5948 ; FALLBACK0-NEXT: shlq %cl, %r11
5949 ; FALLBACK0-NEXT: movl %eax, %esi
5950 ; FALLBACK0-NEXT: notb %sil
5951 ; FALLBACK0-NEXT: movq %r8, %r9
5952 ; FALLBACK0-NEXT: shrq %r9
5953 ; FALLBACK0-NEXT: movl %esi, %ecx
5954 ; FALLBACK0-NEXT: shrq %cl, %r9
5955 ; FALLBACK0-NEXT: orq %r11, %r9
5956 ; FALLBACK0-NEXT: movq -8(%rsp,%r10), %r11
5957 ; FALLBACK0-NEXT: movl %eax, %ecx
5958 ; FALLBACK0-NEXT: shlq %cl, %r11
5959 ; FALLBACK0-NEXT: movq -16(%rsp,%r10), %r10
5960 ; FALLBACK0-NEXT: movq %r10, %rbx
5961 ; FALLBACK0-NEXT: shrq %rbx
5962 ; FALLBACK0-NEXT: movl %esi, %ecx
5963 ; FALLBACK0-NEXT: shrq %cl, %rbx
5964 ; FALLBACK0-NEXT: orq %r11, %rbx
5965 ; FALLBACK0-NEXT: movl %eax, %ecx
5966 ; FALLBACK0-NEXT: shlq %cl, %r10
5967 ; FALLBACK0-NEXT: shrq %rdi
5968 ; FALLBACK0-NEXT: movl %esi, %ecx
5969 ; FALLBACK0-NEXT: shrq %cl, %rdi
5970 ; FALLBACK0-NEXT: orq %r10, %rdi
5971 ; FALLBACK0-NEXT: movl %eax, %ecx
5972 ; FALLBACK0-NEXT: shlq %cl, %r8
5973 ; FALLBACK0-NEXT: movq %r8, (%rdx)
5974 ; FALLBACK0-NEXT: movq %rdi, 16(%rdx)
5975 ; FALLBACK0-NEXT: movq %rbx, 24(%rdx)
5976 ; FALLBACK0-NEXT: movq %r9, 8(%rdx)
5977 ; FALLBACK0-NEXT: popq %rbx
5978 ; FALLBACK0-NEXT: retq
5980 ; FALLBACK1-LABEL: shl_32bytes:
5981 ; FALLBACK1: # %bb.0:
5982 ; FALLBACK1-NEXT: movq (%rdi), %rax
5983 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
5984 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
5985 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
5986 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
5987 ; FALLBACK1-NEXT: leal (,%rsi,8), %ecx
5988 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
5989 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5990 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
5991 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
5992 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
5993 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
5994 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
5995 ; FALLBACK1-NEXT: andb $24, %sil
5996 ; FALLBACK1-NEXT: negb %sil
5997 ; FALLBACK1-NEXT: movsbq %sil, %rax
5998 ; FALLBACK1-NEXT: movq -24(%rsp,%rax), %rsi
5999 ; FALLBACK1-NEXT: movq -16(%rsp,%rax), %rdi
6000 ; FALLBACK1-NEXT: shldq %cl, %rsi, %rdi
6001 ; FALLBACK1-NEXT: movq -40(%rsp,%rax), %r8
6002 ; FALLBACK1-NEXT: movq -32(%rsp,%rax), %rax
6003 ; FALLBACK1-NEXT: shldq %cl, %rax, %rsi
6004 ; FALLBACK1-NEXT: shldq %cl, %r8, %rax
6005 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
6006 ; FALLBACK1-NEXT: shlq %cl, %r8
6007 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
6008 ; FALLBACK1-NEXT: movq %rdi, 24(%rdx)
6009 ; FALLBACK1-NEXT: movq %r8, (%rdx)
6010 ; FALLBACK1-NEXT: movq %rax, 8(%rdx)
6011 ; FALLBACK1-NEXT: retq
6013 ; FALLBACK2-LABEL: shl_32bytes:
6014 ; FALLBACK2: # %bb.0:
6015 ; FALLBACK2-NEXT: movq (%rdi), %rcx
6016 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
6017 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
6018 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
6019 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
6020 ; FALLBACK2-NEXT: leal (,%rsi,8), %eax
6021 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
6022 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6023 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6024 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
6025 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
6026 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
6027 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
6028 ; FALLBACK2-NEXT: andb $24, %sil
6029 ; FALLBACK2-NEXT: negb %sil
6030 ; FALLBACK2-NEXT: movsbq %sil, %rsi
6031 ; FALLBACK2-NEXT: movq -40(%rsp,%rsi), %rdi
6032 ; FALLBACK2-NEXT: movq -32(%rsp,%rsi), %rcx
6033 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %r8
6034 ; FALLBACK2-NEXT: shlxq %rax, -16(%rsp,%rsi), %r9
6035 ; FALLBACK2-NEXT: movq -24(%rsp,%rsi), %rsi
6036 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %r10
6037 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %r11
6038 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
6039 ; FALLBACK2-NEXT: notb %al
6040 ; FALLBACK2-NEXT: shrq %rdi
6041 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %rdi
6042 ; FALLBACK2-NEXT: orq %r8, %rdi
6043 ; FALLBACK2-NEXT: shrq %rsi
6044 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %rsi
6045 ; FALLBACK2-NEXT: orq %r9, %rsi
6046 ; FALLBACK2-NEXT: shrq %rcx
6047 ; FALLBACK2-NEXT: shrxq %rax, %rcx, %rax
6048 ; FALLBACK2-NEXT: orq %r10, %rax
6049 ; FALLBACK2-NEXT: movq %r11, (%rdx)
6050 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
6051 ; FALLBACK2-NEXT: movq %rsi, 24(%rdx)
6052 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
6053 ; FALLBACK2-NEXT: retq
6055 ; FALLBACK3-LABEL: shl_32bytes:
6056 ; FALLBACK3: # %bb.0:
6057 ; FALLBACK3-NEXT: movq (%rdi), %rax
6058 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
6059 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
6060 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
6061 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
6062 ; FALLBACK3-NEXT: leal (,%rsi,8), %ecx
6063 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
6064 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6065 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6066 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
6067 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
6068 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
6069 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
6070 ; FALLBACK3-NEXT: andb $24, %sil
6071 ; FALLBACK3-NEXT: negb %sil
6072 ; FALLBACK3-NEXT: movsbq %sil, %rax
6073 ; FALLBACK3-NEXT: movq -24(%rsp,%rax), %rsi
6074 ; FALLBACK3-NEXT: movq -16(%rsp,%rax), %rdi
6075 ; FALLBACK3-NEXT: shldq %cl, %rsi, %rdi
6076 ; FALLBACK3-NEXT: movq -40(%rsp,%rax), %r8
6077 ; FALLBACK3-NEXT: movq -32(%rsp,%rax), %rax
6078 ; FALLBACK3-NEXT: shldq %cl, %rax, %rsi
6079 ; FALLBACK3-NEXT: shldq %cl, %r8, %rax
6080 ; FALLBACK3-NEXT: shlxq %rcx, %r8, %rcx
6081 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
6082 ; FALLBACK3-NEXT: movq %rdi, 24(%rdx)
6083 ; FALLBACK3-NEXT: movq %rcx, (%rdx)
6084 ; FALLBACK3-NEXT: movq %rax, 8(%rdx)
6085 ; FALLBACK3-NEXT: retq
6087 ; FALLBACK4-LABEL: shl_32bytes:
6088 ; FALLBACK4: # %bb.0:
6089 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
6090 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
6091 ; FALLBACK4-NEXT: movzbl (%rsi), %ecx
6092 ; FALLBACK4-NEXT: leal (,%rcx,8), %eax
6093 ; FALLBACK4-NEXT: xorps %xmm2, %xmm2
6094 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6095 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6096 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
6097 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6098 ; FALLBACK4-NEXT: andb $24, %cl
6099 ; FALLBACK4-NEXT: negb %cl
6100 ; FALLBACK4-NEXT: movsbq %cl, %r8
6101 ; FALLBACK4-NEXT: movq -16(%rsp,%r8), %r9
6102 ; FALLBACK4-NEXT: movl %eax, %ecx
6103 ; FALLBACK4-NEXT: shlq %cl, %r9
6104 ; FALLBACK4-NEXT: movl %eax, %esi
6105 ; FALLBACK4-NEXT: notb %sil
6106 ; FALLBACK4-NEXT: movq -24(%rsp,%r8), %r10
6107 ; FALLBACK4-NEXT: movq %r10, %rdi
6108 ; FALLBACK4-NEXT: shrq %rdi
6109 ; FALLBACK4-NEXT: movl %esi, %ecx
6110 ; FALLBACK4-NEXT: shrq %cl, %rdi
6111 ; FALLBACK4-NEXT: orq %r9, %rdi
6112 ; FALLBACK4-NEXT: movl %eax, %ecx
6113 ; FALLBACK4-NEXT: shlq %cl, %r10
6114 ; FALLBACK4-NEXT: movq -40(%rsp,%r8), %r9
6115 ; FALLBACK4-NEXT: movq -32(%rsp,%r8), %r8
6116 ; FALLBACK4-NEXT: movq %r8, %r11
6117 ; FALLBACK4-NEXT: shrq %r11
6118 ; FALLBACK4-NEXT: movl %esi, %ecx
6119 ; FALLBACK4-NEXT: shrq %cl, %r11
6120 ; FALLBACK4-NEXT: orq %r10, %r11
6121 ; FALLBACK4-NEXT: movl %eax, %ecx
6122 ; FALLBACK4-NEXT: shlq %cl, %r8
6123 ; FALLBACK4-NEXT: movq %r9, %r10
6124 ; FALLBACK4-NEXT: shrq %r10
6125 ; FALLBACK4-NEXT: movl %esi, %ecx
6126 ; FALLBACK4-NEXT: shrq %cl, %r10
6127 ; FALLBACK4-NEXT: orq %r8, %r10
6128 ; FALLBACK4-NEXT: movl %eax, %ecx
6129 ; FALLBACK4-NEXT: shlq %cl, %r9
6130 ; FALLBACK4-NEXT: movq %r9, (%rdx)
6131 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
6132 ; FALLBACK4-NEXT: movq %r11, 16(%rdx)
6133 ; FALLBACK4-NEXT: movq %rdi, 24(%rdx)
6134 ; FALLBACK4-NEXT: retq
6136 ; FALLBACK5-LABEL: shl_32bytes:
6137 ; FALLBACK5: # %bb.0:
6138 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
6139 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
6140 ; FALLBACK5-NEXT: movzbl (%rsi), %eax
6141 ; FALLBACK5-NEXT: leal (,%rax,8), %ecx
6142 ; FALLBACK5-NEXT: xorps %xmm2, %xmm2
6143 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6144 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6145 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
6146 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6147 ; FALLBACK5-NEXT: andb $24, %al
6148 ; FALLBACK5-NEXT: negb %al
6149 ; FALLBACK5-NEXT: movsbq %al, %rax
6150 ; FALLBACK5-NEXT: movq -24(%rsp,%rax), %rsi
6151 ; FALLBACK5-NEXT: movq -16(%rsp,%rax), %rdi
6152 ; FALLBACK5-NEXT: shldq %cl, %rsi, %rdi
6153 ; FALLBACK5-NEXT: movq -40(%rsp,%rax), %r8
6154 ; FALLBACK5-NEXT: movq -32(%rsp,%rax), %rax
6155 ; FALLBACK5-NEXT: shldq %cl, %rax, %rsi
6156 ; FALLBACK5-NEXT: movq %r8, %r9
6157 ; FALLBACK5-NEXT: shlq %cl, %r9
6158 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
6159 ; FALLBACK5-NEXT: shldq %cl, %r8, %rax
6160 ; FALLBACK5-NEXT: movq %rax, 8(%rdx)
6161 ; FALLBACK5-NEXT: movq %rsi, 16(%rdx)
6162 ; FALLBACK5-NEXT: movq %rdi, 24(%rdx)
6163 ; FALLBACK5-NEXT: movq %r9, (%rdx)
6164 ; FALLBACK5-NEXT: retq
6166 ; FALLBACK6-LABEL: shl_32bytes:
6167 ; FALLBACK6: # %bb.0:
6168 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
6169 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
6170 ; FALLBACK6-NEXT: movzbl (%rsi), %ecx
6171 ; FALLBACK6-NEXT: leal (,%rcx,8), %eax
6172 ; FALLBACK6-NEXT: xorps %xmm2, %xmm2
6173 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6174 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6175 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
6176 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6177 ; FALLBACK6-NEXT: andb $24, %cl
6178 ; FALLBACK6-NEXT: negb %cl
6179 ; FALLBACK6-NEXT: movsbq %cl, %rcx
6180 ; FALLBACK6-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
6181 ; FALLBACK6-NEXT: movq -24(%rsp,%rcx), %rdi
6182 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %r8
6183 ; FALLBACK6-NEXT: movq -40(%rsp,%rcx), %r9
6184 ; FALLBACK6-NEXT: movq -32(%rsp,%rcx), %rcx
6185 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %r10
6186 ; FALLBACK6-NEXT: shlxq %rax, %r9, %r11
6187 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
6188 ; FALLBACK6-NEXT: notb %al
6189 ; FALLBACK6-NEXT: shrq %rdi
6190 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %rdi
6191 ; FALLBACK6-NEXT: orq %rsi, %rdi
6192 ; FALLBACK6-NEXT: shrq %rcx
6193 ; FALLBACK6-NEXT: shrxq %rax, %rcx, %rcx
6194 ; FALLBACK6-NEXT: orq %r8, %rcx
6195 ; FALLBACK6-NEXT: shrq %r9
6196 ; FALLBACK6-NEXT: shrxq %rax, %r9, %rax
6197 ; FALLBACK6-NEXT: orq %r10, %rax
6198 ; FALLBACK6-NEXT: movq %r11, (%rdx)
6199 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
6200 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
6201 ; FALLBACK6-NEXT: movq %rdi, 24(%rdx)
6202 ; FALLBACK6-NEXT: retq
6204 ; FALLBACK7-LABEL: shl_32bytes:
6205 ; FALLBACK7: # %bb.0:
6206 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
6207 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
6208 ; FALLBACK7-NEXT: movzbl (%rsi), %eax
6209 ; FALLBACK7-NEXT: leal (,%rax,8), %ecx
6210 ; FALLBACK7-NEXT: xorps %xmm2, %xmm2
6211 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6212 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
6213 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
6214 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
6215 ; FALLBACK7-NEXT: andb $24, %al
6216 ; FALLBACK7-NEXT: negb %al
6217 ; FALLBACK7-NEXT: movsbq %al, %rax
6218 ; FALLBACK7-NEXT: movq -24(%rsp,%rax), %rsi
6219 ; FALLBACK7-NEXT: movq -16(%rsp,%rax), %rdi
6220 ; FALLBACK7-NEXT: shldq %cl, %rsi, %rdi
6221 ; FALLBACK7-NEXT: movq -40(%rsp,%rax), %r8
6222 ; FALLBACK7-NEXT: movq -32(%rsp,%rax), %rax
6223 ; FALLBACK7-NEXT: shldq %cl, %rax, %rsi
6224 ; FALLBACK7-NEXT: shlxq %rcx, %r8, %r9
6225 ; FALLBACK7-NEXT: # kill: def $cl killed $cl killed $rcx
6226 ; FALLBACK7-NEXT: shldq %cl, %r8, %rax
6227 ; FALLBACK7-NEXT: movq %rax, 8(%rdx)
6228 ; FALLBACK7-NEXT: movq %rsi, 16(%rdx)
6229 ; FALLBACK7-NEXT: movq %rdi, 24(%rdx)
6230 ; FALLBACK7-NEXT: movq %r9, (%rdx)
6231 ; FALLBACK7-NEXT: retq
6233 ; FALLBACK8-LABEL: shl_32bytes:
6234 ; FALLBACK8: # %bb.0:
6235 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
6236 ; FALLBACK8-NEXT: movzbl (%rsi), %ecx
6237 ; FALLBACK8-NEXT: leal (,%rcx,8), %eax
6238 ; FALLBACK8-NEXT: vxorps %xmm1, %xmm1, %xmm1
6239 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6240 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6241 ; FALLBACK8-NEXT: andb $24, %cl
6242 ; FALLBACK8-NEXT: negb %cl
6243 ; FALLBACK8-NEXT: movsbq %cl, %r8
6244 ; FALLBACK8-NEXT: movq -16(%rsp,%r8), %r9
6245 ; FALLBACK8-NEXT: movl %eax, %ecx
6246 ; FALLBACK8-NEXT: shlq %cl, %r9
6247 ; FALLBACK8-NEXT: movl %eax, %esi
6248 ; FALLBACK8-NEXT: notb %sil
6249 ; FALLBACK8-NEXT: movq -24(%rsp,%r8), %r10
6250 ; FALLBACK8-NEXT: movq %r10, %rdi
6251 ; FALLBACK8-NEXT: shrq %rdi
6252 ; FALLBACK8-NEXT: movl %esi, %ecx
6253 ; FALLBACK8-NEXT: shrq %cl, %rdi
6254 ; FALLBACK8-NEXT: orq %r9, %rdi
6255 ; FALLBACK8-NEXT: movl %eax, %ecx
6256 ; FALLBACK8-NEXT: shlq %cl, %r10
6257 ; FALLBACK8-NEXT: movq -40(%rsp,%r8), %r9
6258 ; FALLBACK8-NEXT: movq -32(%rsp,%r8), %r8
6259 ; FALLBACK8-NEXT: movq %r8, %r11
6260 ; FALLBACK8-NEXT: shrq %r11
6261 ; FALLBACK8-NEXT: movl %esi, %ecx
6262 ; FALLBACK8-NEXT: shrq %cl, %r11
6263 ; FALLBACK8-NEXT: orq %r10, %r11
6264 ; FALLBACK8-NEXT: movl %eax, %ecx
6265 ; FALLBACK8-NEXT: shlq %cl, %r8
6266 ; FALLBACK8-NEXT: movq %r9, %r10
6267 ; FALLBACK8-NEXT: shrq %r10
6268 ; FALLBACK8-NEXT: movl %esi, %ecx
6269 ; FALLBACK8-NEXT: shrq %cl, %r10
6270 ; FALLBACK8-NEXT: orq %r8, %r10
6271 ; FALLBACK8-NEXT: movl %eax, %ecx
6272 ; FALLBACK8-NEXT: shlq %cl, %r9
6273 ; FALLBACK8-NEXT: movq %r9, (%rdx)
6274 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
6275 ; FALLBACK8-NEXT: movq %r11, 16(%rdx)
6276 ; FALLBACK8-NEXT: movq %rdi, 24(%rdx)
6277 ; FALLBACK8-NEXT: vzeroupper
6278 ; FALLBACK8-NEXT: retq
6280 ; FALLBACK9-LABEL: shl_32bytes:
6281 ; FALLBACK9: # %bb.0:
6282 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
6283 ; FALLBACK9-NEXT: movzbl (%rsi), %eax
6284 ; FALLBACK9-NEXT: leal (,%rax,8), %ecx
6285 ; FALLBACK9-NEXT: vxorps %xmm1, %xmm1, %xmm1
6286 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6287 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6288 ; FALLBACK9-NEXT: andb $24, %al
6289 ; FALLBACK9-NEXT: negb %al
6290 ; FALLBACK9-NEXT: movsbq %al, %rax
6291 ; FALLBACK9-NEXT: movq -24(%rsp,%rax), %rsi
6292 ; FALLBACK9-NEXT: movq -16(%rsp,%rax), %rdi
6293 ; FALLBACK9-NEXT: shldq %cl, %rsi, %rdi
6294 ; FALLBACK9-NEXT: movq -40(%rsp,%rax), %r8
6295 ; FALLBACK9-NEXT: movq -32(%rsp,%rax), %rax
6296 ; FALLBACK9-NEXT: shldq %cl, %rax, %rsi
6297 ; FALLBACK9-NEXT: movq %r8, %r9
6298 ; FALLBACK9-NEXT: shlq %cl, %r9
6299 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
6300 ; FALLBACK9-NEXT: shldq %cl, %r8, %rax
6301 ; FALLBACK9-NEXT: movq %rax, 8(%rdx)
6302 ; FALLBACK9-NEXT: movq %rsi, 16(%rdx)
6303 ; FALLBACK9-NEXT: movq %rdi, 24(%rdx)
6304 ; FALLBACK9-NEXT: movq %r9, (%rdx)
6305 ; FALLBACK9-NEXT: vzeroupper
6306 ; FALLBACK9-NEXT: retq
6308 ; FALLBACK10-LABEL: shl_32bytes:
6309 ; FALLBACK10: # %bb.0:
6310 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
6311 ; FALLBACK10-NEXT: movzbl (%rsi), %ecx
6312 ; FALLBACK10-NEXT: leal (,%rcx,8), %eax
6313 ; FALLBACK10-NEXT: vxorps %xmm1, %xmm1, %xmm1
6314 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6315 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6316 ; FALLBACK10-NEXT: andb $24, %cl
6317 ; FALLBACK10-NEXT: negb %cl
6318 ; FALLBACK10-NEXT: movsbq %cl, %rcx
6319 ; FALLBACK10-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
6320 ; FALLBACK10-NEXT: movq -24(%rsp,%rcx), %rdi
6321 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %r8
6322 ; FALLBACK10-NEXT: movq -40(%rsp,%rcx), %r9
6323 ; FALLBACK10-NEXT: movq -32(%rsp,%rcx), %rcx
6324 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %r10
6325 ; FALLBACK10-NEXT: shlxq %rax, %r9, %r11
6326 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
6327 ; FALLBACK10-NEXT: notb %al
6328 ; FALLBACK10-NEXT: shrq %rdi
6329 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %rdi
6330 ; FALLBACK10-NEXT: orq %rsi, %rdi
6331 ; FALLBACK10-NEXT: shrq %rcx
6332 ; FALLBACK10-NEXT: shrxq %rax, %rcx, %rcx
6333 ; FALLBACK10-NEXT: orq %r8, %rcx
6334 ; FALLBACK10-NEXT: shrq %r9
6335 ; FALLBACK10-NEXT: shrxq %rax, %r9, %rax
6336 ; FALLBACK10-NEXT: orq %r10, %rax
6337 ; FALLBACK10-NEXT: movq %r11, (%rdx)
6338 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
6339 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
6340 ; FALLBACK10-NEXT: movq %rdi, 24(%rdx)
6341 ; FALLBACK10-NEXT: vzeroupper
6342 ; FALLBACK10-NEXT: retq
6344 ; FALLBACK11-LABEL: shl_32bytes:
6345 ; FALLBACK11: # %bb.0:
6346 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
6347 ; FALLBACK11-NEXT: movzbl (%rsi), %eax
6348 ; FALLBACK11-NEXT: leal (,%rax,8), %ecx
6349 ; FALLBACK11-NEXT: vxorps %xmm1, %xmm1, %xmm1
6350 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6351 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6352 ; FALLBACK11-NEXT: andb $24, %al
6353 ; FALLBACK11-NEXT: negb %al
6354 ; FALLBACK11-NEXT: movsbq %al, %rax
6355 ; FALLBACK11-NEXT: movq -24(%rsp,%rax), %rsi
6356 ; FALLBACK11-NEXT: movq -16(%rsp,%rax), %rdi
6357 ; FALLBACK11-NEXT: shldq %cl, %rsi, %rdi
6358 ; FALLBACK11-NEXT: movq -40(%rsp,%rax), %r8
6359 ; FALLBACK11-NEXT: movq -32(%rsp,%rax), %rax
6360 ; FALLBACK11-NEXT: shldq %cl, %rax, %rsi
6361 ; FALLBACK11-NEXT: shlxq %rcx, %r8, %r9
6362 ; FALLBACK11-NEXT: # kill: def $cl killed $cl killed $rcx
6363 ; FALLBACK11-NEXT: shldq %cl, %r8, %rax
6364 ; FALLBACK11-NEXT: movq %rax, 8(%rdx)
6365 ; FALLBACK11-NEXT: movq %rsi, 16(%rdx)
6366 ; FALLBACK11-NEXT: movq %rdi, 24(%rdx)
6367 ; FALLBACK11-NEXT: movq %r9, (%rdx)
6368 ; FALLBACK11-NEXT: vzeroupper
6369 ; FALLBACK11-NEXT: retq
6371 ; FALLBACK12-LABEL: shl_32bytes:
6372 ; FALLBACK12: # %bb.0:
6373 ; FALLBACK12-NEXT: vmovups (%rdi), %ymm0
6374 ; FALLBACK12-NEXT: movzbl (%rsi), %ecx
6375 ; FALLBACK12-NEXT: leal (,%rcx,8), %eax
6376 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
6377 ; FALLBACK12-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6378 ; FALLBACK12-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6379 ; FALLBACK12-NEXT: andb $24, %cl
6380 ; FALLBACK12-NEXT: negb %cl
6381 ; FALLBACK12-NEXT: movsbq %cl, %r8
6382 ; FALLBACK12-NEXT: movq -16(%rsp,%r8), %r9
6383 ; FALLBACK12-NEXT: movl %eax, %ecx
6384 ; FALLBACK12-NEXT: shlq %cl, %r9
6385 ; FALLBACK12-NEXT: movl %eax, %esi
6386 ; FALLBACK12-NEXT: notb %sil
6387 ; FALLBACK12-NEXT: movq -24(%rsp,%r8), %r10
6388 ; FALLBACK12-NEXT: movq %r10, %rdi
6389 ; FALLBACK12-NEXT: shrq %rdi
6390 ; FALLBACK12-NEXT: movl %esi, %ecx
6391 ; FALLBACK12-NEXT: shrq %cl, %rdi
6392 ; FALLBACK12-NEXT: orq %r9, %rdi
6393 ; FALLBACK12-NEXT: movl %eax, %ecx
6394 ; FALLBACK12-NEXT: shlq %cl, %r10
6395 ; FALLBACK12-NEXT: movq -40(%rsp,%r8), %r9
6396 ; FALLBACK12-NEXT: movq -32(%rsp,%r8), %r8
6397 ; FALLBACK12-NEXT: movq %r8, %r11
6398 ; FALLBACK12-NEXT: shrq %r11
6399 ; FALLBACK12-NEXT: movl %esi, %ecx
6400 ; FALLBACK12-NEXT: shrq %cl, %r11
6401 ; FALLBACK12-NEXT: orq %r10, %r11
6402 ; FALLBACK12-NEXT: movl %eax, %ecx
6403 ; FALLBACK12-NEXT: shlq %cl, %r8
6404 ; FALLBACK12-NEXT: movq %r9, %r10
6405 ; FALLBACK12-NEXT: shrq %r10
6406 ; FALLBACK12-NEXT: movl %esi, %ecx
6407 ; FALLBACK12-NEXT: shrq %cl, %r10
6408 ; FALLBACK12-NEXT: orq %r8, %r10
6409 ; FALLBACK12-NEXT: movl %eax, %ecx
6410 ; FALLBACK12-NEXT: shlq %cl, %r9
6411 ; FALLBACK12-NEXT: movq %r9, (%rdx)
6412 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
6413 ; FALLBACK12-NEXT: movq %r11, 16(%rdx)
6414 ; FALLBACK12-NEXT: movq %rdi, 24(%rdx)
6415 ; FALLBACK12-NEXT: vzeroupper
6416 ; FALLBACK12-NEXT: retq
6418 ; FALLBACK13-LABEL: shl_32bytes:
6419 ; FALLBACK13: # %bb.0:
6420 ; FALLBACK13-NEXT: vmovups (%rdi), %ymm0
6421 ; FALLBACK13-NEXT: movzbl (%rsi), %eax
6422 ; FALLBACK13-NEXT: leal (,%rax,8), %ecx
6423 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
6424 ; FALLBACK13-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6425 ; FALLBACK13-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6426 ; FALLBACK13-NEXT: andb $24, %al
6427 ; FALLBACK13-NEXT: negb %al
6428 ; FALLBACK13-NEXT: movsbq %al, %rax
6429 ; FALLBACK13-NEXT: movq -24(%rsp,%rax), %rsi
6430 ; FALLBACK13-NEXT: movq -16(%rsp,%rax), %rdi
6431 ; FALLBACK13-NEXT: shldq %cl, %rsi, %rdi
6432 ; FALLBACK13-NEXT: movq -40(%rsp,%rax), %r8
6433 ; FALLBACK13-NEXT: movq -32(%rsp,%rax), %rax
6434 ; FALLBACK13-NEXT: shldq %cl, %rax, %rsi
6435 ; FALLBACK13-NEXT: movq %r8, %r9
6436 ; FALLBACK13-NEXT: shlq %cl, %r9
6437 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
6438 ; FALLBACK13-NEXT: shldq %cl, %r8, %rax
6439 ; FALLBACK13-NEXT: movq %rax, 8(%rdx)
6440 ; FALLBACK13-NEXT: movq %rsi, 16(%rdx)
6441 ; FALLBACK13-NEXT: movq %rdi, 24(%rdx)
6442 ; FALLBACK13-NEXT: movq %r9, (%rdx)
6443 ; FALLBACK13-NEXT: vzeroupper
6444 ; FALLBACK13-NEXT: retq
6446 ; FALLBACK14-LABEL: shl_32bytes:
6447 ; FALLBACK14: # %bb.0:
6448 ; FALLBACK14-NEXT: vmovups (%rdi), %ymm0
6449 ; FALLBACK14-NEXT: movzbl (%rsi), %ecx
6450 ; FALLBACK14-NEXT: leal (,%rcx,8), %eax
6451 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
6452 ; FALLBACK14-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6453 ; FALLBACK14-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6454 ; FALLBACK14-NEXT: andb $24, %cl
6455 ; FALLBACK14-NEXT: negb %cl
6456 ; FALLBACK14-NEXT: movsbq %cl, %rcx
6457 ; FALLBACK14-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
6458 ; FALLBACK14-NEXT: movq -24(%rsp,%rcx), %rdi
6459 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %r8
6460 ; FALLBACK14-NEXT: movq -40(%rsp,%rcx), %r9
6461 ; FALLBACK14-NEXT: movq -32(%rsp,%rcx), %rcx
6462 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %r10
6463 ; FALLBACK14-NEXT: shlxq %rax, %r9, %r11
6464 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
6465 ; FALLBACK14-NEXT: notb %al
6466 ; FALLBACK14-NEXT: shrq %rdi
6467 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %rdi
6468 ; FALLBACK14-NEXT: orq %rsi, %rdi
6469 ; FALLBACK14-NEXT: shrq %rcx
6470 ; FALLBACK14-NEXT: shrxq %rax, %rcx, %rcx
6471 ; FALLBACK14-NEXT: orq %r8, %rcx
6472 ; FALLBACK14-NEXT: shrq %r9
6473 ; FALLBACK14-NEXT: shrxq %rax, %r9, %rax
6474 ; FALLBACK14-NEXT: orq %r10, %rax
6475 ; FALLBACK14-NEXT: movq %r11, (%rdx)
6476 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
6477 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
6478 ; FALLBACK14-NEXT: movq %rdi, 24(%rdx)
6479 ; FALLBACK14-NEXT: vzeroupper
6480 ; FALLBACK14-NEXT: retq
6482 ; FALLBACK15-LABEL: shl_32bytes:
6483 ; FALLBACK15: # %bb.0:
6484 ; FALLBACK15-NEXT: vmovups (%rdi), %ymm0
6485 ; FALLBACK15-NEXT: movzbl (%rsi), %eax
6486 ; FALLBACK15-NEXT: leal (,%rax,8), %ecx
6487 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
6488 ; FALLBACK15-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
6489 ; FALLBACK15-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
6490 ; FALLBACK15-NEXT: andb $24, %al
6491 ; FALLBACK15-NEXT: negb %al
6492 ; FALLBACK15-NEXT: movsbq %al, %rax
6493 ; FALLBACK15-NEXT: movq -24(%rsp,%rax), %rsi
6494 ; FALLBACK15-NEXT: movq -16(%rsp,%rax), %rdi
6495 ; FALLBACK15-NEXT: shldq %cl, %rsi, %rdi
6496 ; FALLBACK15-NEXT: movq -40(%rsp,%rax), %r8
6497 ; FALLBACK15-NEXT: movq -32(%rsp,%rax), %rax
6498 ; FALLBACK15-NEXT: shldq %cl, %rax, %rsi
6499 ; FALLBACK15-NEXT: shlxq %rcx, %r8, %r9
6500 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx
6501 ; FALLBACK15-NEXT: shldq %cl, %r8, %rax
6502 ; FALLBACK15-NEXT: movq %rax, 8(%rdx)
6503 ; FALLBACK15-NEXT: movq %rsi, 16(%rdx)
6504 ; FALLBACK15-NEXT: movq %rdi, 24(%rdx)
6505 ; FALLBACK15-NEXT: movq %r9, (%rdx)
6506 ; FALLBACK15-NEXT: vzeroupper
6507 ; FALLBACK15-NEXT: retq
6509 ; FALLBACK16-LABEL: shl_32bytes:
6510 ; FALLBACK16: # %bb.0:
6511 ; FALLBACK16-NEXT: pushl %ebp
6512 ; FALLBACK16-NEXT: pushl %ebx
6513 ; FALLBACK16-NEXT: pushl %edi
6514 ; FALLBACK16-NEXT: pushl %esi
6515 ; FALLBACK16-NEXT: subl $108, %esp
6516 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
6517 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ecx
6518 ; FALLBACK16-NEXT: movl (%ecx), %edx
6519 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6520 ; FALLBACK16-NEXT: movl 4(%ecx), %edx
6521 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6522 ; FALLBACK16-NEXT: movl 8(%ecx), %esi
6523 ; FALLBACK16-NEXT: movl 12(%ecx), %edi
6524 ; FALLBACK16-NEXT: movl 16(%ecx), %ebx
6525 ; FALLBACK16-NEXT: movb (%eax), %ah
6526 ; FALLBACK16-NEXT: movl 20(%ecx), %ebp
6527 ; FALLBACK16-NEXT: movl 24(%ecx), %edx
6528 ; FALLBACK16-NEXT: movl 28(%ecx), %ecx
6529 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
6530 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
6531 ; FALLBACK16-NEXT: movb %ah, %ch
6532 ; FALLBACK16-NEXT: shlb $3, %ch
6533 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
6534 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6535 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6536 ; FALLBACK16-NEXT: movl %ebp, {{[0-9]+}}(%esp)
6537 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
6538 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
6539 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
6540 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6541 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
6542 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6543 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
6544 ; FALLBACK16-NEXT: andb $28, %ah
6545 ; FALLBACK16-NEXT: negb %ah
6546 ; FALLBACK16-NEXT: movsbl %ah, %ebx
6547 ; FALLBACK16-NEXT: movl 64(%esp,%ebx), %edi
6548 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6549 ; FALLBACK16-NEXT: movl 68(%esp,%ebx), %eax
6550 ; FALLBACK16-NEXT: movl %eax, %esi
6551 ; FALLBACK16-NEXT: movb %ch, %cl
6552 ; FALLBACK16-NEXT: shll %cl, %esi
6553 ; FALLBACK16-NEXT: movb %ch, %dl
6554 ; FALLBACK16-NEXT: notb %dl
6555 ; FALLBACK16-NEXT: shrl %edi
6556 ; FALLBACK16-NEXT: movb %dl, %cl
6557 ; FALLBACK16-NEXT: shrl %cl, %edi
6558 ; FALLBACK16-NEXT: orl %esi, %edi
6559 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6560 ; FALLBACK16-NEXT: movl 76(%esp,%ebx), %edi
6561 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6562 ; FALLBACK16-NEXT: movb %ch, %cl
6563 ; FALLBACK16-NEXT: shll %cl, %edi
6564 ; FALLBACK16-NEXT: movl 72(%esp,%ebx), %esi
6565 ; FALLBACK16-NEXT: movl %esi, %ebp
6566 ; FALLBACK16-NEXT: shrl %ebp
6567 ; FALLBACK16-NEXT: movb %dl, %cl
6568 ; FALLBACK16-NEXT: shrl %cl, %ebp
6569 ; FALLBACK16-NEXT: orl %edi, %ebp
6570 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6571 ; FALLBACK16-NEXT: movb %ch, %cl
6572 ; FALLBACK16-NEXT: shll %cl, %esi
6573 ; FALLBACK16-NEXT: shrl %eax
6574 ; FALLBACK16-NEXT: movb %dl, %cl
6575 ; FALLBACK16-NEXT: shrl %cl, %eax
6576 ; FALLBACK16-NEXT: orl %esi, %eax
6577 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6578 ; FALLBACK16-NEXT: movl 84(%esp,%ebx), %esi
6579 ; FALLBACK16-NEXT: movl %esi, %eax
6580 ; FALLBACK16-NEXT: movb %ch, %cl
6581 ; FALLBACK16-NEXT: shll %cl, %eax
6582 ; FALLBACK16-NEXT: movl 80(%esp,%ebx), %edi
6583 ; FALLBACK16-NEXT: movl %edi, %ebp
6584 ; FALLBACK16-NEXT: shrl %ebp
6585 ; FALLBACK16-NEXT: movb %dl, %cl
6586 ; FALLBACK16-NEXT: shrl %cl, %ebp
6587 ; FALLBACK16-NEXT: orl %eax, %ebp
6588 ; FALLBACK16-NEXT: movb %ch, %cl
6589 ; FALLBACK16-NEXT: shll %cl, %edi
6590 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6591 ; FALLBACK16-NEXT: shrl %eax
6592 ; FALLBACK16-NEXT: movb %dl, %cl
6593 ; FALLBACK16-NEXT: shrl %cl, %eax
6594 ; FALLBACK16-NEXT: orl %edi, %eax
6595 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6596 ; FALLBACK16-NEXT: movl 92(%esp,%ebx), %eax
6597 ; FALLBACK16-NEXT: movb %ch, %cl
6598 ; FALLBACK16-NEXT: shll %cl, %eax
6599 ; FALLBACK16-NEXT: movl 88(%esp,%ebx), %edi
6600 ; FALLBACK16-NEXT: movl %edi, %ebx
6601 ; FALLBACK16-NEXT: shrl %ebx
6602 ; FALLBACK16-NEXT: movb %dl, %cl
6603 ; FALLBACK16-NEXT: shrl %cl, %ebx
6604 ; FALLBACK16-NEXT: orl %eax, %ebx
6605 ; FALLBACK16-NEXT: movb %ch, %cl
6606 ; FALLBACK16-NEXT: shll %cl, %edi
6607 ; FALLBACK16-NEXT: shrl %esi
6608 ; FALLBACK16-NEXT: movb %dl, %cl
6609 ; FALLBACK16-NEXT: shrl %cl, %esi
6610 ; FALLBACK16-NEXT: orl %edi, %esi
6611 ; FALLBACK16-NEXT: movb %ch, %cl
6612 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6613 ; FALLBACK16-NEXT: shll %cl, %edx
6614 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
6615 ; FALLBACK16-NEXT: movl %edx, (%eax)
6616 ; FALLBACK16-NEXT: movl %esi, 24(%eax)
6617 ; FALLBACK16-NEXT: movl %ebx, 28(%eax)
6618 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6619 ; FALLBACK16-NEXT: movl %ecx, 16(%eax)
6620 ; FALLBACK16-NEXT: movl %ebp, 20(%eax)
6621 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6622 ; FALLBACK16-NEXT: movl %ecx, 8(%eax)
6623 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6624 ; FALLBACK16-NEXT: movl %ecx, 12(%eax)
6625 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6626 ; FALLBACK16-NEXT: movl %ecx, 4(%eax)
6627 ; FALLBACK16-NEXT: addl $108, %esp
6628 ; FALLBACK16-NEXT: popl %esi
6629 ; FALLBACK16-NEXT: popl %edi
6630 ; FALLBACK16-NEXT: popl %ebx
6631 ; FALLBACK16-NEXT: popl %ebp
6632 ; FALLBACK16-NEXT: retl
6634 ; FALLBACK17-LABEL: shl_32bytes:
6635 ; FALLBACK17: # %bb.0:
6636 ; FALLBACK17-NEXT: pushl %ebp
6637 ; FALLBACK17-NEXT: pushl %ebx
6638 ; FALLBACK17-NEXT: pushl %edi
6639 ; FALLBACK17-NEXT: pushl %esi
6640 ; FALLBACK17-NEXT: subl $92, %esp
6641 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
6642 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
6643 ; FALLBACK17-NEXT: movl (%eax), %edx
6644 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6645 ; FALLBACK17-NEXT: movl 4(%eax), %edx
6646 ; FALLBACK17-NEXT: movl %edx, (%esp) # 4-byte Spill
6647 ; FALLBACK17-NEXT: movl 8(%eax), %esi
6648 ; FALLBACK17-NEXT: movl 12(%eax), %edi
6649 ; FALLBACK17-NEXT: movl 16(%eax), %ebx
6650 ; FALLBACK17-NEXT: movb (%ecx), %ch
6651 ; FALLBACK17-NEXT: movl 20(%eax), %ebp
6652 ; FALLBACK17-NEXT: movl 24(%eax), %edx
6653 ; FALLBACK17-NEXT: movl 28(%eax), %eax
6654 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
6655 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
6656 ; FALLBACK17-NEXT: movb %ch, %cl
6657 ; FALLBACK17-NEXT: shlb $3, %cl
6658 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
6659 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6660 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6661 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
6662 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
6663 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
6664 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
6665 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
6666 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
6667 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6668 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
6669 ; FALLBACK17-NEXT: andb $28, %ch
6670 ; FALLBACK17-NEXT: negb %ch
6671 ; FALLBACK17-NEXT: movsbl %ch, %eax
6672 ; FALLBACK17-NEXT: movl 56(%esp,%eax), %edx
6673 ; FALLBACK17-NEXT: movl 60(%esp,%eax), %ebx
6674 ; FALLBACK17-NEXT: movl %ebx, %esi
6675 ; FALLBACK17-NEXT: shldl %cl, %edx, %esi
6676 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6677 ; FALLBACK17-NEXT: movl 52(%esp,%eax), %esi
6678 ; FALLBACK17-NEXT: movl %esi, (%esp) # 4-byte Spill
6679 ; FALLBACK17-NEXT: shldl %cl, %esi, %edx
6680 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6681 ; FALLBACK17-NEXT: movl 64(%esp,%eax), %edi
6682 ; FALLBACK17-NEXT: movl 68(%esp,%eax), %ebp
6683 ; FALLBACK17-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6684 ; FALLBACK17-NEXT: shldl %cl, %edi, %ebp
6685 ; FALLBACK17-NEXT: shldl %cl, %ebx, %edi
6686 ; FALLBACK17-NEXT: movl 48(%esp,%eax), %ebx
6687 ; FALLBACK17-NEXT: movl 72(%esp,%eax), %edx
6688 ; FALLBACK17-NEXT: movl 76(%esp,%eax), %esi
6689 ; FALLBACK17-NEXT: shldl %cl, %edx, %esi
6690 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6691 ; FALLBACK17-NEXT: shldl %cl, %eax, %edx
6692 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
6693 ; FALLBACK17-NEXT: movl %edx, 24(%eax)
6694 ; FALLBACK17-NEXT: movl %esi, 28(%eax)
6695 ; FALLBACK17-NEXT: movl %edi, 16(%eax)
6696 ; FALLBACK17-NEXT: movl %ebp, 20(%eax)
6697 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6698 ; FALLBACK17-NEXT: movl %edx, 8(%eax)
6699 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6700 ; FALLBACK17-NEXT: movl %edx, 12(%eax)
6701 ; FALLBACK17-NEXT: movl (%esp), %edx # 4-byte Reload
6702 ; FALLBACK17-NEXT: shldl %cl, %ebx, %edx
6703 ; FALLBACK17-NEXT: shll %cl, %ebx
6704 ; FALLBACK17-NEXT: movl %ebx, (%eax)
6705 ; FALLBACK17-NEXT: movl %edx, 4(%eax)
6706 ; FALLBACK17-NEXT: addl $92, %esp
6707 ; FALLBACK17-NEXT: popl %esi
6708 ; FALLBACK17-NEXT: popl %edi
6709 ; FALLBACK17-NEXT: popl %ebx
6710 ; FALLBACK17-NEXT: popl %ebp
6711 ; FALLBACK17-NEXT: retl
6713 ; FALLBACK18-LABEL: shl_32bytes:
6714 ; FALLBACK18: # %bb.0:
6715 ; FALLBACK18-NEXT: pushl %ebp
6716 ; FALLBACK18-NEXT: pushl %ebx
6717 ; FALLBACK18-NEXT: pushl %edi
6718 ; FALLBACK18-NEXT: pushl %esi
6719 ; FALLBACK18-NEXT: subl $108, %esp
6720 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ebx
6721 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
6722 ; FALLBACK18-NEXT: movl (%eax), %ecx
6723 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6724 ; FALLBACK18-NEXT: movl 4(%eax), %ecx
6725 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6726 ; FALLBACK18-NEXT: movl 8(%eax), %esi
6727 ; FALLBACK18-NEXT: movl 12(%eax), %edi
6728 ; FALLBACK18-NEXT: movl 16(%eax), %ebp
6729 ; FALLBACK18-NEXT: movzbl (%ebx), %ebx
6730 ; FALLBACK18-NEXT: movl 20(%eax), %edx
6731 ; FALLBACK18-NEXT: movl 24(%eax), %ecx
6732 ; FALLBACK18-NEXT: movl 28(%eax), %eax
6733 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
6734 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
6735 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
6736 ; FALLBACK18-NEXT: movl %ebx, %edx
6737 ; FALLBACK18-NEXT: shlb $3, %dl
6738 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
6739 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6740 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6741 ; FALLBACK18-NEXT: movl %ebp, {{[0-9]+}}(%esp)
6742 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
6743 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
6744 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6745 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
6746 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6747 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
6748 ; FALLBACK18-NEXT: andb $28, %bl
6749 ; FALLBACK18-NEXT: negb %bl
6750 ; FALLBACK18-NEXT: movsbl %bl, %esi
6751 ; FALLBACK18-NEXT: movl 64(%esp,%esi), %ebx
6752 ; FALLBACK18-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6753 ; FALLBACK18-NEXT: movl 68(%esp,%esi), %eax
6754 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6755 ; FALLBACK18-NEXT: shlxl %edx, %eax, %edi
6756 ; FALLBACK18-NEXT: movl %edx, %ecx
6757 ; FALLBACK18-NEXT: notb %cl
6758 ; FALLBACK18-NEXT: shrl %ebx
6759 ; FALLBACK18-NEXT: shrxl %ecx, %ebx, %ebx
6760 ; FALLBACK18-NEXT: orl %edi, %ebx
6761 ; FALLBACK18-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6762 ; FALLBACK18-NEXT: movl 72(%esp,%esi), %ebx
6763 ; FALLBACK18-NEXT: movl %ebx, %edi
6764 ; FALLBACK18-NEXT: shrl %edi
6765 ; FALLBACK18-NEXT: shrxl %ecx, %edi, %eax
6766 ; FALLBACK18-NEXT: movl 76(%esp,%esi), %edi
6767 ; FALLBACK18-NEXT: shlxl %edx, %edi, %ebp
6768 ; FALLBACK18-NEXT: orl %ebp, %eax
6769 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6770 ; FALLBACK18-NEXT: shlxl %edx, %ebx, %ebx
6771 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6772 ; FALLBACK18-NEXT: shrl %eax
6773 ; FALLBACK18-NEXT: shrxl %ecx, %eax, %eax
6774 ; FALLBACK18-NEXT: orl %ebx, %eax
6775 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6776 ; FALLBACK18-NEXT: movl 80(%esp,%esi), %ebx
6777 ; FALLBACK18-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6778 ; FALLBACK18-NEXT: shrl %ebx
6779 ; FALLBACK18-NEXT: shrxl %ecx, %ebx, %eax
6780 ; FALLBACK18-NEXT: movl 84(%esp,%esi), %ebx
6781 ; FALLBACK18-NEXT: shlxl %edx, %ebx, %ebp
6782 ; FALLBACK18-NEXT: orl %ebp, %eax
6783 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6784 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
6785 ; FALLBACK18-NEXT: shrl %edi
6786 ; FALLBACK18-NEXT: shrxl %ecx, %edi, %edi
6787 ; FALLBACK18-NEXT: orl %eax, %edi
6788 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
6789 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6790 ; FALLBACK18-NEXT: shlxl %edx, 92(%esp,%esi), %ebp
6791 ; FALLBACK18-NEXT: movl 88(%esp,%esi), %esi
6792 ; FALLBACK18-NEXT: shlxl %edx, %esi, %eax
6793 ; FALLBACK18-NEXT: shrl %esi
6794 ; FALLBACK18-NEXT: shrxl %ecx, %esi, %esi
6795 ; FALLBACK18-NEXT: orl %ebp, %esi
6796 ; FALLBACK18-NEXT: shrl %ebx
6797 ; FALLBACK18-NEXT: shrxl %ecx, %ebx, %edx
6798 ; FALLBACK18-NEXT: orl %eax, %edx
6799 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
6800 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6801 ; FALLBACK18-NEXT: movl %ecx, (%eax)
6802 ; FALLBACK18-NEXT: movl %edx, 24(%eax)
6803 ; FALLBACK18-NEXT: movl %esi, 28(%eax)
6804 ; FALLBACK18-NEXT: movl %edi, 16(%eax)
6805 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6806 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
6807 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6808 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
6809 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6810 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
6811 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6812 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
6813 ; FALLBACK18-NEXT: addl $108, %esp
6814 ; FALLBACK18-NEXT: popl %esi
6815 ; FALLBACK18-NEXT: popl %edi
6816 ; FALLBACK18-NEXT: popl %ebx
6817 ; FALLBACK18-NEXT: popl %ebp
6818 ; FALLBACK18-NEXT: retl
6820 ; FALLBACK19-LABEL: shl_32bytes:
6821 ; FALLBACK19: # %bb.0:
6822 ; FALLBACK19-NEXT: pushl %ebp
6823 ; FALLBACK19-NEXT: pushl %ebx
6824 ; FALLBACK19-NEXT: pushl %edi
6825 ; FALLBACK19-NEXT: pushl %esi
6826 ; FALLBACK19-NEXT: subl $92, %esp
6827 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebx
6828 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
6829 ; FALLBACK19-NEXT: movl (%ecx), %eax
6830 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6831 ; FALLBACK19-NEXT: movl 4(%ecx), %eax
6832 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6833 ; FALLBACK19-NEXT: movl 8(%ecx), %esi
6834 ; FALLBACK19-NEXT: movl 12(%ecx), %edi
6835 ; FALLBACK19-NEXT: movl 16(%ecx), %ebp
6836 ; FALLBACK19-NEXT: movzbl (%ebx), %ebx
6837 ; FALLBACK19-NEXT: movl 20(%ecx), %edx
6838 ; FALLBACK19-NEXT: movl 24(%ecx), %eax
6839 ; FALLBACK19-NEXT: movl 28(%ecx), %ecx
6840 ; FALLBACK19-NEXT: movl %ecx, {{[0-9]+}}(%esp)
6841 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
6842 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
6843 ; FALLBACK19-NEXT: movl %ebx, %ecx
6844 ; FALLBACK19-NEXT: shlb $3, %cl
6845 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
6846 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6847 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6848 ; FALLBACK19-NEXT: movl %ebp, {{[0-9]+}}(%esp)
6849 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
6850 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
6851 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6852 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
6853 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6854 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
6855 ; FALLBACK19-NEXT: andb $28, %bl
6856 ; FALLBACK19-NEXT: negb %bl
6857 ; FALLBACK19-NEXT: movsbl %bl, %eax
6858 ; FALLBACK19-NEXT: movl 56(%esp,%eax), %edx
6859 ; FALLBACK19-NEXT: movl 60(%esp,%eax), %esi
6860 ; FALLBACK19-NEXT: movl %esi, (%esp) # 4-byte Spill
6861 ; FALLBACK19-NEXT: shldl %cl, %edx, %esi
6862 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6863 ; FALLBACK19-NEXT: movl 52(%esp,%eax), %ebx
6864 ; FALLBACK19-NEXT: shldl %cl, %ebx, %edx
6865 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6866 ; FALLBACK19-NEXT: movl 64(%esp,%eax), %edi
6867 ; FALLBACK19-NEXT: movl 68(%esp,%eax), %ebp
6868 ; FALLBACK19-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6869 ; FALLBACK19-NEXT: shldl %cl, %edi, %ebp
6870 ; FALLBACK19-NEXT: movl (%esp), %edx # 4-byte Reload
6871 ; FALLBACK19-NEXT: shldl %cl, %edx, %edi
6872 ; FALLBACK19-NEXT: movl 48(%esp,%eax), %edx
6873 ; FALLBACK19-NEXT: movl %edx, (%esp) # 4-byte Spill
6874 ; FALLBACK19-NEXT: movl 72(%esp,%eax), %edx
6875 ; FALLBACK19-NEXT: movl 76(%esp,%eax), %esi
6876 ; FALLBACK19-NEXT: shldl %cl, %edx, %esi
6877 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6878 ; FALLBACK19-NEXT: shldl %cl, %eax, %edx
6879 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %eax
6880 ; FALLBACK19-NEXT: movl %edx, 24(%eax)
6881 ; FALLBACK19-NEXT: movl %esi, 28(%eax)
6882 ; FALLBACK19-NEXT: movl %edi, 16(%eax)
6883 ; FALLBACK19-NEXT: movl %ebp, 20(%eax)
6884 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6885 ; FALLBACK19-NEXT: movl %edx, 8(%eax)
6886 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6887 ; FALLBACK19-NEXT: movl %edx, 12(%eax)
6888 ; FALLBACK19-NEXT: movl (%esp), %esi # 4-byte Reload
6889 ; FALLBACK19-NEXT: shlxl %ecx, %esi, %edx
6890 ; FALLBACK19-NEXT: movl %edx, (%eax)
6891 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
6892 ; FALLBACK19-NEXT: shldl %cl, %esi, %ebx
6893 ; FALLBACK19-NEXT: movl %ebx, 4(%eax)
6894 ; FALLBACK19-NEXT: addl $92, %esp
6895 ; FALLBACK19-NEXT: popl %esi
6896 ; FALLBACK19-NEXT: popl %edi
6897 ; FALLBACK19-NEXT: popl %ebx
6898 ; FALLBACK19-NEXT: popl %ebp
6899 ; FALLBACK19-NEXT: retl
6901 ; FALLBACK20-LABEL: shl_32bytes:
6902 ; FALLBACK20: # %bb.0:
6903 ; FALLBACK20-NEXT: pushl %ebp
6904 ; FALLBACK20-NEXT: pushl %ebx
6905 ; FALLBACK20-NEXT: pushl %edi
6906 ; FALLBACK20-NEXT: pushl %esi
6907 ; FALLBACK20-NEXT: subl $108, %esp
6908 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
6909 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
6910 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
6911 ; FALLBACK20-NEXT: movups 16(%ecx), %xmm1
6912 ; FALLBACK20-NEXT: movzbl (%eax), %ecx
6913 ; FALLBACK20-NEXT: movb %cl, %dh
6914 ; FALLBACK20-NEXT: shlb $3, %dh
6915 ; FALLBACK20-NEXT: xorps %xmm2, %xmm2
6916 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
6917 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
6918 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
6919 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
6920 ; FALLBACK20-NEXT: andb $28, %cl
6921 ; FALLBACK20-NEXT: negb %cl
6922 ; FALLBACK20-NEXT: movsbl %cl, %ebx
6923 ; FALLBACK20-NEXT: movl 84(%esp,%ebx), %edi
6924 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6925 ; FALLBACK20-NEXT: movb %dh, %cl
6926 ; FALLBACK20-NEXT: shll %cl, %edi
6927 ; FALLBACK20-NEXT: movb %dh, %dl
6928 ; FALLBACK20-NEXT: notb %dl
6929 ; FALLBACK20-NEXT: movl 80(%esp,%ebx), %esi
6930 ; FALLBACK20-NEXT: movl %esi, %eax
6931 ; FALLBACK20-NEXT: shrl %eax
6932 ; FALLBACK20-NEXT: movl %edx, %ecx
6933 ; FALLBACK20-NEXT: shrl %cl, %eax
6934 ; FALLBACK20-NEXT: orl %edi, %eax
6935 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6936 ; FALLBACK20-NEXT: movb %dh, %cl
6937 ; FALLBACK20-NEXT: shll %cl, %esi
6938 ; FALLBACK20-NEXT: movl %ebx, %edi
6939 ; FALLBACK20-NEXT: movl 76(%esp,%ebx), %ebp
6940 ; FALLBACK20-NEXT: movl %ebp, %eax
6941 ; FALLBACK20-NEXT: shrl %eax
6942 ; FALLBACK20-NEXT: movl %edx, %ecx
6943 ; FALLBACK20-NEXT: shrl %cl, %eax
6944 ; FALLBACK20-NEXT: orl %esi, %eax
6945 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6946 ; FALLBACK20-NEXT: movb %dh, %cl
6947 ; FALLBACK20-NEXT: shll %cl, %ebp
6948 ; FALLBACK20-NEXT: movl 72(%esp,%ebx), %ebx
6949 ; FALLBACK20-NEXT: movl %ebx, %eax
6950 ; FALLBACK20-NEXT: shrl %eax
6951 ; FALLBACK20-NEXT: movl %edx, %ecx
6952 ; FALLBACK20-NEXT: shrl %cl, %eax
6953 ; FALLBACK20-NEXT: orl %ebp, %eax
6954 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6955 ; FALLBACK20-NEXT: movb %dh, %cl
6956 ; FALLBACK20-NEXT: shll %cl, %ebx
6957 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6958 ; FALLBACK20-NEXT: movl 68(%esp,%edi), %ebp
6959 ; FALLBACK20-NEXT: movl %ebp, %esi
6960 ; FALLBACK20-NEXT: shrl %esi
6961 ; FALLBACK20-NEXT: movl %edx, %ecx
6962 ; FALLBACK20-NEXT: shrl %cl, %esi
6963 ; FALLBACK20-NEXT: orl %ebx, %esi
6964 ; FALLBACK20-NEXT: movb %dh, %cl
6965 ; FALLBACK20-NEXT: shll %cl, %ebp
6966 ; FALLBACK20-NEXT: movl 64(%esp,%edi), %ebx
6967 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6968 ; FALLBACK20-NEXT: shrl %ebx
6969 ; FALLBACK20-NEXT: movl %edx, %ecx
6970 ; FALLBACK20-NEXT: shrl %cl, %ebx
6971 ; FALLBACK20-NEXT: orl %ebp, %ebx
6972 ; FALLBACK20-NEXT: movl 88(%esp,%edi), %ebp
6973 ; FALLBACK20-NEXT: movl %ebp, %edi
6974 ; FALLBACK20-NEXT: movb %dh, %cl
6975 ; FALLBACK20-NEXT: shll %cl, %edi
6976 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6977 ; FALLBACK20-NEXT: shrl %eax
6978 ; FALLBACK20-NEXT: movl %edx, %ecx
6979 ; FALLBACK20-NEXT: shrl %cl, %eax
6980 ; FALLBACK20-NEXT: orl %edi, %eax
6981 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
6982 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
6983 ; FALLBACK20-NEXT: movl 92(%esp,%eax), %edi
6984 ; FALLBACK20-NEXT: movb %dh, %cl
6985 ; FALLBACK20-NEXT: shll %cl, %edi
6986 ; FALLBACK20-NEXT: shrl %ebp
6987 ; FALLBACK20-NEXT: movl %edx, %ecx
6988 ; FALLBACK20-NEXT: shrl %cl, %ebp
6989 ; FALLBACK20-NEXT: orl %edi, %ebp
6990 ; FALLBACK20-NEXT: movb %dh, %cl
6991 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
6992 ; FALLBACK20-NEXT: shll %cl, %edx
6993 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
6994 ; FALLBACK20-NEXT: movl %edx, (%eax)
6995 ; FALLBACK20-NEXT: movl %ebp, 28(%eax)
6996 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
6997 ; FALLBACK20-NEXT: movl %ecx, 24(%eax)
6998 ; FALLBACK20-NEXT: movl %ebx, 4(%eax)
6999 ; FALLBACK20-NEXT: movl %esi, 8(%eax)
7000 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7001 ; FALLBACK20-NEXT: movl %ecx, 12(%eax)
7002 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7003 ; FALLBACK20-NEXT: movl %ecx, 16(%eax)
7004 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7005 ; FALLBACK20-NEXT: movl %ecx, 20(%eax)
7006 ; FALLBACK20-NEXT: addl $108, %esp
7007 ; FALLBACK20-NEXT: popl %esi
7008 ; FALLBACK20-NEXT: popl %edi
7009 ; FALLBACK20-NEXT: popl %ebx
7010 ; FALLBACK20-NEXT: popl %ebp
7011 ; FALLBACK20-NEXT: retl
7013 ; FALLBACK21-LABEL: shl_32bytes:
7014 ; FALLBACK21: # %bb.0:
7015 ; FALLBACK21-NEXT: pushl %ebp
7016 ; FALLBACK21-NEXT: pushl %ebx
7017 ; FALLBACK21-NEXT: pushl %edi
7018 ; FALLBACK21-NEXT: pushl %esi
7019 ; FALLBACK21-NEXT: subl $92, %esp
7020 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
7021 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
7022 ; FALLBACK21-NEXT: movups (%ecx), %xmm0
7023 ; FALLBACK21-NEXT: movups 16(%ecx), %xmm1
7024 ; FALLBACK21-NEXT: movzbl (%eax), %eax
7025 ; FALLBACK21-NEXT: movl %eax, %ecx
7026 ; FALLBACK21-NEXT: shlb $3, %cl
7027 ; FALLBACK21-NEXT: xorps %xmm2, %xmm2
7028 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7029 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7030 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
7031 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
7032 ; FALLBACK21-NEXT: andb $28, %al
7033 ; FALLBACK21-NEXT: negb %al
7034 ; FALLBACK21-NEXT: movsbl %al, %ebp
7035 ; FALLBACK21-NEXT: movl 64(%esp,%ebp), %eax
7036 ; FALLBACK21-NEXT: movl 68(%esp,%ebp), %edx
7037 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7038 ; FALLBACK21-NEXT: shldl %cl, %eax, %edx
7039 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7040 ; FALLBACK21-NEXT: movl 60(%esp,%ebp), %edx
7041 ; FALLBACK21-NEXT: shldl %cl, %edx, %eax
7042 ; FALLBACK21-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7043 ; FALLBACK21-NEXT: movl 56(%esp,%ebp), %edi
7044 ; FALLBACK21-NEXT: shldl %cl, %edi, %edx
7045 ; FALLBACK21-NEXT: movl %edx, (%esp) # 4-byte Spill
7046 ; FALLBACK21-NEXT: movl 52(%esp,%ebp), %ebx
7047 ; FALLBACK21-NEXT: shldl %cl, %ebx, %edi
7048 ; FALLBACK21-NEXT: movl 72(%esp,%ebp), %edx
7049 ; FALLBACK21-NEXT: movl %edx, %eax
7050 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7051 ; FALLBACK21-NEXT: shldl %cl, %esi, %eax
7052 ; FALLBACK21-NEXT: movl 48(%esp,%ebp), %esi
7053 ; FALLBACK21-NEXT: movl 76(%esp,%ebp), %ebp
7054 ; FALLBACK21-NEXT: shldl %cl, %edx, %ebp
7055 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %edx
7056 ; FALLBACK21-NEXT: movl %ebp, 28(%edx)
7057 ; FALLBACK21-NEXT: movl %eax, 24(%edx)
7058 ; FALLBACK21-NEXT: movl %esi, %eax
7059 ; FALLBACK21-NEXT: shll %cl, %eax
7060 ; FALLBACK21-NEXT: shldl %cl, %esi, %ebx
7061 ; FALLBACK21-NEXT: movl %ebx, 4(%edx)
7062 ; FALLBACK21-NEXT: movl %edi, 8(%edx)
7063 ; FALLBACK21-NEXT: movl (%esp), %ecx # 4-byte Reload
7064 ; FALLBACK21-NEXT: movl %ecx, 12(%edx)
7065 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7066 ; FALLBACK21-NEXT: movl %ecx, 16(%edx)
7067 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7068 ; FALLBACK21-NEXT: movl %ecx, 20(%edx)
7069 ; FALLBACK21-NEXT: movl %eax, (%edx)
7070 ; FALLBACK21-NEXT: addl $92, %esp
7071 ; FALLBACK21-NEXT: popl %esi
7072 ; FALLBACK21-NEXT: popl %edi
7073 ; FALLBACK21-NEXT: popl %ebx
7074 ; FALLBACK21-NEXT: popl %ebp
7075 ; FALLBACK21-NEXT: retl
7077 ; FALLBACK22-LABEL: shl_32bytes:
7078 ; FALLBACK22: # %bb.0:
7079 ; FALLBACK22-NEXT: pushl %ebp
7080 ; FALLBACK22-NEXT: pushl %ebx
7081 ; FALLBACK22-NEXT: pushl %edi
7082 ; FALLBACK22-NEXT: pushl %esi
7083 ; FALLBACK22-NEXT: subl $108, %esp
7084 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
7085 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
7086 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
7087 ; FALLBACK22-NEXT: movups 16(%ecx), %xmm1
7088 ; FALLBACK22-NEXT: movzbl (%eax), %ecx
7089 ; FALLBACK22-NEXT: movl %ecx, %eax
7090 ; FALLBACK22-NEXT: shlb $3, %al
7091 ; FALLBACK22-NEXT: xorps %xmm2, %xmm2
7092 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7093 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7094 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
7095 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
7096 ; FALLBACK22-NEXT: andb $28, %cl
7097 ; FALLBACK22-NEXT: negb %cl
7098 ; FALLBACK22-NEXT: movsbl %cl, %edx
7099 ; FALLBACK22-NEXT: movl 84(%esp,%edx), %ecx
7100 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7101 ; FALLBACK22-NEXT: shlxl %eax, %ecx, %ecx
7102 ; FALLBACK22-NEXT: movl 80(%esp,%edx), %esi
7103 ; FALLBACK22-NEXT: shlxl %eax, %esi, %edi
7104 ; FALLBACK22-NEXT: movl %eax, %ebx
7105 ; FALLBACK22-NEXT: notb %bl
7106 ; FALLBACK22-NEXT: shrl %esi
7107 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %esi
7108 ; FALLBACK22-NEXT: orl %ecx, %esi
7109 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7110 ; FALLBACK22-NEXT: movl 76(%esp,%edx), %ecx
7111 ; FALLBACK22-NEXT: movl %ecx, %esi
7112 ; FALLBACK22-NEXT: shrl %esi
7113 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %esi
7114 ; FALLBACK22-NEXT: orl %edi, %esi
7115 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7116 ; FALLBACK22-NEXT: shlxl %eax, %ecx, %ecx
7117 ; FALLBACK22-NEXT: movl 72(%esp,%edx), %esi
7118 ; FALLBACK22-NEXT: movl %esi, %edi
7119 ; FALLBACK22-NEXT: shrl %edi
7120 ; FALLBACK22-NEXT: shrxl %ebx, %edi, %edi
7121 ; FALLBACK22-NEXT: orl %ecx, %edi
7122 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7123 ; FALLBACK22-NEXT: shlxl %eax, %esi, %ecx
7124 ; FALLBACK22-NEXT: movl 68(%esp,%edx), %esi
7125 ; FALLBACK22-NEXT: movl %esi, %edi
7126 ; FALLBACK22-NEXT: shrl %edi
7127 ; FALLBACK22-NEXT: shrxl %ebx, %edi, %ebp
7128 ; FALLBACK22-NEXT: orl %ecx, %ebp
7129 ; FALLBACK22-NEXT: shlxl %eax, %esi, %edi
7130 ; FALLBACK22-NEXT: movl 64(%esp,%edx), %esi
7131 ; FALLBACK22-NEXT: movl %esi, %ecx
7132 ; FALLBACK22-NEXT: shrl %ecx
7133 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %ecx
7134 ; FALLBACK22-NEXT: orl %edi, %ecx
7135 ; FALLBACK22-NEXT: shlxl %eax, %esi, %esi
7136 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7137 ; FALLBACK22-NEXT: shlxl %eax, 92(%esp,%edx), %edi
7138 ; FALLBACK22-NEXT: movl 88(%esp,%edx), %edx
7139 ; FALLBACK22-NEXT: shlxl %eax, %edx, %esi
7140 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7141 ; FALLBACK22-NEXT: shrl %eax
7142 ; FALLBACK22-NEXT: shrxl %ebx, %eax, %eax
7143 ; FALLBACK22-NEXT: orl %esi, %eax
7144 ; FALLBACK22-NEXT: shrl %edx
7145 ; FALLBACK22-NEXT: shrxl %ebx, %edx, %edx
7146 ; FALLBACK22-NEXT: orl %edi, %edx
7147 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %esi
7148 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
7149 ; FALLBACK22-NEXT: movl %edi, (%esi)
7150 ; FALLBACK22-NEXT: movl %edx, 28(%esi)
7151 ; FALLBACK22-NEXT: movl %eax, 24(%esi)
7152 ; FALLBACK22-NEXT: movl %ecx, 4(%esi)
7153 ; FALLBACK22-NEXT: movl %ebp, 8(%esi)
7154 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7155 ; FALLBACK22-NEXT: movl %eax, 12(%esi)
7156 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7157 ; FALLBACK22-NEXT: movl %eax, 16(%esi)
7158 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7159 ; FALLBACK22-NEXT: movl %eax, 20(%esi)
7160 ; FALLBACK22-NEXT: addl $108, %esp
7161 ; FALLBACK22-NEXT: popl %esi
7162 ; FALLBACK22-NEXT: popl %edi
7163 ; FALLBACK22-NEXT: popl %ebx
7164 ; FALLBACK22-NEXT: popl %ebp
7165 ; FALLBACK22-NEXT: retl
7167 ; FALLBACK23-LABEL: shl_32bytes:
7168 ; FALLBACK23: # %bb.0:
7169 ; FALLBACK23-NEXT: pushl %ebp
7170 ; FALLBACK23-NEXT: pushl %ebx
7171 ; FALLBACK23-NEXT: pushl %edi
7172 ; FALLBACK23-NEXT: pushl %esi
7173 ; FALLBACK23-NEXT: subl $92, %esp
7174 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
7175 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
7176 ; FALLBACK23-NEXT: movups (%ecx), %xmm0
7177 ; FALLBACK23-NEXT: movups 16(%ecx), %xmm1
7178 ; FALLBACK23-NEXT: movzbl (%eax), %eax
7179 ; FALLBACK23-NEXT: movl %eax, %ecx
7180 ; FALLBACK23-NEXT: shlb $3, %cl
7181 ; FALLBACK23-NEXT: xorps %xmm2, %xmm2
7182 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7183 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
7184 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
7185 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
7186 ; FALLBACK23-NEXT: andb $28, %al
7187 ; FALLBACK23-NEXT: negb %al
7188 ; FALLBACK23-NEXT: movsbl %al, %ebx
7189 ; FALLBACK23-NEXT: movl 64(%esp,%ebx), %eax
7190 ; FALLBACK23-NEXT: movl 68(%esp,%ebx), %edx
7191 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7192 ; FALLBACK23-NEXT: shldl %cl, %eax, %edx
7193 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7194 ; FALLBACK23-NEXT: movl 60(%esp,%ebx), %edx
7195 ; FALLBACK23-NEXT: shldl %cl, %edx, %eax
7196 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7197 ; FALLBACK23-NEXT: movl 56(%esp,%ebx), %edi
7198 ; FALLBACK23-NEXT: shldl %cl, %edi, %edx
7199 ; FALLBACK23-NEXT: movl %edx, (%esp) # 4-byte Spill
7200 ; FALLBACK23-NEXT: movl 52(%esp,%ebx), %ebp
7201 ; FALLBACK23-NEXT: shldl %cl, %ebp, %edi
7202 ; FALLBACK23-NEXT: movl 72(%esp,%ebx), %edx
7203 ; FALLBACK23-NEXT: movl %edx, %eax
7204 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7205 ; FALLBACK23-NEXT: shldl %cl, %esi, %eax
7206 ; FALLBACK23-NEXT: movl 48(%esp,%ebx), %esi
7207 ; FALLBACK23-NEXT: movl 76(%esp,%ebx), %ebx
7208 ; FALLBACK23-NEXT: shldl %cl, %edx, %ebx
7209 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %edx
7210 ; FALLBACK23-NEXT: movl %ebx, 28(%edx)
7211 ; FALLBACK23-NEXT: movl %eax, 24(%edx)
7212 ; FALLBACK23-NEXT: shlxl %ecx, %esi, %eax
7213 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
7214 ; FALLBACK23-NEXT: shldl %cl, %esi, %ebp
7215 ; FALLBACK23-NEXT: movl %ebp, 4(%edx)
7216 ; FALLBACK23-NEXT: movl %edi, 8(%edx)
7217 ; FALLBACK23-NEXT: movl (%esp), %ecx # 4-byte Reload
7218 ; FALLBACK23-NEXT: movl %ecx, 12(%edx)
7219 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7220 ; FALLBACK23-NEXT: movl %ecx, 16(%edx)
7221 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7222 ; FALLBACK23-NEXT: movl %ecx, 20(%edx)
7223 ; FALLBACK23-NEXT: movl %eax, (%edx)
7224 ; FALLBACK23-NEXT: addl $92, %esp
7225 ; FALLBACK23-NEXT: popl %esi
7226 ; FALLBACK23-NEXT: popl %edi
7227 ; FALLBACK23-NEXT: popl %ebx
7228 ; FALLBACK23-NEXT: popl %ebp
7229 ; FALLBACK23-NEXT: retl
7231 ; FALLBACK24-LABEL: shl_32bytes:
7232 ; FALLBACK24: # %bb.0:
7233 ; FALLBACK24-NEXT: pushl %ebp
7234 ; FALLBACK24-NEXT: pushl %ebx
7235 ; FALLBACK24-NEXT: pushl %edi
7236 ; FALLBACK24-NEXT: pushl %esi
7237 ; FALLBACK24-NEXT: subl $108, %esp
7238 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
7239 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
7240 ; FALLBACK24-NEXT: vmovups (%ecx), %ymm0
7241 ; FALLBACK24-NEXT: movzbl (%eax), %ecx
7242 ; FALLBACK24-NEXT: movb %cl, %dh
7243 ; FALLBACK24-NEXT: shlb $3, %dh
7244 ; FALLBACK24-NEXT: vxorps %xmm1, %xmm1, %xmm1
7245 ; FALLBACK24-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7246 ; FALLBACK24-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7247 ; FALLBACK24-NEXT: andb $28, %cl
7248 ; FALLBACK24-NEXT: negb %cl
7249 ; FALLBACK24-NEXT: movsbl %cl, %ebx
7250 ; FALLBACK24-NEXT: movl 84(%esp,%ebx), %edi
7251 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7252 ; FALLBACK24-NEXT: movb %dh, %cl
7253 ; FALLBACK24-NEXT: shll %cl, %edi
7254 ; FALLBACK24-NEXT: movb %dh, %dl
7255 ; FALLBACK24-NEXT: notb %dl
7256 ; FALLBACK24-NEXT: movl 80(%esp,%ebx), %esi
7257 ; FALLBACK24-NEXT: movl %esi, %eax
7258 ; FALLBACK24-NEXT: shrl %eax
7259 ; FALLBACK24-NEXT: movl %edx, %ecx
7260 ; FALLBACK24-NEXT: shrl %cl, %eax
7261 ; FALLBACK24-NEXT: orl %edi, %eax
7262 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7263 ; FALLBACK24-NEXT: movb %dh, %cl
7264 ; FALLBACK24-NEXT: shll %cl, %esi
7265 ; FALLBACK24-NEXT: movl %ebx, %edi
7266 ; FALLBACK24-NEXT: movl 76(%esp,%ebx), %ebp
7267 ; FALLBACK24-NEXT: movl %ebp, %eax
7268 ; FALLBACK24-NEXT: shrl %eax
7269 ; FALLBACK24-NEXT: movl %edx, %ecx
7270 ; FALLBACK24-NEXT: shrl %cl, %eax
7271 ; FALLBACK24-NEXT: orl %esi, %eax
7272 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7273 ; FALLBACK24-NEXT: movb %dh, %cl
7274 ; FALLBACK24-NEXT: shll %cl, %ebp
7275 ; FALLBACK24-NEXT: movl 72(%esp,%ebx), %ebx
7276 ; FALLBACK24-NEXT: movl %ebx, %eax
7277 ; FALLBACK24-NEXT: shrl %eax
7278 ; FALLBACK24-NEXT: movl %edx, %ecx
7279 ; FALLBACK24-NEXT: shrl %cl, %eax
7280 ; FALLBACK24-NEXT: orl %ebp, %eax
7281 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7282 ; FALLBACK24-NEXT: movb %dh, %cl
7283 ; FALLBACK24-NEXT: shll %cl, %ebx
7284 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7285 ; FALLBACK24-NEXT: movl 68(%esp,%edi), %ebp
7286 ; FALLBACK24-NEXT: movl %ebp, %esi
7287 ; FALLBACK24-NEXT: shrl %esi
7288 ; FALLBACK24-NEXT: movl %edx, %ecx
7289 ; FALLBACK24-NEXT: shrl %cl, %esi
7290 ; FALLBACK24-NEXT: orl %ebx, %esi
7291 ; FALLBACK24-NEXT: movb %dh, %cl
7292 ; FALLBACK24-NEXT: shll %cl, %ebp
7293 ; FALLBACK24-NEXT: movl 64(%esp,%edi), %ebx
7294 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7295 ; FALLBACK24-NEXT: shrl %ebx
7296 ; FALLBACK24-NEXT: movl %edx, %ecx
7297 ; FALLBACK24-NEXT: shrl %cl, %ebx
7298 ; FALLBACK24-NEXT: orl %ebp, %ebx
7299 ; FALLBACK24-NEXT: movl 88(%esp,%edi), %ebp
7300 ; FALLBACK24-NEXT: movl %ebp, %edi
7301 ; FALLBACK24-NEXT: movb %dh, %cl
7302 ; FALLBACK24-NEXT: shll %cl, %edi
7303 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7304 ; FALLBACK24-NEXT: shrl %eax
7305 ; FALLBACK24-NEXT: movl %edx, %ecx
7306 ; FALLBACK24-NEXT: shrl %cl, %eax
7307 ; FALLBACK24-NEXT: orl %edi, %eax
7308 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7309 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7310 ; FALLBACK24-NEXT: movl 92(%esp,%eax), %edi
7311 ; FALLBACK24-NEXT: movb %dh, %cl
7312 ; FALLBACK24-NEXT: shll %cl, %edi
7313 ; FALLBACK24-NEXT: shrl %ebp
7314 ; FALLBACK24-NEXT: movl %edx, %ecx
7315 ; FALLBACK24-NEXT: shrl %cl, %ebp
7316 ; FALLBACK24-NEXT: orl %edi, %ebp
7317 ; FALLBACK24-NEXT: movb %dh, %cl
7318 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
7319 ; FALLBACK24-NEXT: shll %cl, %edx
7320 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
7321 ; FALLBACK24-NEXT: movl %edx, (%eax)
7322 ; FALLBACK24-NEXT: movl %ebp, 28(%eax)
7323 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7324 ; FALLBACK24-NEXT: movl %ecx, 24(%eax)
7325 ; FALLBACK24-NEXT: movl %ebx, 4(%eax)
7326 ; FALLBACK24-NEXT: movl %esi, 8(%eax)
7327 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7328 ; FALLBACK24-NEXT: movl %ecx, 12(%eax)
7329 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7330 ; FALLBACK24-NEXT: movl %ecx, 16(%eax)
7331 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7332 ; FALLBACK24-NEXT: movl %ecx, 20(%eax)
7333 ; FALLBACK24-NEXT: addl $108, %esp
7334 ; FALLBACK24-NEXT: popl %esi
7335 ; FALLBACK24-NEXT: popl %edi
7336 ; FALLBACK24-NEXT: popl %ebx
7337 ; FALLBACK24-NEXT: popl %ebp
7338 ; FALLBACK24-NEXT: vzeroupper
7339 ; FALLBACK24-NEXT: retl
7341 ; FALLBACK25-LABEL: shl_32bytes:
7342 ; FALLBACK25: # %bb.0:
7343 ; FALLBACK25-NEXT: pushl %ebp
7344 ; FALLBACK25-NEXT: pushl %ebx
7345 ; FALLBACK25-NEXT: pushl %edi
7346 ; FALLBACK25-NEXT: pushl %esi
7347 ; FALLBACK25-NEXT: subl $92, %esp
7348 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
7349 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
7350 ; FALLBACK25-NEXT: vmovups (%ecx), %ymm0
7351 ; FALLBACK25-NEXT: movzbl (%eax), %eax
7352 ; FALLBACK25-NEXT: movl %eax, %ecx
7353 ; FALLBACK25-NEXT: shlb $3, %cl
7354 ; FALLBACK25-NEXT: vxorps %xmm1, %xmm1, %xmm1
7355 ; FALLBACK25-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7356 ; FALLBACK25-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7357 ; FALLBACK25-NEXT: andb $28, %al
7358 ; FALLBACK25-NEXT: negb %al
7359 ; FALLBACK25-NEXT: movsbl %al, %ebp
7360 ; FALLBACK25-NEXT: movl 64(%esp,%ebp), %eax
7361 ; FALLBACK25-NEXT: movl 68(%esp,%ebp), %edx
7362 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7363 ; FALLBACK25-NEXT: shldl %cl, %eax, %edx
7364 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7365 ; FALLBACK25-NEXT: movl 60(%esp,%ebp), %edx
7366 ; FALLBACK25-NEXT: shldl %cl, %edx, %eax
7367 ; FALLBACK25-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7368 ; FALLBACK25-NEXT: movl 56(%esp,%ebp), %edi
7369 ; FALLBACK25-NEXT: shldl %cl, %edi, %edx
7370 ; FALLBACK25-NEXT: movl %edx, (%esp) # 4-byte Spill
7371 ; FALLBACK25-NEXT: movl 52(%esp,%ebp), %ebx
7372 ; FALLBACK25-NEXT: shldl %cl, %ebx, %edi
7373 ; FALLBACK25-NEXT: movl 72(%esp,%ebp), %edx
7374 ; FALLBACK25-NEXT: movl %edx, %eax
7375 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7376 ; FALLBACK25-NEXT: shldl %cl, %esi, %eax
7377 ; FALLBACK25-NEXT: movl 48(%esp,%ebp), %esi
7378 ; FALLBACK25-NEXT: movl 76(%esp,%ebp), %ebp
7379 ; FALLBACK25-NEXT: shldl %cl, %edx, %ebp
7380 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %edx
7381 ; FALLBACK25-NEXT: movl %ebp, 28(%edx)
7382 ; FALLBACK25-NEXT: movl %eax, 24(%edx)
7383 ; FALLBACK25-NEXT: movl %esi, %eax
7384 ; FALLBACK25-NEXT: shll %cl, %eax
7385 ; FALLBACK25-NEXT: shldl %cl, %esi, %ebx
7386 ; FALLBACK25-NEXT: movl %ebx, 4(%edx)
7387 ; FALLBACK25-NEXT: movl %edi, 8(%edx)
7388 ; FALLBACK25-NEXT: movl (%esp), %ecx # 4-byte Reload
7389 ; FALLBACK25-NEXT: movl %ecx, 12(%edx)
7390 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7391 ; FALLBACK25-NEXT: movl %ecx, 16(%edx)
7392 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7393 ; FALLBACK25-NEXT: movl %ecx, 20(%edx)
7394 ; FALLBACK25-NEXT: movl %eax, (%edx)
7395 ; FALLBACK25-NEXT: addl $92, %esp
7396 ; FALLBACK25-NEXT: popl %esi
7397 ; FALLBACK25-NEXT: popl %edi
7398 ; FALLBACK25-NEXT: popl %ebx
7399 ; FALLBACK25-NEXT: popl %ebp
7400 ; FALLBACK25-NEXT: vzeroupper
7401 ; FALLBACK25-NEXT: retl
7403 ; FALLBACK26-LABEL: shl_32bytes:
7404 ; FALLBACK26: # %bb.0:
7405 ; FALLBACK26-NEXT: pushl %ebp
7406 ; FALLBACK26-NEXT: pushl %ebx
7407 ; FALLBACK26-NEXT: pushl %edi
7408 ; FALLBACK26-NEXT: pushl %esi
7409 ; FALLBACK26-NEXT: subl $108, %esp
7410 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
7411 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
7412 ; FALLBACK26-NEXT: vmovups (%ecx), %ymm0
7413 ; FALLBACK26-NEXT: movzbl (%eax), %ecx
7414 ; FALLBACK26-NEXT: movl %ecx, %eax
7415 ; FALLBACK26-NEXT: shlb $3, %al
7416 ; FALLBACK26-NEXT: vxorps %xmm1, %xmm1, %xmm1
7417 ; FALLBACK26-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7418 ; FALLBACK26-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7419 ; FALLBACK26-NEXT: andb $28, %cl
7420 ; FALLBACK26-NEXT: negb %cl
7421 ; FALLBACK26-NEXT: movsbl %cl, %edx
7422 ; FALLBACK26-NEXT: movl 84(%esp,%edx), %ecx
7423 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7424 ; FALLBACK26-NEXT: shlxl %eax, %ecx, %ecx
7425 ; FALLBACK26-NEXT: movl 80(%esp,%edx), %esi
7426 ; FALLBACK26-NEXT: shlxl %eax, %esi, %edi
7427 ; FALLBACK26-NEXT: movl %eax, %ebx
7428 ; FALLBACK26-NEXT: notb %bl
7429 ; FALLBACK26-NEXT: shrl %esi
7430 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %esi
7431 ; FALLBACK26-NEXT: orl %ecx, %esi
7432 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7433 ; FALLBACK26-NEXT: movl 76(%esp,%edx), %ecx
7434 ; FALLBACK26-NEXT: movl %ecx, %esi
7435 ; FALLBACK26-NEXT: shrl %esi
7436 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %esi
7437 ; FALLBACK26-NEXT: orl %edi, %esi
7438 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7439 ; FALLBACK26-NEXT: shlxl %eax, %ecx, %ecx
7440 ; FALLBACK26-NEXT: movl 72(%esp,%edx), %esi
7441 ; FALLBACK26-NEXT: movl %esi, %edi
7442 ; FALLBACK26-NEXT: shrl %edi
7443 ; FALLBACK26-NEXT: shrxl %ebx, %edi, %edi
7444 ; FALLBACK26-NEXT: orl %ecx, %edi
7445 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7446 ; FALLBACK26-NEXT: shlxl %eax, %esi, %ecx
7447 ; FALLBACK26-NEXT: movl 68(%esp,%edx), %esi
7448 ; FALLBACK26-NEXT: movl %esi, %edi
7449 ; FALLBACK26-NEXT: shrl %edi
7450 ; FALLBACK26-NEXT: shrxl %ebx, %edi, %ebp
7451 ; FALLBACK26-NEXT: orl %ecx, %ebp
7452 ; FALLBACK26-NEXT: shlxl %eax, %esi, %edi
7453 ; FALLBACK26-NEXT: movl 64(%esp,%edx), %esi
7454 ; FALLBACK26-NEXT: movl %esi, %ecx
7455 ; FALLBACK26-NEXT: shrl %ecx
7456 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %ecx
7457 ; FALLBACK26-NEXT: orl %edi, %ecx
7458 ; FALLBACK26-NEXT: shlxl %eax, %esi, %esi
7459 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7460 ; FALLBACK26-NEXT: shlxl %eax, 92(%esp,%edx), %edi
7461 ; FALLBACK26-NEXT: movl 88(%esp,%edx), %edx
7462 ; FALLBACK26-NEXT: shlxl %eax, %edx, %esi
7463 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7464 ; FALLBACK26-NEXT: shrl %eax
7465 ; FALLBACK26-NEXT: shrxl %ebx, %eax, %eax
7466 ; FALLBACK26-NEXT: orl %esi, %eax
7467 ; FALLBACK26-NEXT: shrl %edx
7468 ; FALLBACK26-NEXT: shrxl %ebx, %edx, %edx
7469 ; FALLBACK26-NEXT: orl %edi, %edx
7470 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %esi
7471 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
7472 ; FALLBACK26-NEXT: movl %edi, (%esi)
7473 ; FALLBACK26-NEXT: movl %edx, 28(%esi)
7474 ; FALLBACK26-NEXT: movl %eax, 24(%esi)
7475 ; FALLBACK26-NEXT: movl %ecx, 4(%esi)
7476 ; FALLBACK26-NEXT: movl %ebp, 8(%esi)
7477 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7478 ; FALLBACK26-NEXT: movl %eax, 12(%esi)
7479 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7480 ; FALLBACK26-NEXT: movl %eax, 16(%esi)
7481 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7482 ; FALLBACK26-NEXT: movl %eax, 20(%esi)
7483 ; FALLBACK26-NEXT: addl $108, %esp
7484 ; FALLBACK26-NEXT: popl %esi
7485 ; FALLBACK26-NEXT: popl %edi
7486 ; FALLBACK26-NEXT: popl %ebx
7487 ; FALLBACK26-NEXT: popl %ebp
7488 ; FALLBACK26-NEXT: vzeroupper
7489 ; FALLBACK26-NEXT: retl
7491 ; FALLBACK27-LABEL: shl_32bytes:
7492 ; FALLBACK27: # %bb.0:
7493 ; FALLBACK27-NEXT: pushl %ebp
7494 ; FALLBACK27-NEXT: pushl %ebx
7495 ; FALLBACK27-NEXT: pushl %edi
7496 ; FALLBACK27-NEXT: pushl %esi
7497 ; FALLBACK27-NEXT: subl $92, %esp
7498 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
7499 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
7500 ; FALLBACK27-NEXT: vmovups (%ecx), %ymm0
7501 ; FALLBACK27-NEXT: movzbl (%eax), %eax
7502 ; FALLBACK27-NEXT: movl %eax, %ecx
7503 ; FALLBACK27-NEXT: shlb $3, %cl
7504 ; FALLBACK27-NEXT: vxorps %xmm1, %xmm1, %xmm1
7505 ; FALLBACK27-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7506 ; FALLBACK27-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7507 ; FALLBACK27-NEXT: andb $28, %al
7508 ; FALLBACK27-NEXT: negb %al
7509 ; FALLBACK27-NEXT: movsbl %al, %ebx
7510 ; FALLBACK27-NEXT: movl 64(%esp,%ebx), %eax
7511 ; FALLBACK27-NEXT: movl 68(%esp,%ebx), %edx
7512 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7513 ; FALLBACK27-NEXT: shldl %cl, %eax, %edx
7514 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7515 ; FALLBACK27-NEXT: movl 60(%esp,%ebx), %edx
7516 ; FALLBACK27-NEXT: shldl %cl, %edx, %eax
7517 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7518 ; FALLBACK27-NEXT: movl 56(%esp,%ebx), %edi
7519 ; FALLBACK27-NEXT: shldl %cl, %edi, %edx
7520 ; FALLBACK27-NEXT: movl %edx, (%esp) # 4-byte Spill
7521 ; FALLBACK27-NEXT: movl 52(%esp,%ebx), %ebp
7522 ; FALLBACK27-NEXT: shldl %cl, %ebp, %edi
7523 ; FALLBACK27-NEXT: movl 72(%esp,%ebx), %edx
7524 ; FALLBACK27-NEXT: movl %edx, %eax
7525 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7526 ; FALLBACK27-NEXT: shldl %cl, %esi, %eax
7527 ; FALLBACK27-NEXT: movl 48(%esp,%ebx), %esi
7528 ; FALLBACK27-NEXT: movl 76(%esp,%ebx), %ebx
7529 ; FALLBACK27-NEXT: shldl %cl, %edx, %ebx
7530 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %edx
7531 ; FALLBACK27-NEXT: movl %ebx, 28(%edx)
7532 ; FALLBACK27-NEXT: movl %eax, 24(%edx)
7533 ; FALLBACK27-NEXT: shlxl %ecx, %esi, %eax
7534 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
7535 ; FALLBACK27-NEXT: shldl %cl, %esi, %ebp
7536 ; FALLBACK27-NEXT: movl %ebp, 4(%edx)
7537 ; FALLBACK27-NEXT: movl %edi, 8(%edx)
7538 ; FALLBACK27-NEXT: movl (%esp), %ecx # 4-byte Reload
7539 ; FALLBACK27-NEXT: movl %ecx, 12(%edx)
7540 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7541 ; FALLBACK27-NEXT: movl %ecx, 16(%edx)
7542 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7543 ; FALLBACK27-NEXT: movl %ecx, 20(%edx)
7544 ; FALLBACK27-NEXT: movl %eax, (%edx)
7545 ; FALLBACK27-NEXT: addl $92, %esp
7546 ; FALLBACK27-NEXT: popl %esi
7547 ; FALLBACK27-NEXT: popl %edi
7548 ; FALLBACK27-NEXT: popl %ebx
7549 ; FALLBACK27-NEXT: popl %ebp
7550 ; FALLBACK27-NEXT: vzeroupper
7551 ; FALLBACK27-NEXT: retl
7553 ; FALLBACK28-LABEL: shl_32bytes:
7554 ; FALLBACK28: # %bb.0:
7555 ; FALLBACK28-NEXT: pushl %ebp
7556 ; FALLBACK28-NEXT: pushl %ebx
7557 ; FALLBACK28-NEXT: pushl %edi
7558 ; FALLBACK28-NEXT: pushl %esi
7559 ; FALLBACK28-NEXT: subl $108, %esp
7560 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
7561 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
7562 ; FALLBACK28-NEXT: vmovups (%ecx), %ymm0
7563 ; FALLBACK28-NEXT: movzbl (%eax), %ecx
7564 ; FALLBACK28-NEXT: movb %cl, %dh
7565 ; FALLBACK28-NEXT: shlb $3, %dh
7566 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
7567 ; FALLBACK28-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7568 ; FALLBACK28-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7569 ; FALLBACK28-NEXT: andb $28, %cl
7570 ; FALLBACK28-NEXT: negb %cl
7571 ; FALLBACK28-NEXT: movsbl %cl, %ebx
7572 ; FALLBACK28-NEXT: movl 84(%esp,%ebx), %edi
7573 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7574 ; FALLBACK28-NEXT: movb %dh, %cl
7575 ; FALLBACK28-NEXT: shll %cl, %edi
7576 ; FALLBACK28-NEXT: movb %dh, %dl
7577 ; FALLBACK28-NEXT: notb %dl
7578 ; FALLBACK28-NEXT: movl 80(%esp,%ebx), %esi
7579 ; FALLBACK28-NEXT: movl %esi, %eax
7580 ; FALLBACK28-NEXT: shrl %eax
7581 ; FALLBACK28-NEXT: movl %edx, %ecx
7582 ; FALLBACK28-NEXT: shrl %cl, %eax
7583 ; FALLBACK28-NEXT: orl %edi, %eax
7584 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7585 ; FALLBACK28-NEXT: movb %dh, %cl
7586 ; FALLBACK28-NEXT: shll %cl, %esi
7587 ; FALLBACK28-NEXT: movl %ebx, %edi
7588 ; FALLBACK28-NEXT: movl 76(%esp,%ebx), %ebp
7589 ; FALLBACK28-NEXT: movl %ebp, %eax
7590 ; FALLBACK28-NEXT: shrl %eax
7591 ; FALLBACK28-NEXT: movl %edx, %ecx
7592 ; FALLBACK28-NEXT: shrl %cl, %eax
7593 ; FALLBACK28-NEXT: orl %esi, %eax
7594 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7595 ; FALLBACK28-NEXT: movb %dh, %cl
7596 ; FALLBACK28-NEXT: shll %cl, %ebp
7597 ; FALLBACK28-NEXT: movl 72(%esp,%ebx), %ebx
7598 ; FALLBACK28-NEXT: movl %ebx, %eax
7599 ; FALLBACK28-NEXT: shrl %eax
7600 ; FALLBACK28-NEXT: movl %edx, %ecx
7601 ; FALLBACK28-NEXT: shrl %cl, %eax
7602 ; FALLBACK28-NEXT: orl %ebp, %eax
7603 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7604 ; FALLBACK28-NEXT: movb %dh, %cl
7605 ; FALLBACK28-NEXT: shll %cl, %ebx
7606 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7607 ; FALLBACK28-NEXT: movl 68(%esp,%edi), %ebp
7608 ; FALLBACK28-NEXT: movl %ebp, %esi
7609 ; FALLBACK28-NEXT: shrl %esi
7610 ; FALLBACK28-NEXT: movl %edx, %ecx
7611 ; FALLBACK28-NEXT: shrl %cl, %esi
7612 ; FALLBACK28-NEXT: orl %ebx, %esi
7613 ; FALLBACK28-NEXT: movb %dh, %cl
7614 ; FALLBACK28-NEXT: shll %cl, %ebp
7615 ; FALLBACK28-NEXT: movl 64(%esp,%edi), %ebx
7616 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7617 ; FALLBACK28-NEXT: shrl %ebx
7618 ; FALLBACK28-NEXT: movl %edx, %ecx
7619 ; FALLBACK28-NEXT: shrl %cl, %ebx
7620 ; FALLBACK28-NEXT: orl %ebp, %ebx
7621 ; FALLBACK28-NEXT: movl 88(%esp,%edi), %ebp
7622 ; FALLBACK28-NEXT: movl %ebp, %edi
7623 ; FALLBACK28-NEXT: movb %dh, %cl
7624 ; FALLBACK28-NEXT: shll %cl, %edi
7625 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7626 ; FALLBACK28-NEXT: shrl %eax
7627 ; FALLBACK28-NEXT: movl %edx, %ecx
7628 ; FALLBACK28-NEXT: shrl %cl, %eax
7629 ; FALLBACK28-NEXT: orl %edi, %eax
7630 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7631 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7632 ; FALLBACK28-NEXT: movl 92(%esp,%eax), %edi
7633 ; FALLBACK28-NEXT: movb %dh, %cl
7634 ; FALLBACK28-NEXT: shll %cl, %edi
7635 ; FALLBACK28-NEXT: shrl %ebp
7636 ; FALLBACK28-NEXT: movl %edx, %ecx
7637 ; FALLBACK28-NEXT: shrl %cl, %ebp
7638 ; FALLBACK28-NEXT: orl %edi, %ebp
7639 ; FALLBACK28-NEXT: movb %dh, %cl
7640 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
7641 ; FALLBACK28-NEXT: shll %cl, %edx
7642 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
7643 ; FALLBACK28-NEXT: movl %edx, (%eax)
7644 ; FALLBACK28-NEXT: movl %ebp, 28(%eax)
7645 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7646 ; FALLBACK28-NEXT: movl %ecx, 24(%eax)
7647 ; FALLBACK28-NEXT: movl %ebx, 4(%eax)
7648 ; FALLBACK28-NEXT: movl %esi, 8(%eax)
7649 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7650 ; FALLBACK28-NEXT: movl %ecx, 12(%eax)
7651 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7652 ; FALLBACK28-NEXT: movl %ecx, 16(%eax)
7653 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7654 ; FALLBACK28-NEXT: movl %ecx, 20(%eax)
7655 ; FALLBACK28-NEXT: addl $108, %esp
7656 ; FALLBACK28-NEXT: popl %esi
7657 ; FALLBACK28-NEXT: popl %edi
7658 ; FALLBACK28-NEXT: popl %ebx
7659 ; FALLBACK28-NEXT: popl %ebp
7660 ; FALLBACK28-NEXT: vzeroupper
7661 ; FALLBACK28-NEXT: retl
7663 ; FALLBACK29-LABEL: shl_32bytes:
7664 ; FALLBACK29: # %bb.0:
7665 ; FALLBACK29-NEXT: pushl %ebp
7666 ; FALLBACK29-NEXT: pushl %ebx
7667 ; FALLBACK29-NEXT: pushl %edi
7668 ; FALLBACK29-NEXT: pushl %esi
7669 ; FALLBACK29-NEXT: subl $92, %esp
7670 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
7671 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
7672 ; FALLBACK29-NEXT: vmovups (%ecx), %ymm0
7673 ; FALLBACK29-NEXT: movzbl (%eax), %eax
7674 ; FALLBACK29-NEXT: movl %eax, %ecx
7675 ; FALLBACK29-NEXT: shlb $3, %cl
7676 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
7677 ; FALLBACK29-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7678 ; FALLBACK29-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7679 ; FALLBACK29-NEXT: andb $28, %al
7680 ; FALLBACK29-NEXT: negb %al
7681 ; FALLBACK29-NEXT: movsbl %al, %ebp
7682 ; FALLBACK29-NEXT: movl 64(%esp,%ebp), %eax
7683 ; FALLBACK29-NEXT: movl 68(%esp,%ebp), %edx
7684 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7685 ; FALLBACK29-NEXT: shldl %cl, %eax, %edx
7686 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7687 ; FALLBACK29-NEXT: movl 60(%esp,%ebp), %edx
7688 ; FALLBACK29-NEXT: shldl %cl, %edx, %eax
7689 ; FALLBACK29-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7690 ; FALLBACK29-NEXT: movl 56(%esp,%ebp), %edi
7691 ; FALLBACK29-NEXT: shldl %cl, %edi, %edx
7692 ; FALLBACK29-NEXT: movl %edx, (%esp) # 4-byte Spill
7693 ; FALLBACK29-NEXT: movl 52(%esp,%ebp), %ebx
7694 ; FALLBACK29-NEXT: shldl %cl, %ebx, %edi
7695 ; FALLBACK29-NEXT: movl 72(%esp,%ebp), %edx
7696 ; FALLBACK29-NEXT: movl %edx, %eax
7697 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7698 ; FALLBACK29-NEXT: shldl %cl, %esi, %eax
7699 ; FALLBACK29-NEXT: movl 48(%esp,%ebp), %esi
7700 ; FALLBACK29-NEXT: movl 76(%esp,%ebp), %ebp
7701 ; FALLBACK29-NEXT: shldl %cl, %edx, %ebp
7702 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %edx
7703 ; FALLBACK29-NEXT: movl %ebp, 28(%edx)
7704 ; FALLBACK29-NEXT: movl %eax, 24(%edx)
7705 ; FALLBACK29-NEXT: movl %esi, %eax
7706 ; FALLBACK29-NEXT: shll %cl, %eax
7707 ; FALLBACK29-NEXT: shldl %cl, %esi, %ebx
7708 ; FALLBACK29-NEXT: movl %ebx, 4(%edx)
7709 ; FALLBACK29-NEXT: movl %edi, 8(%edx)
7710 ; FALLBACK29-NEXT: movl (%esp), %ecx # 4-byte Reload
7711 ; FALLBACK29-NEXT: movl %ecx, 12(%edx)
7712 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7713 ; FALLBACK29-NEXT: movl %ecx, 16(%edx)
7714 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7715 ; FALLBACK29-NEXT: movl %ecx, 20(%edx)
7716 ; FALLBACK29-NEXT: movl %eax, (%edx)
7717 ; FALLBACK29-NEXT: addl $92, %esp
7718 ; FALLBACK29-NEXT: popl %esi
7719 ; FALLBACK29-NEXT: popl %edi
7720 ; FALLBACK29-NEXT: popl %ebx
7721 ; FALLBACK29-NEXT: popl %ebp
7722 ; FALLBACK29-NEXT: vzeroupper
7723 ; FALLBACK29-NEXT: retl
7725 ; FALLBACK30-LABEL: shl_32bytes:
7726 ; FALLBACK30: # %bb.0:
7727 ; FALLBACK30-NEXT: pushl %ebp
7728 ; FALLBACK30-NEXT: pushl %ebx
7729 ; FALLBACK30-NEXT: pushl %edi
7730 ; FALLBACK30-NEXT: pushl %esi
7731 ; FALLBACK30-NEXT: subl $108, %esp
7732 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
7733 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
7734 ; FALLBACK30-NEXT: vmovups (%ecx), %ymm0
7735 ; FALLBACK30-NEXT: movzbl (%eax), %ecx
7736 ; FALLBACK30-NEXT: movl %ecx, %eax
7737 ; FALLBACK30-NEXT: shlb $3, %al
7738 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
7739 ; FALLBACK30-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7740 ; FALLBACK30-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7741 ; FALLBACK30-NEXT: andb $28, %cl
7742 ; FALLBACK30-NEXT: negb %cl
7743 ; FALLBACK30-NEXT: movsbl %cl, %edx
7744 ; FALLBACK30-NEXT: movl 84(%esp,%edx), %ecx
7745 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7746 ; FALLBACK30-NEXT: shlxl %eax, %ecx, %ecx
7747 ; FALLBACK30-NEXT: movl 80(%esp,%edx), %esi
7748 ; FALLBACK30-NEXT: shlxl %eax, %esi, %edi
7749 ; FALLBACK30-NEXT: movl %eax, %ebx
7750 ; FALLBACK30-NEXT: notb %bl
7751 ; FALLBACK30-NEXT: shrl %esi
7752 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %esi
7753 ; FALLBACK30-NEXT: orl %ecx, %esi
7754 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7755 ; FALLBACK30-NEXT: movl 76(%esp,%edx), %ecx
7756 ; FALLBACK30-NEXT: movl %ecx, %esi
7757 ; FALLBACK30-NEXT: shrl %esi
7758 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %esi
7759 ; FALLBACK30-NEXT: orl %edi, %esi
7760 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7761 ; FALLBACK30-NEXT: shlxl %eax, %ecx, %ecx
7762 ; FALLBACK30-NEXT: movl 72(%esp,%edx), %esi
7763 ; FALLBACK30-NEXT: movl %esi, %edi
7764 ; FALLBACK30-NEXT: shrl %edi
7765 ; FALLBACK30-NEXT: shrxl %ebx, %edi, %edi
7766 ; FALLBACK30-NEXT: orl %ecx, %edi
7767 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7768 ; FALLBACK30-NEXT: shlxl %eax, %esi, %ecx
7769 ; FALLBACK30-NEXT: movl 68(%esp,%edx), %esi
7770 ; FALLBACK30-NEXT: movl %esi, %edi
7771 ; FALLBACK30-NEXT: shrl %edi
7772 ; FALLBACK30-NEXT: shrxl %ebx, %edi, %ebp
7773 ; FALLBACK30-NEXT: orl %ecx, %ebp
7774 ; FALLBACK30-NEXT: shlxl %eax, %esi, %edi
7775 ; FALLBACK30-NEXT: movl 64(%esp,%edx), %esi
7776 ; FALLBACK30-NEXT: movl %esi, %ecx
7777 ; FALLBACK30-NEXT: shrl %ecx
7778 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %ecx
7779 ; FALLBACK30-NEXT: orl %edi, %ecx
7780 ; FALLBACK30-NEXT: shlxl %eax, %esi, %esi
7781 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7782 ; FALLBACK30-NEXT: shlxl %eax, 92(%esp,%edx), %edi
7783 ; FALLBACK30-NEXT: movl 88(%esp,%edx), %edx
7784 ; FALLBACK30-NEXT: shlxl %eax, %edx, %esi
7785 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7786 ; FALLBACK30-NEXT: shrl %eax
7787 ; FALLBACK30-NEXT: shrxl %ebx, %eax, %eax
7788 ; FALLBACK30-NEXT: orl %esi, %eax
7789 ; FALLBACK30-NEXT: shrl %edx
7790 ; FALLBACK30-NEXT: shrxl %ebx, %edx, %edx
7791 ; FALLBACK30-NEXT: orl %edi, %edx
7792 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %esi
7793 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
7794 ; FALLBACK30-NEXT: movl %edi, (%esi)
7795 ; FALLBACK30-NEXT: movl %edx, 28(%esi)
7796 ; FALLBACK30-NEXT: movl %eax, 24(%esi)
7797 ; FALLBACK30-NEXT: movl %ecx, 4(%esi)
7798 ; FALLBACK30-NEXT: movl %ebp, 8(%esi)
7799 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7800 ; FALLBACK30-NEXT: movl %eax, 12(%esi)
7801 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7802 ; FALLBACK30-NEXT: movl %eax, 16(%esi)
7803 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
7804 ; FALLBACK30-NEXT: movl %eax, 20(%esi)
7805 ; FALLBACK30-NEXT: addl $108, %esp
7806 ; FALLBACK30-NEXT: popl %esi
7807 ; FALLBACK30-NEXT: popl %edi
7808 ; FALLBACK30-NEXT: popl %ebx
7809 ; FALLBACK30-NEXT: popl %ebp
7810 ; FALLBACK30-NEXT: vzeroupper
7811 ; FALLBACK30-NEXT: retl
7813 ; FALLBACK31-LABEL: shl_32bytes:
7814 ; FALLBACK31: # %bb.0:
7815 ; FALLBACK31-NEXT: pushl %ebp
7816 ; FALLBACK31-NEXT: pushl %ebx
7817 ; FALLBACK31-NEXT: pushl %edi
7818 ; FALLBACK31-NEXT: pushl %esi
7819 ; FALLBACK31-NEXT: subl $92, %esp
7820 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
7821 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
7822 ; FALLBACK31-NEXT: vmovups (%ecx), %ymm0
7823 ; FALLBACK31-NEXT: movzbl (%eax), %eax
7824 ; FALLBACK31-NEXT: movl %eax, %ecx
7825 ; FALLBACK31-NEXT: shlb $3, %cl
7826 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
7827 ; FALLBACK31-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
7828 ; FALLBACK31-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
7829 ; FALLBACK31-NEXT: andb $28, %al
7830 ; FALLBACK31-NEXT: negb %al
7831 ; FALLBACK31-NEXT: movsbl %al, %ebx
7832 ; FALLBACK31-NEXT: movl 64(%esp,%ebx), %eax
7833 ; FALLBACK31-NEXT: movl 68(%esp,%ebx), %edx
7834 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7835 ; FALLBACK31-NEXT: shldl %cl, %eax, %edx
7836 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7837 ; FALLBACK31-NEXT: movl 60(%esp,%ebx), %edx
7838 ; FALLBACK31-NEXT: shldl %cl, %edx, %eax
7839 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
7840 ; FALLBACK31-NEXT: movl 56(%esp,%ebx), %edi
7841 ; FALLBACK31-NEXT: shldl %cl, %edi, %edx
7842 ; FALLBACK31-NEXT: movl %edx, (%esp) # 4-byte Spill
7843 ; FALLBACK31-NEXT: movl 52(%esp,%ebx), %ebp
7844 ; FALLBACK31-NEXT: shldl %cl, %ebp, %edi
7845 ; FALLBACK31-NEXT: movl 72(%esp,%ebx), %edx
7846 ; FALLBACK31-NEXT: movl %edx, %eax
7847 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
7848 ; FALLBACK31-NEXT: shldl %cl, %esi, %eax
7849 ; FALLBACK31-NEXT: movl 48(%esp,%ebx), %esi
7850 ; FALLBACK31-NEXT: movl 76(%esp,%ebx), %ebx
7851 ; FALLBACK31-NEXT: shldl %cl, %edx, %ebx
7852 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %edx
7853 ; FALLBACK31-NEXT: movl %ebx, 28(%edx)
7854 ; FALLBACK31-NEXT: movl %eax, 24(%edx)
7855 ; FALLBACK31-NEXT: shlxl %ecx, %esi, %eax
7856 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
7857 ; FALLBACK31-NEXT: shldl %cl, %esi, %ebp
7858 ; FALLBACK31-NEXT: movl %ebp, 4(%edx)
7859 ; FALLBACK31-NEXT: movl %edi, 8(%edx)
7860 ; FALLBACK31-NEXT: movl (%esp), %ecx # 4-byte Reload
7861 ; FALLBACK31-NEXT: movl %ecx, 12(%edx)
7862 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7863 ; FALLBACK31-NEXT: movl %ecx, 16(%edx)
7864 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
7865 ; FALLBACK31-NEXT: movl %ecx, 20(%edx)
7866 ; FALLBACK31-NEXT: movl %eax, (%edx)
7867 ; FALLBACK31-NEXT: addl $92, %esp
7868 ; FALLBACK31-NEXT: popl %esi
7869 ; FALLBACK31-NEXT: popl %edi
7870 ; FALLBACK31-NEXT: popl %ebx
7871 ; FALLBACK31-NEXT: popl %ebp
7872 ; FALLBACK31-NEXT: vzeroupper
7873 ; FALLBACK31-NEXT: retl
7874 %src = load i256, ptr %src.ptr, align 1
7875 %byteOff = load i256, ptr %byteOff.ptr, align 1
7876 %bitOff = shl i256 %byteOff, 3
7877 %res = shl i256 %src, %bitOff
7878 store i256 %res, ptr %dst, align 1
7882 define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
7883 ; FALLBACK0-LABEL: shl_32bytes_dwordOff:
7884 ; FALLBACK0: # %bb.0:
7885 ; FALLBACK0-NEXT: pushq %rbx
7886 ; FALLBACK0-NEXT: movq (%rdi), %rcx
7887 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
7888 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
7889 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
7890 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
7891 ; FALLBACK0-NEXT: movl %esi, %eax
7892 ; FALLBACK0-NEXT: shlb $5, %al
7893 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
7894 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7895 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7896 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
7897 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
7898 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
7899 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
7900 ; FALLBACK0-NEXT: shlb $2, %sil
7901 ; FALLBACK0-NEXT: andb $24, %sil
7902 ; FALLBACK0-NEXT: negb %sil
7903 ; FALLBACK0-NEXT: movsbq %sil, %r10
7904 ; FALLBACK0-NEXT: movq -32(%rsp,%r10), %r8
7905 ; FALLBACK0-NEXT: movq -24(%rsp,%r10), %rdi
7906 ; FALLBACK0-NEXT: movq %rdi, %r11
7907 ; FALLBACK0-NEXT: movl %eax, %ecx
7908 ; FALLBACK0-NEXT: shlq %cl, %r11
7909 ; FALLBACK0-NEXT: movl %eax, %esi
7910 ; FALLBACK0-NEXT: notb %sil
7911 ; FALLBACK0-NEXT: movq %r8, %r9
7912 ; FALLBACK0-NEXT: shrq %r9
7913 ; FALLBACK0-NEXT: movl %esi, %ecx
7914 ; FALLBACK0-NEXT: shrq %cl, %r9
7915 ; FALLBACK0-NEXT: orq %r11, %r9
7916 ; FALLBACK0-NEXT: movq -8(%rsp,%r10), %r11
7917 ; FALLBACK0-NEXT: movl %eax, %ecx
7918 ; FALLBACK0-NEXT: shlq %cl, %r11
7919 ; FALLBACK0-NEXT: movq -16(%rsp,%r10), %r10
7920 ; FALLBACK0-NEXT: movq %r10, %rbx
7921 ; FALLBACK0-NEXT: shrq %rbx
7922 ; FALLBACK0-NEXT: movl %esi, %ecx
7923 ; FALLBACK0-NEXT: shrq %cl, %rbx
7924 ; FALLBACK0-NEXT: orq %r11, %rbx
7925 ; FALLBACK0-NEXT: movl %eax, %ecx
7926 ; FALLBACK0-NEXT: shlq %cl, %r10
7927 ; FALLBACK0-NEXT: shrq %rdi
7928 ; FALLBACK0-NEXT: movl %esi, %ecx
7929 ; FALLBACK0-NEXT: shrq %cl, %rdi
7930 ; FALLBACK0-NEXT: orq %r10, %rdi
7931 ; FALLBACK0-NEXT: movl %eax, %ecx
7932 ; FALLBACK0-NEXT: shlq %cl, %r8
7933 ; FALLBACK0-NEXT: movq %r8, (%rdx)
7934 ; FALLBACK0-NEXT: movq %rdi, 16(%rdx)
7935 ; FALLBACK0-NEXT: movq %rbx, 24(%rdx)
7936 ; FALLBACK0-NEXT: movq %r9, 8(%rdx)
7937 ; FALLBACK0-NEXT: popq %rbx
7938 ; FALLBACK0-NEXT: retq
7940 ; FALLBACK1-LABEL: shl_32bytes_dwordOff:
7941 ; FALLBACK1: # %bb.0:
7942 ; FALLBACK1-NEXT: movq (%rdi), %rax
7943 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
7944 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
7945 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
7946 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
7947 ; FALLBACK1-NEXT: movl %esi, %ecx
7948 ; FALLBACK1-NEXT: shlb $5, %cl
7949 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
7950 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7951 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7952 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
7953 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
7954 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
7955 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
7956 ; FALLBACK1-NEXT: shlb $2, %sil
7957 ; FALLBACK1-NEXT: andb $24, %sil
7958 ; FALLBACK1-NEXT: negb %sil
7959 ; FALLBACK1-NEXT: movsbq %sil, %rax
7960 ; FALLBACK1-NEXT: movq -24(%rsp,%rax), %rsi
7961 ; FALLBACK1-NEXT: movq -16(%rsp,%rax), %rdi
7962 ; FALLBACK1-NEXT: shldq %cl, %rsi, %rdi
7963 ; FALLBACK1-NEXT: movq -40(%rsp,%rax), %r8
7964 ; FALLBACK1-NEXT: movq -32(%rsp,%rax), %rax
7965 ; FALLBACK1-NEXT: shldq %cl, %rax, %rsi
7966 ; FALLBACK1-NEXT: shldq %cl, %r8, %rax
7967 ; FALLBACK1-NEXT: shlq %cl, %r8
7968 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
7969 ; FALLBACK1-NEXT: movq %rdi, 24(%rdx)
7970 ; FALLBACK1-NEXT: movq %r8, (%rdx)
7971 ; FALLBACK1-NEXT: movq %rax, 8(%rdx)
7972 ; FALLBACK1-NEXT: retq
7974 ; FALLBACK2-LABEL: shl_32bytes_dwordOff:
7975 ; FALLBACK2: # %bb.0:
7976 ; FALLBACK2-NEXT: movq (%rdi), %rcx
7977 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
7978 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
7979 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
7980 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
7981 ; FALLBACK2-NEXT: movl %esi, %eax
7982 ; FALLBACK2-NEXT: shlb $5, %al
7983 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
7984 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7985 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
7986 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
7987 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
7988 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
7989 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
7990 ; FALLBACK2-NEXT: shlb $2, %sil
7991 ; FALLBACK2-NEXT: andb $24, %sil
7992 ; FALLBACK2-NEXT: negb %sil
7993 ; FALLBACK2-NEXT: movsbq %sil, %rsi
7994 ; FALLBACK2-NEXT: movq -40(%rsp,%rsi), %rdi
7995 ; FALLBACK2-NEXT: movq -32(%rsp,%rsi), %rcx
7996 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %r8
7997 ; FALLBACK2-NEXT: shlxq %rax, -16(%rsp,%rsi), %r9
7998 ; FALLBACK2-NEXT: movq -24(%rsp,%rsi), %rsi
7999 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %r10
8000 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %r11
8001 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
8002 ; FALLBACK2-NEXT: notb %al
8003 ; FALLBACK2-NEXT: shrq %rdi
8004 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %rdi
8005 ; FALLBACK2-NEXT: orq %r8, %rdi
8006 ; FALLBACK2-NEXT: shrq %rsi
8007 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %rsi
8008 ; FALLBACK2-NEXT: orq %r9, %rsi
8009 ; FALLBACK2-NEXT: shrq %rcx
8010 ; FALLBACK2-NEXT: shrxq %rax, %rcx, %rax
8011 ; FALLBACK2-NEXT: orq %r10, %rax
8012 ; FALLBACK2-NEXT: movq %r11, (%rdx)
8013 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
8014 ; FALLBACK2-NEXT: movq %rsi, 24(%rdx)
8015 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
8016 ; FALLBACK2-NEXT: retq
8018 ; FALLBACK3-LABEL: shl_32bytes_dwordOff:
8019 ; FALLBACK3: # %bb.0:
8020 ; FALLBACK3-NEXT: movq (%rdi), %rax
8021 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
8022 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
8023 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
8024 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
8025 ; FALLBACK3-NEXT: movl %esi, %ecx
8026 ; FALLBACK3-NEXT: shlb $5, %cl
8027 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
8028 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8029 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8030 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8031 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
8032 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8033 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
8034 ; FALLBACK3-NEXT: shlb $2, %sil
8035 ; FALLBACK3-NEXT: andb $24, %sil
8036 ; FALLBACK3-NEXT: negb %sil
8037 ; FALLBACK3-NEXT: movsbq %sil, %rax
8038 ; FALLBACK3-NEXT: movq -24(%rsp,%rax), %rsi
8039 ; FALLBACK3-NEXT: movq -16(%rsp,%rax), %rdi
8040 ; FALLBACK3-NEXT: shldq %cl, %rsi, %rdi
8041 ; FALLBACK3-NEXT: movq -40(%rsp,%rax), %r8
8042 ; FALLBACK3-NEXT: movq -32(%rsp,%rax), %rax
8043 ; FALLBACK3-NEXT: shldq %cl, %rax, %rsi
8044 ; FALLBACK3-NEXT: shldq %cl, %r8, %rax
8045 ; FALLBACK3-NEXT: shlxq %rcx, %r8, %rcx
8046 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
8047 ; FALLBACK3-NEXT: movq %rdi, 24(%rdx)
8048 ; FALLBACK3-NEXT: movq %rcx, (%rdx)
8049 ; FALLBACK3-NEXT: movq %rax, 8(%rdx)
8050 ; FALLBACK3-NEXT: retq
8052 ; FALLBACK4-LABEL: shl_32bytes_dwordOff:
8053 ; FALLBACK4: # %bb.0:
8054 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
8055 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
8056 ; FALLBACK4-NEXT: movzbl (%rsi), %ecx
8057 ; FALLBACK4-NEXT: movl %ecx, %eax
8058 ; FALLBACK4-NEXT: shlb $5, %al
8059 ; FALLBACK4-NEXT: xorps %xmm2, %xmm2
8060 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8061 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8062 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
8063 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8064 ; FALLBACK4-NEXT: shlb $2, %cl
8065 ; FALLBACK4-NEXT: andb $24, %cl
8066 ; FALLBACK4-NEXT: negb %cl
8067 ; FALLBACK4-NEXT: movsbq %cl, %r8
8068 ; FALLBACK4-NEXT: movq -16(%rsp,%r8), %r9
8069 ; FALLBACK4-NEXT: movl %eax, %ecx
8070 ; FALLBACK4-NEXT: shlq %cl, %r9
8071 ; FALLBACK4-NEXT: movl %eax, %esi
8072 ; FALLBACK4-NEXT: notb %sil
8073 ; FALLBACK4-NEXT: movq -24(%rsp,%r8), %r10
8074 ; FALLBACK4-NEXT: movq %r10, %rdi
8075 ; FALLBACK4-NEXT: shrq %rdi
8076 ; FALLBACK4-NEXT: movl %esi, %ecx
8077 ; FALLBACK4-NEXT: shrq %cl, %rdi
8078 ; FALLBACK4-NEXT: orq %r9, %rdi
8079 ; FALLBACK4-NEXT: movl %eax, %ecx
8080 ; FALLBACK4-NEXT: shlq %cl, %r10
8081 ; FALLBACK4-NEXT: movq -40(%rsp,%r8), %r9
8082 ; FALLBACK4-NEXT: movq -32(%rsp,%r8), %r8
8083 ; FALLBACK4-NEXT: movq %r8, %r11
8084 ; FALLBACK4-NEXT: shrq %r11
8085 ; FALLBACK4-NEXT: movl %esi, %ecx
8086 ; FALLBACK4-NEXT: shrq %cl, %r11
8087 ; FALLBACK4-NEXT: orq %r10, %r11
8088 ; FALLBACK4-NEXT: movl %eax, %ecx
8089 ; FALLBACK4-NEXT: shlq %cl, %r8
8090 ; FALLBACK4-NEXT: movq %r9, %r10
8091 ; FALLBACK4-NEXT: shrq %r10
8092 ; FALLBACK4-NEXT: movl %esi, %ecx
8093 ; FALLBACK4-NEXT: shrq %cl, %r10
8094 ; FALLBACK4-NEXT: orq %r8, %r10
8095 ; FALLBACK4-NEXT: movl %eax, %ecx
8096 ; FALLBACK4-NEXT: shlq %cl, %r9
8097 ; FALLBACK4-NEXT: movq %r9, (%rdx)
8098 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
8099 ; FALLBACK4-NEXT: movq %r11, 16(%rdx)
8100 ; FALLBACK4-NEXT: movq %rdi, 24(%rdx)
8101 ; FALLBACK4-NEXT: retq
8103 ; FALLBACK5-LABEL: shl_32bytes_dwordOff:
8104 ; FALLBACK5: # %bb.0:
8105 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
8106 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
8107 ; FALLBACK5-NEXT: movzbl (%rsi), %eax
8108 ; FALLBACK5-NEXT: movl %eax, %ecx
8109 ; FALLBACK5-NEXT: shlb $5, %cl
8110 ; FALLBACK5-NEXT: xorps %xmm2, %xmm2
8111 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8112 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8113 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
8114 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8115 ; FALLBACK5-NEXT: shlb $2, %al
8116 ; FALLBACK5-NEXT: andb $24, %al
8117 ; FALLBACK5-NEXT: negb %al
8118 ; FALLBACK5-NEXT: movsbq %al, %rax
8119 ; FALLBACK5-NEXT: movq -24(%rsp,%rax), %rsi
8120 ; FALLBACK5-NEXT: movq -16(%rsp,%rax), %rdi
8121 ; FALLBACK5-NEXT: shldq %cl, %rsi, %rdi
8122 ; FALLBACK5-NEXT: movq -40(%rsp,%rax), %r8
8123 ; FALLBACK5-NEXT: movq -32(%rsp,%rax), %rax
8124 ; FALLBACK5-NEXT: shldq %cl, %rax, %rsi
8125 ; FALLBACK5-NEXT: movq %r8, %r9
8126 ; FALLBACK5-NEXT: shlq %cl, %r9
8127 ; FALLBACK5-NEXT: shldq %cl, %r8, %rax
8128 ; FALLBACK5-NEXT: movq %rax, 8(%rdx)
8129 ; FALLBACK5-NEXT: movq %rsi, 16(%rdx)
8130 ; FALLBACK5-NEXT: movq %rdi, 24(%rdx)
8131 ; FALLBACK5-NEXT: movq %r9, (%rdx)
8132 ; FALLBACK5-NEXT: retq
8134 ; FALLBACK6-LABEL: shl_32bytes_dwordOff:
8135 ; FALLBACK6: # %bb.0:
8136 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
8137 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
8138 ; FALLBACK6-NEXT: movzbl (%rsi), %ecx
8139 ; FALLBACK6-NEXT: movl %ecx, %eax
8140 ; FALLBACK6-NEXT: shlb $5, %al
8141 ; FALLBACK6-NEXT: xorps %xmm2, %xmm2
8142 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8143 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8144 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
8145 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8146 ; FALLBACK6-NEXT: shlb $2, %cl
8147 ; FALLBACK6-NEXT: andb $24, %cl
8148 ; FALLBACK6-NEXT: negb %cl
8149 ; FALLBACK6-NEXT: movsbq %cl, %rcx
8150 ; FALLBACK6-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
8151 ; FALLBACK6-NEXT: movq -24(%rsp,%rcx), %rdi
8152 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %r8
8153 ; FALLBACK6-NEXT: movq -40(%rsp,%rcx), %r9
8154 ; FALLBACK6-NEXT: movq -32(%rsp,%rcx), %rcx
8155 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %r10
8156 ; FALLBACK6-NEXT: shlxq %rax, %r9, %r11
8157 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
8158 ; FALLBACK6-NEXT: notb %al
8159 ; FALLBACK6-NEXT: shrq %rdi
8160 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %rdi
8161 ; FALLBACK6-NEXT: orq %rsi, %rdi
8162 ; FALLBACK6-NEXT: shrq %rcx
8163 ; FALLBACK6-NEXT: shrxq %rax, %rcx, %rcx
8164 ; FALLBACK6-NEXT: orq %r8, %rcx
8165 ; FALLBACK6-NEXT: shrq %r9
8166 ; FALLBACK6-NEXT: shrxq %rax, %r9, %rax
8167 ; FALLBACK6-NEXT: orq %r10, %rax
8168 ; FALLBACK6-NEXT: movq %r11, (%rdx)
8169 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
8170 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
8171 ; FALLBACK6-NEXT: movq %rdi, 24(%rdx)
8172 ; FALLBACK6-NEXT: retq
8174 ; FALLBACK7-LABEL: shl_32bytes_dwordOff:
8175 ; FALLBACK7: # %bb.0:
8176 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
8177 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
8178 ; FALLBACK7-NEXT: movzbl (%rsi), %eax
8179 ; FALLBACK7-NEXT: movl %eax, %ecx
8180 ; FALLBACK7-NEXT: shlb $5, %cl
8181 ; FALLBACK7-NEXT: xorps %xmm2, %xmm2
8182 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8183 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8184 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
8185 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8186 ; FALLBACK7-NEXT: shlb $2, %al
8187 ; FALLBACK7-NEXT: andb $24, %al
8188 ; FALLBACK7-NEXT: negb %al
8189 ; FALLBACK7-NEXT: movsbq %al, %rax
8190 ; FALLBACK7-NEXT: movq -24(%rsp,%rax), %rsi
8191 ; FALLBACK7-NEXT: movq -16(%rsp,%rax), %rdi
8192 ; FALLBACK7-NEXT: shldq %cl, %rsi, %rdi
8193 ; FALLBACK7-NEXT: movq -40(%rsp,%rax), %r8
8194 ; FALLBACK7-NEXT: movq -32(%rsp,%rax), %rax
8195 ; FALLBACK7-NEXT: shldq %cl, %rax, %rsi
8196 ; FALLBACK7-NEXT: shlxq %rcx, %r8, %r9
8197 ; FALLBACK7-NEXT: # kill: def $cl killed $cl killed $rcx
8198 ; FALLBACK7-NEXT: shldq %cl, %r8, %rax
8199 ; FALLBACK7-NEXT: movq %rax, 8(%rdx)
8200 ; FALLBACK7-NEXT: movq %rsi, 16(%rdx)
8201 ; FALLBACK7-NEXT: movq %rdi, 24(%rdx)
8202 ; FALLBACK7-NEXT: movq %r9, (%rdx)
8203 ; FALLBACK7-NEXT: retq
8205 ; FALLBACK8-LABEL: shl_32bytes_dwordOff:
8206 ; FALLBACK8: # %bb.0:
8207 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
8208 ; FALLBACK8-NEXT: movzbl (%rsi), %ecx
8209 ; FALLBACK8-NEXT: movl %ecx, %eax
8210 ; FALLBACK8-NEXT: shlb $5, %al
8211 ; FALLBACK8-NEXT: vxorps %xmm1, %xmm1, %xmm1
8212 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8213 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8214 ; FALLBACK8-NEXT: shlb $2, %cl
8215 ; FALLBACK8-NEXT: andb $24, %cl
8216 ; FALLBACK8-NEXT: negb %cl
8217 ; FALLBACK8-NEXT: movsbq %cl, %r8
8218 ; FALLBACK8-NEXT: movq -16(%rsp,%r8), %r9
8219 ; FALLBACK8-NEXT: movl %eax, %ecx
8220 ; FALLBACK8-NEXT: shlq %cl, %r9
8221 ; FALLBACK8-NEXT: movl %eax, %esi
8222 ; FALLBACK8-NEXT: notb %sil
8223 ; FALLBACK8-NEXT: movq -24(%rsp,%r8), %r10
8224 ; FALLBACK8-NEXT: movq %r10, %rdi
8225 ; FALLBACK8-NEXT: shrq %rdi
8226 ; FALLBACK8-NEXT: movl %esi, %ecx
8227 ; FALLBACK8-NEXT: shrq %cl, %rdi
8228 ; FALLBACK8-NEXT: orq %r9, %rdi
8229 ; FALLBACK8-NEXT: movl %eax, %ecx
8230 ; FALLBACK8-NEXT: shlq %cl, %r10
8231 ; FALLBACK8-NEXT: movq -40(%rsp,%r8), %r9
8232 ; FALLBACK8-NEXT: movq -32(%rsp,%r8), %r8
8233 ; FALLBACK8-NEXT: movq %r8, %r11
8234 ; FALLBACK8-NEXT: shrq %r11
8235 ; FALLBACK8-NEXT: movl %esi, %ecx
8236 ; FALLBACK8-NEXT: shrq %cl, %r11
8237 ; FALLBACK8-NEXT: orq %r10, %r11
8238 ; FALLBACK8-NEXT: movl %eax, %ecx
8239 ; FALLBACK8-NEXT: shlq %cl, %r8
8240 ; FALLBACK8-NEXT: movq %r9, %r10
8241 ; FALLBACK8-NEXT: shrq %r10
8242 ; FALLBACK8-NEXT: movl %esi, %ecx
8243 ; FALLBACK8-NEXT: shrq %cl, %r10
8244 ; FALLBACK8-NEXT: orq %r8, %r10
8245 ; FALLBACK8-NEXT: movl %eax, %ecx
8246 ; FALLBACK8-NEXT: shlq %cl, %r9
8247 ; FALLBACK8-NEXT: movq %r9, (%rdx)
8248 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
8249 ; FALLBACK8-NEXT: movq %r11, 16(%rdx)
8250 ; FALLBACK8-NEXT: movq %rdi, 24(%rdx)
8251 ; FALLBACK8-NEXT: vzeroupper
8252 ; FALLBACK8-NEXT: retq
8254 ; FALLBACK9-LABEL: shl_32bytes_dwordOff:
8255 ; FALLBACK9: # %bb.0:
8256 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
8257 ; FALLBACK9-NEXT: movzbl (%rsi), %eax
8258 ; FALLBACK9-NEXT: movl %eax, %ecx
8259 ; FALLBACK9-NEXT: shlb $5, %cl
8260 ; FALLBACK9-NEXT: vxorps %xmm1, %xmm1, %xmm1
8261 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8262 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8263 ; FALLBACK9-NEXT: shlb $2, %al
8264 ; FALLBACK9-NEXT: andb $24, %al
8265 ; FALLBACK9-NEXT: negb %al
8266 ; FALLBACK9-NEXT: movsbq %al, %rax
8267 ; FALLBACK9-NEXT: movq -24(%rsp,%rax), %rsi
8268 ; FALLBACK9-NEXT: movq -16(%rsp,%rax), %rdi
8269 ; FALLBACK9-NEXT: shldq %cl, %rsi, %rdi
8270 ; FALLBACK9-NEXT: movq -40(%rsp,%rax), %r8
8271 ; FALLBACK9-NEXT: movq -32(%rsp,%rax), %rax
8272 ; FALLBACK9-NEXT: shldq %cl, %rax, %rsi
8273 ; FALLBACK9-NEXT: movq %r8, %r9
8274 ; FALLBACK9-NEXT: shlq %cl, %r9
8275 ; FALLBACK9-NEXT: shldq %cl, %r8, %rax
8276 ; FALLBACK9-NEXT: movq %rax, 8(%rdx)
8277 ; FALLBACK9-NEXT: movq %rsi, 16(%rdx)
8278 ; FALLBACK9-NEXT: movq %rdi, 24(%rdx)
8279 ; FALLBACK9-NEXT: movq %r9, (%rdx)
8280 ; FALLBACK9-NEXT: vzeroupper
8281 ; FALLBACK9-NEXT: retq
8283 ; FALLBACK10-LABEL: shl_32bytes_dwordOff:
8284 ; FALLBACK10: # %bb.0:
8285 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
8286 ; FALLBACK10-NEXT: movzbl (%rsi), %ecx
8287 ; FALLBACK10-NEXT: movl %ecx, %eax
8288 ; FALLBACK10-NEXT: shlb $5, %al
8289 ; FALLBACK10-NEXT: vxorps %xmm1, %xmm1, %xmm1
8290 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8291 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8292 ; FALLBACK10-NEXT: shlb $2, %cl
8293 ; FALLBACK10-NEXT: andb $24, %cl
8294 ; FALLBACK10-NEXT: negb %cl
8295 ; FALLBACK10-NEXT: movsbq %cl, %rcx
8296 ; FALLBACK10-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
8297 ; FALLBACK10-NEXT: movq -24(%rsp,%rcx), %rdi
8298 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %r8
8299 ; FALLBACK10-NEXT: movq -40(%rsp,%rcx), %r9
8300 ; FALLBACK10-NEXT: movq -32(%rsp,%rcx), %rcx
8301 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %r10
8302 ; FALLBACK10-NEXT: shlxq %rax, %r9, %r11
8303 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
8304 ; FALLBACK10-NEXT: notb %al
8305 ; FALLBACK10-NEXT: shrq %rdi
8306 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %rdi
8307 ; FALLBACK10-NEXT: orq %rsi, %rdi
8308 ; FALLBACK10-NEXT: shrq %rcx
8309 ; FALLBACK10-NEXT: shrxq %rax, %rcx, %rcx
8310 ; FALLBACK10-NEXT: orq %r8, %rcx
8311 ; FALLBACK10-NEXT: shrq %r9
8312 ; FALLBACK10-NEXT: shrxq %rax, %r9, %rax
8313 ; FALLBACK10-NEXT: orq %r10, %rax
8314 ; FALLBACK10-NEXT: movq %r11, (%rdx)
8315 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
8316 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
8317 ; FALLBACK10-NEXT: movq %rdi, 24(%rdx)
8318 ; FALLBACK10-NEXT: vzeroupper
8319 ; FALLBACK10-NEXT: retq
8321 ; FALLBACK11-LABEL: shl_32bytes_dwordOff:
8322 ; FALLBACK11: # %bb.0:
8323 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
8324 ; FALLBACK11-NEXT: movzbl (%rsi), %eax
8325 ; FALLBACK11-NEXT: movl %eax, %ecx
8326 ; FALLBACK11-NEXT: shlb $5, %cl
8327 ; FALLBACK11-NEXT: vxorps %xmm1, %xmm1, %xmm1
8328 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8329 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8330 ; FALLBACK11-NEXT: shlb $2, %al
8331 ; FALLBACK11-NEXT: andb $24, %al
8332 ; FALLBACK11-NEXT: negb %al
8333 ; FALLBACK11-NEXT: movsbq %al, %rax
8334 ; FALLBACK11-NEXT: movq -24(%rsp,%rax), %rsi
8335 ; FALLBACK11-NEXT: movq -16(%rsp,%rax), %rdi
8336 ; FALLBACK11-NEXT: shldq %cl, %rsi, %rdi
8337 ; FALLBACK11-NEXT: movq -40(%rsp,%rax), %r8
8338 ; FALLBACK11-NEXT: movq -32(%rsp,%rax), %rax
8339 ; FALLBACK11-NEXT: shldq %cl, %rax, %rsi
8340 ; FALLBACK11-NEXT: shlxq %rcx, %r8, %r9
8341 ; FALLBACK11-NEXT: # kill: def $cl killed $cl killed $rcx
8342 ; FALLBACK11-NEXT: shldq %cl, %r8, %rax
8343 ; FALLBACK11-NEXT: movq %rax, 8(%rdx)
8344 ; FALLBACK11-NEXT: movq %rsi, 16(%rdx)
8345 ; FALLBACK11-NEXT: movq %rdi, 24(%rdx)
8346 ; FALLBACK11-NEXT: movq %r9, (%rdx)
8347 ; FALLBACK11-NEXT: vzeroupper
8348 ; FALLBACK11-NEXT: retq
8350 ; FALLBACK12-LABEL: shl_32bytes_dwordOff:
8351 ; FALLBACK12: # %bb.0:
8352 ; FALLBACK12-NEXT: vmovups (%rdi), %ymm0
8353 ; FALLBACK12-NEXT: movzbl (%rsi), %ecx
8354 ; FALLBACK12-NEXT: movl %ecx, %eax
8355 ; FALLBACK12-NEXT: shlb $5, %al
8356 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
8357 ; FALLBACK12-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8358 ; FALLBACK12-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8359 ; FALLBACK12-NEXT: shlb $2, %cl
8360 ; FALLBACK12-NEXT: andb $24, %cl
8361 ; FALLBACK12-NEXT: negb %cl
8362 ; FALLBACK12-NEXT: movsbq %cl, %r8
8363 ; FALLBACK12-NEXT: movq -16(%rsp,%r8), %r9
8364 ; FALLBACK12-NEXT: movl %eax, %ecx
8365 ; FALLBACK12-NEXT: shlq %cl, %r9
8366 ; FALLBACK12-NEXT: movl %eax, %esi
8367 ; FALLBACK12-NEXT: notb %sil
8368 ; FALLBACK12-NEXT: movq -24(%rsp,%r8), %r10
8369 ; FALLBACK12-NEXT: movq %r10, %rdi
8370 ; FALLBACK12-NEXT: shrq %rdi
8371 ; FALLBACK12-NEXT: movl %esi, %ecx
8372 ; FALLBACK12-NEXT: shrq %cl, %rdi
8373 ; FALLBACK12-NEXT: orq %r9, %rdi
8374 ; FALLBACK12-NEXT: movl %eax, %ecx
8375 ; FALLBACK12-NEXT: shlq %cl, %r10
8376 ; FALLBACK12-NEXT: movq -40(%rsp,%r8), %r9
8377 ; FALLBACK12-NEXT: movq -32(%rsp,%r8), %r8
8378 ; FALLBACK12-NEXT: movq %r8, %r11
8379 ; FALLBACK12-NEXT: shrq %r11
8380 ; FALLBACK12-NEXT: movl %esi, %ecx
8381 ; FALLBACK12-NEXT: shrq %cl, %r11
8382 ; FALLBACK12-NEXT: orq %r10, %r11
8383 ; FALLBACK12-NEXT: movl %eax, %ecx
8384 ; FALLBACK12-NEXT: shlq %cl, %r8
8385 ; FALLBACK12-NEXT: movq %r9, %r10
8386 ; FALLBACK12-NEXT: shrq %r10
8387 ; FALLBACK12-NEXT: movl %esi, %ecx
8388 ; FALLBACK12-NEXT: shrq %cl, %r10
8389 ; FALLBACK12-NEXT: orq %r8, %r10
8390 ; FALLBACK12-NEXT: movl %eax, %ecx
8391 ; FALLBACK12-NEXT: shlq %cl, %r9
8392 ; FALLBACK12-NEXT: movq %r9, (%rdx)
8393 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
8394 ; FALLBACK12-NEXT: movq %r11, 16(%rdx)
8395 ; FALLBACK12-NEXT: movq %rdi, 24(%rdx)
8396 ; FALLBACK12-NEXT: vzeroupper
8397 ; FALLBACK12-NEXT: retq
8399 ; FALLBACK13-LABEL: shl_32bytes_dwordOff:
8400 ; FALLBACK13: # %bb.0:
8401 ; FALLBACK13-NEXT: vmovups (%rdi), %ymm0
8402 ; FALLBACK13-NEXT: movzbl (%rsi), %eax
8403 ; FALLBACK13-NEXT: movl %eax, %ecx
8404 ; FALLBACK13-NEXT: shlb $5, %cl
8405 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
8406 ; FALLBACK13-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8407 ; FALLBACK13-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8408 ; FALLBACK13-NEXT: shlb $2, %al
8409 ; FALLBACK13-NEXT: andb $24, %al
8410 ; FALLBACK13-NEXT: negb %al
8411 ; FALLBACK13-NEXT: movsbq %al, %rax
8412 ; FALLBACK13-NEXT: movq -24(%rsp,%rax), %rsi
8413 ; FALLBACK13-NEXT: movq -16(%rsp,%rax), %rdi
8414 ; FALLBACK13-NEXT: shldq %cl, %rsi, %rdi
8415 ; FALLBACK13-NEXT: movq -40(%rsp,%rax), %r8
8416 ; FALLBACK13-NEXT: movq -32(%rsp,%rax), %rax
8417 ; FALLBACK13-NEXT: shldq %cl, %rax, %rsi
8418 ; FALLBACK13-NEXT: movq %r8, %r9
8419 ; FALLBACK13-NEXT: shlq %cl, %r9
8420 ; FALLBACK13-NEXT: shldq %cl, %r8, %rax
8421 ; FALLBACK13-NEXT: movq %rax, 8(%rdx)
8422 ; FALLBACK13-NEXT: movq %rsi, 16(%rdx)
8423 ; FALLBACK13-NEXT: movq %rdi, 24(%rdx)
8424 ; FALLBACK13-NEXT: movq %r9, (%rdx)
8425 ; FALLBACK13-NEXT: vzeroupper
8426 ; FALLBACK13-NEXT: retq
8428 ; FALLBACK14-LABEL: shl_32bytes_dwordOff:
8429 ; FALLBACK14: # %bb.0:
8430 ; FALLBACK14-NEXT: vmovups (%rdi), %ymm0
8431 ; FALLBACK14-NEXT: movzbl (%rsi), %ecx
8432 ; FALLBACK14-NEXT: movl %ecx, %eax
8433 ; FALLBACK14-NEXT: shlb $5, %al
8434 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
8435 ; FALLBACK14-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8436 ; FALLBACK14-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8437 ; FALLBACK14-NEXT: shlb $2, %cl
8438 ; FALLBACK14-NEXT: andb $24, %cl
8439 ; FALLBACK14-NEXT: negb %cl
8440 ; FALLBACK14-NEXT: movsbq %cl, %rcx
8441 ; FALLBACK14-NEXT: shlxq %rax, -16(%rsp,%rcx), %rsi
8442 ; FALLBACK14-NEXT: movq -24(%rsp,%rcx), %rdi
8443 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %r8
8444 ; FALLBACK14-NEXT: movq -40(%rsp,%rcx), %r9
8445 ; FALLBACK14-NEXT: movq -32(%rsp,%rcx), %rcx
8446 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %r10
8447 ; FALLBACK14-NEXT: shlxq %rax, %r9, %r11
8448 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
8449 ; FALLBACK14-NEXT: notb %al
8450 ; FALLBACK14-NEXT: shrq %rdi
8451 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %rdi
8452 ; FALLBACK14-NEXT: orq %rsi, %rdi
8453 ; FALLBACK14-NEXT: shrq %rcx
8454 ; FALLBACK14-NEXT: shrxq %rax, %rcx, %rcx
8455 ; FALLBACK14-NEXT: orq %r8, %rcx
8456 ; FALLBACK14-NEXT: shrq %r9
8457 ; FALLBACK14-NEXT: shrxq %rax, %r9, %rax
8458 ; FALLBACK14-NEXT: orq %r10, %rax
8459 ; FALLBACK14-NEXT: movq %r11, (%rdx)
8460 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
8461 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
8462 ; FALLBACK14-NEXT: movq %rdi, 24(%rdx)
8463 ; FALLBACK14-NEXT: vzeroupper
8464 ; FALLBACK14-NEXT: retq
8466 ; FALLBACK15-LABEL: shl_32bytes_dwordOff:
8467 ; FALLBACK15: # %bb.0:
8468 ; FALLBACK15-NEXT: vmovups (%rdi), %ymm0
8469 ; FALLBACK15-NEXT: movzbl (%rsi), %eax
8470 ; FALLBACK15-NEXT: movl %eax, %ecx
8471 ; FALLBACK15-NEXT: shlb $5, %cl
8472 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
8473 ; FALLBACK15-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8474 ; FALLBACK15-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8475 ; FALLBACK15-NEXT: shlb $2, %al
8476 ; FALLBACK15-NEXT: andb $24, %al
8477 ; FALLBACK15-NEXT: negb %al
8478 ; FALLBACK15-NEXT: movsbq %al, %rax
8479 ; FALLBACK15-NEXT: movq -24(%rsp,%rax), %rsi
8480 ; FALLBACK15-NEXT: movq -16(%rsp,%rax), %rdi
8481 ; FALLBACK15-NEXT: shldq %cl, %rsi, %rdi
8482 ; FALLBACK15-NEXT: movq -40(%rsp,%rax), %r8
8483 ; FALLBACK15-NEXT: movq -32(%rsp,%rax), %rax
8484 ; FALLBACK15-NEXT: shldq %cl, %rax, %rsi
8485 ; FALLBACK15-NEXT: shlxq %rcx, %r8, %r9
8486 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx
8487 ; FALLBACK15-NEXT: shldq %cl, %r8, %rax
8488 ; FALLBACK15-NEXT: movq %rax, 8(%rdx)
8489 ; FALLBACK15-NEXT: movq %rsi, 16(%rdx)
8490 ; FALLBACK15-NEXT: movq %rdi, 24(%rdx)
8491 ; FALLBACK15-NEXT: movq %r9, (%rdx)
8492 ; FALLBACK15-NEXT: vzeroupper
8493 ; FALLBACK15-NEXT: retq
8495 ; X86-SSE2-LABEL: shl_32bytes_dwordOff:
8496 ; X86-SSE2: # %bb.0:
8497 ; X86-SSE2-NEXT: pushl %ebp
8498 ; X86-SSE2-NEXT: pushl %ebx
8499 ; X86-SSE2-NEXT: pushl %edi
8500 ; X86-SSE2-NEXT: pushl %esi
8501 ; X86-SSE2-NEXT: subl $92, %esp
8502 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
8503 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
8504 ; X86-SSE2-NEXT: movl (%ebp), %eax
8505 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8506 ; X86-SSE2-NEXT: movl 4(%ebp), %eax
8507 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8508 ; X86-SSE2-NEXT: movl 8(%ebp), %esi
8509 ; X86-SSE2-NEXT: movl 12(%ebp), %edi
8510 ; X86-SSE2-NEXT: movl 16(%ebp), %ebx
8511 ; X86-SSE2-NEXT: movzbl (%ecx), %ecx
8512 ; X86-SSE2-NEXT: movl 20(%ebp), %edx
8513 ; X86-SSE2-NEXT: movl 24(%ebp), %eax
8514 ; X86-SSE2-NEXT: movl 28(%ebp), %ebp
8515 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
8516 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8517 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
8518 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8519 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8520 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
8521 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
8522 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
8523 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
8524 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
8525 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8526 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
8527 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8528 ; X86-SSE2-NEXT: shlb $2, %cl
8529 ; X86-SSE2-NEXT: andb $28, %cl
8530 ; X86-SSE2-NEXT: negb %cl
8531 ; X86-SSE2-NEXT: movsbl %cl, %edx
8532 ; X86-SSE2-NEXT: movl 48(%esp,%edx), %eax
8533 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8534 ; X86-SSE2-NEXT: movl 52(%esp,%edx), %eax
8535 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8536 ; X86-SSE2-NEXT: movl 60(%esp,%edx), %esi
8537 ; X86-SSE2-NEXT: movl 56(%esp,%edx), %edi
8538 ; X86-SSE2-NEXT: movl 68(%esp,%edx), %ebx
8539 ; X86-SSE2-NEXT: movl 64(%esp,%edx), %ebp
8540 ; X86-SSE2-NEXT: movl 76(%esp,%edx), %ecx
8541 ; X86-SSE2-NEXT: movl 72(%esp,%edx), %edx
8542 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
8543 ; X86-SSE2-NEXT: movl %edx, 24(%eax)
8544 ; X86-SSE2-NEXT: movl %ecx, 28(%eax)
8545 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
8546 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
8547 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
8548 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
8549 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
8550 ; X86-SSE2-NEXT: movl %ecx, (%eax)
8551 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
8552 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
8553 ; X86-SSE2-NEXT: addl $92, %esp
8554 ; X86-SSE2-NEXT: popl %esi
8555 ; X86-SSE2-NEXT: popl %edi
8556 ; X86-SSE2-NEXT: popl %ebx
8557 ; X86-SSE2-NEXT: popl %ebp
8558 ; X86-SSE2-NEXT: retl
8560 ; X86-SSE42-LABEL: shl_32bytes_dwordOff:
8561 ; X86-SSE42: # %bb.0:
8562 ; X86-SSE42-NEXT: subl $76, %esp
8563 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
8564 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
8565 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
8566 ; X86-SSE42-NEXT: movups (%edx), %xmm0
8567 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
8568 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
8569 ; X86-SSE42-NEXT: xorps %xmm2, %xmm2
8570 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
8571 ; X86-SSE42-NEXT: movaps %xmm2, (%esp)
8572 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
8573 ; X86-SSE42-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8574 ; X86-SSE42-NEXT: shlb $2, %cl
8575 ; X86-SSE42-NEXT: andb $28, %cl
8576 ; X86-SSE42-NEXT: negb %cl
8577 ; X86-SSE42-NEXT: movsbl %cl, %ecx
8578 ; X86-SSE42-NEXT: movups 32(%esp,%ecx), %xmm0
8579 ; X86-SSE42-NEXT: movups 48(%esp,%ecx), %xmm1
8580 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
8581 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
8582 ; X86-SSE42-NEXT: addl $76, %esp
8583 ; X86-SSE42-NEXT: retl
8585 ; X86-AVX-LABEL: shl_32bytes_dwordOff:
8587 ; X86-AVX-NEXT: subl $76, %esp
8588 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
8589 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
8590 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
8591 ; X86-AVX-NEXT: vmovups (%edx), %ymm0
8592 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
8593 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
8594 ; X86-AVX-NEXT: vmovups %ymm1, (%esp)
8595 ; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
8596 ; X86-AVX-NEXT: shlb $2, %cl
8597 ; X86-AVX-NEXT: andb $28, %cl
8598 ; X86-AVX-NEXT: negb %cl
8599 ; X86-AVX-NEXT: movsbl %cl, %ecx
8600 ; X86-AVX-NEXT: vmovups 32(%esp,%ecx), %xmm0
8601 ; X86-AVX-NEXT: vmovups 48(%esp,%ecx), %xmm1
8602 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
8603 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
8604 ; X86-AVX-NEXT: addl $76, %esp
8605 ; X86-AVX-NEXT: vzeroupper
8606 ; X86-AVX-NEXT: retl
8607 %src = load i256, ptr %src.ptr, align 1
8608 %dwordOff = load i256, ptr %dwordOff.ptr, align 1
8609 %bitOff = shl i256 %dwordOff, 5
8610 %res = shl i256 %src, %bitOff
8611 store i256 %res, ptr %dst, align 1
8615 define void @shl_32bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
8616 ; X64-SSE2-LABEL: shl_32bytes_qwordOff:
8617 ; X64-SSE2: # %bb.0:
8618 ; X64-SSE2-NEXT: movq (%rdi), %rax
8619 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
8620 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
8621 ; X64-SSE2-NEXT: movq 24(%rdi), %rdi
8622 ; X64-SSE2-NEXT: movzbl (%rsi), %esi
8623 ; X64-SSE2-NEXT: xorps %xmm0, %xmm0
8624 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8625 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8626 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8627 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8628 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
8629 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
8630 ; X64-SSE2-NEXT: shlb $3, %sil
8631 ; X64-SSE2-NEXT: andb $24, %sil
8632 ; X64-SSE2-NEXT: negb %sil
8633 ; X64-SSE2-NEXT: movsbq %sil, %rax
8634 ; X64-SSE2-NEXT: movq -40(%rsp,%rax), %rcx
8635 ; X64-SSE2-NEXT: movq -32(%rsp,%rax), %rsi
8636 ; X64-SSE2-NEXT: movq -16(%rsp,%rax), %rdi
8637 ; X64-SSE2-NEXT: movq -24(%rsp,%rax), %rax
8638 ; X64-SSE2-NEXT: movq %rax, 16(%rdx)
8639 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
8640 ; X64-SSE2-NEXT: movq %rcx, (%rdx)
8641 ; X64-SSE2-NEXT: movq %rsi, 8(%rdx)
8642 ; X64-SSE2-NEXT: retq
8644 ; X64-SSE42-LABEL: shl_32bytes_qwordOff:
8645 ; X64-SSE42: # %bb.0:
8646 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
8647 ; X64-SSE42-NEXT: movups 16(%rdi), %xmm1
8648 ; X64-SSE42-NEXT: movzbl (%rsi), %eax
8649 ; X64-SSE42-NEXT: xorps %xmm2, %xmm2
8650 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8651 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
8652 ; X64-SSE42-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
8653 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8654 ; X64-SSE42-NEXT: shlb $3, %al
8655 ; X64-SSE42-NEXT: andb $24, %al
8656 ; X64-SSE42-NEXT: negb %al
8657 ; X64-SSE42-NEXT: movsbq %al, %rax
8658 ; X64-SSE42-NEXT: movups -40(%rsp,%rax), %xmm0
8659 ; X64-SSE42-NEXT: movups -24(%rsp,%rax), %xmm1
8660 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
8661 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
8662 ; X64-SSE42-NEXT: retq
8664 ; X64-AVX-LABEL: shl_32bytes_qwordOff:
8666 ; X64-AVX-NEXT: vmovups (%rdi), %ymm0
8667 ; X64-AVX-NEXT: movzbl (%rsi), %eax
8668 ; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
8669 ; X64-AVX-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
8670 ; X64-AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
8671 ; X64-AVX-NEXT: shlb $3, %al
8672 ; X64-AVX-NEXT: andb $24, %al
8673 ; X64-AVX-NEXT: negb %al
8674 ; X64-AVX-NEXT: movsbq %al, %rax
8675 ; X64-AVX-NEXT: vmovups -40(%rsp,%rax), %xmm0
8676 ; X64-AVX-NEXT: vmovups -24(%rsp,%rax), %xmm1
8677 ; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
8678 ; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
8679 ; X64-AVX-NEXT: vzeroupper
8680 ; X64-AVX-NEXT: retq
8682 ; X86-SSE2-LABEL: shl_32bytes_qwordOff:
8683 ; X86-SSE2: # %bb.0:
8684 ; X86-SSE2-NEXT: pushl %ebp
8685 ; X86-SSE2-NEXT: pushl %ebx
8686 ; X86-SSE2-NEXT: pushl %edi
8687 ; X86-SSE2-NEXT: pushl %esi
8688 ; X86-SSE2-NEXT: subl $92, %esp
8689 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
8690 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
8691 ; X86-SSE2-NEXT: movl (%ebp), %eax
8692 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8693 ; X86-SSE2-NEXT: movl 4(%ebp), %eax
8694 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8695 ; X86-SSE2-NEXT: movl 8(%ebp), %esi
8696 ; X86-SSE2-NEXT: movl 12(%ebp), %edi
8697 ; X86-SSE2-NEXT: movl 16(%ebp), %ebx
8698 ; X86-SSE2-NEXT: movzbl (%ecx), %ecx
8699 ; X86-SSE2-NEXT: movl 20(%ebp), %edx
8700 ; X86-SSE2-NEXT: movl 24(%ebp), %eax
8701 ; X86-SSE2-NEXT: movl 28(%ebp), %ebp
8702 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
8703 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8704 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
8705 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8706 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8707 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
8708 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
8709 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
8710 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
8711 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
8712 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8713 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
8714 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
8715 ; X86-SSE2-NEXT: shlb $3, %cl
8716 ; X86-SSE2-NEXT: andb $24, %cl
8717 ; X86-SSE2-NEXT: negb %cl
8718 ; X86-SSE2-NEXT: movsbl %cl, %edx
8719 ; X86-SSE2-NEXT: movl 48(%esp,%edx), %eax
8720 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8721 ; X86-SSE2-NEXT: movl 52(%esp,%edx), %eax
8722 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
8723 ; X86-SSE2-NEXT: movl 60(%esp,%edx), %esi
8724 ; X86-SSE2-NEXT: movl 56(%esp,%edx), %edi
8725 ; X86-SSE2-NEXT: movl 68(%esp,%edx), %ebx
8726 ; X86-SSE2-NEXT: movl 64(%esp,%edx), %ebp
8727 ; X86-SSE2-NEXT: movl 76(%esp,%edx), %ecx
8728 ; X86-SSE2-NEXT: movl 72(%esp,%edx), %edx
8729 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
8730 ; X86-SSE2-NEXT: movl %edx, 24(%eax)
8731 ; X86-SSE2-NEXT: movl %ecx, 28(%eax)
8732 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
8733 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
8734 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
8735 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
8736 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
8737 ; X86-SSE2-NEXT: movl %ecx, (%eax)
8738 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
8739 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
8740 ; X86-SSE2-NEXT: addl $92, %esp
8741 ; X86-SSE2-NEXT: popl %esi
8742 ; X86-SSE2-NEXT: popl %edi
8743 ; X86-SSE2-NEXT: popl %ebx
8744 ; X86-SSE2-NEXT: popl %ebp
8745 ; X86-SSE2-NEXT: retl
8747 ; X86-SSE42-LABEL: shl_32bytes_qwordOff:
8748 ; X86-SSE42: # %bb.0:
8749 ; X86-SSE42-NEXT: subl $76, %esp
8750 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
8751 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
8752 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
8753 ; X86-SSE42-NEXT: movups (%edx), %xmm0
8754 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
8755 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
8756 ; X86-SSE42-NEXT: xorps %xmm2, %xmm2
8757 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
8758 ; X86-SSE42-NEXT: movaps %xmm2, (%esp)
8759 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
8760 ; X86-SSE42-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
8761 ; X86-SSE42-NEXT: shlb $3, %cl
8762 ; X86-SSE42-NEXT: andb $24, %cl
8763 ; X86-SSE42-NEXT: negb %cl
8764 ; X86-SSE42-NEXT: movsbl %cl, %ecx
8765 ; X86-SSE42-NEXT: movups 32(%esp,%ecx), %xmm0
8766 ; X86-SSE42-NEXT: movups 48(%esp,%ecx), %xmm1
8767 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
8768 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
8769 ; X86-SSE42-NEXT: addl $76, %esp
8770 ; X86-SSE42-NEXT: retl
8772 ; X86-AVX-LABEL: shl_32bytes_qwordOff:
8774 ; X86-AVX-NEXT: subl $76, %esp
8775 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
8776 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
8777 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
8778 ; X86-AVX-NEXT: vmovups (%edx), %ymm0
8779 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
8780 ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
8781 ; X86-AVX-NEXT: vmovups %ymm1, (%esp)
8782 ; X86-AVX-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
8783 ; X86-AVX-NEXT: shlb $3, %cl
8784 ; X86-AVX-NEXT: andb $24, %cl
8785 ; X86-AVX-NEXT: negb %cl
8786 ; X86-AVX-NEXT: movsbl %cl, %ecx
8787 ; X86-AVX-NEXT: vmovups 32(%esp,%ecx), %xmm0
8788 ; X86-AVX-NEXT: vmovups 48(%esp,%ecx), %xmm1
8789 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
8790 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
8791 ; X86-AVX-NEXT: addl $76, %esp
8792 ; X86-AVX-NEXT: vzeroupper
8793 ; X86-AVX-NEXT: retl
8794 %src = load i256, ptr %src.ptr, align 1
8795 %qwordOff = load i256, ptr %qwordOff.ptr, align 1
8796 %bitOff = shl i256 %qwordOff, 6
8797 %res = shl i256 %src, %bitOff
8798 store i256 %res, ptr %dst, align 1
8802 define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
8803 ; FALLBACK0-LABEL: ashr_32bytes:
8804 ; FALLBACK0: # %bb.0:
8805 ; FALLBACK0-NEXT: pushq %rbx
8806 ; FALLBACK0-NEXT: movq (%rdi), %rcx
8807 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
8808 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
8809 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
8810 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
8811 ; FALLBACK0-NEXT: leal (,%rsi,8), %eax
8812 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8813 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
8814 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8815 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
8816 ; FALLBACK0-NEXT: sarq $63, %rdi
8817 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8818 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8819 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8820 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8821 ; FALLBACK0-NEXT: andb $24, %sil
8822 ; FALLBACK0-NEXT: movzbl %sil, %r9d
8823 ; FALLBACK0-NEXT: movq -64(%rsp,%r9), %r10
8824 ; FALLBACK0-NEXT: movq -56(%rsp,%r9), %rdi
8825 ; FALLBACK0-NEXT: movq %rdi, %r11
8826 ; FALLBACK0-NEXT: movl %eax, %ecx
8827 ; FALLBACK0-NEXT: shrq %cl, %r11
8828 ; FALLBACK0-NEXT: movl %eax, %esi
8829 ; FALLBACK0-NEXT: notb %sil
8830 ; FALLBACK0-NEXT: movq -48(%rsp,%r9), %rbx
8831 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r8
8832 ; FALLBACK0-NEXT: movl %esi, %ecx
8833 ; FALLBACK0-NEXT: shlq %cl, %r8
8834 ; FALLBACK0-NEXT: orq %r11, %r8
8835 ; FALLBACK0-NEXT: movl %eax, %ecx
8836 ; FALLBACK0-NEXT: shrq %cl, %r10
8837 ; FALLBACK0-NEXT: addq %rdi, %rdi
8838 ; FALLBACK0-NEXT: movl %esi, %ecx
8839 ; FALLBACK0-NEXT: shlq %cl, %rdi
8840 ; FALLBACK0-NEXT: orq %r10, %rdi
8841 ; FALLBACK0-NEXT: movl %eax, %ecx
8842 ; FALLBACK0-NEXT: shrq %cl, %rbx
8843 ; FALLBACK0-NEXT: movq -40(%rsp,%r9), %r9
8844 ; FALLBACK0-NEXT: leaq (%r9,%r9), %r10
8845 ; FALLBACK0-NEXT: movl %esi, %ecx
8846 ; FALLBACK0-NEXT: shlq %cl, %r10
8847 ; FALLBACK0-NEXT: orq %rbx, %r10
8848 ; FALLBACK0-NEXT: movl %eax, %ecx
8849 ; FALLBACK0-NEXT: sarq %cl, %r9
8850 ; FALLBACK0-NEXT: movq %r9, 24(%rdx)
8851 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
8852 ; FALLBACK0-NEXT: movq %rdi, (%rdx)
8853 ; FALLBACK0-NEXT: movq %r8, 8(%rdx)
8854 ; FALLBACK0-NEXT: popq %rbx
8855 ; FALLBACK0-NEXT: retq
8857 ; FALLBACK1-LABEL: ashr_32bytes:
8858 ; FALLBACK1: # %bb.0:
8859 ; FALLBACK1-NEXT: movq (%rdi), %rax
8860 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
8861 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
8862 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
8863 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
8864 ; FALLBACK1-NEXT: leal (,%rsi,8), %ecx
8865 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8866 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
8867 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8868 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
8869 ; FALLBACK1-NEXT: sarq $63, %rdi
8870 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8871 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8872 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8873 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8874 ; FALLBACK1-NEXT: andb $24, %sil
8875 ; FALLBACK1-NEXT: movzbl %sil, %eax
8876 ; FALLBACK1-NEXT: movq -56(%rsp,%rax), %rsi
8877 ; FALLBACK1-NEXT: movq -72(%rsp,%rax), %rdi
8878 ; FALLBACK1-NEXT: movq -64(%rsp,%rax), %r8
8879 ; FALLBACK1-NEXT: movq %r8, %r9
8880 ; FALLBACK1-NEXT: shrdq %cl, %rsi, %r9
8881 ; FALLBACK1-NEXT: movq -48(%rsp,%rax), %rax
8882 ; FALLBACK1-NEXT: shrdq %cl, %rax, %rsi
8883 ; FALLBACK1-NEXT: shrdq %cl, %r8, %rdi
8884 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
8885 ; FALLBACK1-NEXT: sarq %cl, %rax
8886 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
8887 ; FALLBACK1-NEXT: movq %rax, 24(%rdx)
8888 ; FALLBACK1-NEXT: movq %rdi, (%rdx)
8889 ; FALLBACK1-NEXT: movq %r9, 8(%rdx)
8890 ; FALLBACK1-NEXT: retq
8892 ; FALLBACK2-LABEL: ashr_32bytes:
8893 ; FALLBACK2: # %bb.0:
8894 ; FALLBACK2-NEXT: movq (%rdi), %rcx
8895 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
8896 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
8897 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
8898 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
8899 ; FALLBACK2-NEXT: leal (,%rsi,8), %eax
8900 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8901 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
8902 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8903 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
8904 ; FALLBACK2-NEXT: sarq $63, %rdi
8905 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8906 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8907 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8908 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8909 ; FALLBACK2-NEXT: andb $24, %sil
8910 ; FALLBACK2-NEXT: movzbl %sil, %ecx
8911 ; FALLBACK2-NEXT: movq -64(%rsp,%rcx), %rsi
8912 ; FALLBACK2-NEXT: movq -56(%rsp,%rcx), %rdi
8913 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %r8
8914 ; FALLBACK2-NEXT: shrxq %rax, -72(%rsp,%rcx), %r9
8915 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %r10
8916 ; FALLBACK2-NEXT: movq -48(%rsp,%rcx), %rcx
8917 ; FALLBACK2-NEXT: sarxq %rax, %rcx, %r11
8918 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
8919 ; FALLBACK2-NEXT: notb %al
8920 ; FALLBACK2-NEXT: addq %rdi, %rdi
8921 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %rdi
8922 ; FALLBACK2-NEXT: orq %r8, %rdi
8923 ; FALLBACK2-NEXT: addq %rsi, %rsi
8924 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %rsi
8925 ; FALLBACK2-NEXT: orq %r9, %rsi
8926 ; FALLBACK2-NEXT: addq %rcx, %rcx
8927 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %rax
8928 ; FALLBACK2-NEXT: orq %r10, %rax
8929 ; FALLBACK2-NEXT: movq %r11, 24(%rdx)
8930 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
8931 ; FALLBACK2-NEXT: movq %rsi, (%rdx)
8932 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
8933 ; FALLBACK2-NEXT: retq
8935 ; FALLBACK3-LABEL: ashr_32bytes:
8936 ; FALLBACK3: # %bb.0:
8937 ; FALLBACK3-NEXT: movq (%rdi), %rax
8938 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
8939 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
8940 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
8941 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
8942 ; FALLBACK3-NEXT: leal (,%rsi,8), %ecx
8943 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8944 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
8945 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
8946 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
8947 ; FALLBACK3-NEXT: sarq $63, %rdi
8948 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8949 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8950 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8951 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8952 ; FALLBACK3-NEXT: andb $24, %sil
8953 ; FALLBACK3-NEXT: movzbl %sil, %eax
8954 ; FALLBACK3-NEXT: movq -56(%rsp,%rax), %rsi
8955 ; FALLBACK3-NEXT: movq -72(%rsp,%rax), %rdi
8956 ; FALLBACK3-NEXT: movq -64(%rsp,%rax), %r8
8957 ; FALLBACK3-NEXT: movq %r8, %r9
8958 ; FALLBACK3-NEXT: shrdq %cl, %rsi, %r9
8959 ; FALLBACK3-NEXT: movq -48(%rsp,%rax), %rax
8960 ; FALLBACK3-NEXT: shrdq %cl, %rax, %rsi
8961 ; FALLBACK3-NEXT: shrdq %cl, %r8, %rdi
8962 ; FALLBACK3-NEXT: sarxq %rcx, %rax, %rax
8963 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
8964 ; FALLBACK3-NEXT: movq %rax, 24(%rdx)
8965 ; FALLBACK3-NEXT: movq %rdi, (%rdx)
8966 ; FALLBACK3-NEXT: movq %r9, 8(%rdx)
8967 ; FALLBACK3-NEXT: retq
8969 ; FALLBACK4-LABEL: ashr_32bytes:
8970 ; FALLBACK4: # %bb.0:
8971 ; FALLBACK4-NEXT: pushq %rbx
8972 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
8973 ; FALLBACK4-NEXT: movq 16(%rdi), %rcx
8974 ; FALLBACK4-NEXT: movq 24(%rdi), %rdi
8975 ; FALLBACK4-NEXT: movzbl (%rsi), %esi
8976 ; FALLBACK4-NEXT: leal (,%rsi,8), %eax
8977 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8978 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
8979 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
8980 ; FALLBACK4-NEXT: sarq $63, %rdi
8981 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8982 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8983 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8984 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
8985 ; FALLBACK4-NEXT: andb $24, %sil
8986 ; FALLBACK4-NEXT: movzbl %sil, %r9d
8987 ; FALLBACK4-NEXT: movq -64(%rsp,%r9), %r10
8988 ; FALLBACK4-NEXT: movq -56(%rsp,%r9), %r8
8989 ; FALLBACK4-NEXT: movl %eax, %ecx
8990 ; FALLBACK4-NEXT: shrq %cl, %r10
8991 ; FALLBACK4-NEXT: movl %eax, %esi
8992 ; FALLBACK4-NEXT: notb %sil
8993 ; FALLBACK4-NEXT: leaq (%r8,%r8), %rdi
8994 ; FALLBACK4-NEXT: movl %esi, %ecx
8995 ; FALLBACK4-NEXT: shlq %cl, %rdi
8996 ; FALLBACK4-NEXT: orq %r10, %rdi
8997 ; FALLBACK4-NEXT: movq -48(%rsp,%r9), %r10
8998 ; FALLBACK4-NEXT: movq %r10, %r11
8999 ; FALLBACK4-NEXT: movl %eax, %ecx
9000 ; FALLBACK4-NEXT: shrq %cl, %r11
9001 ; FALLBACK4-NEXT: movq -40(%rsp,%r9), %r9
9002 ; FALLBACK4-NEXT: leaq (%r9,%r9), %rbx
9003 ; FALLBACK4-NEXT: movl %esi, %ecx
9004 ; FALLBACK4-NEXT: shlq %cl, %rbx
9005 ; FALLBACK4-NEXT: orq %r11, %rbx
9006 ; FALLBACK4-NEXT: movl %eax, %ecx
9007 ; FALLBACK4-NEXT: shrq %cl, %r8
9008 ; FALLBACK4-NEXT: addq %r10, %r10
9009 ; FALLBACK4-NEXT: movl %esi, %ecx
9010 ; FALLBACK4-NEXT: shlq %cl, %r10
9011 ; FALLBACK4-NEXT: orq %r8, %r10
9012 ; FALLBACK4-NEXT: movl %eax, %ecx
9013 ; FALLBACK4-NEXT: sarq %cl, %r9
9014 ; FALLBACK4-NEXT: movq %r9, 24(%rdx)
9015 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
9016 ; FALLBACK4-NEXT: movq %rbx, 16(%rdx)
9017 ; FALLBACK4-NEXT: movq %rdi, (%rdx)
9018 ; FALLBACK4-NEXT: popq %rbx
9019 ; FALLBACK4-NEXT: retq
9021 ; FALLBACK5-LABEL: ashr_32bytes:
9022 ; FALLBACK5: # %bb.0:
9023 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
9024 ; FALLBACK5-NEXT: movq 16(%rdi), %rax
9025 ; FALLBACK5-NEXT: movq 24(%rdi), %rdi
9026 ; FALLBACK5-NEXT: movzbl (%rsi), %esi
9027 ; FALLBACK5-NEXT: leal (,%rsi,8), %ecx
9028 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9029 ; FALLBACK5-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9030 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
9031 ; FALLBACK5-NEXT: sarq $63, %rdi
9032 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9033 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9034 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9035 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9036 ; FALLBACK5-NEXT: andb $24, %sil
9037 ; FALLBACK5-NEXT: movzbl %sil, %eax
9038 ; FALLBACK5-NEXT: movq -48(%rsp,%rax), %rsi
9039 ; FALLBACK5-NEXT: movq -56(%rsp,%rax), %rdi
9040 ; FALLBACK5-NEXT: movq %rdi, %r8
9041 ; FALLBACK5-NEXT: shrdq %cl, %rsi, %r8
9042 ; FALLBACK5-NEXT: movq -72(%rsp,%rax), %r9
9043 ; FALLBACK5-NEXT: movq -64(%rsp,%rax), %rax
9044 ; FALLBACK5-NEXT: movq %rax, %r10
9045 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %r10
9046 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r9
9047 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
9048 ; FALLBACK5-NEXT: sarq %cl, %rsi
9049 ; FALLBACK5-NEXT: movq %r10, 8(%rdx)
9050 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
9051 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
9052 ; FALLBACK5-NEXT: movq %r9, (%rdx)
9053 ; FALLBACK5-NEXT: retq
9055 ; FALLBACK6-LABEL: ashr_32bytes:
9056 ; FALLBACK6: # %bb.0:
9057 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
9058 ; FALLBACK6-NEXT: movq 16(%rdi), %rcx
9059 ; FALLBACK6-NEXT: movq 24(%rdi), %rdi
9060 ; FALLBACK6-NEXT: movzbl (%rsi), %esi
9061 ; FALLBACK6-NEXT: leal (,%rsi,8), %eax
9062 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9063 ; FALLBACK6-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
9064 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
9065 ; FALLBACK6-NEXT: sarq $63, %rdi
9066 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9067 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9068 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9069 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9070 ; FALLBACK6-NEXT: andb $24, %sil
9071 ; FALLBACK6-NEXT: movzbl %sil, %ecx
9072 ; FALLBACK6-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
9073 ; FALLBACK6-NEXT: movq -64(%rsp,%rcx), %rdi
9074 ; FALLBACK6-NEXT: movq -56(%rsp,%rcx), %r8
9075 ; FALLBACK6-NEXT: shrxq %rax, %r8, %r9
9076 ; FALLBACK6-NEXT: movq -48(%rsp,%rcx), %rcx
9077 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %r10
9078 ; FALLBACK6-NEXT: sarxq %rax, %rcx, %r11
9079 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
9080 ; FALLBACK6-NEXT: notb %al
9081 ; FALLBACK6-NEXT: addq %rdi, %rdi
9082 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %rdi
9083 ; FALLBACK6-NEXT: orq %rsi, %rdi
9084 ; FALLBACK6-NEXT: addq %rcx, %rcx
9085 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %rcx
9086 ; FALLBACK6-NEXT: orq %r9, %rcx
9087 ; FALLBACK6-NEXT: addq %r8, %r8
9088 ; FALLBACK6-NEXT: shlxq %rax, %r8, %rax
9089 ; FALLBACK6-NEXT: orq %r10, %rax
9090 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
9091 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
9092 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
9093 ; FALLBACK6-NEXT: movq %rdi, (%rdx)
9094 ; FALLBACK6-NEXT: retq
9096 ; FALLBACK7-LABEL: ashr_32bytes:
9097 ; FALLBACK7: # %bb.0:
9098 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
9099 ; FALLBACK7-NEXT: movq 16(%rdi), %rax
9100 ; FALLBACK7-NEXT: movq 24(%rdi), %rdi
9101 ; FALLBACK7-NEXT: movzbl (%rsi), %esi
9102 ; FALLBACK7-NEXT: leal (,%rsi,8), %ecx
9103 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9104 ; FALLBACK7-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9105 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
9106 ; FALLBACK7-NEXT: sarq $63, %rdi
9107 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9108 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9109 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9110 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9111 ; FALLBACK7-NEXT: andb $24, %sil
9112 ; FALLBACK7-NEXT: movzbl %sil, %eax
9113 ; FALLBACK7-NEXT: movq -48(%rsp,%rax), %rsi
9114 ; FALLBACK7-NEXT: movq -56(%rsp,%rax), %rdi
9115 ; FALLBACK7-NEXT: movq %rdi, %r8
9116 ; FALLBACK7-NEXT: shrdq %cl, %rsi, %r8
9117 ; FALLBACK7-NEXT: movq -72(%rsp,%rax), %r9
9118 ; FALLBACK7-NEXT: movq -64(%rsp,%rax), %rax
9119 ; FALLBACK7-NEXT: movq %rax, %r10
9120 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %r10
9121 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r9
9122 ; FALLBACK7-NEXT: sarxq %rcx, %rsi, %rax
9123 ; FALLBACK7-NEXT: movq %r10, 8(%rdx)
9124 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
9125 ; FALLBACK7-NEXT: movq %rax, 24(%rdx)
9126 ; FALLBACK7-NEXT: movq %r9, (%rdx)
9127 ; FALLBACK7-NEXT: retq
9129 ; FALLBACK8-LABEL: ashr_32bytes:
9130 ; FALLBACK8: # %bb.0:
9131 ; FALLBACK8-NEXT: pushq %rbx
9132 ; FALLBACK8-NEXT: vmovups (%rdi), %xmm0
9133 ; FALLBACK8-NEXT: movq 16(%rdi), %rcx
9134 ; FALLBACK8-NEXT: movq 24(%rdi), %rdi
9135 ; FALLBACK8-NEXT: movzbl (%rsi), %esi
9136 ; FALLBACK8-NEXT: leal (,%rsi,8), %eax
9137 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9138 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
9139 ; FALLBACK8-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9140 ; FALLBACK8-NEXT: sarq $63, %rdi
9141 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9142 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9143 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9144 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9145 ; FALLBACK8-NEXT: andb $24, %sil
9146 ; FALLBACK8-NEXT: movzbl %sil, %r9d
9147 ; FALLBACK8-NEXT: movq -64(%rsp,%r9), %r10
9148 ; FALLBACK8-NEXT: movq -56(%rsp,%r9), %r8
9149 ; FALLBACK8-NEXT: movl %eax, %ecx
9150 ; FALLBACK8-NEXT: shrq %cl, %r10
9151 ; FALLBACK8-NEXT: movl %eax, %esi
9152 ; FALLBACK8-NEXT: notb %sil
9153 ; FALLBACK8-NEXT: leaq (%r8,%r8), %rdi
9154 ; FALLBACK8-NEXT: movl %esi, %ecx
9155 ; FALLBACK8-NEXT: shlq %cl, %rdi
9156 ; FALLBACK8-NEXT: orq %r10, %rdi
9157 ; FALLBACK8-NEXT: movq -48(%rsp,%r9), %r10
9158 ; FALLBACK8-NEXT: movq %r10, %r11
9159 ; FALLBACK8-NEXT: movl %eax, %ecx
9160 ; FALLBACK8-NEXT: shrq %cl, %r11
9161 ; FALLBACK8-NEXT: movq -40(%rsp,%r9), %r9
9162 ; FALLBACK8-NEXT: leaq (%r9,%r9), %rbx
9163 ; FALLBACK8-NEXT: movl %esi, %ecx
9164 ; FALLBACK8-NEXT: shlq %cl, %rbx
9165 ; FALLBACK8-NEXT: orq %r11, %rbx
9166 ; FALLBACK8-NEXT: movl %eax, %ecx
9167 ; FALLBACK8-NEXT: shrq %cl, %r8
9168 ; FALLBACK8-NEXT: addq %r10, %r10
9169 ; FALLBACK8-NEXT: movl %esi, %ecx
9170 ; FALLBACK8-NEXT: shlq %cl, %r10
9171 ; FALLBACK8-NEXT: orq %r8, %r10
9172 ; FALLBACK8-NEXT: movl %eax, %ecx
9173 ; FALLBACK8-NEXT: sarq %cl, %r9
9174 ; FALLBACK8-NEXT: movq %r9, 24(%rdx)
9175 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
9176 ; FALLBACK8-NEXT: movq %rbx, 16(%rdx)
9177 ; FALLBACK8-NEXT: movq %rdi, (%rdx)
9178 ; FALLBACK8-NEXT: popq %rbx
9179 ; FALLBACK8-NEXT: retq
9181 ; FALLBACK9-LABEL: ashr_32bytes:
9182 ; FALLBACK9: # %bb.0:
9183 ; FALLBACK9-NEXT: vmovups (%rdi), %xmm0
9184 ; FALLBACK9-NEXT: movq 16(%rdi), %rax
9185 ; FALLBACK9-NEXT: movq 24(%rdi), %rdi
9186 ; FALLBACK9-NEXT: movzbl (%rsi), %esi
9187 ; FALLBACK9-NEXT: leal (,%rsi,8), %ecx
9188 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9189 ; FALLBACK9-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9190 ; FALLBACK9-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9191 ; FALLBACK9-NEXT: sarq $63, %rdi
9192 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9193 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9194 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9195 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9196 ; FALLBACK9-NEXT: andb $24, %sil
9197 ; FALLBACK9-NEXT: movzbl %sil, %eax
9198 ; FALLBACK9-NEXT: movq -48(%rsp,%rax), %rsi
9199 ; FALLBACK9-NEXT: movq -56(%rsp,%rax), %rdi
9200 ; FALLBACK9-NEXT: movq %rdi, %r8
9201 ; FALLBACK9-NEXT: shrdq %cl, %rsi, %r8
9202 ; FALLBACK9-NEXT: movq -72(%rsp,%rax), %r9
9203 ; FALLBACK9-NEXT: movq -64(%rsp,%rax), %rax
9204 ; FALLBACK9-NEXT: movq %rax, %r10
9205 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %r10
9206 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r9
9207 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
9208 ; FALLBACK9-NEXT: sarq %cl, %rsi
9209 ; FALLBACK9-NEXT: movq %r10, 8(%rdx)
9210 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
9211 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
9212 ; FALLBACK9-NEXT: movq %r9, (%rdx)
9213 ; FALLBACK9-NEXT: retq
9215 ; FALLBACK10-LABEL: ashr_32bytes:
9216 ; FALLBACK10: # %bb.0:
9217 ; FALLBACK10-NEXT: vmovups (%rdi), %xmm0
9218 ; FALLBACK10-NEXT: movq 16(%rdi), %rcx
9219 ; FALLBACK10-NEXT: movq 24(%rdi), %rdi
9220 ; FALLBACK10-NEXT: movzbl (%rsi), %esi
9221 ; FALLBACK10-NEXT: leal (,%rsi,8), %eax
9222 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9223 ; FALLBACK10-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
9224 ; FALLBACK10-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9225 ; FALLBACK10-NEXT: sarq $63, %rdi
9226 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9227 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9228 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9229 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9230 ; FALLBACK10-NEXT: andb $24, %sil
9231 ; FALLBACK10-NEXT: movzbl %sil, %ecx
9232 ; FALLBACK10-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
9233 ; FALLBACK10-NEXT: movq -64(%rsp,%rcx), %rdi
9234 ; FALLBACK10-NEXT: movq -56(%rsp,%rcx), %r8
9235 ; FALLBACK10-NEXT: shrxq %rax, %r8, %r9
9236 ; FALLBACK10-NEXT: movq -48(%rsp,%rcx), %rcx
9237 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %r10
9238 ; FALLBACK10-NEXT: sarxq %rax, %rcx, %r11
9239 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
9240 ; FALLBACK10-NEXT: notb %al
9241 ; FALLBACK10-NEXT: addq %rdi, %rdi
9242 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %rdi
9243 ; FALLBACK10-NEXT: orq %rsi, %rdi
9244 ; FALLBACK10-NEXT: addq %rcx, %rcx
9245 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %rcx
9246 ; FALLBACK10-NEXT: orq %r9, %rcx
9247 ; FALLBACK10-NEXT: addq %r8, %r8
9248 ; FALLBACK10-NEXT: shlxq %rax, %r8, %rax
9249 ; FALLBACK10-NEXT: orq %r10, %rax
9250 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
9251 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
9252 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
9253 ; FALLBACK10-NEXT: movq %rdi, (%rdx)
9254 ; FALLBACK10-NEXT: retq
9256 ; FALLBACK11-LABEL: ashr_32bytes:
9257 ; FALLBACK11: # %bb.0:
9258 ; FALLBACK11-NEXT: vmovups (%rdi), %xmm0
9259 ; FALLBACK11-NEXT: movq 16(%rdi), %rax
9260 ; FALLBACK11-NEXT: movq 24(%rdi), %rdi
9261 ; FALLBACK11-NEXT: movzbl (%rsi), %esi
9262 ; FALLBACK11-NEXT: leal (,%rsi,8), %ecx
9263 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9264 ; FALLBACK11-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9265 ; FALLBACK11-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9266 ; FALLBACK11-NEXT: sarq $63, %rdi
9267 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9268 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9269 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9270 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9271 ; FALLBACK11-NEXT: andb $24, %sil
9272 ; FALLBACK11-NEXT: movzbl %sil, %eax
9273 ; FALLBACK11-NEXT: movq -48(%rsp,%rax), %rsi
9274 ; FALLBACK11-NEXT: movq -56(%rsp,%rax), %rdi
9275 ; FALLBACK11-NEXT: movq %rdi, %r8
9276 ; FALLBACK11-NEXT: shrdq %cl, %rsi, %r8
9277 ; FALLBACK11-NEXT: movq -72(%rsp,%rax), %r9
9278 ; FALLBACK11-NEXT: movq -64(%rsp,%rax), %rax
9279 ; FALLBACK11-NEXT: movq %rax, %r10
9280 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %r10
9281 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r9
9282 ; FALLBACK11-NEXT: sarxq %rcx, %rsi, %rax
9283 ; FALLBACK11-NEXT: movq %r10, 8(%rdx)
9284 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
9285 ; FALLBACK11-NEXT: movq %rax, 24(%rdx)
9286 ; FALLBACK11-NEXT: movq %r9, (%rdx)
9287 ; FALLBACK11-NEXT: retq
9289 ; FALLBACK12-LABEL: ashr_32bytes:
9290 ; FALLBACK12: # %bb.0:
9291 ; FALLBACK12-NEXT: pushq %rbx
9292 ; FALLBACK12-NEXT: vmovups (%rdi), %xmm0
9293 ; FALLBACK12-NEXT: movq 16(%rdi), %rcx
9294 ; FALLBACK12-NEXT: movq 24(%rdi), %rdi
9295 ; FALLBACK12-NEXT: movzbl (%rsi), %esi
9296 ; FALLBACK12-NEXT: leal (,%rsi,8), %eax
9297 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9298 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
9299 ; FALLBACK12-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9300 ; FALLBACK12-NEXT: sarq $63, %rdi
9301 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9302 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9303 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9304 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9305 ; FALLBACK12-NEXT: andb $24, %sil
9306 ; FALLBACK12-NEXT: movzbl %sil, %r9d
9307 ; FALLBACK12-NEXT: movq -64(%rsp,%r9), %r10
9308 ; FALLBACK12-NEXT: movq -56(%rsp,%r9), %r8
9309 ; FALLBACK12-NEXT: movl %eax, %ecx
9310 ; FALLBACK12-NEXT: shrq %cl, %r10
9311 ; FALLBACK12-NEXT: movl %eax, %esi
9312 ; FALLBACK12-NEXT: notb %sil
9313 ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi
9314 ; FALLBACK12-NEXT: movl %esi, %ecx
9315 ; FALLBACK12-NEXT: shlq %cl, %rdi
9316 ; FALLBACK12-NEXT: orq %r10, %rdi
9317 ; FALLBACK12-NEXT: movq -48(%rsp,%r9), %r10
9318 ; FALLBACK12-NEXT: movq %r10, %r11
9319 ; FALLBACK12-NEXT: movl %eax, %ecx
9320 ; FALLBACK12-NEXT: shrq %cl, %r11
9321 ; FALLBACK12-NEXT: movq -40(%rsp,%r9), %r9
9322 ; FALLBACK12-NEXT: leaq (%r9,%r9), %rbx
9323 ; FALLBACK12-NEXT: movl %esi, %ecx
9324 ; FALLBACK12-NEXT: shlq %cl, %rbx
9325 ; FALLBACK12-NEXT: orq %r11, %rbx
9326 ; FALLBACK12-NEXT: movl %eax, %ecx
9327 ; FALLBACK12-NEXT: shrq %cl, %r8
9328 ; FALLBACK12-NEXT: addq %r10, %r10
9329 ; FALLBACK12-NEXT: movl %esi, %ecx
9330 ; FALLBACK12-NEXT: shlq %cl, %r10
9331 ; FALLBACK12-NEXT: orq %r8, %r10
9332 ; FALLBACK12-NEXT: movl %eax, %ecx
9333 ; FALLBACK12-NEXT: sarq %cl, %r9
9334 ; FALLBACK12-NEXT: movq %r9, 24(%rdx)
9335 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
9336 ; FALLBACK12-NEXT: movq %rbx, 16(%rdx)
9337 ; FALLBACK12-NEXT: movq %rdi, (%rdx)
9338 ; FALLBACK12-NEXT: popq %rbx
9339 ; FALLBACK12-NEXT: retq
9341 ; FALLBACK13-LABEL: ashr_32bytes:
9342 ; FALLBACK13: # %bb.0:
9343 ; FALLBACK13-NEXT: vmovups (%rdi), %xmm0
9344 ; FALLBACK13-NEXT: movq 16(%rdi), %rax
9345 ; FALLBACK13-NEXT: movq 24(%rdi), %rdi
9346 ; FALLBACK13-NEXT: movzbl (%rsi), %esi
9347 ; FALLBACK13-NEXT: leal (,%rsi,8), %ecx
9348 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9349 ; FALLBACK13-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9350 ; FALLBACK13-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9351 ; FALLBACK13-NEXT: sarq $63, %rdi
9352 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9353 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9354 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9355 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9356 ; FALLBACK13-NEXT: andb $24, %sil
9357 ; FALLBACK13-NEXT: movzbl %sil, %eax
9358 ; FALLBACK13-NEXT: movq -48(%rsp,%rax), %rsi
9359 ; FALLBACK13-NEXT: movq -56(%rsp,%rax), %rdi
9360 ; FALLBACK13-NEXT: movq %rdi, %r8
9361 ; FALLBACK13-NEXT: shrdq %cl, %rsi, %r8
9362 ; FALLBACK13-NEXT: movq -72(%rsp,%rax), %r9
9363 ; FALLBACK13-NEXT: movq -64(%rsp,%rax), %rax
9364 ; FALLBACK13-NEXT: movq %rax, %r10
9365 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %r10
9366 ; FALLBACK13-NEXT: shrdq %cl, %rax, %r9
9367 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
9368 ; FALLBACK13-NEXT: sarq %cl, %rsi
9369 ; FALLBACK13-NEXT: movq %r10, 8(%rdx)
9370 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
9371 ; FALLBACK13-NEXT: movq %rsi, 24(%rdx)
9372 ; FALLBACK13-NEXT: movq %r9, (%rdx)
9373 ; FALLBACK13-NEXT: retq
9375 ; FALLBACK14-LABEL: ashr_32bytes:
9376 ; FALLBACK14: # %bb.0:
9377 ; FALLBACK14-NEXT: vmovups (%rdi), %xmm0
9378 ; FALLBACK14-NEXT: movq 16(%rdi), %rcx
9379 ; FALLBACK14-NEXT: movq 24(%rdi), %rdi
9380 ; FALLBACK14-NEXT: movzbl (%rsi), %esi
9381 ; FALLBACK14-NEXT: leal (,%rsi,8), %eax
9382 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9383 ; FALLBACK14-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
9384 ; FALLBACK14-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9385 ; FALLBACK14-NEXT: sarq $63, %rdi
9386 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9387 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9388 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9389 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9390 ; FALLBACK14-NEXT: andb $24, %sil
9391 ; FALLBACK14-NEXT: movzbl %sil, %ecx
9392 ; FALLBACK14-NEXT: shrxq %rax, -72(%rsp,%rcx), %rsi
9393 ; FALLBACK14-NEXT: movq -64(%rsp,%rcx), %rdi
9394 ; FALLBACK14-NEXT: movq -56(%rsp,%rcx), %r8
9395 ; FALLBACK14-NEXT: shrxq %rax, %r8, %r9
9396 ; FALLBACK14-NEXT: movq -48(%rsp,%rcx), %rcx
9397 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %r10
9398 ; FALLBACK14-NEXT: sarxq %rax, %rcx, %r11
9399 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
9400 ; FALLBACK14-NEXT: notb %al
9401 ; FALLBACK14-NEXT: addq %rdi, %rdi
9402 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %rdi
9403 ; FALLBACK14-NEXT: orq %rsi, %rdi
9404 ; FALLBACK14-NEXT: addq %rcx, %rcx
9405 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %rcx
9406 ; FALLBACK14-NEXT: orq %r9, %rcx
9407 ; FALLBACK14-NEXT: addq %r8, %r8
9408 ; FALLBACK14-NEXT: shlxq %rax, %r8, %rax
9409 ; FALLBACK14-NEXT: orq %r10, %rax
9410 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
9411 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
9412 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
9413 ; FALLBACK14-NEXT: movq %rdi, (%rdx)
9414 ; FALLBACK14-NEXT: retq
9416 ; FALLBACK15-LABEL: ashr_32bytes:
9417 ; FALLBACK15: # %bb.0:
9418 ; FALLBACK15-NEXT: vmovups (%rdi), %xmm0
9419 ; FALLBACK15-NEXT: movq 16(%rdi), %rax
9420 ; FALLBACK15-NEXT: movq 24(%rdi), %rdi
9421 ; FALLBACK15-NEXT: movzbl (%rsi), %esi
9422 ; FALLBACK15-NEXT: leal (,%rsi,8), %ecx
9423 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9424 ; FALLBACK15-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
9425 ; FALLBACK15-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
9426 ; FALLBACK15-NEXT: sarq $63, %rdi
9427 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9428 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9429 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9430 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
9431 ; FALLBACK15-NEXT: andb $24, %sil
9432 ; FALLBACK15-NEXT: movzbl %sil, %eax
9433 ; FALLBACK15-NEXT: movq -48(%rsp,%rax), %rsi
9434 ; FALLBACK15-NEXT: movq -56(%rsp,%rax), %rdi
9435 ; FALLBACK15-NEXT: movq %rdi, %r8
9436 ; FALLBACK15-NEXT: shrdq %cl, %rsi, %r8
9437 ; FALLBACK15-NEXT: movq -72(%rsp,%rax), %r9
9438 ; FALLBACK15-NEXT: movq -64(%rsp,%rax), %rax
9439 ; FALLBACK15-NEXT: movq %rax, %r10
9440 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %r10
9441 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r9
9442 ; FALLBACK15-NEXT: sarxq %rcx, %rsi, %rax
9443 ; FALLBACK15-NEXT: movq %r10, 8(%rdx)
9444 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
9445 ; FALLBACK15-NEXT: movq %rax, 24(%rdx)
9446 ; FALLBACK15-NEXT: movq %r9, (%rdx)
9447 ; FALLBACK15-NEXT: retq
9449 ; FALLBACK16-LABEL: ashr_32bytes:
9450 ; FALLBACK16: # %bb.0:
9451 ; FALLBACK16-NEXT: pushl %ebp
9452 ; FALLBACK16-NEXT: pushl %ebx
9453 ; FALLBACK16-NEXT: pushl %edi
9454 ; FALLBACK16-NEXT: pushl %esi
9455 ; FALLBACK16-NEXT: subl $108, %esp
9456 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
9457 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %esi
9458 ; FALLBACK16-NEXT: movl (%esi), %ecx
9459 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9460 ; FALLBACK16-NEXT: movl 4(%esi), %ecx
9461 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9462 ; FALLBACK16-NEXT: movl 8(%esi), %ebx
9463 ; FALLBACK16-NEXT: movl 12(%esi), %ebp
9464 ; FALLBACK16-NEXT: movl 16(%esi), %edi
9465 ; FALLBACK16-NEXT: movzbl (%eax), %ecx
9466 ; FALLBACK16-NEXT: movl 20(%esi), %edx
9467 ; FALLBACK16-NEXT: movl 24(%esi), %eax
9468 ; FALLBACK16-NEXT: movl 28(%esi), %esi
9469 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
9470 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
9471 ; FALLBACK16-NEXT: movl %ecx, %edx
9472 ; FALLBACK16-NEXT: shlb $3, %dl
9473 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9474 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
9475 ; FALLBACK16-NEXT: movl %ebp, {{[0-9]+}}(%esp)
9476 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
9477 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9478 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
9479 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9480 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
9481 ; FALLBACK16-NEXT: sarl $31, %esi
9482 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9483 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9484 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9485 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9486 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9487 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9488 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9489 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
9490 ; FALLBACK16-NEXT: andb $28, %cl
9491 ; FALLBACK16-NEXT: movzbl %cl, %edi
9492 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9493 ; FALLBACK16-NEXT: movl 32(%esp,%edi), %esi
9494 ; FALLBACK16-NEXT: movl 36(%esp,%edi), %eax
9495 ; FALLBACK16-NEXT: movl %eax, %ebx
9496 ; FALLBACK16-NEXT: movl %edx, %ecx
9497 ; FALLBACK16-NEXT: shrl %cl, %ebx
9498 ; FALLBACK16-NEXT: movb %dl, %ch
9499 ; FALLBACK16-NEXT: notb %ch
9500 ; FALLBACK16-NEXT: movl 40(%esp,%edi), %edi
9501 ; FALLBACK16-NEXT: leal (%edi,%edi), %ebp
9502 ; FALLBACK16-NEXT: movb %ch, %cl
9503 ; FALLBACK16-NEXT: shll %cl, %ebp
9504 ; FALLBACK16-NEXT: orl %ebx, %ebp
9505 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9506 ; FALLBACK16-NEXT: movb %dl, %cl
9507 ; FALLBACK16-NEXT: shrl %cl, %esi
9508 ; FALLBACK16-NEXT: addl %eax, %eax
9509 ; FALLBACK16-NEXT: movb %ch, %cl
9510 ; FALLBACK16-NEXT: shll %cl, %eax
9511 ; FALLBACK16-NEXT: orl %esi, %eax
9512 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9513 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9514 ; FALLBACK16-NEXT: movl 44(%esp,%eax), %ebp
9515 ; FALLBACK16-NEXT: movl %ebp, %esi
9516 ; FALLBACK16-NEXT: movb %dl, %cl
9517 ; FALLBACK16-NEXT: movl %edx, %ebx
9518 ; FALLBACK16-NEXT: shrl %cl, %esi
9519 ; FALLBACK16-NEXT: movl 48(%esp,%eax), %edx
9520 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9521 ; FALLBACK16-NEXT: leal (%edx,%edx), %eax
9522 ; FALLBACK16-NEXT: movb %ch, %cl
9523 ; FALLBACK16-NEXT: shll %cl, %eax
9524 ; FALLBACK16-NEXT: orl %esi, %eax
9525 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9526 ; FALLBACK16-NEXT: movl %ebx, %edx
9527 ; FALLBACK16-NEXT: movb %bl, %cl
9528 ; FALLBACK16-NEXT: shrl %cl, %edi
9529 ; FALLBACK16-NEXT: addl %ebp, %ebp
9530 ; FALLBACK16-NEXT: movb %ch, %cl
9531 ; FALLBACK16-NEXT: shll %cl, %ebp
9532 ; FALLBACK16-NEXT: orl %edi, %ebp
9533 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
9534 ; FALLBACK16-NEXT: movl 52(%esp,%esi), %edi
9535 ; FALLBACK16-NEXT: movl %edi, %eax
9536 ; FALLBACK16-NEXT: movb %bl, %cl
9537 ; FALLBACK16-NEXT: shrl %cl, %eax
9538 ; FALLBACK16-NEXT: movl 56(%esp,%esi), %ebx
9539 ; FALLBACK16-NEXT: leal (%ebx,%ebx), %esi
9540 ; FALLBACK16-NEXT: movb %ch, %cl
9541 ; FALLBACK16-NEXT: shll %cl, %esi
9542 ; FALLBACK16-NEXT: orl %eax, %esi
9543 ; FALLBACK16-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
9544 ; FALLBACK16-NEXT: movb %dl, %cl
9545 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9546 ; FALLBACK16-NEXT: shrl %cl, %eax
9547 ; FALLBACK16-NEXT: addl %edi, %edi
9548 ; FALLBACK16-NEXT: movb %ch, %cl
9549 ; FALLBACK16-NEXT: shll %cl, %edi
9550 ; FALLBACK16-NEXT: orl %eax, %edi
9551 ; FALLBACK16-NEXT: movb %dl, %cl
9552 ; FALLBACK16-NEXT: shrl %cl, %ebx
9553 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9554 ; FALLBACK16-NEXT: movl 60(%esp,%eax), %eax
9555 ; FALLBACK16-NEXT: leal (%eax,%eax), %edx
9556 ; FALLBACK16-NEXT: movb %ch, %cl
9557 ; FALLBACK16-NEXT: shll %cl, %edx
9558 ; FALLBACK16-NEXT: orl %ebx, %edx
9559 ; FALLBACK16-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
9560 ; FALLBACK16-NEXT: sarl %cl, %eax
9561 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ecx
9562 ; FALLBACK16-NEXT: movl %eax, 28(%ecx)
9563 ; FALLBACK16-NEXT: movl %edx, 24(%ecx)
9564 ; FALLBACK16-NEXT: movl %edi, 16(%ecx)
9565 ; FALLBACK16-NEXT: movl %esi, 20(%ecx)
9566 ; FALLBACK16-NEXT: movl %ebp, 8(%ecx)
9567 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9568 ; FALLBACK16-NEXT: movl %eax, 12(%ecx)
9569 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9570 ; FALLBACK16-NEXT: movl %eax, (%ecx)
9571 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9572 ; FALLBACK16-NEXT: movl %eax, 4(%ecx)
9573 ; FALLBACK16-NEXT: addl $108, %esp
9574 ; FALLBACK16-NEXT: popl %esi
9575 ; FALLBACK16-NEXT: popl %edi
9576 ; FALLBACK16-NEXT: popl %ebx
9577 ; FALLBACK16-NEXT: popl %ebp
9578 ; FALLBACK16-NEXT: retl
9580 ; FALLBACK17-LABEL: ashr_32bytes:
9581 ; FALLBACK17: # %bb.0:
9582 ; FALLBACK17-NEXT: pushl %ebp
9583 ; FALLBACK17-NEXT: pushl %ebx
9584 ; FALLBACK17-NEXT: pushl %edi
9585 ; FALLBACK17-NEXT: pushl %esi
9586 ; FALLBACK17-NEXT: subl $92, %esp
9587 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
9588 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
9589 ; FALLBACK17-NEXT: movl (%ecx), %edx
9590 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9591 ; FALLBACK17-NEXT: movl 4(%ecx), %edx
9592 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9593 ; FALLBACK17-NEXT: movl 8(%ecx), %edx
9594 ; FALLBACK17-NEXT: movl %edx, (%esp) # 4-byte Spill
9595 ; FALLBACK17-NEXT: movl 12(%ecx), %ebp
9596 ; FALLBACK17-NEXT: movl 16(%ecx), %ebx
9597 ; FALLBACK17-NEXT: movzbl (%eax), %eax
9598 ; FALLBACK17-NEXT: movl 20(%ecx), %edi
9599 ; FALLBACK17-NEXT: movl 24(%ecx), %edx
9600 ; FALLBACK17-NEXT: movl 28(%ecx), %esi
9601 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
9602 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
9603 ; FALLBACK17-NEXT: movl %eax, %ecx
9604 ; FALLBACK17-NEXT: shlb $3, %cl
9605 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9606 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
9607 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
9608 ; FALLBACK17-NEXT: movl (%esp), %edx # 4-byte Reload
9609 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
9610 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9611 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
9612 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9613 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
9614 ; FALLBACK17-NEXT: sarl $31, %esi
9615 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9616 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9617 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9618 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9619 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9620 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9621 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9622 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
9623 ; FALLBACK17-NEXT: andb $28, %al
9624 ; FALLBACK17-NEXT: movzbl %al, %ebp
9625 ; FALLBACK17-NEXT: movl 24(%esp,%ebp), %edx
9626 ; FALLBACK17-NEXT: movl 20(%esp,%ebp), %eax
9627 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9628 ; FALLBACK17-NEXT: shrdl %cl, %edx, %eax
9629 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9630 ; FALLBACK17-NEXT: movl 32(%esp,%ebp), %ebx
9631 ; FALLBACK17-NEXT: movl 28(%esp,%ebp), %eax
9632 ; FALLBACK17-NEXT: movl %eax, %esi
9633 ; FALLBACK17-NEXT: shrdl %cl, %ebx, %esi
9634 ; FALLBACK17-NEXT: movl %esi, (%esp) # 4-byte Spill
9635 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
9636 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9637 ; FALLBACK17-NEXT: movl 40(%esp,%ebp), %edx
9638 ; FALLBACK17-NEXT: movl 36(%esp,%ebp), %eax
9639 ; FALLBACK17-NEXT: movl %eax, %edi
9640 ; FALLBACK17-NEXT: shrdl %cl, %edx, %edi
9641 ; FALLBACK17-NEXT: shrdl %cl, %eax, %ebx
9642 ; FALLBACK17-NEXT: movl 16(%esp,%ebp), %esi
9643 ; FALLBACK17-NEXT: movl 44(%esp,%ebp), %eax
9644 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
9645 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
9646 ; FALLBACK17-NEXT: movl %edx, 24(%ebp)
9647 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9648 ; FALLBACK17-NEXT: shrdl %cl, %edx, %esi
9649 ; FALLBACK17-NEXT: sarl %cl, %eax
9650 ; FALLBACK17-NEXT: movl %eax, 28(%ebp)
9651 ; FALLBACK17-NEXT: movl %ebx, 16(%ebp)
9652 ; FALLBACK17-NEXT: movl %edi, 20(%ebp)
9653 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9654 ; FALLBACK17-NEXT: movl %eax, 8(%ebp)
9655 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
9656 ; FALLBACK17-NEXT: movl %eax, 12(%ebp)
9657 ; FALLBACK17-NEXT: movl %esi, (%ebp)
9658 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9659 ; FALLBACK17-NEXT: movl %eax, 4(%ebp)
9660 ; FALLBACK17-NEXT: addl $92, %esp
9661 ; FALLBACK17-NEXT: popl %esi
9662 ; FALLBACK17-NEXT: popl %edi
9663 ; FALLBACK17-NEXT: popl %ebx
9664 ; FALLBACK17-NEXT: popl %ebp
9665 ; FALLBACK17-NEXT: retl
9667 ; FALLBACK18-LABEL: ashr_32bytes:
9668 ; FALLBACK18: # %bb.0:
9669 ; FALLBACK18-NEXT: pushl %ebp
9670 ; FALLBACK18-NEXT: pushl %ebx
9671 ; FALLBACK18-NEXT: pushl %edi
9672 ; FALLBACK18-NEXT: pushl %esi
9673 ; FALLBACK18-NEXT: subl $108, %esp
9674 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ecx
9675 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %esi
9676 ; FALLBACK18-NEXT: movl (%esi), %eax
9677 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9678 ; FALLBACK18-NEXT: movl 4(%esi), %eax
9679 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9680 ; FALLBACK18-NEXT: movl 8(%esi), %ebx
9681 ; FALLBACK18-NEXT: movl 12(%esi), %ebp
9682 ; FALLBACK18-NEXT: movl 16(%esi), %edi
9683 ; FALLBACK18-NEXT: movzbl (%ecx), %ecx
9684 ; FALLBACK18-NEXT: movl 20(%esi), %edx
9685 ; FALLBACK18-NEXT: movl 24(%esi), %eax
9686 ; FALLBACK18-NEXT: movl 28(%esi), %esi
9687 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
9688 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
9689 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
9690 ; FALLBACK18-NEXT: movl %ecx, %eax
9691 ; FALLBACK18-NEXT: shlb $3, %al
9692 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9693 ; FALLBACK18-NEXT: movl %ebp, {{[0-9]+}}(%esp)
9694 ; FALLBACK18-NEXT: movl %ebx, {{[0-9]+}}(%esp)
9695 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9696 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
9697 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9698 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
9699 ; FALLBACK18-NEXT: sarl $31, %esi
9700 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9701 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9702 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9703 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9704 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9705 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9706 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9707 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
9708 ; FALLBACK18-NEXT: andb $28, %cl
9709 ; FALLBACK18-NEXT: movzbl %cl, %edi
9710 ; FALLBACK18-NEXT: movl 36(%esp,%edi), %esi
9711 ; FALLBACK18-NEXT: movl 40(%esp,%edi), %ecx
9712 ; FALLBACK18-NEXT: shrxl %eax, %esi, %ebx
9713 ; FALLBACK18-NEXT: movl %eax, %edx
9714 ; FALLBACK18-NEXT: notb %dl
9715 ; FALLBACK18-NEXT: leal (%ecx,%ecx), %ebp
9716 ; FALLBACK18-NEXT: shlxl %edx, %ebp, %ebp
9717 ; FALLBACK18-NEXT: orl %ebx, %ebp
9718 ; FALLBACK18-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9719 ; FALLBACK18-NEXT: shrxl %eax, 32(%esp,%edi), %ebx
9720 ; FALLBACK18-NEXT: addl %esi, %esi
9721 ; FALLBACK18-NEXT: shlxl %edx, %esi, %esi
9722 ; FALLBACK18-NEXT: orl %ebx, %esi
9723 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9724 ; FALLBACK18-NEXT: movl 48(%esp,%edi), %esi
9725 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9726 ; FALLBACK18-NEXT: leal (%esi,%esi), %ebx
9727 ; FALLBACK18-NEXT: shlxl %edx, %ebx, %esi
9728 ; FALLBACK18-NEXT: movl 44(%esp,%edi), %ebp
9729 ; FALLBACK18-NEXT: shrxl %eax, %ebp, %ebx
9730 ; FALLBACK18-NEXT: orl %ebx, %esi
9731 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9732 ; FALLBACK18-NEXT: shrxl %eax, %ecx, %ecx
9733 ; FALLBACK18-NEXT: movl %eax, %ebx
9734 ; FALLBACK18-NEXT: addl %ebp, %ebp
9735 ; FALLBACK18-NEXT: shlxl %edx, %ebp, %eax
9736 ; FALLBACK18-NEXT: orl %ecx, %eax
9737 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9738 ; FALLBACK18-NEXT: movl 56(%esp,%edi), %ebp
9739 ; FALLBACK18-NEXT: leal (%ebp,%ebp), %ecx
9740 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ecx
9741 ; FALLBACK18-NEXT: movl 52(%esp,%edi), %eax
9742 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %esi
9743 ; FALLBACK18-NEXT: orl %esi, %ecx
9744 ; FALLBACK18-NEXT: shrxl %ebx, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
9745 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9746 ; FALLBACK18-NEXT: addl %eax, %eax
9747 ; FALLBACK18-NEXT: shlxl %edx, %eax, %esi
9748 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
9749 ; FALLBACK18-NEXT: shrxl %ebx, %ebp, %eax
9750 ; FALLBACK18-NEXT: movl 60(%esp,%edi), %edi
9751 ; FALLBACK18-NEXT: sarxl %ebx, %edi, %ebx
9752 ; FALLBACK18-NEXT: addl %edi, %edi
9753 ; FALLBACK18-NEXT: shlxl %edx, %edi, %edx
9754 ; FALLBACK18-NEXT: orl %eax, %edx
9755 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
9756 ; FALLBACK18-NEXT: movl %ebx, 28(%eax)
9757 ; FALLBACK18-NEXT: movl %edx, 24(%eax)
9758 ; FALLBACK18-NEXT: movl %esi, 16(%eax)
9759 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
9760 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
9761 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
9762 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
9763 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
9764 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
9765 ; FALLBACK18-NEXT: movl %ecx, (%eax)
9766 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
9767 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
9768 ; FALLBACK18-NEXT: addl $108, %esp
9769 ; FALLBACK18-NEXT: popl %esi
9770 ; FALLBACK18-NEXT: popl %edi
9771 ; FALLBACK18-NEXT: popl %ebx
9772 ; FALLBACK18-NEXT: popl %ebp
9773 ; FALLBACK18-NEXT: retl
9775 ; FALLBACK19-LABEL: ashr_32bytes:
9776 ; FALLBACK19: # %bb.0:
9777 ; FALLBACK19-NEXT: pushl %ebp
9778 ; FALLBACK19-NEXT: pushl %ebx
9779 ; FALLBACK19-NEXT: pushl %edi
9780 ; FALLBACK19-NEXT: pushl %esi
9781 ; FALLBACK19-NEXT: subl $92, %esp
9782 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %eax
9783 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
9784 ; FALLBACK19-NEXT: movl (%ecx), %edx
9785 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9786 ; FALLBACK19-NEXT: movl 4(%ecx), %edx
9787 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9788 ; FALLBACK19-NEXT: movl 8(%ecx), %edx
9789 ; FALLBACK19-NEXT: movl %edx, (%esp) # 4-byte Spill
9790 ; FALLBACK19-NEXT: movl 12(%ecx), %ebp
9791 ; FALLBACK19-NEXT: movl 16(%ecx), %ebx
9792 ; FALLBACK19-NEXT: movzbl (%eax), %eax
9793 ; FALLBACK19-NEXT: movl 20(%ecx), %edi
9794 ; FALLBACK19-NEXT: movl 24(%ecx), %edx
9795 ; FALLBACK19-NEXT: movl 28(%ecx), %esi
9796 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
9797 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
9798 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
9799 ; FALLBACK19-NEXT: movl %eax, %ecx
9800 ; FALLBACK19-NEXT: shlb $3, %cl
9801 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9802 ; FALLBACK19-NEXT: movl %ebp, {{[0-9]+}}(%esp)
9803 ; FALLBACK19-NEXT: movl (%esp), %edx # 4-byte Reload
9804 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
9805 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9806 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
9807 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
9808 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
9809 ; FALLBACK19-NEXT: sarl $31, %esi
9810 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9811 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9812 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9813 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9814 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9815 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9816 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9817 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
9818 ; FALLBACK19-NEXT: andb $28, %al
9819 ; FALLBACK19-NEXT: movzbl %al, %ebp
9820 ; FALLBACK19-NEXT: movl 24(%esp,%ebp), %esi
9821 ; FALLBACK19-NEXT: movl 20(%esp,%ebp), %eax
9822 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9823 ; FALLBACK19-NEXT: shrdl %cl, %esi, %eax
9824 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9825 ; FALLBACK19-NEXT: movl 32(%esp,%ebp), %ebx
9826 ; FALLBACK19-NEXT: movl 28(%esp,%ebp), %eax
9827 ; FALLBACK19-NEXT: movl %eax, %edx
9828 ; FALLBACK19-NEXT: shrdl %cl, %ebx, %edx
9829 ; FALLBACK19-NEXT: movl %edx, (%esp) # 4-byte Spill
9830 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
9831 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9832 ; FALLBACK19-NEXT: movl 40(%esp,%ebp), %eax
9833 ; FALLBACK19-NEXT: movl 36(%esp,%ebp), %edx
9834 ; FALLBACK19-NEXT: movl %edx, %esi
9835 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
9836 ; FALLBACK19-NEXT: shrdl %cl, %edx, %ebx
9837 ; FALLBACK19-NEXT: movl 16(%esp,%ebp), %edx
9838 ; FALLBACK19-NEXT: movl 44(%esp,%ebp), %edi
9839 ; FALLBACK19-NEXT: shrdl %cl, %edi, %eax
9840 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
9841 ; FALLBACK19-NEXT: movl %eax, 24(%ebp)
9842 ; FALLBACK19-NEXT: sarxl %ecx, %edi, %eax
9843 ; FALLBACK19-NEXT: movl %eax, 28(%ebp)
9844 ; FALLBACK19-NEXT: movl %ebx, 16(%ebp)
9845 ; FALLBACK19-NEXT: movl %esi, 20(%ebp)
9846 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9847 ; FALLBACK19-NEXT: movl %eax, 8(%ebp)
9848 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
9849 ; FALLBACK19-NEXT: movl %eax, 12(%ebp)
9850 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
9851 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9852 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
9853 ; FALLBACK19-NEXT: movl %edx, (%ebp)
9854 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9855 ; FALLBACK19-NEXT: movl %eax, 4(%ebp)
9856 ; FALLBACK19-NEXT: addl $92, %esp
9857 ; FALLBACK19-NEXT: popl %esi
9858 ; FALLBACK19-NEXT: popl %edi
9859 ; FALLBACK19-NEXT: popl %ebx
9860 ; FALLBACK19-NEXT: popl %ebp
9861 ; FALLBACK19-NEXT: retl
9863 ; FALLBACK20-LABEL: ashr_32bytes:
9864 ; FALLBACK20: # %bb.0:
9865 ; FALLBACK20-NEXT: pushl %ebp
9866 ; FALLBACK20-NEXT: pushl %ebx
9867 ; FALLBACK20-NEXT: pushl %edi
9868 ; FALLBACK20-NEXT: pushl %esi
9869 ; FALLBACK20-NEXT: subl $108, %esp
9870 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
9871 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
9872 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
9873 ; FALLBACK20-NEXT: movl 16(%ecx), %esi
9874 ; FALLBACK20-NEXT: movl 20(%ecx), %edi
9875 ; FALLBACK20-NEXT: movl 24(%ecx), %ebx
9876 ; FALLBACK20-NEXT: movl 28(%ecx), %edx
9877 ; FALLBACK20-NEXT: movzbl (%eax), %eax
9878 ; FALLBACK20-NEXT: movl %eax, %ecx
9879 ; FALLBACK20-NEXT: shlb $3, %cl
9880 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9881 ; FALLBACK20-NEXT: movl %ebx, {{[0-9]+}}(%esp)
9882 ; FALLBACK20-NEXT: movl %edi, {{[0-9]+}}(%esp)
9883 ; FALLBACK20-NEXT: movl %esi, {{[0-9]+}}(%esp)
9884 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
9885 ; FALLBACK20-NEXT: sarl $31, %edx
9886 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9887 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9888 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9889 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9890 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9891 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9892 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9893 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
9894 ; FALLBACK20-NEXT: andb $28, %al
9895 ; FALLBACK20-NEXT: movzbl %al, %edi
9896 ; FALLBACK20-NEXT: movl 32(%esp,%edi), %eax
9897 ; FALLBACK20-NEXT: movl 36(%esp,%edi), %esi
9898 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9899 ; FALLBACK20-NEXT: shrl %cl, %eax
9900 ; FALLBACK20-NEXT: movl %ecx, %edx
9901 ; FALLBACK20-NEXT: movb %cl, %dh
9902 ; FALLBACK20-NEXT: notb %dl
9903 ; FALLBACK20-NEXT: addl %esi, %esi
9904 ; FALLBACK20-NEXT: movl %edx, %ecx
9905 ; FALLBACK20-NEXT: shll %cl, %esi
9906 ; FALLBACK20-NEXT: orl %eax, %esi
9907 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9908 ; FALLBACK20-NEXT: movl 44(%esp,%edi), %ebx
9909 ; FALLBACK20-NEXT: movl %ebx, %eax
9910 ; FALLBACK20-NEXT: movb %dh, %cl
9911 ; FALLBACK20-NEXT: shrl %cl, %eax
9912 ; FALLBACK20-NEXT: movl 48(%esp,%edi), %esi
9913 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9914 ; FALLBACK20-NEXT: addl %esi, %esi
9915 ; FALLBACK20-NEXT: movl %edx, %ecx
9916 ; FALLBACK20-NEXT: shll %cl, %esi
9917 ; FALLBACK20-NEXT: orl %eax, %esi
9918 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9919 ; FALLBACK20-NEXT: movl 40(%esp,%edi), %esi
9920 ; FALLBACK20-NEXT: movl %esi, %eax
9921 ; FALLBACK20-NEXT: movb %dh, %cl
9922 ; FALLBACK20-NEXT: shrl %cl, %eax
9923 ; FALLBACK20-NEXT: addl %ebx, %ebx
9924 ; FALLBACK20-NEXT: movl %edx, %ecx
9925 ; FALLBACK20-NEXT: shll %cl, %ebx
9926 ; FALLBACK20-NEXT: orl %eax, %ebx
9927 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9928 ; FALLBACK20-NEXT: movl 52(%esp,%edi), %ebp
9929 ; FALLBACK20-NEXT: movl %ebp, %eax
9930 ; FALLBACK20-NEXT: movb %dh, %cl
9931 ; FALLBACK20-NEXT: shrl %cl, %eax
9932 ; FALLBACK20-NEXT: movl 56(%esp,%edi), %ecx
9933 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9934 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
9935 ; FALLBACK20-NEXT: movl %edx, %ecx
9936 ; FALLBACK20-NEXT: shll %cl, %ebx
9937 ; FALLBACK20-NEXT: orl %eax, %ebx
9938 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
9939 ; FALLBACK20-NEXT: movb %dh, %cl
9940 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9941 ; FALLBACK20-NEXT: shrl %cl, %eax
9942 ; FALLBACK20-NEXT: addl %ebp, %ebp
9943 ; FALLBACK20-NEXT: movl %edx, %ecx
9944 ; FALLBACK20-NEXT: shll %cl, %ebp
9945 ; FALLBACK20-NEXT: orl %eax, %ebp
9946 ; FALLBACK20-NEXT: movb %dh, %cl
9947 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
9948 ; FALLBACK20-NEXT: shrl %cl, %ebx
9949 ; FALLBACK20-NEXT: movl 60(%esp,%edi), %eax
9950 ; FALLBACK20-NEXT: leal (%eax,%eax), %edi
9951 ; FALLBACK20-NEXT: movl %edx, %ecx
9952 ; FALLBACK20-NEXT: shll %cl, %edi
9953 ; FALLBACK20-NEXT: orl %ebx, %edi
9954 ; FALLBACK20-NEXT: movb %dh, %cl
9955 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
9956 ; FALLBACK20-NEXT: shrl %cl, %ebx
9957 ; FALLBACK20-NEXT: addl %esi, %esi
9958 ; FALLBACK20-NEXT: movl %edx, %ecx
9959 ; FALLBACK20-NEXT: shll %cl, %esi
9960 ; FALLBACK20-NEXT: orl %ebx, %esi
9961 ; FALLBACK20-NEXT: movb %dh, %cl
9962 ; FALLBACK20-NEXT: sarl %cl, %eax
9963 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
9964 ; FALLBACK20-NEXT: movl %eax, 28(%ecx)
9965 ; FALLBACK20-NEXT: movl %esi, 4(%ecx)
9966 ; FALLBACK20-NEXT: movl %edi, 24(%ecx)
9967 ; FALLBACK20-NEXT: movl %ebp, 16(%ecx)
9968 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9969 ; FALLBACK20-NEXT: movl %eax, 20(%ecx)
9970 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9971 ; FALLBACK20-NEXT: movl %eax, 8(%ecx)
9972 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9973 ; FALLBACK20-NEXT: movl %eax, 12(%ecx)
9974 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
9975 ; FALLBACK20-NEXT: movl %eax, (%ecx)
9976 ; FALLBACK20-NEXT: addl $108, %esp
9977 ; FALLBACK20-NEXT: popl %esi
9978 ; FALLBACK20-NEXT: popl %edi
9979 ; FALLBACK20-NEXT: popl %ebx
9980 ; FALLBACK20-NEXT: popl %ebp
9981 ; FALLBACK20-NEXT: retl
9983 ; FALLBACK21-LABEL: ashr_32bytes:
9984 ; FALLBACK21: # %bb.0:
9985 ; FALLBACK21-NEXT: pushl %ebp
9986 ; FALLBACK21-NEXT: pushl %ebx
9987 ; FALLBACK21-NEXT: pushl %edi
9988 ; FALLBACK21-NEXT: pushl %esi
9989 ; FALLBACK21-NEXT: subl $108, %esp
9990 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
9991 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
9992 ; FALLBACK21-NEXT: movups (%ecx), %xmm0
9993 ; FALLBACK21-NEXT: movl 16(%ecx), %esi
9994 ; FALLBACK21-NEXT: movl 20(%ecx), %edi
9995 ; FALLBACK21-NEXT: movl 24(%ecx), %ebx
9996 ; FALLBACK21-NEXT: movl 28(%ecx), %edx
9997 ; FALLBACK21-NEXT: movzbl (%eax), %eax
9998 ; FALLBACK21-NEXT: movl %eax, %ecx
9999 ; FALLBACK21-NEXT: shlb $3, %cl
10000 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10001 ; FALLBACK21-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10002 ; FALLBACK21-NEXT: movl %edi, {{[0-9]+}}(%esp)
10003 ; FALLBACK21-NEXT: movl %esi, {{[0-9]+}}(%esp)
10004 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
10005 ; FALLBACK21-NEXT: sarl $31, %edx
10006 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10007 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10008 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10009 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10010 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10011 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10012 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10013 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
10014 ; FALLBACK21-NEXT: andb $28, %al
10015 ; FALLBACK21-NEXT: movzbl %al, %ebp
10016 ; FALLBACK21-NEXT: movl 48(%esp,%ebp), %esi
10017 ; FALLBACK21-NEXT: movl 44(%esp,%ebp), %eax
10018 ; FALLBACK21-NEXT: movl %eax, %edx
10019 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
10020 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10021 ; FALLBACK21-NEXT: movl 40(%esp,%ebp), %edx
10022 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10023 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
10024 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10025 ; FALLBACK21-NEXT: movl 56(%esp,%ebp), %ebx
10026 ; FALLBACK21-NEXT: movl 52(%esp,%ebp), %eax
10027 ; FALLBACK21-NEXT: movl %eax, %edx
10028 ; FALLBACK21-NEXT: shrdl %cl, %ebx, %edx
10029 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10030 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
10031 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10032 ; FALLBACK21-NEXT: movl 60(%esp,%ebp), %eax
10033 ; FALLBACK21-NEXT: shrdl %cl, %eax, %ebx
10034 ; FALLBACK21-NEXT: movl 32(%esp,%ebp), %edx
10035 ; FALLBACK21-NEXT: movl 36(%esp,%ebp), %edi
10036 ; FALLBACK21-NEXT: movl %edi, %esi
10037 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
10038 ; FALLBACK21-NEXT: shrdl %cl, %ebp, %esi
10039 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
10040 ; FALLBACK21-NEXT: movl %esi, 4(%ebp)
10041 ; FALLBACK21-NEXT: movl %ebx, 24(%ebp)
10042 ; FALLBACK21-NEXT: shrdl %cl, %edi, %edx
10043 ; FALLBACK21-NEXT: sarl %cl, %eax
10044 ; FALLBACK21-NEXT: movl %eax, 28(%ebp)
10045 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10046 ; FALLBACK21-NEXT: movl %eax, 16(%ebp)
10047 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10048 ; FALLBACK21-NEXT: movl %eax, 20(%ebp)
10049 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10050 ; FALLBACK21-NEXT: movl %eax, 8(%ebp)
10051 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10052 ; FALLBACK21-NEXT: movl %eax, 12(%ebp)
10053 ; FALLBACK21-NEXT: movl %edx, (%ebp)
10054 ; FALLBACK21-NEXT: addl $108, %esp
10055 ; FALLBACK21-NEXT: popl %esi
10056 ; FALLBACK21-NEXT: popl %edi
10057 ; FALLBACK21-NEXT: popl %ebx
10058 ; FALLBACK21-NEXT: popl %ebp
10059 ; FALLBACK21-NEXT: retl
10061 ; FALLBACK22-LABEL: ashr_32bytes:
10062 ; FALLBACK22: # %bb.0:
10063 ; FALLBACK22-NEXT: pushl %ebp
10064 ; FALLBACK22-NEXT: pushl %ebx
10065 ; FALLBACK22-NEXT: pushl %edi
10066 ; FALLBACK22-NEXT: pushl %esi
10067 ; FALLBACK22-NEXT: subl $108, %esp
10068 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
10069 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
10070 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
10071 ; FALLBACK22-NEXT: movl 16(%ecx), %esi
10072 ; FALLBACK22-NEXT: movl 20(%ecx), %edi
10073 ; FALLBACK22-NEXT: movl 24(%ecx), %ebx
10074 ; FALLBACK22-NEXT: movl 28(%ecx), %edx
10075 ; FALLBACK22-NEXT: movzbl (%eax), %ecx
10076 ; FALLBACK22-NEXT: movl %ecx, %eax
10077 ; FALLBACK22-NEXT: shlb $3, %al
10078 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10079 ; FALLBACK22-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10080 ; FALLBACK22-NEXT: movl %edi, {{[0-9]+}}(%esp)
10081 ; FALLBACK22-NEXT: movl %esi, {{[0-9]+}}(%esp)
10082 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
10083 ; FALLBACK22-NEXT: sarl $31, %edx
10084 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10085 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10086 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10087 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10088 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10089 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10090 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10091 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
10092 ; FALLBACK22-NEXT: andb $28, %cl
10093 ; FALLBACK22-NEXT: movzbl %cl, %edi
10094 ; FALLBACK22-NEXT: shrxl %eax, 32(%esp,%edi), %ecx
10095 ; FALLBACK22-NEXT: movl %eax, %edx
10096 ; FALLBACK22-NEXT: notb %dl
10097 ; FALLBACK22-NEXT: movl 36(%esp,%edi), %esi
10098 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10099 ; FALLBACK22-NEXT: addl %esi, %esi
10100 ; FALLBACK22-NEXT: shlxl %edx, %esi, %esi
10101 ; FALLBACK22-NEXT: orl %ecx, %esi
10102 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10103 ; FALLBACK22-NEXT: movl 48(%esp,%edi), %ecx
10104 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10105 ; FALLBACK22-NEXT: addl %ecx, %ecx
10106 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %esi
10107 ; FALLBACK22-NEXT: movl 44(%esp,%edi), %ecx
10108 ; FALLBACK22-NEXT: shrxl %eax, %ecx, %ebx
10109 ; FALLBACK22-NEXT: orl %ebx, %esi
10110 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10111 ; FALLBACK22-NEXT: addl %ecx, %ecx
10112 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %esi
10113 ; FALLBACK22-NEXT: movl 40(%esp,%edi), %ecx
10114 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10115 ; FALLBACK22-NEXT: shrxl %eax, %ecx, %ebx
10116 ; FALLBACK22-NEXT: movl %eax, %ecx
10117 ; FALLBACK22-NEXT: orl %ebx, %esi
10118 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10119 ; FALLBACK22-NEXT: movl 56(%esp,%edi), %esi
10120 ; FALLBACK22-NEXT: leal (%esi,%esi), %ebx
10121 ; FALLBACK22-NEXT: shlxl %edx, %ebx, %eax
10122 ; FALLBACK22-NEXT: movl 52(%esp,%edi), %ebx
10123 ; FALLBACK22-NEXT: shrxl %ecx, %ebx, %ebp
10124 ; FALLBACK22-NEXT: orl %ebp, %eax
10125 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10126 ; FALLBACK22-NEXT: movl %ecx, %eax
10127 ; FALLBACK22-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
10128 ; FALLBACK22-NEXT: addl %ebx, %ebx
10129 ; FALLBACK22-NEXT: shlxl %edx, %ebx, %ebx
10130 ; FALLBACK22-NEXT: orl %ebp, %ebx
10131 ; FALLBACK22-NEXT: shrxl %ecx, %esi, %ecx
10132 ; FALLBACK22-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
10133 ; FALLBACK22-NEXT: movl 60(%esp,%edi), %edi
10134 ; FALLBACK22-NEXT: sarxl %eax, %edi, %eax
10135 ; FALLBACK22-NEXT: addl %edi, %edi
10136 ; FALLBACK22-NEXT: shlxl %edx, %edi, %edi
10137 ; FALLBACK22-NEXT: orl %ecx, %edi
10138 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
10139 ; FALLBACK22-NEXT: addl %ecx, %ecx
10140 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %ecx
10141 ; FALLBACK22-NEXT: orl %esi, %ecx
10142 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %edx
10143 ; FALLBACK22-NEXT: movl %eax, 28(%edx)
10144 ; FALLBACK22-NEXT: movl %ecx, 4(%edx)
10145 ; FALLBACK22-NEXT: movl %edi, 24(%edx)
10146 ; FALLBACK22-NEXT: movl %ebx, 16(%edx)
10147 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10148 ; FALLBACK22-NEXT: movl %eax, 20(%edx)
10149 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10150 ; FALLBACK22-NEXT: movl %eax, 8(%edx)
10151 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10152 ; FALLBACK22-NEXT: movl %eax, 12(%edx)
10153 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10154 ; FALLBACK22-NEXT: movl %eax, (%edx)
10155 ; FALLBACK22-NEXT: addl $108, %esp
10156 ; FALLBACK22-NEXT: popl %esi
10157 ; FALLBACK22-NEXT: popl %edi
10158 ; FALLBACK22-NEXT: popl %ebx
10159 ; FALLBACK22-NEXT: popl %ebp
10160 ; FALLBACK22-NEXT: retl
10162 ; FALLBACK23-LABEL: ashr_32bytes:
10163 ; FALLBACK23: # %bb.0:
10164 ; FALLBACK23-NEXT: pushl %ebp
10165 ; FALLBACK23-NEXT: pushl %ebx
10166 ; FALLBACK23-NEXT: pushl %edi
10167 ; FALLBACK23-NEXT: pushl %esi
10168 ; FALLBACK23-NEXT: subl $108, %esp
10169 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
10170 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
10171 ; FALLBACK23-NEXT: movups (%ecx), %xmm0
10172 ; FALLBACK23-NEXT: movl 16(%ecx), %esi
10173 ; FALLBACK23-NEXT: movl 20(%ecx), %edi
10174 ; FALLBACK23-NEXT: movl 24(%ecx), %ebx
10175 ; FALLBACK23-NEXT: movl 28(%ecx), %edx
10176 ; FALLBACK23-NEXT: movzbl (%eax), %eax
10177 ; FALLBACK23-NEXT: movl %eax, %ecx
10178 ; FALLBACK23-NEXT: shlb $3, %cl
10179 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10180 ; FALLBACK23-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10181 ; FALLBACK23-NEXT: movl %edi, {{[0-9]+}}(%esp)
10182 ; FALLBACK23-NEXT: movl %esi, {{[0-9]+}}(%esp)
10183 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
10184 ; FALLBACK23-NEXT: sarl $31, %edx
10185 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10186 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10187 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10188 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10189 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10190 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10191 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10192 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
10193 ; FALLBACK23-NEXT: andb $28, %al
10194 ; FALLBACK23-NEXT: movzbl %al, %ebx
10195 ; FALLBACK23-NEXT: movl 48(%esp,%ebx), %esi
10196 ; FALLBACK23-NEXT: movl 44(%esp,%ebx), %eax
10197 ; FALLBACK23-NEXT: movl %eax, %edx
10198 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
10199 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10200 ; FALLBACK23-NEXT: movl 40(%esp,%ebx), %edx
10201 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10202 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
10203 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10204 ; FALLBACK23-NEXT: movl 56(%esp,%ebx), %ebp
10205 ; FALLBACK23-NEXT: movl 52(%esp,%ebx), %eax
10206 ; FALLBACK23-NEXT: movl %eax, %edi
10207 ; FALLBACK23-NEXT: shrdl %cl, %ebp, %edi
10208 ; FALLBACK23-NEXT: shrdl %cl, %eax, %esi
10209 ; FALLBACK23-NEXT: movl 60(%esp,%ebx), %eax
10210 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10211 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebp
10212 ; FALLBACK23-NEXT: movl 32(%esp,%ebx), %edx
10213 ; FALLBACK23-NEXT: movl 36(%esp,%ebx), %ebx
10214 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10215 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10216 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebx
10217 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
10218 ; FALLBACK23-NEXT: movl %ebx, 4(%eax)
10219 ; FALLBACK23-NEXT: movl %ebp, 24(%eax)
10220 ; FALLBACK23-NEXT: sarxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
10221 ; FALLBACK23-NEXT: movl %ebx, 28(%eax)
10222 ; FALLBACK23-NEXT: movl %esi, 16(%eax)
10223 ; FALLBACK23-NEXT: movl %edi, 20(%eax)
10224 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10225 ; FALLBACK23-NEXT: movl %esi, 8(%eax)
10226 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10227 ; FALLBACK23-NEXT: movl %esi, 12(%eax)
10228 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
10229 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10230 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
10231 ; FALLBACK23-NEXT: movl %edx, (%eax)
10232 ; FALLBACK23-NEXT: addl $108, %esp
10233 ; FALLBACK23-NEXT: popl %esi
10234 ; FALLBACK23-NEXT: popl %edi
10235 ; FALLBACK23-NEXT: popl %ebx
10236 ; FALLBACK23-NEXT: popl %ebp
10237 ; FALLBACK23-NEXT: retl
10239 ; FALLBACK24-LABEL: ashr_32bytes:
10240 ; FALLBACK24: # %bb.0:
10241 ; FALLBACK24-NEXT: pushl %ebp
10242 ; FALLBACK24-NEXT: pushl %ebx
10243 ; FALLBACK24-NEXT: pushl %edi
10244 ; FALLBACK24-NEXT: pushl %esi
10245 ; FALLBACK24-NEXT: subl $108, %esp
10246 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
10247 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
10248 ; FALLBACK24-NEXT: vmovups (%ecx), %xmm0
10249 ; FALLBACK24-NEXT: movl 16(%ecx), %esi
10250 ; FALLBACK24-NEXT: movl 20(%ecx), %edi
10251 ; FALLBACK24-NEXT: movl 24(%ecx), %ebx
10252 ; FALLBACK24-NEXT: movl 28(%ecx), %edx
10253 ; FALLBACK24-NEXT: movzbl (%eax), %eax
10254 ; FALLBACK24-NEXT: movl %eax, %ecx
10255 ; FALLBACK24-NEXT: shlb $3, %cl
10256 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10257 ; FALLBACK24-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10258 ; FALLBACK24-NEXT: movl %edi, {{[0-9]+}}(%esp)
10259 ; FALLBACK24-NEXT: movl %esi, {{[0-9]+}}(%esp)
10260 ; FALLBACK24-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10261 ; FALLBACK24-NEXT: sarl $31, %edx
10262 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10263 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10264 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10265 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10266 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10267 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10268 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10269 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
10270 ; FALLBACK24-NEXT: andb $28, %al
10271 ; FALLBACK24-NEXT: movzbl %al, %edi
10272 ; FALLBACK24-NEXT: movl 32(%esp,%edi), %eax
10273 ; FALLBACK24-NEXT: movl 36(%esp,%edi), %esi
10274 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10275 ; FALLBACK24-NEXT: shrl %cl, %eax
10276 ; FALLBACK24-NEXT: movl %ecx, %edx
10277 ; FALLBACK24-NEXT: movb %cl, %dh
10278 ; FALLBACK24-NEXT: notb %dl
10279 ; FALLBACK24-NEXT: addl %esi, %esi
10280 ; FALLBACK24-NEXT: movl %edx, %ecx
10281 ; FALLBACK24-NEXT: shll %cl, %esi
10282 ; FALLBACK24-NEXT: orl %eax, %esi
10283 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10284 ; FALLBACK24-NEXT: movl 44(%esp,%edi), %ebx
10285 ; FALLBACK24-NEXT: movl %ebx, %eax
10286 ; FALLBACK24-NEXT: movb %dh, %cl
10287 ; FALLBACK24-NEXT: shrl %cl, %eax
10288 ; FALLBACK24-NEXT: movl 48(%esp,%edi), %esi
10289 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10290 ; FALLBACK24-NEXT: addl %esi, %esi
10291 ; FALLBACK24-NEXT: movl %edx, %ecx
10292 ; FALLBACK24-NEXT: shll %cl, %esi
10293 ; FALLBACK24-NEXT: orl %eax, %esi
10294 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10295 ; FALLBACK24-NEXT: movl 40(%esp,%edi), %esi
10296 ; FALLBACK24-NEXT: movl %esi, %eax
10297 ; FALLBACK24-NEXT: movb %dh, %cl
10298 ; FALLBACK24-NEXT: shrl %cl, %eax
10299 ; FALLBACK24-NEXT: addl %ebx, %ebx
10300 ; FALLBACK24-NEXT: movl %edx, %ecx
10301 ; FALLBACK24-NEXT: shll %cl, %ebx
10302 ; FALLBACK24-NEXT: orl %eax, %ebx
10303 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10304 ; FALLBACK24-NEXT: movl 52(%esp,%edi), %ebp
10305 ; FALLBACK24-NEXT: movl %ebp, %eax
10306 ; FALLBACK24-NEXT: movb %dh, %cl
10307 ; FALLBACK24-NEXT: shrl %cl, %eax
10308 ; FALLBACK24-NEXT: movl 56(%esp,%edi), %ecx
10309 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10310 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %ebx
10311 ; FALLBACK24-NEXT: movl %edx, %ecx
10312 ; FALLBACK24-NEXT: shll %cl, %ebx
10313 ; FALLBACK24-NEXT: orl %eax, %ebx
10314 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10315 ; FALLBACK24-NEXT: movb %dh, %cl
10316 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10317 ; FALLBACK24-NEXT: shrl %cl, %eax
10318 ; FALLBACK24-NEXT: addl %ebp, %ebp
10319 ; FALLBACK24-NEXT: movl %edx, %ecx
10320 ; FALLBACK24-NEXT: shll %cl, %ebp
10321 ; FALLBACK24-NEXT: orl %eax, %ebp
10322 ; FALLBACK24-NEXT: movb %dh, %cl
10323 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
10324 ; FALLBACK24-NEXT: shrl %cl, %ebx
10325 ; FALLBACK24-NEXT: movl 60(%esp,%edi), %eax
10326 ; FALLBACK24-NEXT: leal (%eax,%eax), %edi
10327 ; FALLBACK24-NEXT: movl %edx, %ecx
10328 ; FALLBACK24-NEXT: shll %cl, %edi
10329 ; FALLBACK24-NEXT: orl %ebx, %edi
10330 ; FALLBACK24-NEXT: movb %dh, %cl
10331 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
10332 ; FALLBACK24-NEXT: shrl %cl, %ebx
10333 ; FALLBACK24-NEXT: addl %esi, %esi
10334 ; FALLBACK24-NEXT: movl %edx, %ecx
10335 ; FALLBACK24-NEXT: shll %cl, %esi
10336 ; FALLBACK24-NEXT: orl %ebx, %esi
10337 ; FALLBACK24-NEXT: movb %dh, %cl
10338 ; FALLBACK24-NEXT: sarl %cl, %eax
10339 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
10340 ; FALLBACK24-NEXT: movl %eax, 28(%ecx)
10341 ; FALLBACK24-NEXT: movl %esi, 4(%ecx)
10342 ; FALLBACK24-NEXT: movl %edi, 24(%ecx)
10343 ; FALLBACK24-NEXT: movl %ebp, 16(%ecx)
10344 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10345 ; FALLBACK24-NEXT: movl %eax, 20(%ecx)
10346 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10347 ; FALLBACK24-NEXT: movl %eax, 8(%ecx)
10348 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10349 ; FALLBACK24-NEXT: movl %eax, 12(%ecx)
10350 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10351 ; FALLBACK24-NEXT: movl %eax, (%ecx)
10352 ; FALLBACK24-NEXT: addl $108, %esp
10353 ; FALLBACK24-NEXT: popl %esi
10354 ; FALLBACK24-NEXT: popl %edi
10355 ; FALLBACK24-NEXT: popl %ebx
10356 ; FALLBACK24-NEXT: popl %ebp
10357 ; FALLBACK24-NEXT: retl
10359 ; FALLBACK25-LABEL: ashr_32bytes:
10360 ; FALLBACK25: # %bb.0:
10361 ; FALLBACK25-NEXT: pushl %ebp
10362 ; FALLBACK25-NEXT: pushl %ebx
10363 ; FALLBACK25-NEXT: pushl %edi
10364 ; FALLBACK25-NEXT: pushl %esi
10365 ; FALLBACK25-NEXT: subl $108, %esp
10366 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
10367 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
10368 ; FALLBACK25-NEXT: vmovups (%ecx), %xmm0
10369 ; FALLBACK25-NEXT: movl 16(%ecx), %esi
10370 ; FALLBACK25-NEXT: movl 20(%ecx), %edi
10371 ; FALLBACK25-NEXT: movl 24(%ecx), %ebx
10372 ; FALLBACK25-NEXT: movl 28(%ecx), %edx
10373 ; FALLBACK25-NEXT: movzbl (%eax), %eax
10374 ; FALLBACK25-NEXT: movl %eax, %ecx
10375 ; FALLBACK25-NEXT: shlb $3, %cl
10376 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10377 ; FALLBACK25-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10378 ; FALLBACK25-NEXT: movl %edi, {{[0-9]+}}(%esp)
10379 ; FALLBACK25-NEXT: movl %esi, {{[0-9]+}}(%esp)
10380 ; FALLBACK25-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10381 ; FALLBACK25-NEXT: sarl $31, %edx
10382 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10383 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10384 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10385 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10386 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10387 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10388 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10389 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
10390 ; FALLBACK25-NEXT: andb $28, %al
10391 ; FALLBACK25-NEXT: movzbl %al, %ebp
10392 ; FALLBACK25-NEXT: movl 48(%esp,%ebp), %esi
10393 ; FALLBACK25-NEXT: movl 44(%esp,%ebp), %eax
10394 ; FALLBACK25-NEXT: movl %eax, %edx
10395 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
10396 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10397 ; FALLBACK25-NEXT: movl 40(%esp,%ebp), %edx
10398 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10399 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
10400 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10401 ; FALLBACK25-NEXT: movl 56(%esp,%ebp), %ebx
10402 ; FALLBACK25-NEXT: movl 52(%esp,%ebp), %eax
10403 ; FALLBACK25-NEXT: movl %eax, %edx
10404 ; FALLBACK25-NEXT: shrdl %cl, %ebx, %edx
10405 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10406 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
10407 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10408 ; FALLBACK25-NEXT: movl 60(%esp,%ebp), %eax
10409 ; FALLBACK25-NEXT: shrdl %cl, %eax, %ebx
10410 ; FALLBACK25-NEXT: movl 32(%esp,%ebp), %edx
10411 ; FALLBACK25-NEXT: movl 36(%esp,%ebp), %edi
10412 ; FALLBACK25-NEXT: movl %edi, %esi
10413 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
10414 ; FALLBACK25-NEXT: shrdl %cl, %ebp, %esi
10415 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
10416 ; FALLBACK25-NEXT: movl %esi, 4(%ebp)
10417 ; FALLBACK25-NEXT: movl %ebx, 24(%ebp)
10418 ; FALLBACK25-NEXT: shrdl %cl, %edi, %edx
10419 ; FALLBACK25-NEXT: sarl %cl, %eax
10420 ; FALLBACK25-NEXT: movl %eax, 28(%ebp)
10421 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10422 ; FALLBACK25-NEXT: movl %eax, 16(%ebp)
10423 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10424 ; FALLBACK25-NEXT: movl %eax, 20(%ebp)
10425 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10426 ; FALLBACK25-NEXT: movl %eax, 8(%ebp)
10427 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10428 ; FALLBACK25-NEXT: movl %eax, 12(%ebp)
10429 ; FALLBACK25-NEXT: movl %edx, (%ebp)
10430 ; FALLBACK25-NEXT: addl $108, %esp
10431 ; FALLBACK25-NEXT: popl %esi
10432 ; FALLBACK25-NEXT: popl %edi
10433 ; FALLBACK25-NEXT: popl %ebx
10434 ; FALLBACK25-NEXT: popl %ebp
10435 ; FALLBACK25-NEXT: retl
10437 ; FALLBACK26-LABEL: ashr_32bytes:
10438 ; FALLBACK26: # %bb.0:
10439 ; FALLBACK26-NEXT: pushl %ebp
10440 ; FALLBACK26-NEXT: pushl %ebx
10441 ; FALLBACK26-NEXT: pushl %edi
10442 ; FALLBACK26-NEXT: pushl %esi
10443 ; FALLBACK26-NEXT: subl $108, %esp
10444 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
10445 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
10446 ; FALLBACK26-NEXT: vmovups (%ecx), %xmm0
10447 ; FALLBACK26-NEXT: movl 16(%ecx), %esi
10448 ; FALLBACK26-NEXT: movl 20(%ecx), %edi
10449 ; FALLBACK26-NEXT: movl 24(%ecx), %ebx
10450 ; FALLBACK26-NEXT: movl 28(%ecx), %edx
10451 ; FALLBACK26-NEXT: movzbl (%eax), %ecx
10452 ; FALLBACK26-NEXT: movl %ecx, %eax
10453 ; FALLBACK26-NEXT: shlb $3, %al
10454 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10455 ; FALLBACK26-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10456 ; FALLBACK26-NEXT: movl %edi, {{[0-9]+}}(%esp)
10457 ; FALLBACK26-NEXT: movl %esi, {{[0-9]+}}(%esp)
10458 ; FALLBACK26-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10459 ; FALLBACK26-NEXT: sarl $31, %edx
10460 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10461 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10462 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10463 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10464 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10465 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10466 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10467 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
10468 ; FALLBACK26-NEXT: andb $28, %cl
10469 ; FALLBACK26-NEXT: movzbl %cl, %edi
10470 ; FALLBACK26-NEXT: shrxl %eax, 32(%esp,%edi), %ecx
10471 ; FALLBACK26-NEXT: movl %eax, %edx
10472 ; FALLBACK26-NEXT: notb %dl
10473 ; FALLBACK26-NEXT: movl 36(%esp,%edi), %esi
10474 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10475 ; FALLBACK26-NEXT: addl %esi, %esi
10476 ; FALLBACK26-NEXT: shlxl %edx, %esi, %esi
10477 ; FALLBACK26-NEXT: orl %ecx, %esi
10478 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10479 ; FALLBACK26-NEXT: movl 48(%esp,%edi), %ecx
10480 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10481 ; FALLBACK26-NEXT: addl %ecx, %ecx
10482 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %esi
10483 ; FALLBACK26-NEXT: movl 44(%esp,%edi), %ecx
10484 ; FALLBACK26-NEXT: shrxl %eax, %ecx, %ebx
10485 ; FALLBACK26-NEXT: orl %ebx, %esi
10486 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10487 ; FALLBACK26-NEXT: addl %ecx, %ecx
10488 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %esi
10489 ; FALLBACK26-NEXT: movl 40(%esp,%edi), %ecx
10490 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10491 ; FALLBACK26-NEXT: shrxl %eax, %ecx, %ebx
10492 ; FALLBACK26-NEXT: movl %eax, %ecx
10493 ; FALLBACK26-NEXT: orl %ebx, %esi
10494 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10495 ; FALLBACK26-NEXT: movl 56(%esp,%edi), %esi
10496 ; FALLBACK26-NEXT: leal (%esi,%esi), %ebx
10497 ; FALLBACK26-NEXT: shlxl %edx, %ebx, %eax
10498 ; FALLBACK26-NEXT: movl 52(%esp,%edi), %ebx
10499 ; FALLBACK26-NEXT: shrxl %ecx, %ebx, %ebp
10500 ; FALLBACK26-NEXT: orl %ebp, %eax
10501 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10502 ; FALLBACK26-NEXT: movl %ecx, %eax
10503 ; FALLBACK26-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
10504 ; FALLBACK26-NEXT: addl %ebx, %ebx
10505 ; FALLBACK26-NEXT: shlxl %edx, %ebx, %ebx
10506 ; FALLBACK26-NEXT: orl %ebp, %ebx
10507 ; FALLBACK26-NEXT: shrxl %ecx, %esi, %ecx
10508 ; FALLBACK26-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
10509 ; FALLBACK26-NEXT: movl 60(%esp,%edi), %edi
10510 ; FALLBACK26-NEXT: sarxl %eax, %edi, %eax
10511 ; FALLBACK26-NEXT: addl %edi, %edi
10512 ; FALLBACK26-NEXT: shlxl %edx, %edi, %edi
10513 ; FALLBACK26-NEXT: orl %ecx, %edi
10514 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
10515 ; FALLBACK26-NEXT: addl %ecx, %ecx
10516 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %ecx
10517 ; FALLBACK26-NEXT: orl %esi, %ecx
10518 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %edx
10519 ; FALLBACK26-NEXT: movl %eax, 28(%edx)
10520 ; FALLBACK26-NEXT: movl %ecx, 4(%edx)
10521 ; FALLBACK26-NEXT: movl %edi, 24(%edx)
10522 ; FALLBACK26-NEXT: movl %ebx, 16(%edx)
10523 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10524 ; FALLBACK26-NEXT: movl %eax, 20(%edx)
10525 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10526 ; FALLBACK26-NEXT: movl %eax, 8(%edx)
10527 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10528 ; FALLBACK26-NEXT: movl %eax, 12(%edx)
10529 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10530 ; FALLBACK26-NEXT: movl %eax, (%edx)
10531 ; FALLBACK26-NEXT: addl $108, %esp
10532 ; FALLBACK26-NEXT: popl %esi
10533 ; FALLBACK26-NEXT: popl %edi
10534 ; FALLBACK26-NEXT: popl %ebx
10535 ; FALLBACK26-NEXT: popl %ebp
10536 ; FALLBACK26-NEXT: retl
10538 ; FALLBACK27-LABEL: ashr_32bytes:
10539 ; FALLBACK27: # %bb.0:
10540 ; FALLBACK27-NEXT: pushl %ebp
10541 ; FALLBACK27-NEXT: pushl %ebx
10542 ; FALLBACK27-NEXT: pushl %edi
10543 ; FALLBACK27-NEXT: pushl %esi
10544 ; FALLBACK27-NEXT: subl $108, %esp
10545 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
10546 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
10547 ; FALLBACK27-NEXT: vmovups (%ecx), %xmm0
10548 ; FALLBACK27-NEXT: movl 16(%ecx), %esi
10549 ; FALLBACK27-NEXT: movl 20(%ecx), %edi
10550 ; FALLBACK27-NEXT: movl 24(%ecx), %ebx
10551 ; FALLBACK27-NEXT: movl 28(%ecx), %edx
10552 ; FALLBACK27-NEXT: movzbl (%eax), %eax
10553 ; FALLBACK27-NEXT: movl %eax, %ecx
10554 ; FALLBACK27-NEXT: shlb $3, %cl
10555 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10556 ; FALLBACK27-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10557 ; FALLBACK27-NEXT: movl %edi, {{[0-9]+}}(%esp)
10558 ; FALLBACK27-NEXT: movl %esi, {{[0-9]+}}(%esp)
10559 ; FALLBACK27-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10560 ; FALLBACK27-NEXT: sarl $31, %edx
10561 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10562 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10563 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10564 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10565 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10566 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10567 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10568 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
10569 ; FALLBACK27-NEXT: andb $28, %al
10570 ; FALLBACK27-NEXT: movzbl %al, %ebx
10571 ; FALLBACK27-NEXT: movl 48(%esp,%ebx), %esi
10572 ; FALLBACK27-NEXT: movl 44(%esp,%ebx), %eax
10573 ; FALLBACK27-NEXT: movl %eax, %edx
10574 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
10575 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10576 ; FALLBACK27-NEXT: movl 40(%esp,%ebx), %edx
10577 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10578 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
10579 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10580 ; FALLBACK27-NEXT: movl 56(%esp,%ebx), %ebp
10581 ; FALLBACK27-NEXT: movl 52(%esp,%ebx), %eax
10582 ; FALLBACK27-NEXT: movl %eax, %edi
10583 ; FALLBACK27-NEXT: shrdl %cl, %ebp, %edi
10584 ; FALLBACK27-NEXT: shrdl %cl, %eax, %esi
10585 ; FALLBACK27-NEXT: movl 60(%esp,%ebx), %eax
10586 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10587 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebp
10588 ; FALLBACK27-NEXT: movl 32(%esp,%ebx), %edx
10589 ; FALLBACK27-NEXT: movl 36(%esp,%ebx), %ebx
10590 ; FALLBACK27-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10591 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10592 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebx
10593 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
10594 ; FALLBACK27-NEXT: movl %ebx, 4(%eax)
10595 ; FALLBACK27-NEXT: movl %ebp, 24(%eax)
10596 ; FALLBACK27-NEXT: sarxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
10597 ; FALLBACK27-NEXT: movl %ebx, 28(%eax)
10598 ; FALLBACK27-NEXT: movl %esi, 16(%eax)
10599 ; FALLBACK27-NEXT: movl %edi, 20(%eax)
10600 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10601 ; FALLBACK27-NEXT: movl %esi, 8(%eax)
10602 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10603 ; FALLBACK27-NEXT: movl %esi, 12(%eax)
10604 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
10605 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10606 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
10607 ; FALLBACK27-NEXT: movl %edx, (%eax)
10608 ; FALLBACK27-NEXT: addl $108, %esp
10609 ; FALLBACK27-NEXT: popl %esi
10610 ; FALLBACK27-NEXT: popl %edi
10611 ; FALLBACK27-NEXT: popl %ebx
10612 ; FALLBACK27-NEXT: popl %ebp
10613 ; FALLBACK27-NEXT: retl
10615 ; FALLBACK28-LABEL: ashr_32bytes:
10616 ; FALLBACK28: # %bb.0:
10617 ; FALLBACK28-NEXT: pushl %ebp
10618 ; FALLBACK28-NEXT: pushl %ebx
10619 ; FALLBACK28-NEXT: pushl %edi
10620 ; FALLBACK28-NEXT: pushl %esi
10621 ; FALLBACK28-NEXT: subl $108, %esp
10622 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
10623 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
10624 ; FALLBACK28-NEXT: vmovups (%ecx), %xmm0
10625 ; FALLBACK28-NEXT: movl 16(%ecx), %esi
10626 ; FALLBACK28-NEXT: movl 20(%ecx), %edi
10627 ; FALLBACK28-NEXT: movl 24(%ecx), %ebx
10628 ; FALLBACK28-NEXT: movl 28(%ecx), %edx
10629 ; FALLBACK28-NEXT: movzbl (%eax), %eax
10630 ; FALLBACK28-NEXT: movl %eax, %ecx
10631 ; FALLBACK28-NEXT: shlb $3, %cl
10632 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10633 ; FALLBACK28-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10634 ; FALLBACK28-NEXT: movl %edi, {{[0-9]+}}(%esp)
10635 ; FALLBACK28-NEXT: movl %esi, {{[0-9]+}}(%esp)
10636 ; FALLBACK28-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10637 ; FALLBACK28-NEXT: sarl $31, %edx
10638 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10639 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10640 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10641 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10642 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10643 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10644 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10645 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
10646 ; FALLBACK28-NEXT: andb $28, %al
10647 ; FALLBACK28-NEXT: movzbl %al, %edi
10648 ; FALLBACK28-NEXT: movl 32(%esp,%edi), %eax
10649 ; FALLBACK28-NEXT: movl 36(%esp,%edi), %esi
10650 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10651 ; FALLBACK28-NEXT: shrl %cl, %eax
10652 ; FALLBACK28-NEXT: movl %ecx, %edx
10653 ; FALLBACK28-NEXT: movb %cl, %dh
10654 ; FALLBACK28-NEXT: notb %dl
10655 ; FALLBACK28-NEXT: addl %esi, %esi
10656 ; FALLBACK28-NEXT: movl %edx, %ecx
10657 ; FALLBACK28-NEXT: shll %cl, %esi
10658 ; FALLBACK28-NEXT: orl %eax, %esi
10659 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10660 ; FALLBACK28-NEXT: movl 44(%esp,%edi), %ebx
10661 ; FALLBACK28-NEXT: movl %ebx, %eax
10662 ; FALLBACK28-NEXT: movb %dh, %cl
10663 ; FALLBACK28-NEXT: shrl %cl, %eax
10664 ; FALLBACK28-NEXT: movl 48(%esp,%edi), %esi
10665 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10666 ; FALLBACK28-NEXT: addl %esi, %esi
10667 ; FALLBACK28-NEXT: movl %edx, %ecx
10668 ; FALLBACK28-NEXT: shll %cl, %esi
10669 ; FALLBACK28-NEXT: orl %eax, %esi
10670 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10671 ; FALLBACK28-NEXT: movl 40(%esp,%edi), %esi
10672 ; FALLBACK28-NEXT: movl %esi, %eax
10673 ; FALLBACK28-NEXT: movb %dh, %cl
10674 ; FALLBACK28-NEXT: shrl %cl, %eax
10675 ; FALLBACK28-NEXT: addl %ebx, %ebx
10676 ; FALLBACK28-NEXT: movl %edx, %ecx
10677 ; FALLBACK28-NEXT: shll %cl, %ebx
10678 ; FALLBACK28-NEXT: orl %eax, %ebx
10679 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10680 ; FALLBACK28-NEXT: movl 52(%esp,%edi), %ebp
10681 ; FALLBACK28-NEXT: movl %ebp, %eax
10682 ; FALLBACK28-NEXT: movb %dh, %cl
10683 ; FALLBACK28-NEXT: shrl %cl, %eax
10684 ; FALLBACK28-NEXT: movl 56(%esp,%edi), %ecx
10685 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10686 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %ebx
10687 ; FALLBACK28-NEXT: movl %edx, %ecx
10688 ; FALLBACK28-NEXT: shll %cl, %ebx
10689 ; FALLBACK28-NEXT: orl %eax, %ebx
10690 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10691 ; FALLBACK28-NEXT: movb %dh, %cl
10692 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10693 ; FALLBACK28-NEXT: shrl %cl, %eax
10694 ; FALLBACK28-NEXT: addl %ebp, %ebp
10695 ; FALLBACK28-NEXT: movl %edx, %ecx
10696 ; FALLBACK28-NEXT: shll %cl, %ebp
10697 ; FALLBACK28-NEXT: orl %eax, %ebp
10698 ; FALLBACK28-NEXT: movb %dh, %cl
10699 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
10700 ; FALLBACK28-NEXT: shrl %cl, %ebx
10701 ; FALLBACK28-NEXT: movl 60(%esp,%edi), %eax
10702 ; FALLBACK28-NEXT: leal (%eax,%eax), %edi
10703 ; FALLBACK28-NEXT: movl %edx, %ecx
10704 ; FALLBACK28-NEXT: shll %cl, %edi
10705 ; FALLBACK28-NEXT: orl %ebx, %edi
10706 ; FALLBACK28-NEXT: movb %dh, %cl
10707 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
10708 ; FALLBACK28-NEXT: shrl %cl, %ebx
10709 ; FALLBACK28-NEXT: addl %esi, %esi
10710 ; FALLBACK28-NEXT: movl %edx, %ecx
10711 ; FALLBACK28-NEXT: shll %cl, %esi
10712 ; FALLBACK28-NEXT: orl %ebx, %esi
10713 ; FALLBACK28-NEXT: movb %dh, %cl
10714 ; FALLBACK28-NEXT: sarl %cl, %eax
10715 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
10716 ; FALLBACK28-NEXT: movl %eax, 28(%ecx)
10717 ; FALLBACK28-NEXT: movl %esi, 4(%ecx)
10718 ; FALLBACK28-NEXT: movl %edi, 24(%ecx)
10719 ; FALLBACK28-NEXT: movl %ebp, 16(%ecx)
10720 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10721 ; FALLBACK28-NEXT: movl %eax, 20(%ecx)
10722 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10723 ; FALLBACK28-NEXT: movl %eax, 8(%ecx)
10724 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10725 ; FALLBACK28-NEXT: movl %eax, 12(%ecx)
10726 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10727 ; FALLBACK28-NEXT: movl %eax, (%ecx)
10728 ; FALLBACK28-NEXT: addl $108, %esp
10729 ; FALLBACK28-NEXT: popl %esi
10730 ; FALLBACK28-NEXT: popl %edi
10731 ; FALLBACK28-NEXT: popl %ebx
10732 ; FALLBACK28-NEXT: popl %ebp
10733 ; FALLBACK28-NEXT: retl
10735 ; FALLBACK29-LABEL: ashr_32bytes:
10736 ; FALLBACK29: # %bb.0:
10737 ; FALLBACK29-NEXT: pushl %ebp
10738 ; FALLBACK29-NEXT: pushl %ebx
10739 ; FALLBACK29-NEXT: pushl %edi
10740 ; FALLBACK29-NEXT: pushl %esi
10741 ; FALLBACK29-NEXT: subl $108, %esp
10742 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
10743 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
10744 ; FALLBACK29-NEXT: vmovups (%ecx), %xmm0
10745 ; FALLBACK29-NEXT: movl 16(%ecx), %esi
10746 ; FALLBACK29-NEXT: movl 20(%ecx), %edi
10747 ; FALLBACK29-NEXT: movl 24(%ecx), %ebx
10748 ; FALLBACK29-NEXT: movl 28(%ecx), %edx
10749 ; FALLBACK29-NEXT: movzbl (%eax), %eax
10750 ; FALLBACK29-NEXT: movl %eax, %ecx
10751 ; FALLBACK29-NEXT: shlb $3, %cl
10752 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10753 ; FALLBACK29-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10754 ; FALLBACK29-NEXT: movl %edi, {{[0-9]+}}(%esp)
10755 ; FALLBACK29-NEXT: movl %esi, {{[0-9]+}}(%esp)
10756 ; FALLBACK29-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10757 ; FALLBACK29-NEXT: sarl $31, %edx
10758 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10759 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10760 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10761 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10762 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10763 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10764 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10765 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
10766 ; FALLBACK29-NEXT: andb $28, %al
10767 ; FALLBACK29-NEXT: movzbl %al, %ebp
10768 ; FALLBACK29-NEXT: movl 48(%esp,%ebp), %esi
10769 ; FALLBACK29-NEXT: movl 44(%esp,%ebp), %eax
10770 ; FALLBACK29-NEXT: movl %eax, %edx
10771 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
10772 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10773 ; FALLBACK29-NEXT: movl 40(%esp,%ebp), %edx
10774 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10775 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
10776 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10777 ; FALLBACK29-NEXT: movl 56(%esp,%ebp), %ebx
10778 ; FALLBACK29-NEXT: movl 52(%esp,%ebp), %eax
10779 ; FALLBACK29-NEXT: movl %eax, %edx
10780 ; FALLBACK29-NEXT: shrdl %cl, %ebx, %edx
10781 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10782 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
10783 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10784 ; FALLBACK29-NEXT: movl 60(%esp,%ebp), %eax
10785 ; FALLBACK29-NEXT: shrdl %cl, %eax, %ebx
10786 ; FALLBACK29-NEXT: movl 32(%esp,%ebp), %edx
10787 ; FALLBACK29-NEXT: movl 36(%esp,%ebp), %edi
10788 ; FALLBACK29-NEXT: movl %edi, %esi
10789 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
10790 ; FALLBACK29-NEXT: shrdl %cl, %ebp, %esi
10791 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
10792 ; FALLBACK29-NEXT: movl %esi, 4(%ebp)
10793 ; FALLBACK29-NEXT: movl %ebx, 24(%ebp)
10794 ; FALLBACK29-NEXT: shrdl %cl, %edi, %edx
10795 ; FALLBACK29-NEXT: sarl %cl, %eax
10796 ; FALLBACK29-NEXT: movl %eax, 28(%ebp)
10797 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10798 ; FALLBACK29-NEXT: movl %eax, 16(%ebp)
10799 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10800 ; FALLBACK29-NEXT: movl %eax, 20(%ebp)
10801 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10802 ; FALLBACK29-NEXT: movl %eax, 8(%ebp)
10803 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10804 ; FALLBACK29-NEXT: movl %eax, 12(%ebp)
10805 ; FALLBACK29-NEXT: movl %edx, (%ebp)
10806 ; FALLBACK29-NEXT: addl $108, %esp
10807 ; FALLBACK29-NEXT: popl %esi
10808 ; FALLBACK29-NEXT: popl %edi
10809 ; FALLBACK29-NEXT: popl %ebx
10810 ; FALLBACK29-NEXT: popl %ebp
10811 ; FALLBACK29-NEXT: retl
10813 ; FALLBACK30-LABEL: ashr_32bytes:
10814 ; FALLBACK30: # %bb.0:
10815 ; FALLBACK30-NEXT: pushl %ebp
10816 ; FALLBACK30-NEXT: pushl %ebx
10817 ; FALLBACK30-NEXT: pushl %edi
10818 ; FALLBACK30-NEXT: pushl %esi
10819 ; FALLBACK30-NEXT: subl $108, %esp
10820 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
10821 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
10822 ; FALLBACK30-NEXT: vmovups (%ecx), %xmm0
10823 ; FALLBACK30-NEXT: movl 16(%ecx), %esi
10824 ; FALLBACK30-NEXT: movl 20(%ecx), %edi
10825 ; FALLBACK30-NEXT: movl 24(%ecx), %ebx
10826 ; FALLBACK30-NEXT: movl 28(%ecx), %edx
10827 ; FALLBACK30-NEXT: movzbl (%eax), %ecx
10828 ; FALLBACK30-NEXT: movl %ecx, %eax
10829 ; FALLBACK30-NEXT: shlb $3, %al
10830 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10831 ; FALLBACK30-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10832 ; FALLBACK30-NEXT: movl %edi, {{[0-9]+}}(%esp)
10833 ; FALLBACK30-NEXT: movl %esi, {{[0-9]+}}(%esp)
10834 ; FALLBACK30-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10835 ; FALLBACK30-NEXT: sarl $31, %edx
10836 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10837 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10838 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10839 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10840 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10841 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10842 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10843 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
10844 ; FALLBACK30-NEXT: andb $28, %cl
10845 ; FALLBACK30-NEXT: movzbl %cl, %edi
10846 ; FALLBACK30-NEXT: shrxl %eax, 32(%esp,%edi), %ecx
10847 ; FALLBACK30-NEXT: movl %eax, %edx
10848 ; FALLBACK30-NEXT: notb %dl
10849 ; FALLBACK30-NEXT: movl 36(%esp,%edi), %esi
10850 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10851 ; FALLBACK30-NEXT: addl %esi, %esi
10852 ; FALLBACK30-NEXT: shlxl %edx, %esi, %esi
10853 ; FALLBACK30-NEXT: orl %ecx, %esi
10854 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10855 ; FALLBACK30-NEXT: movl 48(%esp,%edi), %ecx
10856 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10857 ; FALLBACK30-NEXT: addl %ecx, %ecx
10858 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %esi
10859 ; FALLBACK30-NEXT: movl 44(%esp,%edi), %ecx
10860 ; FALLBACK30-NEXT: shrxl %eax, %ecx, %ebx
10861 ; FALLBACK30-NEXT: orl %ebx, %esi
10862 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10863 ; FALLBACK30-NEXT: addl %ecx, %ecx
10864 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %esi
10865 ; FALLBACK30-NEXT: movl 40(%esp,%edi), %ecx
10866 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10867 ; FALLBACK30-NEXT: shrxl %eax, %ecx, %ebx
10868 ; FALLBACK30-NEXT: movl %eax, %ecx
10869 ; FALLBACK30-NEXT: orl %ebx, %esi
10870 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10871 ; FALLBACK30-NEXT: movl 56(%esp,%edi), %esi
10872 ; FALLBACK30-NEXT: leal (%esi,%esi), %ebx
10873 ; FALLBACK30-NEXT: shlxl %edx, %ebx, %eax
10874 ; FALLBACK30-NEXT: movl 52(%esp,%edi), %ebx
10875 ; FALLBACK30-NEXT: shrxl %ecx, %ebx, %ebp
10876 ; FALLBACK30-NEXT: orl %ebp, %eax
10877 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10878 ; FALLBACK30-NEXT: movl %ecx, %eax
10879 ; FALLBACK30-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
10880 ; FALLBACK30-NEXT: addl %ebx, %ebx
10881 ; FALLBACK30-NEXT: shlxl %edx, %ebx, %ebx
10882 ; FALLBACK30-NEXT: orl %ebp, %ebx
10883 ; FALLBACK30-NEXT: shrxl %ecx, %esi, %ecx
10884 ; FALLBACK30-NEXT: shrxl %eax, {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
10885 ; FALLBACK30-NEXT: movl 60(%esp,%edi), %edi
10886 ; FALLBACK30-NEXT: sarxl %eax, %edi, %eax
10887 ; FALLBACK30-NEXT: addl %edi, %edi
10888 ; FALLBACK30-NEXT: shlxl %edx, %edi, %edi
10889 ; FALLBACK30-NEXT: orl %ecx, %edi
10890 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
10891 ; FALLBACK30-NEXT: addl %ecx, %ecx
10892 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %ecx
10893 ; FALLBACK30-NEXT: orl %esi, %ecx
10894 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %edx
10895 ; FALLBACK30-NEXT: movl %eax, 28(%edx)
10896 ; FALLBACK30-NEXT: movl %ecx, 4(%edx)
10897 ; FALLBACK30-NEXT: movl %edi, 24(%edx)
10898 ; FALLBACK30-NEXT: movl %ebx, 16(%edx)
10899 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10900 ; FALLBACK30-NEXT: movl %eax, 20(%edx)
10901 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10902 ; FALLBACK30-NEXT: movl %eax, 8(%edx)
10903 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10904 ; FALLBACK30-NEXT: movl %eax, 12(%edx)
10905 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10906 ; FALLBACK30-NEXT: movl %eax, (%edx)
10907 ; FALLBACK30-NEXT: addl $108, %esp
10908 ; FALLBACK30-NEXT: popl %esi
10909 ; FALLBACK30-NEXT: popl %edi
10910 ; FALLBACK30-NEXT: popl %ebx
10911 ; FALLBACK30-NEXT: popl %ebp
10912 ; FALLBACK30-NEXT: retl
10914 ; FALLBACK31-LABEL: ashr_32bytes:
10915 ; FALLBACK31: # %bb.0:
10916 ; FALLBACK31-NEXT: pushl %ebp
10917 ; FALLBACK31-NEXT: pushl %ebx
10918 ; FALLBACK31-NEXT: pushl %edi
10919 ; FALLBACK31-NEXT: pushl %esi
10920 ; FALLBACK31-NEXT: subl $108, %esp
10921 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
10922 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
10923 ; FALLBACK31-NEXT: vmovups (%ecx), %xmm0
10924 ; FALLBACK31-NEXT: movl 16(%ecx), %esi
10925 ; FALLBACK31-NEXT: movl 20(%ecx), %edi
10926 ; FALLBACK31-NEXT: movl 24(%ecx), %ebx
10927 ; FALLBACK31-NEXT: movl 28(%ecx), %edx
10928 ; FALLBACK31-NEXT: movzbl (%eax), %eax
10929 ; FALLBACK31-NEXT: movl %eax, %ecx
10930 ; FALLBACK31-NEXT: shlb $3, %cl
10931 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10932 ; FALLBACK31-NEXT: movl %ebx, {{[0-9]+}}(%esp)
10933 ; FALLBACK31-NEXT: movl %edi, {{[0-9]+}}(%esp)
10934 ; FALLBACK31-NEXT: movl %esi, {{[0-9]+}}(%esp)
10935 ; FALLBACK31-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
10936 ; FALLBACK31-NEXT: sarl $31, %edx
10937 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10938 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10939 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10940 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10941 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10942 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10943 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10944 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
10945 ; FALLBACK31-NEXT: andb $28, %al
10946 ; FALLBACK31-NEXT: movzbl %al, %ebx
10947 ; FALLBACK31-NEXT: movl 48(%esp,%ebx), %esi
10948 ; FALLBACK31-NEXT: movl 44(%esp,%ebx), %eax
10949 ; FALLBACK31-NEXT: movl %eax, %edx
10950 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
10951 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10952 ; FALLBACK31-NEXT: movl 40(%esp,%ebx), %edx
10953 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10954 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
10955 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10956 ; FALLBACK31-NEXT: movl 56(%esp,%ebx), %ebp
10957 ; FALLBACK31-NEXT: movl 52(%esp,%ebx), %eax
10958 ; FALLBACK31-NEXT: movl %eax, %edi
10959 ; FALLBACK31-NEXT: shrdl %cl, %ebp, %edi
10960 ; FALLBACK31-NEXT: shrdl %cl, %eax, %esi
10961 ; FALLBACK31-NEXT: movl 60(%esp,%ebx), %eax
10962 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10963 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebp
10964 ; FALLBACK31-NEXT: movl 32(%esp,%ebx), %edx
10965 ; FALLBACK31-NEXT: movl 36(%esp,%ebx), %ebx
10966 ; FALLBACK31-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
10967 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
10968 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebx
10969 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
10970 ; FALLBACK31-NEXT: movl %ebx, 4(%eax)
10971 ; FALLBACK31-NEXT: movl %ebp, 24(%eax)
10972 ; FALLBACK31-NEXT: sarxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
10973 ; FALLBACK31-NEXT: movl %ebx, 28(%eax)
10974 ; FALLBACK31-NEXT: movl %esi, 16(%eax)
10975 ; FALLBACK31-NEXT: movl %edi, 20(%eax)
10976 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10977 ; FALLBACK31-NEXT: movl %esi, 8(%eax)
10978 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10979 ; FALLBACK31-NEXT: movl %esi, 12(%eax)
10980 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
10981 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
10982 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
10983 ; FALLBACK31-NEXT: movl %edx, (%eax)
10984 ; FALLBACK31-NEXT: addl $108, %esp
10985 ; FALLBACK31-NEXT: popl %esi
10986 ; FALLBACK31-NEXT: popl %edi
10987 ; FALLBACK31-NEXT: popl %ebx
10988 ; FALLBACK31-NEXT: popl %ebp
10989 ; FALLBACK31-NEXT: retl
10990 %src = load i256, ptr %src.ptr, align 1
10991 %byteOff = load i256, ptr %byteOff.ptr, align 1
10992 %bitOff = shl i256 %byteOff, 3
10993 %res = ashr i256 %src, %bitOff
10994 store i256 %res, ptr %dst, align 1
10998 define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
10999 ; FALLBACK0-LABEL: ashr_32bytes_dwordOff:
11000 ; FALLBACK0: # %bb.0:
11001 ; FALLBACK0-NEXT: pushq %rbx
11002 ; FALLBACK0-NEXT: movq (%rdi), %rcx
11003 ; FALLBACK0-NEXT: movq 8(%rdi), %r8
11004 ; FALLBACK0-NEXT: movq 16(%rdi), %r9
11005 ; FALLBACK0-NEXT: movq 24(%rdi), %rdi
11006 ; FALLBACK0-NEXT: movzbl (%rsi), %esi
11007 ; FALLBACK0-NEXT: movl %esi, %eax
11008 ; FALLBACK0-NEXT: shlb $5, %al
11009 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11010 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
11011 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
11012 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11013 ; FALLBACK0-NEXT: sarq $63, %rdi
11014 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11015 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11016 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11017 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11018 ; FALLBACK0-NEXT: andb $6, %sil
11019 ; FALLBACK0-NEXT: movzbl %sil, %r9d
11020 ; FALLBACK0-NEXT: movq -64(%rsp,%r9,4), %r10
11021 ; FALLBACK0-NEXT: movq -56(%rsp,%r9,4), %rdi
11022 ; FALLBACK0-NEXT: movq %rdi, %r11
11023 ; FALLBACK0-NEXT: movl %eax, %ecx
11024 ; FALLBACK0-NEXT: shrq %cl, %r11
11025 ; FALLBACK0-NEXT: movl %eax, %esi
11026 ; FALLBACK0-NEXT: notb %sil
11027 ; FALLBACK0-NEXT: movq -48(%rsp,%r9,4), %rbx
11028 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r8
11029 ; FALLBACK0-NEXT: movl %esi, %ecx
11030 ; FALLBACK0-NEXT: shlq %cl, %r8
11031 ; FALLBACK0-NEXT: orq %r11, %r8
11032 ; FALLBACK0-NEXT: movl %eax, %ecx
11033 ; FALLBACK0-NEXT: shrq %cl, %r10
11034 ; FALLBACK0-NEXT: addq %rdi, %rdi
11035 ; FALLBACK0-NEXT: movl %esi, %ecx
11036 ; FALLBACK0-NEXT: shlq %cl, %rdi
11037 ; FALLBACK0-NEXT: orq %r10, %rdi
11038 ; FALLBACK0-NEXT: movl %eax, %ecx
11039 ; FALLBACK0-NEXT: shrq %cl, %rbx
11040 ; FALLBACK0-NEXT: movq -40(%rsp,%r9,4), %r9
11041 ; FALLBACK0-NEXT: leaq (%r9,%r9), %r10
11042 ; FALLBACK0-NEXT: movl %esi, %ecx
11043 ; FALLBACK0-NEXT: shlq %cl, %r10
11044 ; FALLBACK0-NEXT: orq %rbx, %r10
11045 ; FALLBACK0-NEXT: movl %eax, %ecx
11046 ; FALLBACK0-NEXT: sarq %cl, %r9
11047 ; FALLBACK0-NEXT: movq %r9, 24(%rdx)
11048 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
11049 ; FALLBACK0-NEXT: movq %rdi, (%rdx)
11050 ; FALLBACK0-NEXT: movq %r8, 8(%rdx)
11051 ; FALLBACK0-NEXT: popq %rbx
11052 ; FALLBACK0-NEXT: retq
11054 ; FALLBACK1-LABEL: ashr_32bytes_dwordOff:
11055 ; FALLBACK1: # %bb.0:
11056 ; FALLBACK1-NEXT: movq (%rdi), %rax
11057 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
11058 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
11059 ; FALLBACK1-NEXT: movq 24(%rdi), %rdi
11060 ; FALLBACK1-NEXT: movzbl (%rsi), %esi
11061 ; FALLBACK1-NEXT: movl %esi, %ecx
11062 ; FALLBACK1-NEXT: shlb $5, %cl
11063 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11064 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
11065 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
11066 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11067 ; FALLBACK1-NEXT: sarq $63, %rdi
11068 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11069 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11070 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11071 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11072 ; FALLBACK1-NEXT: andb $6, %sil
11073 ; FALLBACK1-NEXT: movzbl %sil, %eax
11074 ; FALLBACK1-NEXT: movq -56(%rsp,%rax,4), %rsi
11075 ; FALLBACK1-NEXT: movq -72(%rsp,%rax,4), %rdi
11076 ; FALLBACK1-NEXT: movq -64(%rsp,%rax,4), %r8
11077 ; FALLBACK1-NEXT: movq %r8, %r9
11078 ; FALLBACK1-NEXT: shrdq %cl, %rsi, %r9
11079 ; FALLBACK1-NEXT: movq -48(%rsp,%rax,4), %rax
11080 ; FALLBACK1-NEXT: shrdq %cl, %rax, %rsi
11081 ; FALLBACK1-NEXT: shrdq %cl, %r8, %rdi
11082 ; FALLBACK1-NEXT: sarq %cl, %rax
11083 ; FALLBACK1-NEXT: movq %rsi, 16(%rdx)
11084 ; FALLBACK1-NEXT: movq %rax, 24(%rdx)
11085 ; FALLBACK1-NEXT: movq %rdi, (%rdx)
11086 ; FALLBACK1-NEXT: movq %r9, 8(%rdx)
11087 ; FALLBACK1-NEXT: retq
11089 ; FALLBACK2-LABEL: ashr_32bytes_dwordOff:
11090 ; FALLBACK2: # %bb.0:
11091 ; FALLBACK2-NEXT: movq (%rdi), %rcx
11092 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
11093 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
11094 ; FALLBACK2-NEXT: movq 24(%rdi), %rdi
11095 ; FALLBACK2-NEXT: movzbl (%rsi), %esi
11096 ; FALLBACK2-NEXT: movl %esi, %eax
11097 ; FALLBACK2-NEXT: shlb $5, %al
11098 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11099 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
11100 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
11101 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11102 ; FALLBACK2-NEXT: sarq $63, %rdi
11103 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11104 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11105 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11106 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11107 ; FALLBACK2-NEXT: andb $6, %sil
11108 ; FALLBACK2-NEXT: movzbl %sil, %ecx
11109 ; FALLBACK2-NEXT: movq -64(%rsp,%rcx,4), %rsi
11110 ; FALLBACK2-NEXT: movq -56(%rsp,%rcx,4), %rdi
11111 ; FALLBACK2-NEXT: shrxq %rax, %rsi, %r8
11112 ; FALLBACK2-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %r9
11113 ; FALLBACK2-NEXT: shrxq %rax, %rdi, %r10
11114 ; FALLBACK2-NEXT: movq -48(%rsp,%rcx,4), %rcx
11115 ; FALLBACK2-NEXT: sarxq %rax, %rcx, %r11
11116 ; FALLBACK2-NEXT: # kill: def $al killed $al killed $rax def $rax
11117 ; FALLBACK2-NEXT: notb %al
11118 ; FALLBACK2-NEXT: addq %rdi, %rdi
11119 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %rdi
11120 ; FALLBACK2-NEXT: orq %r8, %rdi
11121 ; FALLBACK2-NEXT: addq %rsi, %rsi
11122 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %rsi
11123 ; FALLBACK2-NEXT: orq %r9, %rsi
11124 ; FALLBACK2-NEXT: addq %rcx, %rcx
11125 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %rax
11126 ; FALLBACK2-NEXT: orq %r10, %rax
11127 ; FALLBACK2-NEXT: movq %r11, 24(%rdx)
11128 ; FALLBACK2-NEXT: movq %rax, 16(%rdx)
11129 ; FALLBACK2-NEXT: movq %rsi, (%rdx)
11130 ; FALLBACK2-NEXT: movq %rdi, 8(%rdx)
11131 ; FALLBACK2-NEXT: retq
11133 ; FALLBACK3-LABEL: ashr_32bytes_dwordOff:
11134 ; FALLBACK3: # %bb.0:
11135 ; FALLBACK3-NEXT: movq (%rdi), %rax
11136 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
11137 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
11138 ; FALLBACK3-NEXT: movq 24(%rdi), %rdi
11139 ; FALLBACK3-NEXT: movzbl (%rsi), %esi
11140 ; FALLBACK3-NEXT: movl %esi, %ecx
11141 ; FALLBACK3-NEXT: shlb $5, %cl
11142 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11143 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
11144 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
11145 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11146 ; FALLBACK3-NEXT: sarq $63, %rdi
11147 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11148 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11149 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11150 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11151 ; FALLBACK3-NEXT: andb $6, %sil
11152 ; FALLBACK3-NEXT: movzbl %sil, %eax
11153 ; FALLBACK3-NEXT: movq -56(%rsp,%rax,4), %rsi
11154 ; FALLBACK3-NEXT: movq -72(%rsp,%rax,4), %rdi
11155 ; FALLBACK3-NEXT: movq -64(%rsp,%rax,4), %r8
11156 ; FALLBACK3-NEXT: movq %r8, %r9
11157 ; FALLBACK3-NEXT: shrdq %cl, %rsi, %r9
11158 ; FALLBACK3-NEXT: movq -48(%rsp,%rax,4), %rax
11159 ; FALLBACK3-NEXT: shrdq %cl, %rax, %rsi
11160 ; FALLBACK3-NEXT: shrdq %cl, %r8, %rdi
11161 ; FALLBACK3-NEXT: sarxq %rcx, %rax, %rax
11162 ; FALLBACK3-NEXT: movq %rsi, 16(%rdx)
11163 ; FALLBACK3-NEXT: movq %rax, 24(%rdx)
11164 ; FALLBACK3-NEXT: movq %rdi, (%rdx)
11165 ; FALLBACK3-NEXT: movq %r9, 8(%rdx)
11166 ; FALLBACK3-NEXT: retq
11168 ; FALLBACK4-LABEL: ashr_32bytes_dwordOff:
11169 ; FALLBACK4: # %bb.0:
11170 ; FALLBACK4-NEXT: pushq %rbx
11171 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
11172 ; FALLBACK4-NEXT: movq 16(%rdi), %rcx
11173 ; FALLBACK4-NEXT: movq 24(%rdi), %rdi
11174 ; FALLBACK4-NEXT: movzbl (%rsi), %esi
11175 ; FALLBACK4-NEXT: movl %esi, %eax
11176 ; FALLBACK4-NEXT: shlb $5, %al
11177 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11178 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11179 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
11180 ; FALLBACK4-NEXT: sarq $63, %rdi
11181 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11182 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11183 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11184 ; FALLBACK4-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11185 ; FALLBACK4-NEXT: andb $6, %sil
11186 ; FALLBACK4-NEXT: movzbl %sil, %r9d
11187 ; FALLBACK4-NEXT: movq -64(%rsp,%r9,4), %r10
11188 ; FALLBACK4-NEXT: movq -56(%rsp,%r9,4), %r8
11189 ; FALLBACK4-NEXT: movl %eax, %ecx
11190 ; FALLBACK4-NEXT: shrq %cl, %r10
11191 ; FALLBACK4-NEXT: movl %eax, %esi
11192 ; FALLBACK4-NEXT: notb %sil
11193 ; FALLBACK4-NEXT: leaq (%r8,%r8), %rdi
11194 ; FALLBACK4-NEXT: movl %esi, %ecx
11195 ; FALLBACK4-NEXT: shlq %cl, %rdi
11196 ; FALLBACK4-NEXT: orq %r10, %rdi
11197 ; FALLBACK4-NEXT: movq -48(%rsp,%r9,4), %r10
11198 ; FALLBACK4-NEXT: movq %r10, %r11
11199 ; FALLBACK4-NEXT: movl %eax, %ecx
11200 ; FALLBACK4-NEXT: shrq %cl, %r11
11201 ; FALLBACK4-NEXT: movq -40(%rsp,%r9,4), %r9
11202 ; FALLBACK4-NEXT: leaq (%r9,%r9), %rbx
11203 ; FALLBACK4-NEXT: movl %esi, %ecx
11204 ; FALLBACK4-NEXT: shlq %cl, %rbx
11205 ; FALLBACK4-NEXT: orq %r11, %rbx
11206 ; FALLBACK4-NEXT: movl %eax, %ecx
11207 ; FALLBACK4-NEXT: shrq %cl, %r8
11208 ; FALLBACK4-NEXT: addq %r10, %r10
11209 ; FALLBACK4-NEXT: movl %esi, %ecx
11210 ; FALLBACK4-NEXT: shlq %cl, %r10
11211 ; FALLBACK4-NEXT: orq %r8, %r10
11212 ; FALLBACK4-NEXT: movl %eax, %ecx
11213 ; FALLBACK4-NEXT: sarq %cl, %r9
11214 ; FALLBACK4-NEXT: movq %r9, 24(%rdx)
11215 ; FALLBACK4-NEXT: movq %r10, 8(%rdx)
11216 ; FALLBACK4-NEXT: movq %rbx, 16(%rdx)
11217 ; FALLBACK4-NEXT: movq %rdi, (%rdx)
11218 ; FALLBACK4-NEXT: popq %rbx
11219 ; FALLBACK4-NEXT: retq
11221 ; FALLBACK5-LABEL: ashr_32bytes_dwordOff:
11222 ; FALLBACK5: # %bb.0:
11223 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
11224 ; FALLBACK5-NEXT: movq 16(%rdi), %rax
11225 ; FALLBACK5-NEXT: movq 24(%rdi), %rdi
11226 ; FALLBACK5-NEXT: movzbl (%rsi), %esi
11227 ; FALLBACK5-NEXT: movl %esi, %ecx
11228 ; FALLBACK5-NEXT: shlb $5, %cl
11229 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11230 ; FALLBACK5-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11231 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
11232 ; FALLBACK5-NEXT: sarq $63, %rdi
11233 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11234 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11235 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11236 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11237 ; FALLBACK5-NEXT: andb $6, %sil
11238 ; FALLBACK5-NEXT: movzbl %sil, %eax
11239 ; FALLBACK5-NEXT: movq -48(%rsp,%rax,4), %rsi
11240 ; FALLBACK5-NEXT: movq -56(%rsp,%rax,4), %rdi
11241 ; FALLBACK5-NEXT: movq %rdi, %r8
11242 ; FALLBACK5-NEXT: shrdq %cl, %rsi, %r8
11243 ; FALLBACK5-NEXT: movq -72(%rsp,%rax,4), %r9
11244 ; FALLBACK5-NEXT: movq -64(%rsp,%rax,4), %rax
11245 ; FALLBACK5-NEXT: movq %rax, %r10
11246 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %r10
11247 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r9
11248 ; FALLBACK5-NEXT: sarq %cl, %rsi
11249 ; FALLBACK5-NEXT: movq %r10, 8(%rdx)
11250 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
11251 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
11252 ; FALLBACK5-NEXT: movq %r9, (%rdx)
11253 ; FALLBACK5-NEXT: retq
11255 ; FALLBACK6-LABEL: ashr_32bytes_dwordOff:
11256 ; FALLBACK6: # %bb.0:
11257 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
11258 ; FALLBACK6-NEXT: movq 16(%rdi), %rcx
11259 ; FALLBACK6-NEXT: movq 24(%rdi), %rdi
11260 ; FALLBACK6-NEXT: movzbl (%rsi), %esi
11261 ; FALLBACK6-NEXT: movl %esi, %eax
11262 ; FALLBACK6-NEXT: shlb $5, %al
11263 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11264 ; FALLBACK6-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11265 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
11266 ; FALLBACK6-NEXT: sarq $63, %rdi
11267 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11268 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11269 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11270 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11271 ; FALLBACK6-NEXT: andb $6, %sil
11272 ; FALLBACK6-NEXT: movzbl %sil, %ecx
11273 ; FALLBACK6-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
11274 ; FALLBACK6-NEXT: movq -64(%rsp,%rcx,4), %rdi
11275 ; FALLBACK6-NEXT: movq -56(%rsp,%rcx,4), %r8
11276 ; FALLBACK6-NEXT: shrxq %rax, %r8, %r9
11277 ; FALLBACK6-NEXT: movq -48(%rsp,%rcx,4), %rcx
11278 ; FALLBACK6-NEXT: shrxq %rax, %rdi, %r10
11279 ; FALLBACK6-NEXT: sarxq %rax, %rcx, %r11
11280 ; FALLBACK6-NEXT: # kill: def $al killed $al killed $rax def $rax
11281 ; FALLBACK6-NEXT: notb %al
11282 ; FALLBACK6-NEXT: addq %rdi, %rdi
11283 ; FALLBACK6-NEXT: shlxq %rax, %rdi, %rdi
11284 ; FALLBACK6-NEXT: orq %rsi, %rdi
11285 ; FALLBACK6-NEXT: addq %rcx, %rcx
11286 ; FALLBACK6-NEXT: shlxq %rax, %rcx, %rcx
11287 ; FALLBACK6-NEXT: orq %r9, %rcx
11288 ; FALLBACK6-NEXT: addq %r8, %r8
11289 ; FALLBACK6-NEXT: shlxq %rax, %r8, %rax
11290 ; FALLBACK6-NEXT: orq %r10, %rax
11291 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
11292 ; FALLBACK6-NEXT: movq %rax, 8(%rdx)
11293 ; FALLBACK6-NEXT: movq %rcx, 16(%rdx)
11294 ; FALLBACK6-NEXT: movq %rdi, (%rdx)
11295 ; FALLBACK6-NEXT: retq
11297 ; FALLBACK7-LABEL: ashr_32bytes_dwordOff:
11298 ; FALLBACK7: # %bb.0:
11299 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
11300 ; FALLBACK7-NEXT: movq 16(%rdi), %rax
11301 ; FALLBACK7-NEXT: movq 24(%rdi), %rdi
11302 ; FALLBACK7-NEXT: movzbl (%rsi), %esi
11303 ; FALLBACK7-NEXT: movl %esi, %ecx
11304 ; FALLBACK7-NEXT: shlb $5, %cl
11305 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11306 ; FALLBACK7-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11307 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
11308 ; FALLBACK7-NEXT: sarq $63, %rdi
11309 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11310 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11311 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11312 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11313 ; FALLBACK7-NEXT: andb $6, %sil
11314 ; FALLBACK7-NEXT: movzbl %sil, %eax
11315 ; FALLBACK7-NEXT: movq -48(%rsp,%rax,4), %rsi
11316 ; FALLBACK7-NEXT: movq -56(%rsp,%rax,4), %rdi
11317 ; FALLBACK7-NEXT: movq %rdi, %r8
11318 ; FALLBACK7-NEXT: shrdq %cl, %rsi, %r8
11319 ; FALLBACK7-NEXT: movq -72(%rsp,%rax,4), %r9
11320 ; FALLBACK7-NEXT: movq -64(%rsp,%rax,4), %rax
11321 ; FALLBACK7-NEXT: movq %rax, %r10
11322 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %r10
11323 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r9
11324 ; FALLBACK7-NEXT: sarxq %rcx, %rsi, %rax
11325 ; FALLBACK7-NEXT: movq %r10, 8(%rdx)
11326 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
11327 ; FALLBACK7-NEXT: movq %rax, 24(%rdx)
11328 ; FALLBACK7-NEXT: movq %r9, (%rdx)
11329 ; FALLBACK7-NEXT: retq
11331 ; FALLBACK8-LABEL: ashr_32bytes_dwordOff:
11332 ; FALLBACK8: # %bb.0:
11333 ; FALLBACK8-NEXT: pushq %rbx
11334 ; FALLBACK8-NEXT: vmovups (%rdi), %xmm0
11335 ; FALLBACK8-NEXT: movq 16(%rdi), %rcx
11336 ; FALLBACK8-NEXT: movq 24(%rdi), %rdi
11337 ; FALLBACK8-NEXT: movzbl (%rsi), %esi
11338 ; FALLBACK8-NEXT: movl %esi, %eax
11339 ; FALLBACK8-NEXT: shlb $5, %al
11340 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11341 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11342 ; FALLBACK8-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11343 ; FALLBACK8-NEXT: sarq $63, %rdi
11344 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11345 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11346 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11347 ; FALLBACK8-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11348 ; FALLBACK8-NEXT: andb $6, %sil
11349 ; FALLBACK8-NEXT: movzbl %sil, %r9d
11350 ; FALLBACK8-NEXT: movq -64(%rsp,%r9,4), %r10
11351 ; FALLBACK8-NEXT: movq -56(%rsp,%r9,4), %r8
11352 ; FALLBACK8-NEXT: movl %eax, %ecx
11353 ; FALLBACK8-NEXT: shrq %cl, %r10
11354 ; FALLBACK8-NEXT: movl %eax, %esi
11355 ; FALLBACK8-NEXT: notb %sil
11356 ; FALLBACK8-NEXT: leaq (%r8,%r8), %rdi
11357 ; FALLBACK8-NEXT: movl %esi, %ecx
11358 ; FALLBACK8-NEXT: shlq %cl, %rdi
11359 ; FALLBACK8-NEXT: orq %r10, %rdi
11360 ; FALLBACK8-NEXT: movq -48(%rsp,%r9,4), %r10
11361 ; FALLBACK8-NEXT: movq %r10, %r11
11362 ; FALLBACK8-NEXT: movl %eax, %ecx
11363 ; FALLBACK8-NEXT: shrq %cl, %r11
11364 ; FALLBACK8-NEXT: movq -40(%rsp,%r9,4), %r9
11365 ; FALLBACK8-NEXT: leaq (%r9,%r9), %rbx
11366 ; FALLBACK8-NEXT: movl %esi, %ecx
11367 ; FALLBACK8-NEXT: shlq %cl, %rbx
11368 ; FALLBACK8-NEXT: orq %r11, %rbx
11369 ; FALLBACK8-NEXT: movl %eax, %ecx
11370 ; FALLBACK8-NEXT: shrq %cl, %r8
11371 ; FALLBACK8-NEXT: addq %r10, %r10
11372 ; FALLBACK8-NEXT: movl %esi, %ecx
11373 ; FALLBACK8-NEXT: shlq %cl, %r10
11374 ; FALLBACK8-NEXT: orq %r8, %r10
11375 ; FALLBACK8-NEXT: movl %eax, %ecx
11376 ; FALLBACK8-NEXT: sarq %cl, %r9
11377 ; FALLBACK8-NEXT: movq %r9, 24(%rdx)
11378 ; FALLBACK8-NEXT: movq %r10, 8(%rdx)
11379 ; FALLBACK8-NEXT: movq %rbx, 16(%rdx)
11380 ; FALLBACK8-NEXT: movq %rdi, (%rdx)
11381 ; FALLBACK8-NEXT: popq %rbx
11382 ; FALLBACK8-NEXT: retq
11384 ; FALLBACK9-LABEL: ashr_32bytes_dwordOff:
11385 ; FALLBACK9: # %bb.0:
11386 ; FALLBACK9-NEXT: vmovups (%rdi), %xmm0
11387 ; FALLBACK9-NEXT: movq 16(%rdi), %rax
11388 ; FALLBACK9-NEXT: movq 24(%rdi), %rdi
11389 ; FALLBACK9-NEXT: movzbl (%rsi), %esi
11390 ; FALLBACK9-NEXT: movl %esi, %ecx
11391 ; FALLBACK9-NEXT: shlb $5, %cl
11392 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11393 ; FALLBACK9-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11394 ; FALLBACK9-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11395 ; FALLBACK9-NEXT: sarq $63, %rdi
11396 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11397 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11398 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11399 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11400 ; FALLBACK9-NEXT: andb $6, %sil
11401 ; FALLBACK9-NEXT: movzbl %sil, %eax
11402 ; FALLBACK9-NEXT: movq -48(%rsp,%rax,4), %rsi
11403 ; FALLBACK9-NEXT: movq -56(%rsp,%rax,4), %rdi
11404 ; FALLBACK9-NEXT: movq %rdi, %r8
11405 ; FALLBACK9-NEXT: shrdq %cl, %rsi, %r8
11406 ; FALLBACK9-NEXT: movq -72(%rsp,%rax,4), %r9
11407 ; FALLBACK9-NEXT: movq -64(%rsp,%rax,4), %rax
11408 ; FALLBACK9-NEXT: movq %rax, %r10
11409 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %r10
11410 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r9
11411 ; FALLBACK9-NEXT: sarq %cl, %rsi
11412 ; FALLBACK9-NEXT: movq %r10, 8(%rdx)
11413 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
11414 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
11415 ; FALLBACK9-NEXT: movq %r9, (%rdx)
11416 ; FALLBACK9-NEXT: retq
11418 ; FALLBACK10-LABEL: ashr_32bytes_dwordOff:
11419 ; FALLBACK10: # %bb.0:
11420 ; FALLBACK10-NEXT: vmovups (%rdi), %xmm0
11421 ; FALLBACK10-NEXT: movq 16(%rdi), %rcx
11422 ; FALLBACK10-NEXT: movq 24(%rdi), %rdi
11423 ; FALLBACK10-NEXT: movzbl (%rsi), %esi
11424 ; FALLBACK10-NEXT: movl %esi, %eax
11425 ; FALLBACK10-NEXT: shlb $5, %al
11426 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11427 ; FALLBACK10-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11428 ; FALLBACK10-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11429 ; FALLBACK10-NEXT: sarq $63, %rdi
11430 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11431 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11432 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11433 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11434 ; FALLBACK10-NEXT: andb $6, %sil
11435 ; FALLBACK10-NEXT: movzbl %sil, %ecx
11436 ; FALLBACK10-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
11437 ; FALLBACK10-NEXT: movq -64(%rsp,%rcx,4), %rdi
11438 ; FALLBACK10-NEXT: movq -56(%rsp,%rcx,4), %r8
11439 ; FALLBACK10-NEXT: shrxq %rax, %r8, %r9
11440 ; FALLBACK10-NEXT: movq -48(%rsp,%rcx,4), %rcx
11441 ; FALLBACK10-NEXT: shrxq %rax, %rdi, %r10
11442 ; FALLBACK10-NEXT: sarxq %rax, %rcx, %r11
11443 ; FALLBACK10-NEXT: # kill: def $al killed $al killed $rax def $rax
11444 ; FALLBACK10-NEXT: notb %al
11445 ; FALLBACK10-NEXT: addq %rdi, %rdi
11446 ; FALLBACK10-NEXT: shlxq %rax, %rdi, %rdi
11447 ; FALLBACK10-NEXT: orq %rsi, %rdi
11448 ; FALLBACK10-NEXT: addq %rcx, %rcx
11449 ; FALLBACK10-NEXT: shlxq %rax, %rcx, %rcx
11450 ; FALLBACK10-NEXT: orq %r9, %rcx
11451 ; FALLBACK10-NEXT: addq %r8, %r8
11452 ; FALLBACK10-NEXT: shlxq %rax, %r8, %rax
11453 ; FALLBACK10-NEXT: orq %r10, %rax
11454 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
11455 ; FALLBACK10-NEXT: movq %rax, 8(%rdx)
11456 ; FALLBACK10-NEXT: movq %rcx, 16(%rdx)
11457 ; FALLBACK10-NEXT: movq %rdi, (%rdx)
11458 ; FALLBACK10-NEXT: retq
11460 ; FALLBACK11-LABEL: ashr_32bytes_dwordOff:
11461 ; FALLBACK11: # %bb.0:
11462 ; FALLBACK11-NEXT: vmovups (%rdi), %xmm0
11463 ; FALLBACK11-NEXT: movq 16(%rdi), %rax
11464 ; FALLBACK11-NEXT: movq 24(%rdi), %rdi
11465 ; FALLBACK11-NEXT: movzbl (%rsi), %esi
11466 ; FALLBACK11-NEXT: movl %esi, %ecx
11467 ; FALLBACK11-NEXT: shlb $5, %cl
11468 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11469 ; FALLBACK11-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11470 ; FALLBACK11-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11471 ; FALLBACK11-NEXT: sarq $63, %rdi
11472 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11473 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11474 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11475 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11476 ; FALLBACK11-NEXT: andb $6, %sil
11477 ; FALLBACK11-NEXT: movzbl %sil, %eax
11478 ; FALLBACK11-NEXT: movq -48(%rsp,%rax,4), %rsi
11479 ; FALLBACK11-NEXT: movq -56(%rsp,%rax,4), %rdi
11480 ; FALLBACK11-NEXT: movq %rdi, %r8
11481 ; FALLBACK11-NEXT: shrdq %cl, %rsi, %r8
11482 ; FALLBACK11-NEXT: movq -72(%rsp,%rax,4), %r9
11483 ; FALLBACK11-NEXT: movq -64(%rsp,%rax,4), %rax
11484 ; FALLBACK11-NEXT: movq %rax, %r10
11485 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %r10
11486 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r9
11487 ; FALLBACK11-NEXT: sarxq %rcx, %rsi, %rax
11488 ; FALLBACK11-NEXT: movq %r10, 8(%rdx)
11489 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
11490 ; FALLBACK11-NEXT: movq %rax, 24(%rdx)
11491 ; FALLBACK11-NEXT: movq %r9, (%rdx)
11492 ; FALLBACK11-NEXT: retq
11494 ; FALLBACK12-LABEL: ashr_32bytes_dwordOff:
11495 ; FALLBACK12: # %bb.0:
11496 ; FALLBACK12-NEXT: pushq %rbx
11497 ; FALLBACK12-NEXT: vmovups (%rdi), %xmm0
11498 ; FALLBACK12-NEXT: movq 16(%rdi), %rcx
11499 ; FALLBACK12-NEXT: movq 24(%rdi), %rdi
11500 ; FALLBACK12-NEXT: movzbl (%rsi), %esi
11501 ; FALLBACK12-NEXT: movl %esi, %eax
11502 ; FALLBACK12-NEXT: shlb $5, %al
11503 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11504 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11505 ; FALLBACK12-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11506 ; FALLBACK12-NEXT: sarq $63, %rdi
11507 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11508 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11509 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11510 ; FALLBACK12-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11511 ; FALLBACK12-NEXT: andb $6, %sil
11512 ; FALLBACK12-NEXT: movzbl %sil, %r9d
11513 ; FALLBACK12-NEXT: movq -64(%rsp,%r9,4), %r10
11514 ; FALLBACK12-NEXT: movq -56(%rsp,%r9,4), %r8
11515 ; FALLBACK12-NEXT: movl %eax, %ecx
11516 ; FALLBACK12-NEXT: shrq %cl, %r10
11517 ; FALLBACK12-NEXT: movl %eax, %esi
11518 ; FALLBACK12-NEXT: notb %sil
11519 ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi
11520 ; FALLBACK12-NEXT: movl %esi, %ecx
11521 ; FALLBACK12-NEXT: shlq %cl, %rdi
11522 ; FALLBACK12-NEXT: orq %r10, %rdi
11523 ; FALLBACK12-NEXT: movq -48(%rsp,%r9,4), %r10
11524 ; FALLBACK12-NEXT: movq %r10, %r11
11525 ; FALLBACK12-NEXT: movl %eax, %ecx
11526 ; FALLBACK12-NEXT: shrq %cl, %r11
11527 ; FALLBACK12-NEXT: movq -40(%rsp,%r9,4), %r9
11528 ; FALLBACK12-NEXT: leaq (%r9,%r9), %rbx
11529 ; FALLBACK12-NEXT: movl %esi, %ecx
11530 ; FALLBACK12-NEXT: shlq %cl, %rbx
11531 ; FALLBACK12-NEXT: orq %r11, %rbx
11532 ; FALLBACK12-NEXT: movl %eax, %ecx
11533 ; FALLBACK12-NEXT: shrq %cl, %r8
11534 ; FALLBACK12-NEXT: addq %r10, %r10
11535 ; FALLBACK12-NEXT: movl %esi, %ecx
11536 ; FALLBACK12-NEXT: shlq %cl, %r10
11537 ; FALLBACK12-NEXT: orq %r8, %r10
11538 ; FALLBACK12-NEXT: movl %eax, %ecx
11539 ; FALLBACK12-NEXT: sarq %cl, %r9
11540 ; FALLBACK12-NEXT: movq %r9, 24(%rdx)
11541 ; FALLBACK12-NEXT: movq %r10, 8(%rdx)
11542 ; FALLBACK12-NEXT: movq %rbx, 16(%rdx)
11543 ; FALLBACK12-NEXT: movq %rdi, (%rdx)
11544 ; FALLBACK12-NEXT: popq %rbx
11545 ; FALLBACK12-NEXT: retq
11547 ; FALLBACK13-LABEL: ashr_32bytes_dwordOff:
11548 ; FALLBACK13: # %bb.0:
11549 ; FALLBACK13-NEXT: vmovups (%rdi), %xmm0
11550 ; FALLBACK13-NEXT: movq 16(%rdi), %rax
11551 ; FALLBACK13-NEXT: movq 24(%rdi), %rdi
11552 ; FALLBACK13-NEXT: movzbl (%rsi), %esi
11553 ; FALLBACK13-NEXT: movl %esi, %ecx
11554 ; FALLBACK13-NEXT: shlb $5, %cl
11555 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11556 ; FALLBACK13-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11557 ; FALLBACK13-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11558 ; FALLBACK13-NEXT: sarq $63, %rdi
11559 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11560 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11561 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11562 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11563 ; FALLBACK13-NEXT: andb $6, %sil
11564 ; FALLBACK13-NEXT: movzbl %sil, %eax
11565 ; FALLBACK13-NEXT: movq -48(%rsp,%rax,4), %rsi
11566 ; FALLBACK13-NEXT: movq -56(%rsp,%rax,4), %rdi
11567 ; FALLBACK13-NEXT: movq %rdi, %r8
11568 ; FALLBACK13-NEXT: shrdq %cl, %rsi, %r8
11569 ; FALLBACK13-NEXT: movq -72(%rsp,%rax,4), %r9
11570 ; FALLBACK13-NEXT: movq -64(%rsp,%rax,4), %rax
11571 ; FALLBACK13-NEXT: movq %rax, %r10
11572 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %r10
11573 ; FALLBACK13-NEXT: shrdq %cl, %rax, %r9
11574 ; FALLBACK13-NEXT: sarq %cl, %rsi
11575 ; FALLBACK13-NEXT: movq %r10, 8(%rdx)
11576 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
11577 ; FALLBACK13-NEXT: movq %rsi, 24(%rdx)
11578 ; FALLBACK13-NEXT: movq %r9, (%rdx)
11579 ; FALLBACK13-NEXT: retq
11581 ; FALLBACK14-LABEL: ashr_32bytes_dwordOff:
11582 ; FALLBACK14: # %bb.0:
11583 ; FALLBACK14-NEXT: vmovups (%rdi), %xmm0
11584 ; FALLBACK14-NEXT: movq 16(%rdi), %rcx
11585 ; FALLBACK14-NEXT: movq 24(%rdi), %rdi
11586 ; FALLBACK14-NEXT: movzbl (%rsi), %esi
11587 ; FALLBACK14-NEXT: movl %esi, %eax
11588 ; FALLBACK14-NEXT: shlb $5, %al
11589 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11590 ; FALLBACK14-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11591 ; FALLBACK14-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11592 ; FALLBACK14-NEXT: sarq $63, %rdi
11593 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11594 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11595 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11596 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11597 ; FALLBACK14-NEXT: andb $6, %sil
11598 ; FALLBACK14-NEXT: movzbl %sil, %ecx
11599 ; FALLBACK14-NEXT: shrxq %rax, -72(%rsp,%rcx,4), %rsi
11600 ; FALLBACK14-NEXT: movq -64(%rsp,%rcx,4), %rdi
11601 ; FALLBACK14-NEXT: movq -56(%rsp,%rcx,4), %r8
11602 ; FALLBACK14-NEXT: shrxq %rax, %r8, %r9
11603 ; FALLBACK14-NEXT: movq -48(%rsp,%rcx,4), %rcx
11604 ; FALLBACK14-NEXT: shrxq %rax, %rdi, %r10
11605 ; FALLBACK14-NEXT: sarxq %rax, %rcx, %r11
11606 ; FALLBACK14-NEXT: # kill: def $al killed $al killed $rax def $rax
11607 ; FALLBACK14-NEXT: notb %al
11608 ; FALLBACK14-NEXT: addq %rdi, %rdi
11609 ; FALLBACK14-NEXT: shlxq %rax, %rdi, %rdi
11610 ; FALLBACK14-NEXT: orq %rsi, %rdi
11611 ; FALLBACK14-NEXT: addq %rcx, %rcx
11612 ; FALLBACK14-NEXT: shlxq %rax, %rcx, %rcx
11613 ; FALLBACK14-NEXT: orq %r9, %rcx
11614 ; FALLBACK14-NEXT: addq %r8, %r8
11615 ; FALLBACK14-NEXT: shlxq %rax, %r8, %rax
11616 ; FALLBACK14-NEXT: orq %r10, %rax
11617 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
11618 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
11619 ; FALLBACK14-NEXT: movq %rcx, 16(%rdx)
11620 ; FALLBACK14-NEXT: movq %rdi, (%rdx)
11621 ; FALLBACK14-NEXT: retq
11623 ; FALLBACK15-LABEL: ashr_32bytes_dwordOff:
11624 ; FALLBACK15: # %bb.0:
11625 ; FALLBACK15-NEXT: vmovups (%rdi), %xmm0
11626 ; FALLBACK15-NEXT: movq 16(%rdi), %rax
11627 ; FALLBACK15-NEXT: movq 24(%rdi), %rdi
11628 ; FALLBACK15-NEXT: movzbl (%rsi), %esi
11629 ; FALLBACK15-NEXT: movl %esi, %ecx
11630 ; FALLBACK15-NEXT: shlb $5, %cl
11631 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11632 ; FALLBACK15-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11633 ; FALLBACK15-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11634 ; FALLBACK15-NEXT: sarq $63, %rdi
11635 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11636 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11637 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11638 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11639 ; FALLBACK15-NEXT: andb $6, %sil
11640 ; FALLBACK15-NEXT: movzbl %sil, %eax
11641 ; FALLBACK15-NEXT: movq -48(%rsp,%rax,4), %rsi
11642 ; FALLBACK15-NEXT: movq -56(%rsp,%rax,4), %rdi
11643 ; FALLBACK15-NEXT: movq %rdi, %r8
11644 ; FALLBACK15-NEXT: shrdq %cl, %rsi, %r8
11645 ; FALLBACK15-NEXT: movq -72(%rsp,%rax,4), %r9
11646 ; FALLBACK15-NEXT: movq -64(%rsp,%rax,4), %rax
11647 ; FALLBACK15-NEXT: movq %rax, %r10
11648 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %r10
11649 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r9
11650 ; FALLBACK15-NEXT: sarxq %rcx, %rsi, %rax
11651 ; FALLBACK15-NEXT: movq %r10, 8(%rdx)
11652 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
11653 ; FALLBACK15-NEXT: movq %rax, 24(%rdx)
11654 ; FALLBACK15-NEXT: movq %r9, (%rdx)
11655 ; FALLBACK15-NEXT: retq
11657 ; X86-SSE2-LABEL: ashr_32bytes_dwordOff:
11658 ; X86-SSE2: # %bb.0:
11659 ; X86-SSE2-NEXT: pushl %ebp
11660 ; X86-SSE2-NEXT: pushl %ebx
11661 ; X86-SSE2-NEXT: pushl %edi
11662 ; X86-SSE2-NEXT: pushl %esi
11663 ; X86-SSE2-NEXT: subl $92, %esp
11664 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11665 ; X86-SSE2-NEXT: movl (%eax), %ecx
11666 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11667 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
11668 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11669 ; X86-SSE2-NEXT: movl 8(%eax), %edi
11670 ; X86-SSE2-NEXT: movl 12(%eax), %ebx
11671 ; X86-SSE2-NEXT: movl 16(%eax), %ebp
11672 ; X86-SSE2-NEXT: movl 20(%eax), %esi
11673 ; X86-SSE2-NEXT: movl 24(%eax), %edx
11674 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
11675 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11676 ; X86-SSE2-NEXT: movzbl (%eax), %eax
11677 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11678 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
11679 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11680 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
11681 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
11682 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
11683 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
11684 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11685 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
11686 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11687 ; X86-SSE2-NEXT: sarl $31, %ecx
11688 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11689 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11690 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11691 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11692 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11693 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11694 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11695 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11696 ; X86-SSE2-NEXT: andl $7, %eax
11697 ; X86-SSE2-NEXT: movl 16(%esp,%eax,4), %ecx
11698 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11699 ; X86-SSE2-NEXT: movl 20(%esp,%eax,4), %ecx
11700 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11701 ; X86-SSE2-NEXT: movl 28(%esp,%eax,4), %esi
11702 ; X86-SSE2-NEXT: movl 24(%esp,%eax,4), %edi
11703 ; X86-SSE2-NEXT: movl 36(%esp,%eax,4), %ebx
11704 ; X86-SSE2-NEXT: movl 32(%esp,%eax,4), %ebp
11705 ; X86-SSE2-NEXT: movl 44(%esp,%eax,4), %edx
11706 ; X86-SSE2-NEXT: movl 40(%esp,%eax,4), %ecx
11707 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11708 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
11709 ; X86-SSE2-NEXT: movl %edx, 28(%eax)
11710 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
11711 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
11712 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
11713 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
11714 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
11715 ; X86-SSE2-NEXT: movl %ecx, (%eax)
11716 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
11717 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
11718 ; X86-SSE2-NEXT: addl $92, %esp
11719 ; X86-SSE2-NEXT: popl %esi
11720 ; X86-SSE2-NEXT: popl %edi
11721 ; X86-SSE2-NEXT: popl %ebx
11722 ; X86-SSE2-NEXT: popl %ebp
11723 ; X86-SSE2-NEXT: retl
11725 ; X86-SSE42-LABEL: ashr_32bytes_dwordOff:
11726 ; X86-SSE42: # %bb.0:
11727 ; X86-SSE42-NEXT: pushl %ebx
11728 ; X86-SSE42-NEXT: pushl %edi
11729 ; X86-SSE42-NEXT: pushl %esi
11730 ; X86-SSE42-NEXT: subl $64, %esp
11731 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
11732 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
11733 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
11734 ; X86-SSE42-NEXT: movups (%edx), %xmm0
11735 ; X86-SSE42-NEXT: movl 16(%edx), %esi
11736 ; X86-SSE42-NEXT: movl 20(%edx), %edi
11737 ; X86-SSE42-NEXT: movl 24(%edx), %ebx
11738 ; X86-SSE42-NEXT: movl 28(%edx), %edx
11739 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
11740 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11741 ; X86-SSE42-NEXT: movl %ebx, {{[0-9]+}}(%esp)
11742 ; X86-SSE42-NEXT: movl %edi, {{[0-9]+}}(%esp)
11743 ; X86-SSE42-NEXT: movl %esi, {{[0-9]+}}(%esp)
11744 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
11745 ; X86-SSE42-NEXT: sarl $31, %edx
11746 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11747 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11748 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11749 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11750 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11751 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11752 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11753 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11754 ; X86-SSE42-NEXT: andl $7, %ecx
11755 ; X86-SSE42-NEXT: movups (%esp,%ecx,4), %xmm0
11756 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,4), %xmm1
11757 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
11758 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
11759 ; X86-SSE42-NEXT: addl $64, %esp
11760 ; X86-SSE42-NEXT: popl %esi
11761 ; X86-SSE42-NEXT: popl %edi
11762 ; X86-SSE42-NEXT: popl %ebx
11763 ; X86-SSE42-NEXT: retl
11765 ; X86-AVX-LABEL: ashr_32bytes_dwordOff:
11766 ; X86-AVX: # %bb.0:
11767 ; X86-AVX-NEXT: pushl %ebx
11768 ; X86-AVX-NEXT: pushl %edi
11769 ; X86-AVX-NEXT: pushl %esi
11770 ; X86-AVX-NEXT: subl $64, %esp
11771 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
11772 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
11773 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
11774 ; X86-AVX-NEXT: vmovups (%edx), %xmm0
11775 ; X86-AVX-NEXT: movl 16(%edx), %esi
11776 ; X86-AVX-NEXT: movl 20(%edx), %edi
11777 ; X86-AVX-NEXT: movl 24(%edx), %ebx
11778 ; X86-AVX-NEXT: movl 28(%edx), %edx
11779 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
11780 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11781 ; X86-AVX-NEXT: movl %ebx, {{[0-9]+}}(%esp)
11782 ; X86-AVX-NEXT: movl %edi, {{[0-9]+}}(%esp)
11783 ; X86-AVX-NEXT: movl %esi, {{[0-9]+}}(%esp)
11784 ; X86-AVX-NEXT: vmovaps %xmm0, (%esp)
11785 ; X86-AVX-NEXT: sarl $31, %edx
11786 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11787 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11788 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11789 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11790 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11791 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11792 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11793 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
11794 ; X86-AVX-NEXT: andl $7, %ecx
11795 ; X86-AVX-NEXT: vmovups (%esp,%ecx,4), %xmm0
11796 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx,4), %xmm1
11797 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
11798 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
11799 ; X86-AVX-NEXT: addl $64, %esp
11800 ; X86-AVX-NEXT: popl %esi
11801 ; X86-AVX-NEXT: popl %edi
11802 ; X86-AVX-NEXT: popl %ebx
11803 ; X86-AVX-NEXT: retl
11804 %src = load i256, ptr %src.ptr, align 1
11805 %dwordOff = load i256, ptr %dwordOff.ptr, align 1
11806 %bitOff = shl i256 %dwordOff, 5
11807 %res = ashr i256 %src, %bitOff
11808 store i256 %res, ptr %dst, align 1
11812 define void @ashr_32bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
11813 ; X64-SSE2-LABEL: ashr_32bytes_qwordOff:
11814 ; X64-SSE2: # %bb.0:
11815 ; X64-SSE2-NEXT: movq (%rdi), %rax
11816 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
11817 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
11818 ; X64-SSE2-NEXT: movq 24(%rdi), %rdi
11819 ; X64-SSE2-NEXT: movzbl (%rsi), %esi
11820 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11821 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
11822 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11823 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11824 ; X64-SSE2-NEXT: sarq $63, %rdi
11825 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11826 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11827 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11828 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
11829 ; X64-SSE2-NEXT: andl $3, %esi
11830 ; X64-SSE2-NEXT: movq -72(%rsp,%rsi,8), %rax
11831 ; X64-SSE2-NEXT: movq -64(%rsp,%rsi,8), %rcx
11832 ; X64-SSE2-NEXT: movq -48(%rsp,%rsi,8), %rdi
11833 ; X64-SSE2-NEXT: movq -56(%rsp,%rsi,8), %rsi
11834 ; X64-SSE2-NEXT: movq %rsi, 16(%rdx)
11835 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
11836 ; X64-SSE2-NEXT: movq %rax, (%rdx)
11837 ; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
11838 ; X64-SSE2-NEXT: retq
11840 ; X64-SSE42-LABEL: ashr_32bytes_qwordOff:
11841 ; X64-SSE42: # %bb.0:
11842 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
11843 ; X64-SSE42-NEXT: movq 16(%rdi), %rax
11844 ; X64-SSE42-NEXT: movq 24(%rdi), %rcx
11845 ; X64-SSE42-NEXT: movzbl (%rsi), %esi
11846 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11847 ; X64-SSE42-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11848 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
11849 ; X64-SSE42-NEXT: sarq $63, %rcx
11850 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11851 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11852 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11853 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11854 ; X64-SSE42-NEXT: andl $3, %esi
11855 ; X64-SSE42-NEXT: movups -72(%rsp,%rsi,8), %xmm0
11856 ; X64-SSE42-NEXT: movups -56(%rsp,%rsi,8), %xmm1
11857 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
11858 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
11859 ; X64-SSE42-NEXT: retq
11861 ; X64-AVX-LABEL: ashr_32bytes_qwordOff:
11862 ; X64-AVX: # %bb.0:
11863 ; X64-AVX-NEXT: vmovups (%rdi), %xmm0
11864 ; X64-AVX-NEXT: movq 16(%rdi), %rax
11865 ; X64-AVX-NEXT: movq 24(%rdi), %rcx
11866 ; X64-AVX-NEXT: movzbl (%rsi), %esi
11867 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11868 ; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
11869 ; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
11870 ; X64-AVX-NEXT: sarq $63, %rcx
11871 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11872 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11873 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11874 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
11875 ; X64-AVX-NEXT: andl $3, %esi
11876 ; X64-AVX-NEXT: vmovups -72(%rsp,%rsi,8), %xmm0
11877 ; X64-AVX-NEXT: vmovups -56(%rsp,%rsi,8), %xmm1
11878 ; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
11879 ; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
11880 ; X64-AVX-NEXT: retq
11882 ; X86-SSE2-LABEL: ashr_32bytes_qwordOff:
11883 ; X86-SSE2: # %bb.0:
11884 ; X86-SSE2-NEXT: pushl %ebp
11885 ; X86-SSE2-NEXT: pushl %ebx
11886 ; X86-SSE2-NEXT: pushl %edi
11887 ; X86-SSE2-NEXT: pushl %esi
11888 ; X86-SSE2-NEXT: subl $92, %esp
11889 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11890 ; X86-SSE2-NEXT: movl (%eax), %ecx
11891 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11892 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
11893 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11894 ; X86-SSE2-NEXT: movl 8(%eax), %edi
11895 ; X86-SSE2-NEXT: movl 12(%eax), %ebx
11896 ; X86-SSE2-NEXT: movl 16(%eax), %ebp
11897 ; X86-SSE2-NEXT: movl 20(%eax), %esi
11898 ; X86-SSE2-NEXT: movl 24(%eax), %edx
11899 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
11900 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11901 ; X86-SSE2-NEXT: movzbl (%eax), %eax
11902 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11903 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
11904 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11905 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
11906 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
11907 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
11908 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
11909 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11910 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
11911 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
11912 ; X86-SSE2-NEXT: sarl $31, %ecx
11913 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11914 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11915 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11916 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11917 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11918 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11919 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11920 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
11921 ; X86-SSE2-NEXT: andl $3, %eax
11922 ; X86-SSE2-NEXT: movl 16(%esp,%eax,8), %ecx
11923 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11924 ; X86-SSE2-NEXT: movl 20(%esp,%eax,8), %ecx
11925 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
11926 ; X86-SSE2-NEXT: movl 28(%esp,%eax,8), %esi
11927 ; X86-SSE2-NEXT: movl 24(%esp,%eax,8), %edi
11928 ; X86-SSE2-NEXT: movl 36(%esp,%eax,8), %ebx
11929 ; X86-SSE2-NEXT: movl 32(%esp,%eax,8), %ebp
11930 ; X86-SSE2-NEXT: movl 44(%esp,%eax,8), %edx
11931 ; X86-SSE2-NEXT: movl 40(%esp,%eax,8), %ecx
11932 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
11933 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
11934 ; X86-SSE2-NEXT: movl %edx, 28(%eax)
11935 ; X86-SSE2-NEXT: movl %ebp, 16(%eax)
11936 ; X86-SSE2-NEXT: movl %ebx, 20(%eax)
11937 ; X86-SSE2-NEXT: movl %edi, 8(%eax)
11938 ; X86-SSE2-NEXT: movl %esi, 12(%eax)
11939 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
11940 ; X86-SSE2-NEXT: movl %ecx, (%eax)
11941 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
11942 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
11943 ; X86-SSE2-NEXT: addl $92, %esp
11944 ; X86-SSE2-NEXT: popl %esi
11945 ; X86-SSE2-NEXT: popl %edi
11946 ; X86-SSE2-NEXT: popl %ebx
11947 ; X86-SSE2-NEXT: popl %ebp
11948 ; X86-SSE2-NEXT: retl
11950 ; X86-SSE42-LABEL: ashr_32bytes_qwordOff:
11951 ; X86-SSE42: # %bb.0:
11952 ; X86-SSE42-NEXT: pushl %ebx
11953 ; X86-SSE42-NEXT: pushl %edi
11954 ; X86-SSE42-NEXT: pushl %esi
11955 ; X86-SSE42-NEXT: subl $64, %esp
11956 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
11957 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
11958 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
11959 ; X86-SSE42-NEXT: movups (%edx), %xmm0
11960 ; X86-SSE42-NEXT: movl 16(%edx), %esi
11961 ; X86-SSE42-NEXT: movl 20(%edx), %edi
11962 ; X86-SSE42-NEXT: movl 24(%edx), %ebx
11963 ; X86-SSE42-NEXT: movl 28(%edx), %edx
11964 ; X86-SSE42-NEXT: movzbl (%ecx), %ecx
11965 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11966 ; X86-SSE42-NEXT: movl %ebx, {{[0-9]+}}(%esp)
11967 ; X86-SSE42-NEXT: movl %edi, {{[0-9]+}}(%esp)
11968 ; X86-SSE42-NEXT: movl %esi, {{[0-9]+}}(%esp)
11969 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
11970 ; X86-SSE42-NEXT: sarl $31, %edx
11971 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11972 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11973 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11974 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11975 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11976 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11977 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11978 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
11979 ; X86-SSE42-NEXT: andl $3, %ecx
11980 ; X86-SSE42-NEXT: movups (%esp,%ecx,8), %xmm0
11981 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,8), %xmm1
11982 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
11983 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
11984 ; X86-SSE42-NEXT: addl $64, %esp
11985 ; X86-SSE42-NEXT: popl %esi
11986 ; X86-SSE42-NEXT: popl %edi
11987 ; X86-SSE42-NEXT: popl %ebx
11988 ; X86-SSE42-NEXT: retl
11990 ; X86-AVX-LABEL: ashr_32bytes_qwordOff:
11991 ; X86-AVX: # %bb.0:
11992 ; X86-AVX-NEXT: pushl %ebx
11993 ; X86-AVX-NEXT: pushl %edi
11994 ; X86-AVX-NEXT: pushl %esi
11995 ; X86-AVX-NEXT: subl $64, %esp
11996 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
11997 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
11998 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
11999 ; X86-AVX-NEXT: vmovups (%edx), %xmm0
12000 ; X86-AVX-NEXT: movl 16(%edx), %esi
12001 ; X86-AVX-NEXT: movl 20(%edx), %edi
12002 ; X86-AVX-NEXT: movl 24(%edx), %ebx
12003 ; X86-AVX-NEXT: movl 28(%edx), %edx
12004 ; X86-AVX-NEXT: movzbl (%ecx), %ecx
12005 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12006 ; X86-AVX-NEXT: movl %ebx, {{[0-9]+}}(%esp)
12007 ; X86-AVX-NEXT: movl %edi, {{[0-9]+}}(%esp)
12008 ; X86-AVX-NEXT: movl %esi, {{[0-9]+}}(%esp)
12009 ; X86-AVX-NEXT: vmovaps %xmm0, (%esp)
12010 ; X86-AVX-NEXT: sarl $31, %edx
12011 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12012 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12013 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12014 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12015 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12016 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12017 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12018 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
12019 ; X86-AVX-NEXT: andl $3, %ecx
12020 ; X86-AVX-NEXT: vmovups (%esp,%ecx,8), %xmm0
12021 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx,8), %xmm1
12022 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
12023 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
12024 ; X86-AVX-NEXT: addl $64, %esp
12025 ; X86-AVX-NEXT: popl %esi
12026 ; X86-AVX-NEXT: popl %edi
12027 ; X86-AVX-NEXT: popl %ebx
12028 ; X86-AVX-NEXT: retl
12029 %src = load i256, ptr %src.ptr, align 1
12030 %qwordOff = load i256, ptr %qwordOff.ptr, align 1
12031 %bitOff = shl i256 %qwordOff, 6
12032 %res = ashr i256 %src, %bitOff
12033 store i256 %res, ptr %dst, align 1
12037 define void @lshr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
12038 ; FALLBACK0-LABEL: lshr_64bytes:
12039 ; FALLBACK0: # %bb.0:
12040 ; FALLBACK0-NEXT: pushq %r15
12041 ; FALLBACK0-NEXT: pushq %r14
12042 ; FALLBACK0-NEXT: pushq %r13
12043 ; FALLBACK0-NEXT: pushq %r12
12044 ; FALLBACK0-NEXT: pushq %rbx
12045 ; FALLBACK0-NEXT: movq (%rdi), %rax
12046 ; FALLBACK0-NEXT: movq 8(%rdi), %rcx
12047 ; FALLBACK0-NEXT: movq 16(%rdi), %r8
12048 ; FALLBACK0-NEXT: movq 24(%rdi), %r9
12049 ; FALLBACK0-NEXT: movq 32(%rdi), %r10
12050 ; FALLBACK0-NEXT: movq 40(%rdi), %r11
12051 ; FALLBACK0-NEXT: movq 48(%rdi), %rbx
12052 ; FALLBACK0-NEXT: movq 56(%rdi), %r14
12053 ; FALLBACK0-NEXT: movl (%rsi), %edi
12054 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
12055 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12056 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12057 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12058 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12059 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
12060 ; FALLBACK0-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
12061 ; FALLBACK0-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
12062 ; FALLBACK0-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
12063 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
12064 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
12065 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
12066 ; FALLBACK0-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
12067 ; FALLBACK0-NEXT: leal (,%rdi,8), %eax
12068 ; FALLBACK0-NEXT: andl $56, %eax
12069 ; FALLBACK0-NEXT: andl $56, %edi
12070 ; FALLBACK0-NEXT: movq -128(%rsp,%rdi), %r10
12071 ; FALLBACK0-NEXT: movq -120(%rsp,%rdi), %r8
12072 ; FALLBACK0-NEXT: movq %r8, %r11
12073 ; FALLBACK0-NEXT: movl %eax, %ecx
12074 ; FALLBACK0-NEXT: shrq %cl, %r11
12075 ; FALLBACK0-NEXT: movl %eax, %esi
12076 ; FALLBACK0-NEXT: notb %sil
12077 ; FALLBACK0-NEXT: movq -112(%rsp,%rdi), %rbx
12078 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r9
12079 ; FALLBACK0-NEXT: movl %esi, %ecx
12080 ; FALLBACK0-NEXT: shlq %cl, %r9
12081 ; FALLBACK0-NEXT: orq %r11, %r9
12082 ; FALLBACK0-NEXT: movl %eax, %ecx
12083 ; FALLBACK0-NEXT: shrq %cl, %r10
12084 ; FALLBACK0-NEXT: addq %r8, %r8
12085 ; FALLBACK0-NEXT: movl %esi, %ecx
12086 ; FALLBACK0-NEXT: shlq %cl, %r8
12087 ; FALLBACK0-NEXT: orq %r10, %r8
12088 ; FALLBACK0-NEXT: movq -104(%rsp,%rdi), %r10
12089 ; FALLBACK0-NEXT: movq %r10, %r15
12090 ; FALLBACK0-NEXT: movl %eax, %ecx
12091 ; FALLBACK0-NEXT: shrq %cl, %r15
12092 ; FALLBACK0-NEXT: movq -96(%rsp,%rdi), %r14
12093 ; FALLBACK0-NEXT: leaq (%r14,%r14), %r11
12094 ; FALLBACK0-NEXT: movl %esi, %ecx
12095 ; FALLBACK0-NEXT: shlq %cl, %r11
12096 ; FALLBACK0-NEXT: orq %r15, %r11
12097 ; FALLBACK0-NEXT: movl %eax, %ecx
12098 ; FALLBACK0-NEXT: shrq %cl, %rbx
12099 ; FALLBACK0-NEXT: addq %r10, %r10
12100 ; FALLBACK0-NEXT: movl %esi, %ecx
12101 ; FALLBACK0-NEXT: shlq %cl, %r10
12102 ; FALLBACK0-NEXT: orq %rbx, %r10
12103 ; FALLBACK0-NEXT: movq -88(%rsp,%rdi), %rbx
12104 ; FALLBACK0-NEXT: movq %rbx, %r12
12105 ; FALLBACK0-NEXT: movl %eax, %ecx
12106 ; FALLBACK0-NEXT: shrq %cl, %r12
12107 ; FALLBACK0-NEXT: movq -80(%rsp,%rdi), %r13
12108 ; FALLBACK0-NEXT: leaq (%r13,%r13), %r15
12109 ; FALLBACK0-NEXT: movl %esi, %ecx
12110 ; FALLBACK0-NEXT: shlq %cl, %r15
12111 ; FALLBACK0-NEXT: orq %r12, %r15
12112 ; FALLBACK0-NEXT: movl %eax, %ecx
12113 ; FALLBACK0-NEXT: shrq %cl, %r14
12114 ; FALLBACK0-NEXT: addq %rbx, %rbx
12115 ; FALLBACK0-NEXT: movl %esi, %ecx
12116 ; FALLBACK0-NEXT: shlq %cl, %rbx
12117 ; FALLBACK0-NEXT: orq %r14, %rbx
12118 ; FALLBACK0-NEXT: movl %eax, %ecx
12119 ; FALLBACK0-NEXT: shrq %cl, %r13
12120 ; FALLBACK0-NEXT: movq -72(%rsp,%rdi), %rdi
12121 ; FALLBACK0-NEXT: leaq (%rdi,%rdi), %r14
12122 ; FALLBACK0-NEXT: movl %esi, %ecx
12123 ; FALLBACK0-NEXT: shlq %cl, %r14
12124 ; FALLBACK0-NEXT: orq %r13, %r14
12125 ; FALLBACK0-NEXT: movl %eax, %ecx
12126 ; FALLBACK0-NEXT: shrq %cl, %rdi
12127 ; FALLBACK0-NEXT: movq %rdi, 56(%rdx)
12128 ; FALLBACK0-NEXT: movq %r14, 48(%rdx)
12129 ; FALLBACK0-NEXT: movq %rbx, 32(%rdx)
12130 ; FALLBACK0-NEXT: movq %r15, 40(%rdx)
12131 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
12132 ; FALLBACK0-NEXT: movq %r11, 24(%rdx)
12133 ; FALLBACK0-NEXT: movq %r8, (%rdx)
12134 ; FALLBACK0-NEXT: movq %r9, 8(%rdx)
12135 ; FALLBACK0-NEXT: popq %rbx
12136 ; FALLBACK0-NEXT: popq %r12
12137 ; FALLBACK0-NEXT: popq %r13
12138 ; FALLBACK0-NEXT: popq %r14
12139 ; FALLBACK0-NEXT: popq %r15
12140 ; FALLBACK0-NEXT: retq
12142 ; FALLBACK1-LABEL: lshr_64bytes:
12143 ; FALLBACK1: # %bb.0:
12144 ; FALLBACK1-NEXT: pushq %r15
12145 ; FALLBACK1-NEXT: pushq %r14
12146 ; FALLBACK1-NEXT: pushq %rbx
12147 ; FALLBACK1-NEXT: movq (%rdi), %rcx
12148 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
12149 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
12150 ; FALLBACK1-NEXT: movq 24(%rdi), %r10
12151 ; FALLBACK1-NEXT: movq 32(%rdi), %r11
12152 ; FALLBACK1-NEXT: movq 40(%rdi), %rbx
12153 ; FALLBACK1-NEXT: movq 48(%rdi), %r14
12154 ; FALLBACK1-NEXT: movq 56(%rdi), %rdi
12155 ; FALLBACK1-NEXT: movl (%rsi), %eax
12156 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
12157 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12158 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12159 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12160 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12161 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
12162 ; FALLBACK1-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
12163 ; FALLBACK1-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
12164 ; FALLBACK1-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
12165 ; FALLBACK1-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
12166 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
12167 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
12168 ; FALLBACK1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
12169 ; FALLBACK1-NEXT: leal (,%rax,8), %ecx
12170 ; FALLBACK1-NEXT: andl $56, %ecx
12171 ; FALLBACK1-NEXT: andl $56, %eax
12172 ; FALLBACK1-NEXT: movq -112(%rsp,%rax), %rdi
12173 ; FALLBACK1-NEXT: movq -128(%rsp,%rax), %rsi
12174 ; FALLBACK1-NEXT: movq -120(%rsp,%rax), %r9
12175 ; FALLBACK1-NEXT: movq %r9, %r8
12176 ; FALLBACK1-NEXT: shrdq %cl, %rdi, %r8
12177 ; FALLBACK1-NEXT: movq -96(%rsp,%rax), %r10
12178 ; FALLBACK1-NEXT: movq -104(%rsp,%rax), %r11
12179 ; FALLBACK1-NEXT: movq %r11, %rbx
12180 ; FALLBACK1-NEXT: shrdq %cl, %r10, %rbx
12181 ; FALLBACK1-NEXT: shrdq %cl, %r11, %rdi
12182 ; FALLBACK1-NEXT: movq -80(%rsp,%rax), %r11
12183 ; FALLBACK1-NEXT: movq -88(%rsp,%rax), %r14
12184 ; FALLBACK1-NEXT: movq %r14, %r15
12185 ; FALLBACK1-NEXT: shrdq %cl, %r11, %r15
12186 ; FALLBACK1-NEXT: shrdq %cl, %r14, %r10
12187 ; FALLBACK1-NEXT: movq -72(%rsp,%rax), %rax
12188 ; FALLBACK1-NEXT: shrdq %cl, %rax, %r11
12189 ; FALLBACK1-NEXT: shrdq %cl, %r9, %rsi
12190 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
12191 ; FALLBACK1-NEXT: shrq %cl, %rax
12192 ; FALLBACK1-NEXT: movq %r11, 48(%rdx)
12193 ; FALLBACK1-NEXT: movq %rax, 56(%rdx)
12194 ; FALLBACK1-NEXT: movq %r10, 32(%rdx)
12195 ; FALLBACK1-NEXT: movq %r15, 40(%rdx)
12196 ; FALLBACK1-NEXT: movq %rdi, 16(%rdx)
12197 ; FALLBACK1-NEXT: movq %rbx, 24(%rdx)
12198 ; FALLBACK1-NEXT: movq %rsi, (%rdx)
12199 ; FALLBACK1-NEXT: movq %r8, 8(%rdx)
12200 ; FALLBACK1-NEXT: popq %rbx
12201 ; FALLBACK1-NEXT: popq %r14
12202 ; FALLBACK1-NEXT: popq %r15
12203 ; FALLBACK1-NEXT: retq
12205 ; FALLBACK2-LABEL: lshr_64bytes:
12206 ; FALLBACK2: # %bb.0:
12207 ; FALLBACK2-NEXT: pushq %rbp
12208 ; FALLBACK2-NEXT: pushq %r15
12209 ; FALLBACK2-NEXT: pushq %r14
12210 ; FALLBACK2-NEXT: pushq %r13
12211 ; FALLBACK2-NEXT: pushq %r12
12212 ; FALLBACK2-NEXT: pushq %rbx
12213 ; FALLBACK2-NEXT: pushq %rax
12214 ; FALLBACK2-NEXT: movq (%rdi), %rcx
12215 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
12216 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
12217 ; FALLBACK2-NEXT: movq 24(%rdi), %r10
12218 ; FALLBACK2-NEXT: movq 32(%rdi), %r11
12219 ; FALLBACK2-NEXT: movq 40(%rdi), %rbx
12220 ; FALLBACK2-NEXT: movq 48(%rdi), %r14
12221 ; FALLBACK2-NEXT: movq 56(%rdi), %rdi
12222 ; FALLBACK2-NEXT: movl (%rsi), %eax
12223 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
12224 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12225 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12226 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12227 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12228 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
12229 ; FALLBACK2-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
12230 ; FALLBACK2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
12231 ; FALLBACK2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
12232 ; FALLBACK2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
12233 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
12234 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
12235 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
12236 ; FALLBACK2-NEXT: leal (,%rax,8), %ecx
12237 ; FALLBACK2-NEXT: andl $56, %ecx
12238 ; FALLBACK2-NEXT: andl $56, %eax
12239 ; FALLBACK2-NEXT: movq -120(%rsp,%rax), %rdi
12240 ; FALLBACK2-NEXT: movq -112(%rsp,%rax), %r9
12241 ; FALLBACK2-NEXT: shrxq %rcx, %rdi, %rbx
12242 ; FALLBACK2-NEXT: shrxq %rcx, -128(%rsp,%rax), %r13
12243 ; FALLBACK2-NEXT: movq -104(%rsp,%rax), %rsi
12244 ; FALLBACK2-NEXT: shrxq %rcx, %rsi, %r8
12245 ; FALLBACK2-NEXT: movq -96(%rsp,%rax), %r10
12246 ; FALLBACK2-NEXT: shrxq %rcx, %r9, %r11
12247 ; FALLBACK2-NEXT: movq -88(%rsp,%rax), %r14
12248 ; FALLBACK2-NEXT: shrxq %rcx, %r14, %r15
12249 ; FALLBACK2-NEXT: shrxq %rcx, %r10, %rbp
12250 ; FALLBACK2-NEXT: movl %ecx, %r12d
12251 ; FALLBACK2-NEXT: notb %r12b
12252 ; FALLBACK2-NEXT: addq %r9, %r9
12253 ; FALLBACK2-NEXT: shlxq %r12, %r9, %r9
12254 ; FALLBACK2-NEXT: orq %rbx, %r9
12255 ; FALLBACK2-NEXT: addq %rdi, %rdi
12256 ; FALLBACK2-NEXT: shlxq %r12, %rdi, %rdi
12257 ; FALLBACK2-NEXT: orq %r13, %rdi
12258 ; FALLBACK2-NEXT: movq -80(%rsp,%rax), %rbx
12259 ; FALLBACK2-NEXT: shrxq %rcx, %rbx, %r13
12260 ; FALLBACK2-NEXT: movq -72(%rsp,%rax), %rax
12261 ; FALLBACK2-NEXT: shrxq %rcx, %rax, %rcx
12262 ; FALLBACK2-NEXT: addq %r10, %r10
12263 ; FALLBACK2-NEXT: shlxq %r12, %r10, %r10
12264 ; FALLBACK2-NEXT: orq %r8, %r10
12265 ; FALLBACK2-NEXT: addq %rsi, %rsi
12266 ; FALLBACK2-NEXT: shlxq %r12, %rsi, %rsi
12267 ; FALLBACK2-NEXT: orq %r11, %rsi
12268 ; FALLBACK2-NEXT: leaq (%rbx,%rbx), %r8
12269 ; FALLBACK2-NEXT: shlxq %r12, %r8, %r8
12270 ; FALLBACK2-NEXT: orq %r15, %r8
12271 ; FALLBACK2-NEXT: addq %r14, %r14
12272 ; FALLBACK2-NEXT: shlxq %r12, %r14, %r11
12273 ; FALLBACK2-NEXT: orq %rbp, %r11
12274 ; FALLBACK2-NEXT: addq %rax, %rax
12275 ; FALLBACK2-NEXT: shlxq %r12, %rax, %rax
12276 ; FALLBACK2-NEXT: orq %r13, %rax
12277 ; FALLBACK2-NEXT: movq %rcx, 56(%rdx)
12278 ; FALLBACK2-NEXT: movq %rax, 48(%rdx)
12279 ; FALLBACK2-NEXT: movq %r11, 32(%rdx)
12280 ; FALLBACK2-NEXT: movq %r8, 40(%rdx)
12281 ; FALLBACK2-NEXT: movq %rsi, 16(%rdx)
12282 ; FALLBACK2-NEXT: movq %r10, 24(%rdx)
12283 ; FALLBACK2-NEXT: movq %rdi, (%rdx)
12284 ; FALLBACK2-NEXT: movq %r9, 8(%rdx)
12285 ; FALLBACK2-NEXT: addq $8, %rsp
12286 ; FALLBACK2-NEXT: popq %rbx
12287 ; FALLBACK2-NEXT: popq %r12
12288 ; FALLBACK2-NEXT: popq %r13
12289 ; FALLBACK2-NEXT: popq %r14
12290 ; FALLBACK2-NEXT: popq %r15
12291 ; FALLBACK2-NEXT: popq %rbp
12292 ; FALLBACK2-NEXT: retq
12294 ; FALLBACK3-LABEL: lshr_64bytes:
12295 ; FALLBACK3: # %bb.0:
12296 ; FALLBACK3-NEXT: pushq %r15
12297 ; FALLBACK3-NEXT: pushq %r14
12298 ; FALLBACK3-NEXT: pushq %rbx
12299 ; FALLBACK3-NEXT: movq (%rdi), %rcx
12300 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
12301 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
12302 ; FALLBACK3-NEXT: movq 24(%rdi), %r10
12303 ; FALLBACK3-NEXT: movq 32(%rdi), %r11
12304 ; FALLBACK3-NEXT: movq 40(%rdi), %rbx
12305 ; FALLBACK3-NEXT: movq 48(%rdi), %r14
12306 ; FALLBACK3-NEXT: movq 56(%rdi), %rdi
12307 ; FALLBACK3-NEXT: movl (%rsi), %eax
12308 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
12309 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12310 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12311 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12312 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12313 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
12314 ; FALLBACK3-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
12315 ; FALLBACK3-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
12316 ; FALLBACK3-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
12317 ; FALLBACK3-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
12318 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
12319 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
12320 ; FALLBACK3-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
12321 ; FALLBACK3-NEXT: leal (,%rax,8), %ecx
12322 ; FALLBACK3-NEXT: andl $56, %ecx
12323 ; FALLBACK3-NEXT: andl $56, %eax
12324 ; FALLBACK3-NEXT: movq -112(%rsp,%rax), %rdi
12325 ; FALLBACK3-NEXT: movq -128(%rsp,%rax), %rsi
12326 ; FALLBACK3-NEXT: movq -120(%rsp,%rax), %r9
12327 ; FALLBACK3-NEXT: movq %r9, %r8
12328 ; FALLBACK3-NEXT: shrdq %cl, %rdi, %r8
12329 ; FALLBACK3-NEXT: movq -96(%rsp,%rax), %r10
12330 ; FALLBACK3-NEXT: movq -104(%rsp,%rax), %r11
12331 ; FALLBACK3-NEXT: movq %r11, %rbx
12332 ; FALLBACK3-NEXT: shrdq %cl, %r10, %rbx
12333 ; FALLBACK3-NEXT: shrdq %cl, %r11, %rdi
12334 ; FALLBACK3-NEXT: movq -80(%rsp,%rax), %r11
12335 ; FALLBACK3-NEXT: movq -88(%rsp,%rax), %r14
12336 ; FALLBACK3-NEXT: movq %r14, %r15
12337 ; FALLBACK3-NEXT: shrdq %cl, %r11, %r15
12338 ; FALLBACK3-NEXT: shrdq %cl, %r14, %r10
12339 ; FALLBACK3-NEXT: movq -72(%rsp,%rax), %rax
12340 ; FALLBACK3-NEXT: shrdq %cl, %rax, %r11
12341 ; FALLBACK3-NEXT: shrxq %rcx, %rax, %rax
12342 ; FALLBACK3-NEXT: # kill: def $cl killed $cl killed $rcx
12343 ; FALLBACK3-NEXT: shrdq %cl, %r9, %rsi
12344 ; FALLBACK3-NEXT: movq %r11, 48(%rdx)
12345 ; FALLBACK3-NEXT: movq %r10, 32(%rdx)
12346 ; FALLBACK3-NEXT: movq %r15, 40(%rdx)
12347 ; FALLBACK3-NEXT: movq %rdi, 16(%rdx)
12348 ; FALLBACK3-NEXT: movq %rbx, 24(%rdx)
12349 ; FALLBACK3-NEXT: movq %rsi, (%rdx)
12350 ; FALLBACK3-NEXT: movq %r8, 8(%rdx)
12351 ; FALLBACK3-NEXT: movq %rax, 56(%rdx)
12352 ; FALLBACK3-NEXT: popq %rbx
12353 ; FALLBACK3-NEXT: popq %r14
12354 ; FALLBACK3-NEXT: popq %r15
12355 ; FALLBACK3-NEXT: retq
12357 ; FALLBACK4-LABEL: lshr_64bytes:
12358 ; FALLBACK4: # %bb.0:
12359 ; FALLBACK4-NEXT: pushq %rbp
12360 ; FALLBACK4-NEXT: pushq %r15
12361 ; FALLBACK4-NEXT: pushq %r14
12362 ; FALLBACK4-NEXT: pushq %r13
12363 ; FALLBACK4-NEXT: pushq %r12
12364 ; FALLBACK4-NEXT: pushq %rbx
12365 ; FALLBACK4-NEXT: pushq %rax
12366 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
12367 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
12368 ; FALLBACK4-NEXT: movups 32(%rdi), %xmm2
12369 ; FALLBACK4-NEXT: movups 48(%rdi), %xmm3
12370 ; FALLBACK4-NEXT: movl (%rsi), %r8d
12371 ; FALLBACK4-NEXT: xorps %xmm4, %xmm4
12372 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12373 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12374 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12375 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12376 ; FALLBACK4-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
12377 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12378 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
12379 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12380 ; FALLBACK4-NEXT: leal (,%r8,8), %eax
12381 ; FALLBACK4-NEXT: andl $56, %eax
12382 ; FALLBACK4-NEXT: andl $56, %r8d
12383 ; FALLBACK4-NEXT: movq -128(%rsp,%r8), %r10
12384 ; FALLBACK4-NEXT: movq -120(%rsp,%r8), %r9
12385 ; FALLBACK4-NEXT: movl %eax, %ecx
12386 ; FALLBACK4-NEXT: shrq %cl, %r10
12387 ; FALLBACK4-NEXT: movl %eax, %esi
12388 ; FALLBACK4-NEXT: notb %sil
12389 ; FALLBACK4-NEXT: leaq (%r9,%r9), %rdi
12390 ; FALLBACK4-NEXT: movl %esi, %ecx
12391 ; FALLBACK4-NEXT: shlq %cl, %rdi
12392 ; FALLBACK4-NEXT: orq %r10, %rdi
12393 ; FALLBACK4-NEXT: movq -104(%rsp,%r8), %r10
12394 ; FALLBACK4-NEXT: movq %r10, %rbx
12395 ; FALLBACK4-NEXT: movl %eax, %ecx
12396 ; FALLBACK4-NEXT: shrq %cl, %rbx
12397 ; FALLBACK4-NEXT: movq -96(%rsp,%r8), %r12
12398 ; FALLBACK4-NEXT: leaq (%r12,%r12), %r11
12399 ; FALLBACK4-NEXT: movl %esi, %ecx
12400 ; FALLBACK4-NEXT: shlq %cl, %r11
12401 ; FALLBACK4-NEXT: orq %rbx, %r11
12402 ; FALLBACK4-NEXT: movq -112(%rsp,%r8), %rbx
12403 ; FALLBACK4-NEXT: movq %rbx, %r14
12404 ; FALLBACK4-NEXT: movl %eax, %ecx
12405 ; FALLBACK4-NEXT: shrq %cl, %r14
12406 ; FALLBACK4-NEXT: addq %r10, %r10
12407 ; FALLBACK4-NEXT: movl %esi, %ecx
12408 ; FALLBACK4-NEXT: shlq %cl, %r10
12409 ; FALLBACK4-NEXT: orq %r14, %r10
12410 ; FALLBACK4-NEXT: movq -88(%rsp,%r8), %r14
12411 ; FALLBACK4-NEXT: movq %r14, %r13
12412 ; FALLBACK4-NEXT: movl %eax, %ecx
12413 ; FALLBACK4-NEXT: shrq %cl, %r13
12414 ; FALLBACK4-NEXT: movq -80(%rsp,%r8), %rbp
12415 ; FALLBACK4-NEXT: leaq (%rbp,%rbp), %r15
12416 ; FALLBACK4-NEXT: movl %esi, %ecx
12417 ; FALLBACK4-NEXT: shlq %cl, %r15
12418 ; FALLBACK4-NEXT: orq %r13, %r15
12419 ; FALLBACK4-NEXT: movl %eax, %ecx
12420 ; FALLBACK4-NEXT: shrq %cl, %r12
12421 ; FALLBACK4-NEXT: addq %r14, %r14
12422 ; FALLBACK4-NEXT: movl %esi, %ecx
12423 ; FALLBACK4-NEXT: shlq %cl, %r14
12424 ; FALLBACK4-NEXT: orq %r12, %r14
12425 ; FALLBACK4-NEXT: movl %eax, %ecx
12426 ; FALLBACK4-NEXT: shrq %cl, %rbp
12427 ; FALLBACK4-NEXT: movq -72(%rsp,%r8), %r8
12428 ; FALLBACK4-NEXT: leaq (%r8,%r8), %r12
12429 ; FALLBACK4-NEXT: movl %esi, %ecx
12430 ; FALLBACK4-NEXT: shlq %cl, %r12
12431 ; FALLBACK4-NEXT: orq %rbp, %r12
12432 ; FALLBACK4-NEXT: movl %eax, %ecx
12433 ; FALLBACK4-NEXT: shrq %cl, %r9
12434 ; FALLBACK4-NEXT: addq %rbx, %rbx
12435 ; FALLBACK4-NEXT: movl %esi, %ecx
12436 ; FALLBACK4-NEXT: shlq %cl, %rbx
12437 ; FALLBACK4-NEXT: orq %r9, %rbx
12438 ; FALLBACK4-NEXT: movl %eax, %ecx
12439 ; FALLBACK4-NEXT: shrq %cl, %r8
12440 ; FALLBACK4-NEXT: movq %r8, 56(%rdx)
12441 ; FALLBACK4-NEXT: movq %rbx, 8(%rdx)
12442 ; FALLBACK4-NEXT: movq %r12, 48(%rdx)
12443 ; FALLBACK4-NEXT: movq %r14, 32(%rdx)
12444 ; FALLBACK4-NEXT: movq %r15, 40(%rdx)
12445 ; FALLBACK4-NEXT: movq %r10, 16(%rdx)
12446 ; FALLBACK4-NEXT: movq %r11, 24(%rdx)
12447 ; FALLBACK4-NEXT: movq %rdi, (%rdx)
12448 ; FALLBACK4-NEXT: addq $8, %rsp
12449 ; FALLBACK4-NEXT: popq %rbx
12450 ; FALLBACK4-NEXT: popq %r12
12451 ; FALLBACK4-NEXT: popq %r13
12452 ; FALLBACK4-NEXT: popq %r14
12453 ; FALLBACK4-NEXT: popq %r15
12454 ; FALLBACK4-NEXT: popq %rbp
12455 ; FALLBACK4-NEXT: retq
12457 ; FALLBACK5-LABEL: lshr_64bytes:
12458 ; FALLBACK5: # %bb.0:
12459 ; FALLBACK5-NEXT: pushq %r15
12460 ; FALLBACK5-NEXT: pushq %r14
12461 ; FALLBACK5-NEXT: pushq %rbx
12462 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
12463 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
12464 ; FALLBACK5-NEXT: movups 32(%rdi), %xmm2
12465 ; FALLBACK5-NEXT: movups 48(%rdi), %xmm3
12466 ; FALLBACK5-NEXT: movl (%rsi), %eax
12467 ; FALLBACK5-NEXT: xorps %xmm4, %xmm4
12468 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12469 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12470 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12471 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12472 ; FALLBACK5-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
12473 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12474 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
12475 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12476 ; FALLBACK5-NEXT: leal (,%rax,8), %ecx
12477 ; FALLBACK5-NEXT: andl $56, %ecx
12478 ; FALLBACK5-NEXT: andl $56, %eax
12479 ; FALLBACK5-NEXT: movq -96(%rsp,%rax), %rdi
12480 ; FALLBACK5-NEXT: movq -104(%rsp,%rax), %r9
12481 ; FALLBACK5-NEXT: movq %r9, %rsi
12482 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %rsi
12483 ; FALLBACK5-NEXT: movq -112(%rsp,%rax), %r10
12484 ; FALLBACK5-NEXT: movq %r10, %r8
12485 ; FALLBACK5-NEXT: shrdq %cl, %r9, %r8
12486 ; FALLBACK5-NEXT: movq -80(%rsp,%rax), %r9
12487 ; FALLBACK5-NEXT: movq -88(%rsp,%rax), %r11
12488 ; FALLBACK5-NEXT: movq %r11, %rbx
12489 ; FALLBACK5-NEXT: shrdq %cl, %r9, %rbx
12490 ; FALLBACK5-NEXT: shrdq %cl, %r11, %rdi
12491 ; FALLBACK5-NEXT: movq -72(%rsp,%rax), %r11
12492 ; FALLBACK5-NEXT: shrdq %cl, %r11, %r9
12493 ; FALLBACK5-NEXT: movq -128(%rsp,%rax), %r14
12494 ; FALLBACK5-NEXT: movq -120(%rsp,%rax), %rax
12495 ; FALLBACK5-NEXT: movq %rax, %r15
12496 ; FALLBACK5-NEXT: shrdq %cl, %r10, %r15
12497 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r14
12498 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
12499 ; FALLBACK5-NEXT: shrq %cl, %r11
12500 ; FALLBACK5-NEXT: movq %r15, 8(%rdx)
12501 ; FALLBACK5-NEXT: movq %r9, 48(%rdx)
12502 ; FALLBACK5-NEXT: movq %r11, 56(%rdx)
12503 ; FALLBACK5-NEXT: movq %rdi, 32(%rdx)
12504 ; FALLBACK5-NEXT: movq %rbx, 40(%rdx)
12505 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
12506 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
12507 ; FALLBACK5-NEXT: movq %r14, (%rdx)
12508 ; FALLBACK5-NEXT: popq %rbx
12509 ; FALLBACK5-NEXT: popq %r14
12510 ; FALLBACK5-NEXT: popq %r15
12511 ; FALLBACK5-NEXT: retq
12513 ; FALLBACK6-LABEL: lshr_64bytes:
12514 ; FALLBACK6: # %bb.0:
12515 ; FALLBACK6-NEXT: pushq %rbp
12516 ; FALLBACK6-NEXT: pushq %r15
12517 ; FALLBACK6-NEXT: pushq %r14
12518 ; FALLBACK6-NEXT: pushq %r13
12519 ; FALLBACK6-NEXT: pushq %r12
12520 ; FALLBACK6-NEXT: pushq %rbx
12521 ; FALLBACK6-NEXT: pushq %rax
12522 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
12523 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
12524 ; FALLBACK6-NEXT: movups 32(%rdi), %xmm2
12525 ; FALLBACK6-NEXT: movups 48(%rdi), %xmm3
12526 ; FALLBACK6-NEXT: movl (%rsi), %eax
12527 ; FALLBACK6-NEXT: xorps %xmm4, %xmm4
12528 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12529 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12530 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12531 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12532 ; FALLBACK6-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
12533 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12534 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
12535 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12536 ; FALLBACK6-NEXT: leal (,%rax,8), %esi
12537 ; FALLBACK6-NEXT: andl $56, %esi
12538 ; FALLBACK6-NEXT: andl $56, %eax
12539 ; FALLBACK6-NEXT: shrxq %rsi, -128(%rsp,%rax), %r11
12540 ; FALLBACK6-NEXT: movq -112(%rsp,%rax), %rcx
12541 ; FALLBACK6-NEXT: movq -104(%rsp,%rax), %rdi
12542 ; FALLBACK6-NEXT: shrxq %rsi, %rdi, %r12
12543 ; FALLBACK6-NEXT: movq -96(%rsp,%rax), %r13
12544 ; FALLBACK6-NEXT: shrxq %rsi, %rcx, %r9
12545 ; FALLBACK6-NEXT: movq -88(%rsp,%rax), %r10
12546 ; FALLBACK6-NEXT: shrxq %rsi, %r10, %r14
12547 ; FALLBACK6-NEXT: shrxq %rsi, %r13, %r15
12548 ; FALLBACK6-NEXT: movl %esi, %ebx
12549 ; FALLBACK6-NEXT: notb %bl
12550 ; FALLBACK6-NEXT: movq -120(%rsp,%rax), %rbp
12551 ; FALLBACK6-NEXT: leaq (%rbp,%rbp), %r8
12552 ; FALLBACK6-NEXT: shlxq %rbx, %r8, %r8
12553 ; FALLBACK6-NEXT: orq %r11, %r8
12554 ; FALLBACK6-NEXT: leaq (%r13,%r13), %r11
12555 ; FALLBACK6-NEXT: shlxq %rbx, %r11, %r11
12556 ; FALLBACK6-NEXT: orq %r12, %r11
12557 ; FALLBACK6-NEXT: movq -80(%rsp,%rax), %r12
12558 ; FALLBACK6-NEXT: shrxq %rsi, %r12, %r13
12559 ; FALLBACK6-NEXT: shrxq %rsi, %rbp, %rbp
12560 ; FALLBACK6-NEXT: movq -72(%rsp,%rax), %rax
12561 ; FALLBACK6-NEXT: shrxq %rsi, %rax, %rsi
12562 ; FALLBACK6-NEXT: addq %rdi, %rdi
12563 ; FALLBACK6-NEXT: shlxq %rbx, %rdi, %rdi
12564 ; FALLBACK6-NEXT: orq %r9, %rdi
12565 ; FALLBACK6-NEXT: leaq (%r12,%r12), %r9
12566 ; FALLBACK6-NEXT: shlxq %rbx, %r9, %r9
12567 ; FALLBACK6-NEXT: orq %r14, %r9
12568 ; FALLBACK6-NEXT: addq %r10, %r10
12569 ; FALLBACK6-NEXT: shlxq %rbx, %r10, %r10
12570 ; FALLBACK6-NEXT: orq %r15, %r10
12571 ; FALLBACK6-NEXT: addq %rax, %rax
12572 ; FALLBACK6-NEXT: shlxq %rbx, %rax, %rax
12573 ; FALLBACK6-NEXT: orq %r13, %rax
12574 ; FALLBACK6-NEXT: addq %rcx, %rcx
12575 ; FALLBACK6-NEXT: shlxq %rbx, %rcx, %rcx
12576 ; FALLBACK6-NEXT: orq %rbp, %rcx
12577 ; FALLBACK6-NEXT: movq %rsi, 56(%rdx)
12578 ; FALLBACK6-NEXT: movq %rcx, 8(%rdx)
12579 ; FALLBACK6-NEXT: movq %rax, 48(%rdx)
12580 ; FALLBACK6-NEXT: movq %r10, 32(%rdx)
12581 ; FALLBACK6-NEXT: movq %r9, 40(%rdx)
12582 ; FALLBACK6-NEXT: movq %rdi, 16(%rdx)
12583 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
12584 ; FALLBACK6-NEXT: movq %r8, (%rdx)
12585 ; FALLBACK6-NEXT: addq $8, %rsp
12586 ; FALLBACK6-NEXT: popq %rbx
12587 ; FALLBACK6-NEXT: popq %r12
12588 ; FALLBACK6-NEXT: popq %r13
12589 ; FALLBACK6-NEXT: popq %r14
12590 ; FALLBACK6-NEXT: popq %r15
12591 ; FALLBACK6-NEXT: popq %rbp
12592 ; FALLBACK6-NEXT: retq
12594 ; FALLBACK7-LABEL: lshr_64bytes:
12595 ; FALLBACK7: # %bb.0:
12596 ; FALLBACK7-NEXT: pushq %r15
12597 ; FALLBACK7-NEXT: pushq %r14
12598 ; FALLBACK7-NEXT: pushq %rbx
12599 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
12600 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
12601 ; FALLBACK7-NEXT: movups 32(%rdi), %xmm2
12602 ; FALLBACK7-NEXT: movups 48(%rdi), %xmm3
12603 ; FALLBACK7-NEXT: movl (%rsi), %eax
12604 ; FALLBACK7-NEXT: xorps %xmm4, %xmm4
12605 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12606 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12607 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12608 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
12609 ; FALLBACK7-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
12610 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12611 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
12612 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
12613 ; FALLBACK7-NEXT: leal (,%rax,8), %ecx
12614 ; FALLBACK7-NEXT: andl $56, %ecx
12615 ; FALLBACK7-NEXT: andl $56, %eax
12616 ; FALLBACK7-NEXT: movq -96(%rsp,%rax), %rdi
12617 ; FALLBACK7-NEXT: movq -104(%rsp,%rax), %r9
12618 ; FALLBACK7-NEXT: movq %r9, %rsi
12619 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %rsi
12620 ; FALLBACK7-NEXT: movq -112(%rsp,%rax), %r10
12621 ; FALLBACK7-NEXT: movq %r10, %r8
12622 ; FALLBACK7-NEXT: shrdq %cl, %r9, %r8
12623 ; FALLBACK7-NEXT: movq -80(%rsp,%rax), %r9
12624 ; FALLBACK7-NEXT: movq -88(%rsp,%rax), %r11
12625 ; FALLBACK7-NEXT: movq %r11, %rbx
12626 ; FALLBACK7-NEXT: shrdq %cl, %r9, %rbx
12627 ; FALLBACK7-NEXT: shrdq %cl, %r11, %rdi
12628 ; FALLBACK7-NEXT: movq -72(%rsp,%rax), %r11
12629 ; FALLBACK7-NEXT: shrdq %cl, %r11, %r9
12630 ; FALLBACK7-NEXT: movq -128(%rsp,%rax), %r14
12631 ; FALLBACK7-NEXT: movq -120(%rsp,%rax), %rax
12632 ; FALLBACK7-NEXT: movq %rax, %r15
12633 ; FALLBACK7-NEXT: shrdq %cl, %r10, %r15
12634 ; FALLBACK7-NEXT: shrxq %rcx, %r11, %r10
12635 ; FALLBACK7-NEXT: # kill: def $cl killed $cl killed $rcx
12636 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r14
12637 ; FALLBACK7-NEXT: movq %r15, 8(%rdx)
12638 ; FALLBACK7-NEXT: movq %r9, 48(%rdx)
12639 ; FALLBACK7-NEXT: movq %rdi, 32(%rdx)
12640 ; FALLBACK7-NEXT: movq %rbx, 40(%rdx)
12641 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
12642 ; FALLBACK7-NEXT: movq %rsi, 24(%rdx)
12643 ; FALLBACK7-NEXT: movq %r14, (%rdx)
12644 ; FALLBACK7-NEXT: movq %r10, 56(%rdx)
12645 ; FALLBACK7-NEXT: popq %rbx
12646 ; FALLBACK7-NEXT: popq %r14
12647 ; FALLBACK7-NEXT: popq %r15
12648 ; FALLBACK7-NEXT: retq
12650 ; FALLBACK8-LABEL: lshr_64bytes:
12651 ; FALLBACK8: # %bb.0:
12652 ; FALLBACK8-NEXT: pushq %rbp
12653 ; FALLBACK8-NEXT: pushq %r15
12654 ; FALLBACK8-NEXT: pushq %r14
12655 ; FALLBACK8-NEXT: pushq %r13
12656 ; FALLBACK8-NEXT: pushq %r12
12657 ; FALLBACK8-NEXT: pushq %rbx
12658 ; FALLBACK8-NEXT: pushq %rax
12659 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
12660 ; FALLBACK8-NEXT: vmovups 32(%rdi), %ymm1
12661 ; FALLBACK8-NEXT: movl (%rsi), %r9d
12662 ; FALLBACK8-NEXT: vxorps %xmm2, %xmm2, %xmm2
12663 ; FALLBACK8-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12664 ; FALLBACK8-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12665 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
12666 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
12667 ; FALLBACK8-NEXT: leal (,%r9,8), %eax
12668 ; FALLBACK8-NEXT: andl $56, %eax
12669 ; FALLBACK8-NEXT: andl $56, %r9d
12670 ; FALLBACK8-NEXT: movq -128(%rsp,%r9), %r10
12671 ; FALLBACK8-NEXT: movq -120(%rsp,%r9), %r8
12672 ; FALLBACK8-NEXT: movl %eax, %ecx
12673 ; FALLBACK8-NEXT: shrq %cl, %r10
12674 ; FALLBACK8-NEXT: movl %eax, %esi
12675 ; FALLBACK8-NEXT: notb %sil
12676 ; FALLBACK8-NEXT: leaq (%r8,%r8), %rdi
12677 ; FALLBACK8-NEXT: movl %esi, %ecx
12678 ; FALLBACK8-NEXT: shlq %cl, %rdi
12679 ; FALLBACK8-NEXT: orq %r10, %rdi
12680 ; FALLBACK8-NEXT: movq -104(%rsp,%r9), %r10
12681 ; FALLBACK8-NEXT: movq %r10, %rbx
12682 ; FALLBACK8-NEXT: movl %eax, %ecx
12683 ; FALLBACK8-NEXT: shrq %cl, %rbx
12684 ; FALLBACK8-NEXT: movq -96(%rsp,%r9), %r12
12685 ; FALLBACK8-NEXT: leaq (%r12,%r12), %r11
12686 ; FALLBACK8-NEXT: movl %esi, %ecx
12687 ; FALLBACK8-NEXT: shlq %cl, %r11
12688 ; FALLBACK8-NEXT: orq %rbx, %r11
12689 ; FALLBACK8-NEXT: movq -112(%rsp,%r9), %rbx
12690 ; FALLBACK8-NEXT: movq %rbx, %r14
12691 ; FALLBACK8-NEXT: movl %eax, %ecx
12692 ; FALLBACK8-NEXT: shrq %cl, %r14
12693 ; FALLBACK8-NEXT: addq %r10, %r10
12694 ; FALLBACK8-NEXT: movl %esi, %ecx
12695 ; FALLBACK8-NEXT: shlq %cl, %r10
12696 ; FALLBACK8-NEXT: orq %r14, %r10
12697 ; FALLBACK8-NEXT: movq -88(%rsp,%r9), %r14
12698 ; FALLBACK8-NEXT: movq %r14, %r13
12699 ; FALLBACK8-NEXT: movl %eax, %ecx
12700 ; FALLBACK8-NEXT: shrq %cl, %r13
12701 ; FALLBACK8-NEXT: movq -80(%rsp,%r9), %rbp
12702 ; FALLBACK8-NEXT: leaq (%rbp,%rbp), %r15
12703 ; FALLBACK8-NEXT: movl %esi, %ecx
12704 ; FALLBACK8-NEXT: shlq %cl, %r15
12705 ; FALLBACK8-NEXT: orq %r13, %r15
12706 ; FALLBACK8-NEXT: movl %eax, %ecx
12707 ; FALLBACK8-NEXT: shrq %cl, %r12
12708 ; FALLBACK8-NEXT: addq %r14, %r14
12709 ; FALLBACK8-NEXT: movl %esi, %ecx
12710 ; FALLBACK8-NEXT: shlq %cl, %r14
12711 ; FALLBACK8-NEXT: orq %r12, %r14
12712 ; FALLBACK8-NEXT: movl %eax, %ecx
12713 ; FALLBACK8-NEXT: shrq %cl, %rbp
12714 ; FALLBACK8-NEXT: movq -72(%rsp,%r9), %r9
12715 ; FALLBACK8-NEXT: leaq (%r9,%r9), %r12
12716 ; FALLBACK8-NEXT: movl %esi, %ecx
12717 ; FALLBACK8-NEXT: shlq %cl, %r12
12718 ; FALLBACK8-NEXT: orq %rbp, %r12
12719 ; FALLBACK8-NEXT: movl %eax, %ecx
12720 ; FALLBACK8-NEXT: shrq %cl, %r8
12721 ; FALLBACK8-NEXT: addq %rbx, %rbx
12722 ; FALLBACK8-NEXT: movl %esi, %ecx
12723 ; FALLBACK8-NEXT: shlq %cl, %rbx
12724 ; FALLBACK8-NEXT: orq %r8, %rbx
12725 ; FALLBACK8-NEXT: movl %eax, %ecx
12726 ; FALLBACK8-NEXT: shrq %cl, %r9
12727 ; FALLBACK8-NEXT: movq %r9, 56(%rdx)
12728 ; FALLBACK8-NEXT: movq %rbx, 8(%rdx)
12729 ; FALLBACK8-NEXT: movq %r12, 48(%rdx)
12730 ; FALLBACK8-NEXT: movq %r14, 32(%rdx)
12731 ; FALLBACK8-NEXT: movq %r15, 40(%rdx)
12732 ; FALLBACK8-NEXT: movq %r10, 16(%rdx)
12733 ; FALLBACK8-NEXT: movq %r11, 24(%rdx)
12734 ; FALLBACK8-NEXT: movq %rdi, (%rdx)
12735 ; FALLBACK8-NEXT: addq $8, %rsp
12736 ; FALLBACK8-NEXT: popq %rbx
12737 ; FALLBACK8-NEXT: popq %r12
12738 ; FALLBACK8-NEXT: popq %r13
12739 ; FALLBACK8-NEXT: popq %r14
12740 ; FALLBACK8-NEXT: popq %r15
12741 ; FALLBACK8-NEXT: popq %rbp
12742 ; FALLBACK8-NEXT: vzeroupper
12743 ; FALLBACK8-NEXT: retq
12745 ; FALLBACK9-LABEL: lshr_64bytes:
12746 ; FALLBACK9: # %bb.0:
12747 ; FALLBACK9-NEXT: pushq %r15
12748 ; FALLBACK9-NEXT: pushq %r14
12749 ; FALLBACK9-NEXT: pushq %rbx
12750 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
12751 ; FALLBACK9-NEXT: vmovups 32(%rdi), %ymm1
12752 ; FALLBACK9-NEXT: movl (%rsi), %eax
12753 ; FALLBACK9-NEXT: vxorps %xmm2, %xmm2, %xmm2
12754 ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12755 ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12756 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
12757 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
12758 ; FALLBACK9-NEXT: leal (,%rax,8), %ecx
12759 ; FALLBACK9-NEXT: andl $56, %ecx
12760 ; FALLBACK9-NEXT: andl $56, %eax
12761 ; FALLBACK9-NEXT: movq -96(%rsp,%rax), %rdi
12762 ; FALLBACK9-NEXT: movq -104(%rsp,%rax), %r9
12763 ; FALLBACK9-NEXT: movq %r9, %rsi
12764 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %rsi
12765 ; FALLBACK9-NEXT: movq -112(%rsp,%rax), %r10
12766 ; FALLBACK9-NEXT: movq %r10, %r8
12767 ; FALLBACK9-NEXT: shrdq %cl, %r9, %r8
12768 ; FALLBACK9-NEXT: movq -80(%rsp,%rax), %r9
12769 ; FALLBACK9-NEXT: movq -88(%rsp,%rax), %r11
12770 ; FALLBACK9-NEXT: movq %r11, %rbx
12771 ; FALLBACK9-NEXT: shrdq %cl, %r9, %rbx
12772 ; FALLBACK9-NEXT: shrdq %cl, %r11, %rdi
12773 ; FALLBACK9-NEXT: movq -72(%rsp,%rax), %r11
12774 ; FALLBACK9-NEXT: shrdq %cl, %r11, %r9
12775 ; FALLBACK9-NEXT: movq -128(%rsp,%rax), %r14
12776 ; FALLBACK9-NEXT: movq -120(%rsp,%rax), %rax
12777 ; FALLBACK9-NEXT: movq %rax, %r15
12778 ; FALLBACK9-NEXT: shrdq %cl, %r10, %r15
12779 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r14
12780 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
12781 ; FALLBACK9-NEXT: shrq %cl, %r11
12782 ; FALLBACK9-NEXT: movq %r15, 8(%rdx)
12783 ; FALLBACK9-NEXT: movq %r9, 48(%rdx)
12784 ; FALLBACK9-NEXT: movq %r11, 56(%rdx)
12785 ; FALLBACK9-NEXT: movq %rdi, 32(%rdx)
12786 ; FALLBACK9-NEXT: movq %rbx, 40(%rdx)
12787 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
12788 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
12789 ; FALLBACK9-NEXT: movq %r14, (%rdx)
12790 ; FALLBACK9-NEXT: popq %rbx
12791 ; FALLBACK9-NEXT: popq %r14
12792 ; FALLBACK9-NEXT: popq %r15
12793 ; FALLBACK9-NEXT: vzeroupper
12794 ; FALLBACK9-NEXT: retq
12796 ; FALLBACK10-LABEL: lshr_64bytes:
12797 ; FALLBACK10: # %bb.0:
12798 ; FALLBACK10-NEXT: pushq %rbp
12799 ; FALLBACK10-NEXT: pushq %r15
12800 ; FALLBACK10-NEXT: pushq %r14
12801 ; FALLBACK10-NEXT: pushq %r13
12802 ; FALLBACK10-NEXT: pushq %r12
12803 ; FALLBACK10-NEXT: pushq %rbx
12804 ; FALLBACK10-NEXT: pushq %rax
12805 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
12806 ; FALLBACK10-NEXT: vmovups 32(%rdi), %ymm1
12807 ; FALLBACK10-NEXT: movl (%rsi), %eax
12808 ; FALLBACK10-NEXT: vxorps %xmm2, %xmm2, %xmm2
12809 ; FALLBACK10-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12810 ; FALLBACK10-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12811 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
12812 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
12813 ; FALLBACK10-NEXT: leal (,%rax,8), %esi
12814 ; FALLBACK10-NEXT: andl $56, %esi
12815 ; FALLBACK10-NEXT: andl $56, %eax
12816 ; FALLBACK10-NEXT: shrxq %rsi, -128(%rsp,%rax), %r11
12817 ; FALLBACK10-NEXT: movq -112(%rsp,%rax), %rcx
12818 ; FALLBACK10-NEXT: movq -104(%rsp,%rax), %rdi
12819 ; FALLBACK10-NEXT: shrxq %rsi, %rdi, %r12
12820 ; FALLBACK10-NEXT: movq -96(%rsp,%rax), %r13
12821 ; FALLBACK10-NEXT: shrxq %rsi, %rcx, %r9
12822 ; FALLBACK10-NEXT: movq -88(%rsp,%rax), %r10
12823 ; FALLBACK10-NEXT: shrxq %rsi, %r10, %r14
12824 ; FALLBACK10-NEXT: shrxq %rsi, %r13, %r15
12825 ; FALLBACK10-NEXT: movl %esi, %ebx
12826 ; FALLBACK10-NEXT: notb %bl
12827 ; FALLBACK10-NEXT: movq -120(%rsp,%rax), %rbp
12828 ; FALLBACK10-NEXT: leaq (%rbp,%rbp), %r8
12829 ; FALLBACK10-NEXT: shlxq %rbx, %r8, %r8
12830 ; FALLBACK10-NEXT: orq %r11, %r8
12831 ; FALLBACK10-NEXT: leaq (%r13,%r13), %r11
12832 ; FALLBACK10-NEXT: shlxq %rbx, %r11, %r11
12833 ; FALLBACK10-NEXT: orq %r12, %r11
12834 ; FALLBACK10-NEXT: movq -80(%rsp,%rax), %r12
12835 ; FALLBACK10-NEXT: shrxq %rsi, %r12, %r13
12836 ; FALLBACK10-NEXT: shrxq %rsi, %rbp, %rbp
12837 ; FALLBACK10-NEXT: movq -72(%rsp,%rax), %rax
12838 ; FALLBACK10-NEXT: shrxq %rsi, %rax, %rsi
12839 ; FALLBACK10-NEXT: addq %rdi, %rdi
12840 ; FALLBACK10-NEXT: shlxq %rbx, %rdi, %rdi
12841 ; FALLBACK10-NEXT: orq %r9, %rdi
12842 ; FALLBACK10-NEXT: leaq (%r12,%r12), %r9
12843 ; FALLBACK10-NEXT: shlxq %rbx, %r9, %r9
12844 ; FALLBACK10-NEXT: orq %r14, %r9
12845 ; FALLBACK10-NEXT: addq %r10, %r10
12846 ; FALLBACK10-NEXT: shlxq %rbx, %r10, %r10
12847 ; FALLBACK10-NEXT: orq %r15, %r10
12848 ; FALLBACK10-NEXT: addq %rax, %rax
12849 ; FALLBACK10-NEXT: shlxq %rbx, %rax, %rax
12850 ; FALLBACK10-NEXT: orq %r13, %rax
12851 ; FALLBACK10-NEXT: addq %rcx, %rcx
12852 ; FALLBACK10-NEXT: shlxq %rbx, %rcx, %rcx
12853 ; FALLBACK10-NEXT: orq %rbp, %rcx
12854 ; FALLBACK10-NEXT: movq %rsi, 56(%rdx)
12855 ; FALLBACK10-NEXT: movq %rcx, 8(%rdx)
12856 ; FALLBACK10-NEXT: movq %rax, 48(%rdx)
12857 ; FALLBACK10-NEXT: movq %r10, 32(%rdx)
12858 ; FALLBACK10-NEXT: movq %r9, 40(%rdx)
12859 ; FALLBACK10-NEXT: movq %rdi, 16(%rdx)
12860 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
12861 ; FALLBACK10-NEXT: movq %r8, (%rdx)
12862 ; FALLBACK10-NEXT: addq $8, %rsp
12863 ; FALLBACK10-NEXT: popq %rbx
12864 ; FALLBACK10-NEXT: popq %r12
12865 ; FALLBACK10-NEXT: popq %r13
12866 ; FALLBACK10-NEXT: popq %r14
12867 ; FALLBACK10-NEXT: popq %r15
12868 ; FALLBACK10-NEXT: popq %rbp
12869 ; FALLBACK10-NEXT: vzeroupper
12870 ; FALLBACK10-NEXT: retq
12872 ; FALLBACK11-LABEL: lshr_64bytes:
12873 ; FALLBACK11: # %bb.0:
12874 ; FALLBACK11-NEXT: pushq %r15
12875 ; FALLBACK11-NEXT: pushq %r14
12876 ; FALLBACK11-NEXT: pushq %rbx
12877 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
12878 ; FALLBACK11-NEXT: vmovups 32(%rdi), %ymm1
12879 ; FALLBACK11-NEXT: movl (%rsi), %eax
12880 ; FALLBACK11-NEXT: vxorps %xmm2, %xmm2, %xmm2
12881 ; FALLBACK11-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12882 ; FALLBACK11-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
12883 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
12884 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
12885 ; FALLBACK11-NEXT: leal (,%rax,8), %ecx
12886 ; FALLBACK11-NEXT: andl $56, %ecx
12887 ; FALLBACK11-NEXT: andl $56, %eax
12888 ; FALLBACK11-NEXT: movq -96(%rsp,%rax), %rdi
12889 ; FALLBACK11-NEXT: movq -104(%rsp,%rax), %r9
12890 ; FALLBACK11-NEXT: movq %r9, %rsi
12891 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %rsi
12892 ; FALLBACK11-NEXT: movq -112(%rsp,%rax), %r10
12893 ; FALLBACK11-NEXT: movq %r10, %r8
12894 ; FALLBACK11-NEXT: shrdq %cl, %r9, %r8
12895 ; FALLBACK11-NEXT: movq -80(%rsp,%rax), %r9
12896 ; FALLBACK11-NEXT: movq -88(%rsp,%rax), %r11
12897 ; FALLBACK11-NEXT: movq %r11, %rbx
12898 ; FALLBACK11-NEXT: shrdq %cl, %r9, %rbx
12899 ; FALLBACK11-NEXT: shrdq %cl, %r11, %rdi
12900 ; FALLBACK11-NEXT: movq -72(%rsp,%rax), %r11
12901 ; FALLBACK11-NEXT: shrdq %cl, %r11, %r9
12902 ; FALLBACK11-NEXT: movq -128(%rsp,%rax), %r14
12903 ; FALLBACK11-NEXT: movq -120(%rsp,%rax), %rax
12904 ; FALLBACK11-NEXT: movq %rax, %r15
12905 ; FALLBACK11-NEXT: shrdq %cl, %r10, %r15
12906 ; FALLBACK11-NEXT: shrxq %rcx, %r11, %r10
12907 ; FALLBACK11-NEXT: # kill: def $cl killed $cl killed $rcx
12908 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r14
12909 ; FALLBACK11-NEXT: movq %r15, 8(%rdx)
12910 ; FALLBACK11-NEXT: movq %r9, 48(%rdx)
12911 ; FALLBACK11-NEXT: movq %rdi, 32(%rdx)
12912 ; FALLBACK11-NEXT: movq %rbx, 40(%rdx)
12913 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
12914 ; FALLBACK11-NEXT: movq %rsi, 24(%rdx)
12915 ; FALLBACK11-NEXT: movq %r14, (%rdx)
12916 ; FALLBACK11-NEXT: movq %r10, 56(%rdx)
12917 ; FALLBACK11-NEXT: popq %rbx
12918 ; FALLBACK11-NEXT: popq %r14
12919 ; FALLBACK11-NEXT: popq %r15
12920 ; FALLBACK11-NEXT: vzeroupper
12921 ; FALLBACK11-NEXT: retq
12923 ; FALLBACK12-LABEL: lshr_64bytes:
12924 ; FALLBACK12: # %bb.0:
12925 ; FALLBACK12-NEXT: pushq %rbp
12926 ; FALLBACK12-NEXT: pushq %r15
12927 ; FALLBACK12-NEXT: pushq %r14
12928 ; FALLBACK12-NEXT: pushq %r13
12929 ; FALLBACK12-NEXT: pushq %r12
12930 ; FALLBACK12-NEXT: pushq %rbx
12931 ; FALLBACK12-NEXT: pushq %rax
12932 ; FALLBACK12-NEXT: vmovups (%rdi), %zmm0
12933 ; FALLBACK12-NEXT: movl (%rsi), %r9d
12934 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
12935 ; FALLBACK12-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
12936 ; FALLBACK12-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
12937 ; FALLBACK12-NEXT: leal (,%r9,8), %eax
12938 ; FALLBACK12-NEXT: andl $56, %eax
12939 ; FALLBACK12-NEXT: andl $56, %r9d
12940 ; FALLBACK12-NEXT: movq -128(%rsp,%r9), %r10
12941 ; FALLBACK12-NEXT: movq -120(%rsp,%r9), %r8
12942 ; FALLBACK12-NEXT: movl %eax, %ecx
12943 ; FALLBACK12-NEXT: shrq %cl, %r10
12944 ; FALLBACK12-NEXT: movl %eax, %esi
12945 ; FALLBACK12-NEXT: notb %sil
12946 ; FALLBACK12-NEXT: leaq (%r8,%r8), %rdi
12947 ; FALLBACK12-NEXT: movl %esi, %ecx
12948 ; FALLBACK12-NEXT: shlq %cl, %rdi
12949 ; FALLBACK12-NEXT: orq %r10, %rdi
12950 ; FALLBACK12-NEXT: movq -104(%rsp,%r9), %r10
12951 ; FALLBACK12-NEXT: movq %r10, %rbx
12952 ; FALLBACK12-NEXT: movl %eax, %ecx
12953 ; FALLBACK12-NEXT: shrq %cl, %rbx
12954 ; FALLBACK12-NEXT: movq -96(%rsp,%r9), %r12
12955 ; FALLBACK12-NEXT: leaq (%r12,%r12), %r11
12956 ; FALLBACK12-NEXT: movl %esi, %ecx
12957 ; FALLBACK12-NEXT: shlq %cl, %r11
12958 ; FALLBACK12-NEXT: orq %rbx, %r11
12959 ; FALLBACK12-NEXT: movq -112(%rsp,%r9), %rbx
12960 ; FALLBACK12-NEXT: movq %rbx, %r14
12961 ; FALLBACK12-NEXT: movl %eax, %ecx
12962 ; FALLBACK12-NEXT: shrq %cl, %r14
12963 ; FALLBACK12-NEXT: addq %r10, %r10
12964 ; FALLBACK12-NEXT: movl %esi, %ecx
12965 ; FALLBACK12-NEXT: shlq %cl, %r10
12966 ; FALLBACK12-NEXT: orq %r14, %r10
12967 ; FALLBACK12-NEXT: movq -88(%rsp,%r9), %r14
12968 ; FALLBACK12-NEXT: movq %r14, %r13
12969 ; FALLBACK12-NEXT: movl %eax, %ecx
12970 ; FALLBACK12-NEXT: shrq %cl, %r13
12971 ; FALLBACK12-NEXT: movq -80(%rsp,%r9), %rbp
12972 ; FALLBACK12-NEXT: leaq (%rbp,%rbp), %r15
12973 ; FALLBACK12-NEXT: movl %esi, %ecx
12974 ; FALLBACK12-NEXT: shlq %cl, %r15
12975 ; FALLBACK12-NEXT: orq %r13, %r15
12976 ; FALLBACK12-NEXT: movl %eax, %ecx
12977 ; FALLBACK12-NEXT: shrq %cl, %r12
12978 ; FALLBACK12-NEXT: addq %r14, %r14
12979 ; FALLBACK12-NEXT: movl %esi, %ecx
12980 ; FALLBACK12-NEXT: shlq %cl, %r14
12981 ; FALLBACK12-NEXT: orq %r12, %r14
12982 ; FALLBACK12-NEXT: movl %eax, %ecx
12983 ; FALLBACK12-NEXT: shrq %cl, %rbp
12984 ; FALLBACK12-NEXT: movq -72(%rsp,%r9), %r9
12985 ; FALLBACK12-NEXT: leaq (%r9,%r9), %r12
12986 ; FALLBACK12-NEXT: movl %esi, %ecx
12987 ; FALLBACK12-NEXT: shlq %cl, %r12
12988 ; FALLBACK12-NEXT: orq %rbp, %r12
12989 ; FALLBACK12-NEXT: movl %eax, %ecx
12990 ; FALLBACK12-NEXT: shrq %cl, %r8
12991 ; FALLBACK12-NEXT: addq %rbx, %rbx
12992 ; FALLBACK12-NEXT: movl %esi, %ecx
12993 ; FALLBACK12-NEXT: shlq %cl, %rbx
12994 ; FALLBACK12-NEXT: orq %r8, %rbx
12995 ; FALLBACK12-NEXT: movl %eax, %ecx
12996 ; FALLBACK12-NEXT: shrq %cl, %r9
12997 ; FALLBACK12-NEXT: movq %r9, 56(%rdx)
12998 ; FALLBACK12-NEXT: movq %rbx, 8(%rdx)
12999 ; FALLBACK12-NEXT: movq %r12, 48(%rdx)
13000 ; FALLBACK12-NEXT: movq %r14, 32(%rdx)
13001 ; FALLBACK12-NEXT: movq %r15, 40(%rdx)
13002 ; FALLBACK12-NEXT: movq %r10, 16(%rdx)
13003 ; FALLBACK12-NEXT: movq %r11, 24(%rdx)
13004 ; FALLBACK12-NEXT: movq %rdi, (%rdx)
13005 ; FALLBACK12-NEXT: addq $8, %rsp
13006 ; FALLBACK12-NEXT: popq %rbx
13007 ; FALLBACK12-NEXT: popq %r12
13008 ; FALLBACK12-NEXT: popq %r13
13009 ; FALLBACK12-NEXT: popq %r14
13010 ; FALLBACK12-NEXT: popq %r15
13011 ; FALLBACK12-NEXT: popq %rbp
13012 ; FALLBACK12-NEXT: vzeroupper
13013 ; FALLBACK12-NEXT: retq
13015 ; FALLBACK13-LABEL: lshr_64bytes:
13016 ; FALLBACK13: # %bb.0:
13017 ; FALLBACK13-NEXT: pushq %r15
13018 ; FALLBACK13-NEXT: pushq %r14
13019 ; FALLBACK13-NEXT: pushq %rbx
13020 ; FALLBACK13-NEXT: vmovups (%rdi), %zmm0
13021 ; FALLBACK13-NEXT: movl (%rsi), %edi
13022 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
13023 ; FALLBACK13-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
13024 ; FALLBACK13-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
13025 ; FALLBACK13-NEXT: leal (,%rdi,8), %ecx
13026 ; FALLBACK13-NEXT: andl $56, %ecx
13027 ; FALLBACK13-NEXT: andl $56, %edi
13028 ; FALLBACK13-NEXT: movq -96(%rsp,%rdi), %rsi
13029 ; FALLBACK13-NEXT: movq -104(%rsp,%rdi), %r9
13030 ; FALLBACK13-NEXT: movq %r9, %rax
13031 ; FALLBACK13-NEXT: shrdq %cl, %rsi, %rax
13032 ; FALLBACK13-NEXT: movq -112(%rsp,%rdi), %r10
13033 ; FALLBACK13-NEXT: movq %r10, %r8
13034 ; FALLBACK13-NEXT: shrdq %cl, %r9, %r8
13035 ; FALLBACK13-NEXT: movq -80(%rsp,%rdi), %r9
13036 ; FALLBACK13-NEXT: movq -88(%rsp,%rdi), %r11
13037 ; FALLBACK13-NEXT: movq %r11, %rbx
13038 ; FALLBACK13-NEXT: shrdq %cl, %r9, %rbx
13039 ; FALLBACK13-NEXT: shrdq %cl, %r11, %rsi
13040 ; FALLBACK13-NEXT: movq -72(%rsp,%rdi), %r11
13041 ; FALLBACK13-NEXT: shrdq %cl, %r11, %r9
13042 ; FALLBACK13-NEXT: movq -128(%rsp,%rdi), %r14
13043 ; FALLBACK13-NEXT: movq -120(%rsp,%rdi), %rdi
13044 ; FALLBACK13-NEXT: movq %rdi, %r15
13045 ; FALLBACK13-NEXT: shrdq %cl, %r10, %r15
13046 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %r14
13047 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
13048 ; FALLBACK13-NEXT: shrq %cl, %r11
13049 ; FALLBACK13-NEXT: movq %r15, 8(%rdx)
13050 ; FALLBACK13-NEXT: movq %r9, 48(%rdx)
13051 ; FALLBACK13-NEXT: movq %r11, 56(%rdx)
13052 ; FALLBACK13-NEXT: movq %rsi, 32(%rdx)
13053 ; FALLBACK13-NEXT: movq %rbx, 40(%rdx)
13054 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
13055 ; FALLBACK13-NEXT: movq %rax, 24(%rdx)
13056 ; FALLBACK13-NEXT: movq %r14, (%rdx)
13057 ; FALLBACK13-NEXT: popq %rbx
13058 ; FALLBACK13-NEXT: popq %r14
13059 ; FALLBACK13-NEXT: popq %r15
13060 ; FALLBACK13-NEXT: vzeroupper
13061 ; FALLBACK13-NEXT: retq
13063 ; FALLBACK14-LABEL: lshr_64bytes:
13064 ; FALLBACK14: # %bb.0:
13065 ; FALLBACK14-NEXT: pushq %rbp
13066 ; FALLBACK14-NEXT: pushq %r15
13067 ; FALLBACK14-NEXT: pushq %r14
13068 ; FALLBACK14-NEXT: pushq %r13
13069 ; FALLBACK14-NEXT: pushq %r12
13070 ; FALLBACK14-NEXT: pushq %rbx
13071 ; FALLBACK14-NEXT: pushq %rax
13072 ; FALLBACK14-NEXT: vmovups (%rdi), %zmm0
13073 ; FALLBACK14-NEXT: movl (%rsi), %esi
13074 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
13075 ; FALLBACK14-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
13076 ; FALLBACK14-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
13077 ; FALLBACK14-NEXT: leal (,%rsi,8), %ecx
13078 ; FALLBACK14-NEXT: andl $56, %ecx
13079 ; FALLBACK14-NEXT: andl $56, %esi
13080 ; FALLBACK14-NEXT: shrxq %rcx, -128(%rsp,%rsi), %r11
13081 ; FALLBACK14-NEXT: movq -112(%rsp,%rsi), %rax
13082 ; FALLBACK14-NEXT: movq -104(%rsp,%rsi), %rdi
13083 ; FALLBACK14-NEXT: shrxq %rcx, %rdi, %r12
13084 ; FALLBACK14-NEXT: movq -96(%rsp,%rsi), %r13
13085 ; FALLBACK14-NEXT: shrxq %rcx, %rax, %r9
13086 ; FALLBACK14-NEXT: movq -88(%rsp,%rsi), %r10
13087 ; FALLBACK14-NEXT: shrxq %rcx, %r10, %r14
13088 ; FALLBACK14-NEXT: shrxq %rcx, %r13, %r15
13089 ; FALLBACK14-NEXT: movl %ecx, %ebx
13090 ; FALLBACK14-NEXT: notb %bl
13091 ; FALLBACK14-NEXT: movq -120(%rsp,%rsi), %rbp
13092 ; FALLBACK14-NEXT: leaq (%rbp,%rbp), %r8
13093 ; FALLBACK14-NEXT: shlxq %rbx, %r8, %r8
13094 ; FALLBACK14-NEXT: orq %r11, %r8
13095 ; FALLBACK14-NEXT: leaq (%r13,%r13), %r11
13096 ; FALLBACK14-NEXT: shlxq %rbx, %r11, %r11
13097 ; FALLBACK14-NEXT: orq %r12, %r11
13098 ; FALLBACK14-NEXT: movq -80(%rsp,%rsi), %r12
13099 ; FALLBACK14-NEXT: shrxq %rcx, %r12, %r13
13100 ; FALLBACK14-NEXT: shrxq %rcx, %rbp, %rbp
13101 ; FALLBACK14-NEXT: movq -72(%rsp,%rsi), %rsi
13102 ; FALLBACK14-NEXT: shrxq %rcx, %rsi, %rcx
13103 ; FALLBACK14-NEXT: addq %rdi, %rdi
13104 ; FALLBACK14-NEXT: shlxq %rbx, %rdi, %rdi
13105 ; FALLBACK14-NEXT: orq %r9, %rdi
13106 ; FALLBACK14-NEXT: leaq (%r12,%r12), %r9
13107 ; FALLBACK14-NEXT: shlxq %rbx, %r9, %r9
13108 ; FALLBACK14-NEXT: orq %r14, %r9
13109 ; FALLBACK14-NEXT: addq %r10, %r10
13110 ; FALLBACK14-NEXT: shlxq %rbx, %r10, %r10
13111 ; FALLBACK14-NEXT: orq %r15, %r10
13112 ; FALLBACK14-NEXT: addq %rsi, %rsi
13113 ; FALLBACK14-NEXT: shlxq %rbx, %rsi, %rsi
13114 ; FALLBACK14-NEXT: orq %r13, %rsi
13115 ; FALLBACK14-NEXT: addq %rax, %rax
13116 ; FALLBACK14-NEXT: shlxq %rbx, %rax, %rax
13117 ; FALLBACK14-NEXT: orq %rbp, %rax
13118 ; FALLBACK14-NEXT: movq %rcx, 56(%rdx)
13119 ; FALLBACK14-NEXT: movq %rax, 8(%rdx)
13120 ; FALLBACK14-NEXT: movq %rsi, 48(%rdx)
13121 ; FALLBACK14-NEXT: movq %r10, 32(%rdx)
13122 ; FALLBACK14-NEXT: movq %r9, 40(%rdx)
13123 ; FALLBACK14-NEXT: movq %rdi, 16(%rdx)
13124 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
13125 ; FALLBACK14-NEXT: movq %r8, (%rdx)
13126 ; FALLBACK14-NEXT: addq $8, %rsp
13127 ; FALLBACK14-NEXT: popq %rbx
13128 ; FALLBACK14-NEXT: popq %r12
13129 ; FALLBACK14-NEXT: popq %r13
13130 ; FALLBACK14-NEXT: popq %r14
13131 ; FALLBACK14-NEXT: popq %r15
13132 ; FALLBACK14-NEXT: popq %rbp
13133 ; FALLBACK14-NEXT: vzeroupper
13134 ; FALLBACK14-NEXT: retq
13136 ; FALLBACK15-LABEL: lshr_64bytes:
13137 ; FALLBACK15: # %bb.0:
13138 ; FALLBACK15-NEXT: pushq %r15
13139 ; FALLBACK15-NEXT: pushq %r14
13140 ; FALLBACK15-NEXT: pushq %rbx
13141 ; FALLBACK15-NEXT: vmovups (%rdi), %zmm0
13142 ; FALLBACK15-NEXT: movl (%rsi), %eax
13143 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
13144 ; FALLBACK15-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
13145 ; FALLBACK15-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
13146 ; FALLBACK15-NEXT: leal (,%rax,8), %ecx
13147 ; FALLBACK15-NEXT: andl $56, %ecx
13148 ; FALLBACK15-NEXT: andl $56, %eax
13149 ; FALLBACK15-NEXT: movq -96(%rsp,%rax), %rdi
13150 ; FALLBACK15-NEXT: movq -104(%rsp,%rax), %r9
13151 ; FALLBACK15-NEXT: movq %r9, %rsi
13152 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %rsi
13153 ; FALLBACK15-NEXT: movq -112(%rsp,%rax), %r10
13154 ; FALLBACK15-NEXT: movq %r10, %r8
13155 ; FALLBACK15-NEXT: shrdq %cl, %r9, %r8
13156 ; FALLBACK15-NEXT: movq -80(%rsp,%rax), %r9
13157 ; FALLBACK15-NEXT: movq -88(%rsp,%rax), %r11
13158 ; FALLBACK15-NEXT: movq %r11, %rbx
13159 ; FALLBACK15-NEXT: shrdq %cl, %r9, %rbx
13160 ; FALLBACK15-NEXT: shrdq %cl, %r11, %rdi
13161 ; FALLBACK15-NEXT: movq -72(%rsp,%rax), %r11
13162 ; FALLBACK15-NEXT: shrdq %cl, %r11, %r9
13163 ; FALLBACK15-NEXT: movq -128(%rsp,%rax), %r14
13164 ; FALLBACK15-NEXT: movq -120(%rsp,%rax), %rax
13165 ; FALLBACK15-NEXT: movq %rax, %r15
13166 ; FALLBACK15-NEXT: shrdq %cl, %r10, %r15
13167 ; FALLBACK15-NEXT: shrxq %rcx, %r11, %r10
13168 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx
13169 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r14
13170 ; FALLBACK15-NEXT: movq %r15, 8(%rdx)
13171 ; FALLBACK15-NEXT: movq %r9, 48(%rdx)
13172 ; FALLBACK15-NEXT: movq %rdi, 32(%rdx)
13173 ; FALLBACK15-NEXT: movq %rbx, 40(%rdx)
13174 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
13175 ; FALLBACK15-NEXT: movq %rsi, 24(%rdx)
13176 ; FALLBACK15-NEXT: movq %r14, (%rdx)
13177 ; FALLBACK15-NEXT: movq %r10, 56(%rdx)
13178 ; FALLBACK15-NEXT: popq %rbx
13179 ; FALLBACK15-NEXT: popq %r14
13180 ; FALLBACK15-NEXT: popq %r15
13181 ; FALLBACK15-NEXT: vzeroupper
13182 ; FALLBACK15-NEXT: retq
13184 ; FALLBACK16-LABEL: lshr_64bytes:
13185 ; FALLBACK16: # %bb.0:
13186 ; FALLBACK16-NEXT: pushl %ebp
13187 ; FALLBACK16-NEXT: pushl %ebx
13188 ; FALLBACK16-NEXT: pushl %edi
13189 ; FALLBACK16-NEXT: pushl %esi
13190 ; FALLBACK16-NEXT: subl $204, %esp
13191 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
13192 ; FALLBACK16-NEXT: movl (%eax), %ecx
13193 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13194 ; FALLBACK16-NEXT: movl 4(%eax), %ecx
13195 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13196 ; FALLBACK16-NEXT: movl 8(%eax), %ecx
13197 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13198 ; FALLBACK16-NEXT: movl 12(%eax), %ecx
13199 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13200 ; FALLBACK16-NEXT: movl 16(%eax), %ecx
13201 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13202 ; FALLBACK16-NEXT: movl 20(%eax), %ecx
13203 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13204 ; FALLBACK16-NEXT: movl 24(%eax), %ecx
13205 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13206 ; FALLBACK16-NEXT: movl 28(%eax), %ecx
13207 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13208 ; FALLBACK16-NEXT: movl 32(%eax), %ecx
13209 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13210 ; FALLBACK16-NEXT: movl 36(%eax), %ecx
13211 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13212 ; FALLBACK16-NEXT: movl 40(%eax), %ebp
13213 ; FALLBACK16-NEXT: movl 44(%eax), %ebx
13214 ; FALLBACK16-NEXT: movl 48(%eax), %edi
13215 ; FALLBACK16-NEXT: movl 52(%eax), %esi
13216 ; FALLBACK16-NEXT: movl 56(%eax), %edx
13217 ; FALLBACK16-NEXT: movl 60(%eax), %ecx
13218 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
13219 ; FALLBACK16-NEXT: movl (%eax), %eax
13220 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
13221 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13222 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13223 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13224 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13225 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
13226 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
13227 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
13228 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
13229 ; FALLBACK16-NEXT: movl %ebp, {{[0-9]+}}(%esp)
13230 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13231 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13232 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13233 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13234 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13235 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13236 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13237 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13238 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13239 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13240 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13241 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13242 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13243 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13244 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13245 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13246 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13247 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13248 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13249 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13250 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13251 ; FALLBACK16-NEXT: movl %eax, %esi
13252 ; FALLBACK16-NEXT: andl $60, %esi
13253 ; FALLBACK16-NEXT: movl 68(%esp,%esi), %edx
13254 ; FALLBACK16-NEXT: shll $3, %eax
13255 ; FALLBACK16-NEXT: andl $24, %eax
13256 ; FALLBACK16-NEXT: movl %edx, %edi
13257 ; FALLBACK16-NEXT: movl %eax, %ecx
13258 ; FALLBACK16-NEXT: shrl %cl, %edi
13259 ; FALLBACK16-NEXT: movl 72(%esp,%esi), %ecx
13260 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13261 ; FALLBACK16-NEXT: leal (%ecx,%ecx), %ebx
13262 ; FALLBACK16-NEXT: movb %al, %ch
13263 ; FALLBACK16-NEXT: notb %ch
13264 ; FALLBACK16-NEXT: movb %ch, %cl
13265 ; FALLBACK16-NEXT: shll %cl, %ebx
13266 ; FALLBACK16-NEXT: orl %edi, %ebx
13267 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13268 ; FALLBACK16-NEXT: movl 64(%esp,%esi), %edi
13269 ; FALLBACK16-NEXT: movb %al, %cl
13270 ; FALLBACK16-NEXT: shrl %cl, %edi
13271 ; FALLBACK16-NEXT: addl %edx, %edx
13272 ; FALLBACK16-NEXT: movb %ch, %cl
13273 ; FALLBACK16-NEXT: shll %cl, %edx
13274 ; FALLBACK16-NEXT: orl %edi, %edx
13275 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13276 ; FALLBACK16-NEXT: movl 76(%esp,%esi), %edx
13277 ; FALLBACK16-NEXT: movl %edx, %ebp
13278 ; FALLBACK16-NEXT: movb %al, %cl
13279 ; FALLBACK16-NEXT: shrl %cl, %ebp
13280 ; FALLBACK16-NEXT: movl 80(%esp,%esi), %edi
13281 ; FALLBACK16-NEXT: leal (%edi,%edi), %ebx
13282 ; FALLBACK16-NEXT: movb %ch, %cl
13283 ; FALLBACK16-NEXT: shll %cl, %ebx
13284 ; FALLBACK16-NEXT: orl %ebp, %ebx
13285 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13286 ; FALLBACK16-NEXT: movb %al, %cl
13287 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
13288 ; FALLBACK16-NEXT: shrl %cl, %ebx
13289 ; FALLBACK16-NEXT: addl %edx, %edx
13290 ; FALLBACK16-NEXT: movb %ch, %cl
13291 ; FALLBACK16-NEXT: shll %cl, %edx
13292 ; FALLBACK16-NEXT: orl %ebx, %edx
13293 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13294 ; FALLBACK16-NEXT: movl 84(%esp,%esi), %ebx
13295 ; FALLBACK16-NEXT: movl %ebx, %ebp
13296 ; FALLBACK16-NEXT: movl %eax, %edx
13297 ; FALLBACK16-NEXT: movb %dl, %cl
13298 ; FALLBACK16-NEXT: shrl %cl, %ebp
13299 ; FALLBACK16-NEXT: movl 88(%esp,%esi), %eax
13300 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13301 ; FALLBACK16-NEXT: addl %eax, %eax
13302 ; FALLBACK16-NEXT: movb %ch, %cl
13303 ; FALLBACK16-NEXT: shll %cl, %eax
13304 ; FALLBACK16-NEXT: orl %ebp, %eax
13305 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13306 ; FALLBACK16-NEXT: movb %dl, %cl
13307 ; FALLBACK16-NEXT: shrl %cl, %edi
13308 ; FALLBACK16-NEXT: addl %ebx, %ebx
13309 ; FALLBACK16-NEXT: movb %ch, %cl
13310 ; FALLBACK16-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
13311 ; FALLBACK16-NEXT: shll %cl, %ebx
13312 ; FALLBACK16-NEXT: orl %edi, %ebx
13313 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13314 ; FALLBACK16-NEXT: movl 92(%esp,%esi), %ebx
13315 ; FALLBACK16-NEXT: movl %ebx, %ebp
13316 ; FALLBACK16-NEXT: movb %dl, %cl
13317 ; FALLBACK16-NEXT: shrl %cl, %ebp
13318 ; FALLBACK16-NEXT: movl 96(%esp,%esi), %edi
13319 ; FALLBACK16-NEXT: leal (%edi,%edi), %eax
13320 ; FALLBACK16-NEXT: movb %ch, %cl
13321 ; FALLBACK16-NEXT: shll %cl, %eax
13322 ; FALLBACK16-NEXT: orl %ebp, %eax
13323 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13324 ; FALLBACK16-NEXT: movb %dl, %cl
13325 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13326 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13327 ; FALLBACK16-NEXT: shrl %cl, %eax
13328 ; FALLBACK16-NEXT: addl %ebx, %ebx
13329 ; FALLBACK16-NEXT: movb %ch, %cl
13330 ; FALLBACK16-NEXT: shll %cl, %ebx
13331 ; FALLBACK16-NEXT: orl %eax, %ebx
13332 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13333 ; FALLBACK16-NEXT: movl 100(%esp,%esi), %ebx
13334 ; FALLBACK16-NEXT: movl %ebx, %ebp
13335 ; FALLBACK16-NEXT: movb %dl, %cl
13336 ; FALLBACK16-NEXT: shrl %cl, %ebp
13337 ; FALLBACK16-NEXT: movl 104(%esp,%esi), %edx
13338 ; FALLBACK16-NEXT: leal (%edx,%edx), %eax
13339 ; FALLBACK16-NEXT: movb %ch, %cl
13340 ; FALLBACK16-NEXT: shll %cl, %eax
13341 ; FALLBACK16-NEXT: orl %ebp, %eax
13342 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13343 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13344 ; FALLBACK16-NEXT: movb %al, %cl
13345 ; FALLBACK16-NEXT: shrl %cl, %edi
13346 ; FALLBACK16-NEXT: addl %ebx, %ebx
13347 ; FALLBACK16-NEXT: movb %ch, %cl
13348 ; FALLBACK16-NEXT: shll %cl, %ebx
13349 ; FALLBACK16-NEXT: orl %edi, %ebx
13350 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13351 ; FALLBACK16-NEXT: movl 108(%esp,%esi), %edi
13352 ; FALLBACK16-NEXT: movl %edi, %ebp
13353 ; FALLBACK16-NEXT: movl %eax, %ecx
13354 ; FALLBACK16-NEXT: shrl %cl, %ebp
13355 ; FALLBACK16-NEXT: movl 112(%esp,%esi), %ecx
13356 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13357 ; FALLBACK16-NEXT: leal (%ecx,%ecx), %ebx
13358 ; FALLBACK16-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
13359 ; FALLBACK16-NEXT: movb %ch, %cl
13360 ; FALLBACK16-NEXT: shll %cl, %ebx
13361 ; FALLBACK16-NEXT: orl %ebp, %ebx
13362 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13363 ; FALLBACK16-NEXT: movb %al, %cl
13364 ; FALLBACK16-NEXT: shrl %cl, %edx
13365 ; FALLBACK16-NEXT: addl %edi, %edi
13366 ; FALLBACK16-NEXT: movb %ch, %cl
13367 ; FALLBACK16-NEXT: shll %cl, %edi
13368 ; FALLBACK16-NEXT: orl %edx, %edi
13369 ; FALLBACK16-NEXT: movl %esi, %edx
13370 ; FALLBACK16-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13371 ; FALLBACK16-NEXT: movl 116(%esp,%esi), %esi
13372 ; FALLBACK16-NEXT: movl %esi, %ebx
13373 ; FALLBACK16-NEXT: movb %al, %cl
13374 ; FALLBACK16-NEXT: shrl %cl, %ebx
13375 ; FALLBACK16-NEXT: movl 120(%esp,%edx), %eax
13376 ; FALLBACK16-NEXT: leal (%eax,%eax), %ebp
13377 ; FALLBACK16-NEXT: movb %ch, %cl
13378 ; FALLBACK16-NEXT: shll %cl, %ebp
13379 ; FALLBACK16-NEXT: orl %ebx, %ebp
13380 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
13381 ; FALLBACK16-NEXT: movb %dl, %cl
13382 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
13383 ; FALLBACK16-NEXT: shrl %cl, %ebx
13384 ; FALLBACK16-NEXT: addl %esi, %esi
13385 ; FALLBACK16-NEXT: movb %ch, %cl
13386 ; FALLBACK16-NEXT: shll %cl, %esi
13387 ; FALLBACK16-NEXT: orl %ebx, %esi
13388 ; FALLBACK16-NEXT: movb %dl, %cl
13389 ; FALLBACK16-NEXT: shrl %cl, %eax
13390 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
13391 ; FALLBACK16-NEXT: movl 124(%esp,%edx), %ebx
13392 ; FALLBACK16-NEXT: leal (%ebx,%ebx), %edx
13393 ; FALLBACK16-NEXT: movb %ch, %cl
13394 ; FALLBACK16-NEXT: shll %cl, %edx
13395 ; FALLBACK16-NEXT: orl %eax, %edx
13396 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13397 ; FALLBACK16-NEXT: # kill: def $cl killed $cl killed $ecx
13398 ; FALLBACK16-NEXT: shrl %cl, %ebx
13399 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
13400 ; FALLBACK16-NEXT: movl %ebx, 60(%eax)
13401 ; FALLBACK16-NEXT: movl %edx, 56(%eax)
13402 ; FALLBACK16-NEXT: movl %esi, 48(%eax)
13403 ; FALLBACK16-NEXT: movl %ebp, 52(%eax)
13404 ; FALLBACK16-NEXT: movl %edi, 40(%eax)
13405 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13406 ; FALLBACK16-NEXT: movl %ecx, 44(%eax)
13407 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13408 ; FALLBACK16-NEXT: movl %ecx, 32(%eax)
13409 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13410 ; FALLBACK16-NEXT: movl %ecx, 36(%eax)
13411 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13412 ; FALLBACK16-NEXT: movl %ecx, 24(%eax)
13413 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13414 ; FALLBACK16-NEXT: movl %ecx, 28(%eax)
13415 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13416 ; FALLBACK16-NEXT: movl %ecx, 16(%eax)
13417 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13418 ; FALLBACK16-NEXT: movl %ecx, 20(%eax)
13419 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13420 ; FALLBACK16-NEXT: movl %ecx, 8(%eax)
13421 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13422 ; FALLBACK16-NEXT: movl %ecx, 12(%eax)
13423 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13424 ; FALLBACK16-NEXT: movl %ecx, (%eax)
13425 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13426 ; FALLBACK16-NEXT: movl %ecx, 4(%eax)
13427 ; FALLBACK16-NEXT: addl $204, %esp
13428 ; FALLBACK16-NEXT: popl %esi
13429 ; FALLBACK16-NEXT: popl %edi
13430 ; FALLBACK16-NEXT: popl %ebx
13431 ; FALLBACK16-NEXT: popl %ebp
13432 ; FALLBACK16-NEXT: retl
13434 ; FALLBACK17-LABEL: lshr_64bytes:
13435 ; FALLBACK17: # %bb.0:
13436 ; FALLBACK17-NEXT: pushl %ebp
13437 ; FALLBACK17-NEXT: pushl %ebx
13438 ; FALLBACK17-NEXT: pushl %edi
13439 ; FALLBACK17-NEXT: pushl %esi
13440 ; FALLBACK17-NEXT: subl $188, %esp
13441 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
13442 ; FALLBACK17-NEXT: movl (%ecx), %eax
13443 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13444 ; FALLBACK17-NEXT: movl 4(%ecx), %eax
13445 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13446 ; FALLBACK17-NEXT: movl 8(%ecx), %eax
13447 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13448 ; FALLBACK17-NEXT: movl 12(%ecx), %eax
13449 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13450 ; FALLBACK17-NEXT: movl 16(%ecx), %eax
13451 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13452 ; FALLBACK17-NEXT: movl 20(%ecx), %eax
13453 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13454 ; FALLBACK17-NEXT: movl 24(%ecx), %eax
13455 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13456 ; FALLBACK17-NEXT: movl 28(%ecx), %eax
13457 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13458 ; FALLBACK17-NEXT: movl 32(%ecx), %eax
13459 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13460 ; FALLBACK17-NEXT: movl 36(%ecx), %eax
13461 ; FALLBACK17-NEXT: movl %eax, (%esp) # 4-byte Spill
13462 ; FALLBACK17-NEXT: movl 40(%ecx), %ebp
13463 ; FALLBACK17-NEXT: movl 44(%ecx), %ebx
13464 ; FALLBACK17-NEXT: movl 48(%ecx), %edi
13465 ; FALLBACK17-NEXT: movl 52(%ecx), %esi
13466 ; FALLBACK17-NEXT: movl 56(%ecx), %edx
13467 ; FALLBACK17-NEXT: movl 60(%ecx), %eax
13468 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
13469 ; FALLBACK17-NEXT: movl (%ecx), %ecx
13470 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
13471 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13472 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13473 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13474 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13475 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
13476 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
13477 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
13478 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
13479 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
13480 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
13481 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13482 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13483 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13484 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13485 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13486 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13487 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13488 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13489 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13490 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13491 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13492 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13493 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13494 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13495 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13496 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13497 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13498 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13499 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13500 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
13501 ; FALLBACK17-NEXT: movl %ecx, %ebp
13502 ; FALLBACK17-NEXT: andl $60, %ebp
13503 ; FALLBACK17-NEXT: movl 56(%esp,%ebp), %edx
13504 ; FALLBACK17-NEXT: movl 52(%esp,%ebp), %eax
13505 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13506 ; FALLBACK17-NEXT: shll $3, %ecx
13507 ; FALLBACK17-NEXT: andl $24, %ecx
13508 ; FALLBACK17-NEXT: shrdl %cl, %edx, %eax
13509 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13510 ; FALLBACK17-NEXT: movl 64(%esp,%ebp), %edi
13511 ; FALLBACK17-NEXT: movl 60(%esp,%ebp), %eax
13512 ; FALLBACK17-NEXT: movl %eax, %esi
13513 ; FALLBACK17-NEXT: shrdl %cl, %edi, %esi
13514 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13515 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
13516 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13517 ; FALLBACK17-NEXT: movl 72(%esp,%ebp), %esi
13518 ; FALLBACK17-NEXT: movl 68(%esp,%ebp), %eax
13519 ; FALLBACK17-NEXT: movl %eax, %edx
13520 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edx
13521 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13522 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edi
13523 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13524 ; FALLBACK17-NEXT: movl 80(%esp,%ebp), %edi
13525 ; FALLBACK17-NEXT: movl 76(%esp,%ebp), %eax
13526 ; FALLBACK17-NEXT: movl %eax, %edx
13527 ; FALLBACK17-NEXT: shrdl %cl, %edi, %edx
13528 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13529 ; FALLBACK17-NEXT: shrdl %cl, %eax, %esi
13530 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13531 ; FALLBACK17-NEXT: movl 88(%esp,%ebp), %esi
13532 ; FALLBACK17-NEXT: movl 84(%esp,%ebp), %eax
13533 ; FALLBACK17-NEXT: movl %eax, %edx
13534 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edx
13535 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13536 ; FALLBACK17-NEXT: movl %esi, %edx
13537 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edi
13538 ; FALLBACK17-NEXT: movl %edi, (%esp) # 4-byte Spill
13539 ; FALLBACK17-NEXT: movl 96(%esp,%ebp), %esi
13540 ; FALLBACK17-NEXT: movl 92(%esp,%ebp), %eax
13541 ; FALLBACK17-NEXT: movl %eax, %edi
13542 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edi
13543 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13544 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
13545 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13546 ; FALLBACK17-NEXT: movl 104(%esp,%ebp), %edx
13547 ; FALLBACK17-NEXT: movl 100(%esp,%ebp), %eax
13548 ; FALLBACK17-NEXT: movl %eax, %edi
13549 ; FALLBACK17-NEXT: shrdl %cl, %edx, %edi
13550 ; FALLBACK17-NEXT: shrdl %cl, %eax, %esi
13551 ; FALLBACK17-NEXT: movl 48(%esp,%ebp), %ebx
13552 ; FALLBACK17-NEXT: movl 108(%esp,%ebp), %eax
13553 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
13554 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
13555 ; FALLBACK17-NEXT: movl %edx, 56(%ebp)
13556 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
13557 ; FALLBACK17-NEXT: shrdl %cl, %edx, %ebx
13558 ; FALLBACK17-NEXT: # kill: def $cl killed $cl killed $ecx
13559 ; FALLBACK17-NEXT: shrl %cl, %eax
13560 ; FALLBACK17-NEXT: movl %eax, 60(%ebp)
13561 ; FALLBACK17-NEXT: movl %esi, 48(%ebp)
13562 ; FALLBACK17-NEXT: movl %edi, 52(%ebp)
13563 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13564 ; FALLBACK17-NEXT: movl %eax, 40(%ebp)
13565 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13566 ; FALLBACK17-NEXT: movl %eax, 44(%ebp)
13567 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
13568 ; FALLBACK17-NEXT: movl %eax, 32(%ebp)
13569 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13570 ; FALLBACK17-NEXT: movl %eax, 36(%ebp)
13571 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13572 ; FALLBACK17-NEXT: movl %eax, 24(%ebp)
13573 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13574 ; FALLBACK17-NEXT: movl %eax, 28(%ebp)
13575 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13576 ; FALLBACK17-NEXT: movl %eax, 16(%ebp)
13577 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13578 ; FALLBACK17-NEXT: movl %eax, 20(%ebp)
13579 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13580 ; FALLBACK17-NEXT: movl %eax, 8(%ebp)
13581 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13582 ; FALLBACK17-NEXT: movl %eax, 12(%ebp)
13583 ; FALLBACK17-NEXT: movl %ebx, (%ebp)
13584 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13585 ; FALLBACK17-NEXT: movl %eax, 4(%ebp)
13586 ; FALLBACK17-NEXT: addl $188, %esp
13587 ; FALLBACK17-NEXT: popl %esi
13588 ; FALLBACK17-NEXT: popl %edi
13589 ; FALLBACK17-NEXT: popl %ebx
13590 ; FALLBACK17-NEXT: popl %ebp
13591 ; FALLBACK17-NEXT: retl
13593 ; FALLBACK18-LABEL: lshr_64bytes:
13594 ; FALLBACK18: # %bb.0:
13595 ; FALLBACK18-NEXT: pushl %ebp
13596 ; FALLBACK18-NEXT: pushl %ebx
13597 ; FALLBACK18-NEXT: pushl %edi
13598 ; FALLBACK18-NEXT: pushl %esi
13599 ; FALLBACK18-NEXT: subl $204, %esp
13600 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
13601 ; FALLBACK18-NEXT: movl (%eax), %ecx
13602 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13603 ; FALLBACK18-NEXT: movl 4(%eax), %ecx
13604 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13605 ; FALLBACK18-NEXT: movl 8(%eax), %ecx
13606 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13607 ; FALLBACK18-NEXT: movl 12(%eax), %ecx
13608 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13609 ; FALLBACK18-NEXT: movl 16(%eax), %ecx
13610 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13611 ; FALLBACK18-NEXT: movl 20(%eax), %ecx
13612 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13613 ; FALLBACK18-NEXT: movl 24(%eax), %ecx
13614 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13615 ; FALLBACK18-NEXT: movl 28(%eax), %ecx
13616 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13617 ; FALLBACK18-NEXT: movl 32(%eax), %ecx
13618 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13619 ; FALLBACK18-NEXT: movl 36(%eax), %ecx
13620 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13621 ; FALLBACK18-NEXT: movl 40(%eax), %ebp
13622 ; FALLBACK18-NEXT: movl 44(%eax), %ebx
13623 ; FALLBACK18-NEXT: movl 48(%eax), %edi
13624 ; FALLBACK18-NEXT: movl 52(%eax), %esi
13625 ; FALLBACK18-NEXT: movl 56(%eax), %edx
13626 ; FALLBACK18-NEXT: movl 60(%eax), %ecx
13627 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
13628 ; FALLBACK18-NEXT: movl (%eax), %eax
13629 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
13630 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13631 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13632 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13633 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13634 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
13635 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
13636 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
13637 ; FALLBACK18-NEXT: movl %ebx, {{[0-9]+}}(%esp)
13638 ; FALLBACK18-NEXT: movl %ebp, {{[0-9]+}}(%esp)
13639 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13640 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13641 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13642 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13643 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13644 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13645 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13646 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13647 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13648 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13649 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13650 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13651 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13652 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13653 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13654 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13655 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13656 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13657 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13658 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13659 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
13660 ; FALLBACK18-NEXT: movl %eax, %ecx
13661 ; FALLBACK18-NEXT: leal (,%eax,8), %edx
13662 ; FALLBACK18-NEXT: andl $24, %edx
13663 ; FALLBACK18-NEXT: andl $60, %ecx
13664 ; FALLBACK18-NEXT: movl 68(%esp,%ecx), %esi
13665 ; FALLBACK18-NEXT: movl 72(%esp,%ecx), %eax
13666 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13667 ; FALLBACK18-NEXT: shrxl %edx, %esi, %edi
13668 ; FALLBACK18-NEXT: movl %edx, %ebx
13669 ; FALLBACK18-NEXT: notb %bl
13670 ; FALLBACK18-NEXT: leal (%eax,%eax), %ebp
13671 ; FALLBACK18-NEXT: shlxl %ebx, %ebp, %eax
13672 ; FALLBACK18-NEXT: orl %edi, %eax
13673 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13674 ; FALLBACK18-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
13675 ; FALLBACK18-NEXT: addl %esi, %esi
13676 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %eax
13677 ; FALLBACK18-NEXT: orl %edi, %eax
13678 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13679 ; FALLBACK18-NEXT: movl 80(%esp,%ecx), %esi
13680 ; FALLBACK18-NEXT: leal (%esi,%esi), %edi
13681 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13682 ; FALLBACK18-NEXT: movl 76(%esp,%ecx), %edi
13683 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
13684 ; FALLBACK18-NEXT: orl %ebp, %eax
13685 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13686 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
13687 ; FALLBACK18-NEXT: addl %edi, %edi
13688 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %edi
13689 ; FALLBACK18-NEXT: orl %eax, %edi
13690 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13691 ; FALLBACK18-NEXT: movl 88(%esp,%ecx), %eax
13692 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13693 ; FALLBACK18-NEXT: leal (%eax,%eax), %edi
13694 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13695 ; FALLBACK18-NEXT: movl 84(%esp,%ecx), %edi
13696 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
13697 ; FALLBACK18-NEXT: orl %ebp, %eax
13698 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13699 ; FALLBACK18-NEXT: shrxl %edx, %esi, %esi
13700 ; FALLBACK18-NEXT: addl %edi, %edi
13701 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13702 ; FALLBACK18-NEXT: orl %esi, %eax
13703 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13704 ; FALLBACK18-NEXT: movl 96(%esp,%ecx), %esi
13705 ; FALLBACK18-NEXT: leal (%esi,%esi), %edi
13706 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13707 ; FALLBACK18-NEXT: movl 92(%esp,%ecx), %edi
13708 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
13709 ; FALLBACK18-NEXT: orl %ebp, %eax
13710 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13711 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
13712 ; FALLBACK18-NEXT: addl %edi, %edi
13713 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %edi
13714 ; FALLBACK18-NEXT: orl %eax, %edi
13715 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13716 ; FALLBACK18-NEXT: movl 104(%esp,%ecx), %eax
13717 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13718 ; FALLBACK18-NEXT: leal (%eax,%eax), %edi
13719 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13720 ; FALLBACK18-NEXT: movl 100(%esp,%ecx), %edi
13721 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
13722 ; FALLBACK18-NEXT: orl %ebp, %eax
13723 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13724 ; FALLBACK18-NEXT: shrxl %edx, %esi, %esi
13725 ; FALLBACK18-NEXT: addl %edi, %edi
13726 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
13727 ; FALLBACK18-NEXT: orl %esi, %eax
13728 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13729 ; FALLBACK18-NEXT: movl 112(%esp,%ecx), %eax
13730 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13731 ; FALLBACK18-NEXT: leal (%eax,%eax), %esi
13732 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %eax
13733 ; FALLBACK18-NEXT: movl 108(%esp,%ecx), %esi
13734 ; FALLBACK18-NEXT: movl %ecx, %edi
13735 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13736 ; FALLBACK18-NEXT: shrxl %edx, %esi, %ebp
13737 ; FALLBACK18-NEXT: orl %ebp, %eax
13738 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13739 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
13740 ; FALLBACK18-NEXT: addl %esi, %esi
13741 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %esi
13742 ; FALLBACK18-NEXT: orl %ecx, %esi
13743 ; FALLBACK18-NEXT: movl 120(%esp,%edi), %ebp
13744 ; FALLBACK18-NEXT: leal (%ebp,%ebp), %ecx
13745 ; FALLBACK18-NEXT: shlxl %ebx, %ecx, %ecx
13746 ; FALLBACK18-NEXT: movl 116(%esp,%edi), %eax
13747 ; FALLBACK18-NEXT: shrxl %edx, %eax, %edi
13748 ; FALLBACK18-NEXT: orl %edi, %ecx
13749 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
13750 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13751 ; FALLBACK18-NEXT: addl %eax, %eax
13752 ; FALLBACK18-NEXT: shlxl %ebx, %eax, %edi
13753 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
13754 ; FALLBACK18-NEXT: shrxl %edx, %ebp, %eax
13755 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
13756 ; FALLBACK18-NEXT: movl 124(%esp,%ebp), %ebp
13757 ; FALLBACK18-NEXT: shrxl %edx, %ebp, %edx
13758 ; FALLBACK18-NEXT: addl %ebp, %ebp
13759 ; FALLBACK18-NEXT: shlxl %ebx, %ebp, %ebx
13760 ; FALLBACK18-NEXT: orl %eax, %ebx
13761 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
13762 ; FALLBACK18-NEXT: movl %edx, 60(%eax)
13763 ; FALLBACK18-NEXT: movl %ebx, 56(%eax)
13764 ; FALLBACK18-NEXT: movl %edi, 48(%eax)
13765 ; FALLBACK18-NEXT: movl %ecx, 52(%eax)
13766 ; FALLBACK18-NEXT: movl %esi, 40(%eax)
13767 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13768 ; FALLBACK18-NEXT: movl %ecx, 44(%eax)
13769 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13770 ; FALLBACK18-NEXT: movl %ecx, 32(%eax)
13771 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13772 ; FALLBACK18-NEXT: movl %ecx, 36(%eax)
13773 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13774 ; FALLBACK18-NEXT: movl %ecx, 24(%eax)
13775 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13776 ; FALLBACK18-NEXT: movl %ecx, 28(%eax)
13777 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13778 ; FALLBACK18-NEXT: movl %ecx, 16(%eax)
13779 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13780 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
13781 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13782 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
13783 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13784 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
13785 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13786 ; FALLBACK18-NEXT: movl %ecx, (%eax)
13787 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13788 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
13789 ; FALLBACK18-NEXT: addl $204, %esp
13790 ; FALLBACK18-NEXT: popl %esi
13791 ; FALLBACK18-NEXT: popl %edi
13792 ; FALLBACK18-NEXT: popl %ebx
13793 ; FALLBACK18-NEXT: popl %ebp
13794 ; FALLBACK18-NEXT: retl
13796 ; FALLBACK19-LABEL: lshr_64bytes:
13797 ; FALLBACK19: # %bb.0:
13798 ; FALLBACK19-NEXT: pushl %ebp
13799 ; FALLBACK19-NEXT: pushl %ebx
13800 ; FALLBACK19-NEXT: pushl %edi
13801 ; FALLBACK19-NEXT: pushl %esi
13802 ; FALLBACK19-NEXT: subl $188, %esp
13803 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
13804 ; FALLBACK19-NEXT: movl (%ecx), %eax
13805 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13806 ; FALLBACK19-NEXT: movl 4(%ecx), %eax
13807 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13808 ; FALLBACK19-NEXT: movl 8(%ecx), %eax
13809 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13810 ; FALLBACK19-NEXT: movl 12(%ecx), %eax
13811 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13812 ; FALLBACK19-NEXT: movl 16(%ecx), %eax
13813 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13814 ; FALLBACK19-NEXT: movl 20(%ecx), %eax
13815 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13816 ; FALLBACK19-NEXT: movl 24(%ecx), %eax
13817 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13818 ; FALLBACK19-NEXT: movl 28(%ecx), %eax
13819 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13820 ; FALLBACK19-NEXT: movl 32(%ecx), %eax
13821 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13822 ; FALLBACK19-NEXT: movl 36(%ecx), %eax
13823 ; FALLBACK19-NEXT: movl %eax, (%esp) # 4-byte Spill
13824 ; FALLBACK19-NEXT: movl 40(%ecx), %ebp
13825 ; FALLBACK19-NEXT: movl 44(%ecx), %ebx
13826 ; FALLBACK19-NEXT: movl 48(%ecx), %edi
13827 ; FALLBACK19-NEXT: movl 52(%ecx), %esi
13828 ; FALLBACK19-NEXT: movl 56(%ecx), %edx
13829 ; FALLBACK19-NEXT: movl 60(%ecx), %eax
13830 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
13831 ; FALLBACK19-NEXT: movl (%ecx), %ecx
13832 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
13833 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13834 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13835 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13836 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13837 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
13838 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
13839 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
13840 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
13841 ; FALLBACK19-NEXT: movl %ebp, {{[0-9]+}}(%esp)
13842 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
13843 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13844 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13845 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13846 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13847 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13848 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13849 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13850 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13851 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13852 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13853 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13854 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13855 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13856 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13857 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13858 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13859 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13860 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13861 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13862 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
13863 ; FALLBACK19-NEXT: movl %ecx, %ebp
13864 ; FALLBACK19-NEXT: andl $60, %ebp
13865 ; FALLBACK19-NEXT: movl 56(%esp,%ebp), %edx
13866 ; FALLBACK19-NEXT: movl 52(%esp,%ebp), %eax
13867 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13868 ; FALLBACK19-NEXT: shll $3, %ecx
13869 ; FALLBACK19-NEXT: andl $24, %ecx
13870 ; FALLBACK19-NEXT: shrdl %cl, %edx, %eax
13871 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13872 ; FALLBACK19-NEXT: movl 64(%esp,%ebp), %edi
13873 ; FALLBACK19-NEXT: movl 60(%esp,%ebp), %eax
13874 ; FALLBACK19-NEXT: movl %eax, %esi
13875 ; FALLBACK19-NEXT: shrdl %cl, %edi, %esi
13876 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13877 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
13878 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13879 ; FALLBACK19-NEXT: movl 72(%esp,%ebp), %esi
13880 ; FALLBACK19-NEXT: movl 68(%esp,%ebp), %eax
13881 ; FALLBACK19-NEXT: movl %eax, %edx
13882 ; FALLBACK19-NEXT: shrdl %cl, %esi, %edx
13883 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13884 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edi
13885 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13886 ; FALLBACK19-NEXT: movl 80(%esp,%ebp), %edi
13887 ; FALLBACK19-NEXT: movl 76(%esp,%ebp), %eax
13888 ; FALLBACK19-NEXT: movl %eax, %edx
13889 ; FALLBACK19-NEXT: shrdl %cl, %edi, %edx
13890 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13891 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
13892 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13893 ; FALLBACK19-NEXT: movl 88(%esp,%ebp), %ebx
13894 ; FALLBACK19-NEXT: movl 84(%esp,%ebp), %eax
13895 ; FALLBACK19-NEXT: movl %eax, %edx
13896 ; FALLBACK19-NEXT: shrdl %cl, %ebx, %edx
13897 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13898 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edi
13899 ; FALLBACK19-NEXT: movl %edi, (%esp) # 4-byte Spill
13900 ; FALLBACK19-NEXT: movl 96(%esp,%ebp), %esi
13901 ; FALLBACK19-NEXT: movl 92(%esp,%ebp), %eax
13902 ; FALLBACK19-NEXT: movl %eax, %edx
13903 ; FALLBACK19-NEXT: shrdl %cl, %esi, %edx
13904 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13905 ; FALLBACK19-NEXT: shrdl %cl, %eax, %ebx
13906 ; FALLBACK19-NEXT: movl 104(%esp,%ebp), %eax
13907 ; FALLBACK19-NEXT: movl 100(%esp,%ebp), %edi
13908 ; FALLBACK19-NEXT: movl %edi, %edx
13909 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
13910 ; FALLBACK19-NEXT: shrdl %cl, %edi, %esi
13911 ; FALLBACK19-NEXT: movl 48(%esp,%ebp), %edi
13912 ; FALLBACK19-NEXT: movl 108(%esp,%ebp), %ebp
13913 ; FALLBACK19-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13914 ; FALLBACK19-NEXT: shrdl %cl, %ebp, %eax
13915 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
13916 ; FALLBACK19-NEXT: movl %eax, 56(%ebp)
13917 ; FALLBACK19-NEXT: movl %esi, 48(%ebp)
13918 ; FALLBACK19-NEXT: movl %edx, 52(%ebp)
13919 ; FALLBACK19-NEXT: movl %ebx, 40(%ebp)
13920 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13921 ; FALLBACK19-NEXT: movl %eax, 44(%ebp)
13922 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
13923 ; FALLBACK19-NEXT: movl %eax, 32(%ebp)
13924 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13925 ; FALLBACK19-NEXT: movl %eax, 36(%ebp)
13926 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13927 ; FALLBACK19-NEXT: movl %eax, 24(%ebp)
13928 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13929 ; FALLBACK19-NEXT: movl %eax, 28(%ebp)
13930 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13931 ; FALLBACK19-NEXT: movl %eax, 16(%ebp)
13932 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13933 ; FALLBACK19-NEXT: movl %eax, 20(%ebp)
13934 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13935 ; FALLBACK19-NEXT: movl %eax, 8(%ebp)
13936 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
13937 ; FALLBACK19-NEXT: movl %eax, 12(%ebp)
13938 ; FALLBACK19-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
13939 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
13940 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
13941 ; FALLBACK19-NEXT: shrdl %cl, %edx, %edi
13942 ; FALLBACK19-NEXT: movl %edi, (%ebp)
13943 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
13944 ; FALLBACK19-NEXT: movl %ecx, 4(%ebp)
13945 ; FALLBACK19-NEXT: movl %eax, 60(%ebp)
13946 ; FALLBACK19-NEXT: addl $188, %esp
13947 ; FALLBACK19-NEXT: popl %esi
13948 ; FALLBACK19-NEXT: popl %edi
13949 ; FALLBACK19-NEXT: popl %ebx
13950 ; FALLBACK19-NEXT: popl %ebp
13951 ; FALLBACK19-NEXT: retl
13953 ; FALLBACK20-LABEL: lshr_64bytes:
13954 ; FALLBACK20: # %bb.0:
13955 ; FALLBACK20-NEXT: pushl %ebp
13956 ; FALLBACK20-NEXT: pushl %ebx
13957 ; FALLBACK20-NEXT: pushl %edi
13958 ; FALLBACK20-NEXT: pushl %esi
13959 ; FALLBACK20-NEXT: subl $204, %esp
13960 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
13961 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
13962 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
13963 ; FALLBACK20-NEXT: movups 16(%ecx), %xmm1
13964 ; FALLBACK20-NEXT: movups 32(%ecx), %xmm2
13965 ; FALLBACK20-NEXT: movups 48(%ecx), %xmm3
13966 ; FALLBACK20-NEXT: movl (%eax), %eax
13967 ; FALLBACK20-NEXT: xorps %xmm4, %xmm4
13968 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
13969 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
13970 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
13971 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
13972 ; FALLBACK20-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
13973 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
13974 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
13975 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
13976 ; FALLBACK20-NEXT: movl %eax, %esi
13977 ; FALLBACK20-NEXT: andl $60, %esi
13978 ; FALLBACK20-NEXT: movl 68(%esp,%esi), %edx
13979 ; FALLBACK20-NEXT: shll $3, %eax
13980 ; FALLBACK20-NEXT: andl $24, %eax
13981 ; FALLBACK20-NEXT: movl %edx, %edi
13982 ; FALLBACK20-NEXT: movl %eax, %ecx
13983 ; FALLBACK20-NEXT: shrl %cl, %edi
13984 ; FALLBACK20-NEXT: movl 72(%esp,%esi), %ecx
13985 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13986 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
13987 ; FALLBACK20-NEXT: movb %al, %ch
13988 ; FALLBACK20-NEXT: notb %ch
13989 ; FALLBACK20-NEXT: movb %ch, %cl
13990 ; FALLBACK20-NEXT: shll %cl, %ebx
13991 ; FALLBACK20-NEXT: orl %edi, %ebx
13992 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
13993 ; FALLBACK20-NEXT: movl 64(%esp,%esi), %edi
13994 ; FALLBACK20-NEXT: movb %al, %cl
13995 ; FALLBACK20-NEXT: shrl %cl, %edi
13996 ; FALLBACK20-NEXT: addl %edx, %edx
13997 ; FALLBACK20-NEXT: movb %ch, %cl
13998 ; FALLBACK20-NEXT: shll %cl, %edx
13999 ; FALLBACK20-NEXT: orl %edi, %edx
14000 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14001 ; FALLBACK20-NEXT: movl 76(%esp,%esi), %edx
14002 ; FALLBACK20-NEXT: movl %edx, %ebp
14003 ; FALLBACK20-NEXT: movb %al, %cl
14004 ; FALLBACK20-NEXT: shrl %cl, %ebp
14005 ; FALLBACK20-NEXT: movl 80(%esp,%esi), %edi
14006 ; FALLBACK20-NEXT: leal (%edi,%edi), %ebx
14007 ; FALLBACK20-NEXT: movb %ch, %cl
14008 ; FALLBACK20-NEXT: shll %cl, %ebx
14009 ; FALLBACK20-NEXT: orl %ebp, %ebx
14010 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14011 ; FALLBACK20-NEXT: movb %al, %cl
14012 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
14013 ; FALLBACK20-NEXT: shrl %cl, %ebx
14014 ; FALLBACK20-NEXT: addl %edx, %edx
14015 ; FALLBACK20-NEXT: movb %ch, %cl
14016 ; FALLBACK20-NEXT: shll %cl, %edx
14017 ; FALLBACK20-NEXT: orl %ebx, %edx
14018 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14019 ; FALLBACK20-NEXT: movl 84(%esp,%esi), %ebx
14020 ; FALLBACK20-NEXT: movl %ebx, %ebp
14021 ; FALLBACK20-NEXT: movl %eax, %edx
14022 ; FALLBACK20-NEXT: movb %dl, %cl
14023 ; FALLBACK20-NEXT: shrl %cl, %ebp
14024 ; FALLBACK20-NEXT: movl 88(%esp,%esi), %eax
14025 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14026 ; FALLBACK20-NEXT: addl %eax, %eax
14027 ; FALLBACK20-NEXT: movb %ch, %cl
14028 ; FALLBACK20-NEXT: shll %cl, %eax
14029 ; FALLBACK20-NEXT: orl %ebp, %eax
14030 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14031 ; FALLBACK20-NEXT: movb %dl, %cl
14032 ; FALLBACK20-NEXT: shrl %cl, %edi
14033 ; FALLBACK20-NEXT: addl %ebx, %ebx
14034 ; FALLBACK20-NEXT: movb %ch, %cl
14035 ; FALLBACK20-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
14036 ; FALLBACK20-NEXT: shll %cl, %ebx
14037 ; FALLBACK20-NEXT: orl %edi, %ebx
14038 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14039 ; FALLBACK20-NEXT: movl 92(%esp,%esi), %ebx
14040 ; FALLBACK20-NEXT: movl %ebx, %ebp
14041 ; FALLBACK20-NEXT: movb %dl, %cl
14042 ; FALLBACK20-NEXT: shrl %cl, %ebp
14043 ; FALLBACK20-NEXT: movl 96(%esp,%esi), %edi
14044 ; FALLBACK20-NEXT: leal (%edi,%edi), %eax
14045 ; FALLBACK20-NEXT: movb %ch, %cl
14046 ; FALLBACK20-NEXT: shll %cl, %eax
14047 ; FALLBACK20-NEXT: orl %ebp, %eax
14048 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14049 ; FALLBACK20-NEXT: movb %dl, %cl
14050 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14051 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14052 ; FALLBACK20-NEXT: shrl %cl, %eax
14053 ; FALLBACK20-NEXT: addl %ebx, %ebx
14054 ; FALLBACK20-NEXT: movb %ch, %cl
14055 ; FALLBACK20-NEXT: shll %cl, %ebx
14056 ; FALLBACK20-NEXT: orl %eax, %ebx
14057 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14058 ; FALLBACK20-NEXT: movl 100(%esp,%esi), %ebx
14059 ; FALLBACK20-NEXT: movl %ebx, %ebp
14060 ; FALLBACK20-NEXT: movb %dl, %cl
14061 ; FALLBACK20-NEXT: shrl %cl, %ebp
14062 ; FALLBACK20-NEXT: movl 104(%esp,%esi), %edx
14063 ; FALLBACK20-NEXT: leal (%edx,%edx), %eax
14064 ; FALLBACK20-NEXT: movb %ch, %cl
14065 ; FALLBACK20-NEXT: shll %cl, %eax
14066 ; FALLBACK20-NEXT: orl %ebp, %eax
14067 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14068 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14069 ; FALLBACK20-NEXT: movb %al, %cl
14070 ; FALLBACK20-NEXT: shrl %cl, %edi
14071 ; FALLBACK20-NEXT: addl %ebx, %ebx
14072 ; FALLBACK20-NEXT: movb %ch, %cl
14073 ; FALLBACK20-NEXT: shll %cl, %ebx
14074 ; FALLBACK20-NEXT: orl %edi, %ebx
14075 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14076 ; FALLBACK20-NEXT: movl 108(%esp,%esi), %edi
14077 ; FALLBACK20-NEXT: movl %edi, %ebp
14078 ; FALLBACK20-NEXT: movl %eax, %ecx
14079 ; FALLBACK20-NEXT: shrl %cl, %ebp
14080 ; FALLBACK20-NEXT: movl 112(%esp,%esi), %ecx
14081 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14082 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
14083 ; FALLBACK20-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
14084 ; FALLBACK20-NEXT: movb %ch, %cl
14085 ; FALLBACK20-NEXT: shll %cl, %ebx
14086 ; FALLBACK20-NEXT: orl %ebp, %ebx
14087 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14088 ; FALLBACK20-NEXT: movb %al, %cl
14089 ; FALLBACK20-NEXT: shrl %cl, %edx
14090 ; FALLBACK20-NEXT: addl %edi, %edi
14091 ; FALLBACK20-NEXT: movb %ch, %cl
14092 ; FALLBACK20-NEXT: shll %cl, %edi
14093 ; FALLBACK20-NEXT: orl %edx, %edi
14094 ; FALLBACK20-NEXT: movl %esi, %edx
14095 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14096 ; FALLBACK20-NEXT: movl 116(%esp,%esi), %esi
14097 ; FALLBACK20-NEXT: movl %esi, %ebx
14098 ; FALLBACK20-NEXT: movb %al, %cl
14099 ; FALLBACK20-NEXT: shrl %cl, %ebx
14100 ; FALLBACK20-NEXT: movl 120(%esp,%edx), %eax
14101 ; FALLBACK20-NEXT: leal (%eax,%eax), %ebp
14102 ; FALLBACK20-NEXT: movb %ch, %cl
14103 ; FALLBACK20-NEXT: shll %cl, %ebp
14104 ; FALLBACK20-NEXT: orl %ebx, %ebp
14105 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14106 ; FALLBACK20-NEXT: movb %dl, %cl
14107 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
14108 ; FALLBACK20-NEXT: shrl %cl, %ebx
14109 ; FALLBACK20-NEXT: addl %esi, %esi
14110 ; FALLBACK20-NEXT: movb %ch, %cl
14111 ; FALLBACK20-NEXT: shll %cl, %esi
14112 ; FALLBACK20-NEXT: orl %ebx, %esi
14113 ; FALLBACK20-NEXT: movb %dl, %cl
14114 ; FALLBACK20-NEXT: shrl %cl, %eax
14115 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14116 ; FALLBACK20-NEXT: movl 124(%esp,%edx), %ebx
14117 ; FALLBACK20-NEXT: leal (%ebx,%ebx), %edx
14118 ; FALLBACK20-NEXT: movb %ch, %cl
14119 ; FALLBACK20-NEXT: shll %cl, %edx
14120 ; FALLBACK20-NEXT: orl %eax, %edx
14121 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14122 ; FALLBACK20-NEXT: # kill: def $cl killed $cl killed $ecx
14123 ; FALLBACK20-NEXT: shrl %cl, %ebx
14124 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
14125 ; FALLBACK20-NEXT: movl %ebx, 60(%eax)
14126 ; FALLBACK20-NEXT: movl %edx, 56(%eax)
14127 ; FALLBACK20-NEXT: movl %esi, 48(%eax)
14128 ; FALLBACK20-NEXT: movl %ebp, 52(%eax)
14129 ; FALLBACK20-NEXT: movl %edi, 40(%eax)
14130 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14131 ; FALLBACK20-NEXT: movl %ecx, 44(%eax)
14132 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14133 ; FALLBACK20-NEXT: movl %ecx, 32(%eax)
14134 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14135 ; FALLBACK20-NEXT: movl %ecx, 36(%eax)
14136 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14137 ; FALLBACK20-NEXT: movl %ecx, 24(%eax)
14138 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14139 ; FALLBACK20-NEXT: movl %ecx, 28(%eax)
14140 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14141 ; FALLBACK20-NEXT: movl %ecx, 16(%eax)
14142 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14143 ; FALLBACK20-NEXT: movl %ecx, 20(%eax)
14144 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14145 ; FALLBACK20-NEXT: movl %ecx, 8(%eax)
14146 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14147 ; FALLBACK20-NEXT: movl %ecx, 12(%eax)
14148 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14149 ; FALLBACK20-NEXT: movl %ecx, (%eax)
14150 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14151 ; FALLBACK20-NEXT: movl %ecx, 4(%eax)
14152 ; FALLBACK20-NEXT: addl $204, %esp
14153 ; FALLBACK20-NEXT: popl %esi
14154 ; FALLBACK20-NEXT: popl %edi
14155 ; FALLBACK20-NEXT: popl %ebx
14156 ; FALLBACK20-NEXT: popl %ebp
14157 ; FALLBACK20-NEXT: retl
14159 ; FALLBACK21-LABEL: lshr_64bytes:
14160 ; FALLBACK21: # %bb.0:
14161 ; FALLBACK21-NEXT: pushl %ebp
14162 ; FALLBACK21-NEXT: pushl %ebx
14163 ; FALLBACK21-NEXT: pushl %edi
14164 ; FALLBACK21-NEXT: pushl %esi
14165 ; FALLBACK21-NEXT: subl $188, %esp
14166 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
14167 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
14168 ; FALLBACK21-NEXT: movups (%ecx), %xmm0
14169 ; FALLBACK21-NEXT: movups 16(%ecx), %xmm1
14170 ; FALLBACK21-NEXT: movups 32(%ecx), %xmm2
14171 ; FALLBACK21-NEXT: movups 48(%ecx), %xmm3
14172 ; FALLBACK21-NEXT: movl (%eax), %ecx
14173 ; FALLBACK21-NEXT: xorps %xmm4, %xmm4
14174 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14175 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14176 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14177 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14178 ; FALLBACK21-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
14179 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
14180 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
14181 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
14182 ; FALLBACK21-NEXT: movl %ecx, %ebp
14183 ; FALLBACK21-NEXT: andl $60, %ebp
14184 ; FALLBACK21-NEXT: movl 56(%esp,%ebp), %edx
14185 ; FALLBACK21-NEXT: movl 52(%esp,%ebp), %eax
14186 ; FALLBACK21-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14187 ; FALLBACK21-NEXT: shll $3, %ecx
14188 ; FALLBACK21-NEXT: andl $24, %ecx
14189 ; FALLBACK21-NEXT: shrdl %cl, %edx, %eax
14190 ; FALLBACK21-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14191 ; FALLBACK21-NEXT: movl 64(%esp,%ebp), %edi
14192 ; FALLBACK21-NEXT: movl 60(%esp,%ebp), %eax
14193 ; FALLBACK21-NEXT: movl %eax, %esi
14194 ; FALLBACK21-NEXT: shrdl %cl, %edi, %esi
14195 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14196 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
14197 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14198 ; FALLBACK21-NEXT: movl 72(%esp,%ebp), %esi
14199 ; FALLBACK21-NEXT: movl 68(%esp,%ebp), %eax
14200 ; FALLBACK21-NEXT: movl %eax, %edx
14201 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
14202 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14203 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edi
14204 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14205 ; FALLBACK21-NEXT: movl 80(%esp,%ebp), %edi
14206 ; FALLBACK21-NEXT: movl 76(%esp,%ebp), %eax
14207 ; FALLBACK21-NEXT: movl %eax, %edx
14208 ; FALLBACK21-NEXT: shrdl %cl, %edi, %edx
14209 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14210 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
14211 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14212 ; FALLBACK21-NEXT: movl 88(%esp,%ebp), %esi
14213 ; FALLBACK21-NEXT: movl 84(%esp,%ebp), %eax
14214 ; FALLBACK21-NEXT: movl %eax, %edx
14215 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
14216 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14217 ; FALLBACK21-NEXT: movl %esi, %edx
14218 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edi
14219 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14220 ; FALLBACK21-NEXT: movl 96(%esp,%ebp), %esi
14221 ; FALLBACK21-NEXT: movl 92(%esp,%ebp), %eax
14222 ; FALLBACK21-NEXT: movl %eax, %edi
14223 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edi
14224 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14225 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
14226 ; FALLBACK21-NEXT: movl %edx, (%esp) # 4-byte Spill
14227 ; FALLBACK21-NEXT: movl 104(%esp,%ebp), %edx
14228 ; FALLBACK21-NEXT: movl 100(%esp,%ebp), %eax
14229 ; FALLBACK21-NEXT: movl %eax, %edi
14230 ; FALLBACK21-NEXT: shrdl %cl, %edx, %edi
14231 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
14232 ; FALLBACK21-NEXT: movl 48(%esp,%ebp), %ebx
14233 ; FALLBACK21-NEXT: movl 108(%esp,%ebp), %eax
14234 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
14235 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
14236 ; FALLBACK21-NEXT: movl %edx, 56(%ebp)
14237 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14238 ; FALLBACK21-NEXT: shrdl %cl, %edx, %ebx
14239 ; FALLBACK21-NEXT: # kill: def $cl killed $cl killed $ecx
14240 ; FALLBACK21-NEXT: shrl %cl, %eax
14241 ; FALLBACK21-NEXT: movl %eax, 60(%ebp)
14242 ; FALLBACK21-NEXT: movl %esi, 48(%ebp)
14243 ; FALLBACK21-NEXT: movl %edi, 52(%ebp)
14244 ; FALLBACK21-NEXT: movl (%esp), %eax # 4-byte Reload
14245 ; FALLBACK21-NEXT: movl %eax, 40(%ebp)
14246 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14247 ; FALLBACK21-NEXT: movl %eax, 44(%ebp)
14248 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14249 ; FALLBACK21-NEXT: movl %eax, 32(%ebp)
14250 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14251 ; FALLBACK21-NEXT: movl %eax, 36(%ebp)
14252 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14253 ; FALLBACK21-NEXT: movl %eax, 24(%ebp)
14254 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14255 ; FALLBACK21-NEXT: movl %eax, 28(%ebp)
14256 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14257 ; FALLBACK21-NEXT: movl %eax, 16(%ebp)
14258 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14259 ; FALLBACK21-NEXT: movl %eax, 20(%ebp)
14260 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14261 ; FALLBACK21-NEXT: movl %eax, 8(%ebp)
14262 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14263 ; FALLBACK21-NEXT: movl %eax, 12(%ebp)
14264 ; FALLBACK21-NEXT: movl %ebx, (%ebp)
14265 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14266 ; FALLBACK21-NEXT: movl %eax, 4(%ebp)
14267 ; FALLBACK21-NEXT: addl $188, %esp
14268 ; FALLBACK21-NEXT: popl %esi
14269 ; FALLBACK21-NEXT: popl %edi
14270 ; FALLBACK21-NEXT: popl %ebx
14271 ; FALLBACK21-NEXT: popl %ebp
14272 ; FALLBACK21-NEXT: retl
14274 ; FALLBACK22-LABEL: lshr_64bytes:
14275 ; FALLBACK22: # %bb.0:
14276 ; FALLBACK22-NEXT: pushl %ebp
14277 ; FALLBACK22-NEXT: pushl %ebx
14278 ; FALLBACK22-NEXT: pushl %edi
14279 ; FALLBACK22-NEXT: pushl %esi
14280 ; FALLBACK22-NEXT: subl $204, %esp
14281 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
14282 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
14283 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
14284 ; FALLBACK22-NEXT: movups 16(%ecx), %xmm1
14285 ; FALLBACK22-NEXT: movups 32(%ecx), %xmm2
14286 ; FALLBACK22-NEXT: movups 48(%ecx), %xmm3
14287 ; FALLBACK22-NEXT: movl (%eax), %ecx
14288 ; FALLBACK22-NEXT: xorps %xmm4, %xmm4
14289 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14290 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14291 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14292 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14293 ; FALLBACK22-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
14294 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
14295 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
14296 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
14297 ; FALLBACK22-NEXT: leal (,%ecx,8), %edx
14298 ; FALLBACK22-NEXT: andl $24, %edx
14299 ; FALLBACK22-NEXT: andl $60, %ecx
14300 ; FALLBACK22-NEXT: movl 68(%esp,%ecx), %esi
14301 ; FALLBACK22-NEXT: movl 72(%esp,%ecx), %eax
14302 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14303 ; FALLBACK22-NEXT: shrxl %edx, %esi, %edi
14304 ; FALLBACK22-NEXT: movl %edx, %ebx
14305 ; FALLBACK22-NEXT: notb %bl
14306 ; FALLBACK22-NEXT: leal (%eax,%eax), %ebp
14307 ; FALLBACK22-NEXT: shlxl %ebx, %ebp, %ebp
14308 ; FALLBACK22-NEXT: orl %edi, %ebp
14309 ; FALLBACK22-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14310 ; FALLBACK22-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
14311 ; FALLBACK22-NEXT: addl %esi, %esi
14312 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %esi
14313 ; FALLBACK22-NEXT: orl %edi, %esi
14314 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14315 ; FALLBACK22-NEXT: movl 80(%esp,%ecx), %esi
14316 ; FALLBACK22-NEXT: leal (%esi,%esi), %edi
14317 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14318 ; FALLBACK22-NEXT: movl 76(%esp,%ecx), %edi
14319 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
14320 ; FALLBACK22-NEXT: orl %ebp, %eax
14321 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14322 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
14323 ; FALLBACK22-NEXT: addl %edi, %edi
14324 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %edi
14325 ; FALLBACK22-NEXT: orl %eax, %edi
14326 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14327 ; FALLBACK22-NEXT: movl 88(%esp,%ecx), %eax
14328 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14329 ; FALLBACK22-NEXT: leal (%eax,%eax), %edi
14330 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14331 ; FALLBACK22-NEXT: movl 84(%esp,%ecx), %edi
14332 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
14333 ; FALLBACK22-NEXT: orl %ebp, %eax
14334 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14335 ; FALLBACK22-NEXT: shrxl %edx, %esi, %esi
14336 ; FALLBACK22-NEXT: addl %edi, %edi
14337 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14338 ; FALLBACK22-NEXT: orl %esi, %eax
14339 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14340 ; FALLBACK22-NEXT: movl 96(%esp,%ecx), %esi
14341 ; FALLBACK22-NEXT: leal (%esi,%esi), %edi
14342 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14343 ; FALLBACK22-NEXT: movl 92(%esp,%ecx), %edi
14344 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
14345 ; FALLBACK22-NEXT: orl %ebp, %eax
14346 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14347 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
14348 ; FALLBACK22-NEXT: addl %edi, %edi
14349 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %edi
14350 ; FALLBACK22-NEXT: orl %eax, %edi
14351 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14352 ; FALLBACK22-NEXT: movl 104(%esp,%ecx), %eax
14353 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14354 ; FALLBACK22-NEXT: leal (%eax,%eax), %edi
14355 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14356 ; FALLBACK22-NEXT: movl 100(%esp,%ecx), %edi
14357 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
14358 ; FALLBACK22-NEXT: orl %ebp, %eax
14359 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14360 ; FALLBACK22-NEXT: shrxl %edx, %esi, %esi
14361 ; FALLBACK22-NEXT: addl %edi, %edi
14362 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
14363 ; FALLBACK22-NEXT: orl %esi, %eax
14364 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14365 ; FALLBACK22-NEXT: movl %ecx, %eax
14366 ; FALLBACK22-NEXT: movl 112(%esp,%ecx), %ecx
14367 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14368 ; FALLBACK22-NEXT: leal (%ecx,%ecx), %esi
14369 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %ecx
14370 ; FALLBACK22-NEXT: movl 108(%esp,%eax), %esi
14371 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14372 ; FALLBACK22-NEXT: shrxl %edx, %esi, %ebp
14373 ; FALLBACK22-NEXT: orl %ebp, %ecx
14374 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14375 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
14376 ; FALLBACK22-NEXT: addl %esi, %esi
14377 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %esi
14378 ; FALLBACK22-NEXT: orl %ecx, %esi
14379 ; FALLBACK22-NEXT: movl 120(%esp,%eax), %ebp
14380 ; FALLBACK22-NEXT: leal (%ebp,%ebp), %ecx
14381 ; FALLBACK22-NEXT: shlxl %ebx, %ecx, %ecx
14382 ; FALLBACK22-NEXT: movl 116(%esp,%eax), %eax
14383 ; FALLBACK22-NEXT: shrxl %edx, %eax, %edi
14384 ; FALLBACK22-NEXT: orl %edi, %ecx
14385 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
14386 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14387 ; FALLBACK22-NEXT: addl %eax, %eax
14388 ; FALLBACK22-NEXT: shlxl %ebx, %eax, %edi
14389 ; FALLBACK22-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
14390 ; FALLBACK22-NEXT: shrxl %edx, %ebp, %eax
14391 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
14392 ; FALLBACK22-NEXT: movl 124(%esp,%ebp), %ebp
14393 ; FALLBACK22-NEXT: shrxl %edx, %ebp, %edx
14394 ; FALLBACK22-NEXT: addl %ebp, %ebp
14395 ; FALLBACK22-NEXT: shlxl %ebx, %ebp, %ebx
14396 ; FALLBACK22-NEXT: orl %eax, %ebx
14397 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
14398 ; FALLBACK22-NEXT: movl %edx, 60(%eax)
14399 ; FALLBACK22-NEXT: movl %ebx, 56(%eax)
14400 ; FALLBACK22-NEXT: movl %edi, 48(%eax)
14401 ; FALLBACK22-NEXT: movl %ecx, 52(%eax)
14402 ; FALLBACK22-NEXT: movl %esi, 40(%eax)
14403 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14404 ; FALLBACK22-NEXT: movl %ecx, 44(%eax)
14405 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14406 ; FALLBACK22-NEXT: movl %ecx, 32(%eax)
14407 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14408 ; FALLBACK22-NEXT: movl %ecx, 36(%eax)
14409 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14410 ; FALLBACK22-NEXT: movl %ecx, 24(%eax)
14411 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14412 ; FALLBACK22-NEXT: movl %ecx, 28(%eax)
14413 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14414 ; FALLBACK22-NEXT: movl %ecx, 16(%eax)
14415 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14416 ; FALLBACK22-NEXT: movl %ecx, 20(%eax)
14417 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14418 ; FALLBACK22-NEXT: movl %ecx, 8(%eax)
14419 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14420 ; FALLBACK22-NEXT: movl %ecx, 12(%eax)
14421 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14422 ; FALLBACK22-NEXT: movl %ecx, (%eax)
14423 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14424 ; FALLBACK22-NEXT: movl %ecx, 4(%eax)
14425 ; FALLBACK22-NEXT: addl $204, %esp
14426 ; FALLBACK22-NEXT: popl %esi
14427 ; FALLBACK22-NEXT: popl %edi
14428 ; FALLBACK22-NEXT: popl %ebx
14429 ; FALLBACK22-NEXT: popl %ebp
14430 ; FALLBACK22-NEXT: retl
14432 ; FALLBACK23-LABEL: lshr_64bytes:
14433 ; FALLBACK23: # %bb.0:
14434 ; FALLBACK23-NEXT: pushl %ebp
14435 ; FALLBACK23-NEXT: pushl %ebx
14436 ; FALLBACK23-NEXT: pushl %edi
14437 ; FALLBACK23-NEXT: pushl %esi
14438 ; FALLBACK23-NEXT: subl $188, %esp
14439 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
14440 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
14441 ; FALLBACK23-NEXT: movups (%ecx), %xmm0
14442 ; FALLBACK23-NEXT: movups 16(%ecx), %xmm1
14443 ; FALLBACK23-NEXT: movups 32(%ecx), %xmm2
14444 ; FALLBACK23-NEXT: movups 48(%ecx), %xmm3
14445 ; FALLBACK23-NEXT: movl (%eax), %ecx
14446 ; FALLBACK23-NEXT: xorps %xmm4, %xmm4
14447 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14448 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14449 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14450 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
14451 ; FALLBACK23-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
14452 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
14453 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
14454 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
14455 ; FALLBACK23-NEXT: movl %ecx, %ebp
14456 ; FALLBACK23-NEXT: andl $60, %ebp
14457 ; FALLBACK23-NEXT: movl 56(%esp,%ebp), %edx
14458 ; FALLBACK23-NEXT: movl 52(%esp,%ebp), %eax
14459 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14460 ; FALLBACK23-NEXT: shll $3, %ecx
14461 ; FALLBACK23-NEXT: andl $24, %ecx
14462 ; FALLBACK23-NEXT: shrdl %cl, %edx, %eax
14463 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14464 ; FALLBACK23-NEXT: movl 64(%esp,%ebp), %edi
14465 ; FALLBACK23-NEXT: movl 60(%esp,%ebp), %eax
14466 ; FALLBACK23-NEXT: movl %eax, %esi
14467 ; FALLBACK23-NEXT: shrdl %cl, %edi, %esi
14468 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14469 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
14470 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14471 ; FALLBACK23-NEXT: movl 72(%esp,%ebp), %esi
14472 ; FALLBACK23-NEXT: movl 68(%esp,%ebp), %eax
14473 ; FALLBACK23-NEXT: movl %eax, %edx
14474 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
14475 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14476 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edi
14477 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14478 ; FALLBACK23-NEXT: movl 80(%esp,%ebp), %edi
14479 ; FALLBACK23-NEXT: movl 76(%esp,%ebp), %eax
14480 ; FALLBACK23-NEXT: movl %eax, %edx
14481 ; FALLBACK23-NEXT: shrdl %cl, %edi, %edx
14482 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14483 ; FALLBACK23-NEXT: shrdl %cl, %eax, %esi
14484 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14485 ; FALLBACK23-NEXT: movl 88(%esp,%ebp), %ebx
14486 ; FALLBACK23-NEXT: movl 84(%esp,%ebp), %eax
14487 ; FALLBACK23-NEXT: movl %eax, %edx
14488 ; FALLBACK23-NEXT: shrdl %cl, %ebx, %edx
14489 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14490 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edi
14491 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14492 ; FALLBACK23-NEXT: movl 96(%esp,%ebp), %esi
14493 ; FALLBACK23-NEXT: movl 92(%esp,%ebp), %eax
14494 ; FALLBACK23-NEXT: movl %eax, %edx
14495 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
14496 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14497 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebx
14498 ; FALLBACK23-NEXT: movl 104(%esp,%ebp), %eax
14499 ; FALLBACK23-NEXT: movl 100(%esp,%ebp), %edi
14500 ; FALLBACK23-NEXT: movl %edi, %edx
14501 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
14502 ; FALLBACK23-NEXT: shrdl %cl, %edi, %esi
14503 ; FALLBACK23-NEXT: movl 48(%esp,%ebp), %edi
14504 ; FALLBACK23-NEXT: movl 108(%esp,%ebp), %ebp
14505 ; FALLBACK23-NEXT: movl %ebp, (%esp) # 4-byte Spill
14506 ; FALLBACK23-NEXT: shrdl %cl, %ebp, %eax
14507 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ebp
14508 ; FALLBACK23-NEXT: movl %eax, 56(%ebp)
14509 ; FALLBACK23-NEXT: movl %esi, 48(%ebp)
14510 ; FALLBACK23-NEXT: movl %edx, 52(%ebp)
14511 ; FALLBACK23-NEXT: movl %ebx, 40(%ebp)
14512 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14513 ; FALLBACK23-NEXT: movl %eax, 44(%ebp)
14514 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14515 ; FALLBACK23-NEXT: movl %eax, 32(%ebp)
14516 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14517 ; FALLBACK23-NEXT: movl %eax, 36(%ebp)
14518 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14519 ; FALLBACK23-NEXT: movl %eax, 24(%ebp)
14520 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14521 ; FALLBACK23-NEXT: movl %eax, 28(%ebp)
14522 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14523 ; FALLBACK23-NEXT: movl %eax, 16(%ebp)
14524 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14525 ; FALLBACK23-NEXT: movl %eax, 20(%ebp)
14526 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14527 ; FALLBACK23-NEXT: movl %eax, 8(%ebp)
14528 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14529 ; FALLBACK23-NEXT: movl %eax, 12(%ebp)
14530 ; FALLBACK23-NEXT: shrxl %ecx, (%esp), %eax # 4-byte Folded Reload
14531 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
14532 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14533 ; FALLBACK23-NEXT: shrdl %cl, %edx, %edi
14534 ; FALLBACK23-NEXT: movl %edi, (%ebp)
14535 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14536 ; FALLBACK23-NEXT: movl %ecx, 4(%ebp)
14537 ; FALLBACK23-NEXT: movl %eax, 60(%ebp)
14538 ; FALLBACK23-NEXT: addl $188, %esp
14539 ; FALLBACK23-NEXT: popl %esi
14540 ; FALLBACK23-NEXT: popl %edi
14541 ; FALLBACK23-NEXT: popl %ebx
14542 ; FALLBACK23-NEXT: popl %ebp
14543 ; FALLBACK23-NEXT: retl
14545 ; FALLBACK24-LABEL: lshr_64bytes:
14546 ; FALLBACK24: # %bb.0:
14547 ; FALLBACK24-NEXT: pushl %ebp
14548 ; FALLBACK24-NEXT: pushl %ebx
14549 ; FALLBACK24-NEXT: pushl %edi
14550 ; FALLBACK24-NEXT: pushl %esi
14551 ; FALLBACK24-NEXT: subl $204, %esp
14552 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
14553 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
14554 ; FALLBACK24-NEXT: vmovups (%ecx), %ymm0
14555 ; FALLBACK24-NEXT: vmovups 32(%ecx), %ymm1
14556 ; FALLBACK24-NEXT: movl (%eax), %ecx
14557 ; FALLBACK24-NEXT: vxorps %xmm2, %xmm2, %xmm2
14558 ; FALLBACK24-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14559 ; FALLBACK24-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14560 ; FALLBACK24-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
14561 ; FALLBACK24-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
14562 ; FALLBACK24-NEXT: movl %ecx, %esi
14563 ; FALLBACK24-NEXT: andl $60, %esi
14564 ; FALLBACK24-NEXT: movl 68(%esp,%esi), %edx
14565 ; FALLBACK24-NEXT: shll $3, %ecx
14566 ; FALLBACK24-NEXT: andl $24, %ecx
14567 ; FALLBACK24-NEXT: movl %edx, %edi
14568 ; FALLBACK24-NEXT: shrl %cl, %edi
14569 ; FALLBACK24-NEXT: movl 72(%esp,%esi), %eax
14570 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14571 ; FALLBACK24-NEXT: leal (%eax,%eax), %ebx
14572 ; FALLBACK24-NEXT: movl %ecx, %ebp
14573 ; FALLBACK24-NEXT: movb %cl, %ch
14574 ; FALLBACK24-NEXT: notb %ch
14575 ; FALLBACK24-NEXT: movb %ch, %cl
14576 ; FALLBACK24-NEXT: shll %cl, %ebx
14577 ; FALLBACK24-NEXT: orl %edi, %ebx
14578 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14579 ; FALLBACK24-NEXT: movl 64(%esp,%esi), %edi
14580 ; FALLBACK24-NEXT: movl %ebp, %eax
14581 ; FALLBACK24-NEXT: movb %al, %cl
14582 ; FALLBACK24-NEXT: shrl %cl, %edi
14583 ; FALLBACK24-NEXT: addl %edx, %edx
14584 ; FALLBACK24-NEXT: movb %ch, %cl
14585 ; FALLBACK24-NEXT: shll %cl, %edx
14586 ; FALLBACK24-NEXT: orl %edi, %edx
14587 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14588 ; FALLBACK24-NEXT: movl 76(%esp,%esi), %edx
14589 ; FALLBACK24-NEXT: movl %edx, %ebp
14590 ; FALLBACK24-NEXT: movb %al, %cl
14591 ; FALLBACK24-NEXT: shrl %cl, %ebp
14592 ; FALLBACK24-NEXT: movl 80(%esp,%esi), %edi
14593 ; FALLBACK24-NEXT: leal (%edi,%edi), %ebx
14594 ; FALLBACK24-NEXT: movb %ch, %cl
14595 ; FALLBACK24-NEXT: shll %cl, %ebx
14596 ; FALLBACK24-NEXT: orl %ebp, %ebx
14597 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14598 ; FALLBACK24-NEXT: movb %al, %cl
14599 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
14600 ; FALLBACK24-NEXT: shrl %cl, %ebx
14601 ; FALLBACK24-NEXT: addl %edx, %edx
14602 ; FALLBACK24-NEXT: movb %ch, %cl
14603 ; FALLBACK24-NEXT: shll %cl, %edx
14604 ; FALLBACK24-NEXT: orl %ebx, %edx
14605 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14606 ; FALLBACK24-NEXT: movl 84(%esp,%esi), %ebx
14607 ; FALLBACK24-NEXT: movl %ebx, %ebp
14608 ; FALLBACK24-NEXT: movl %eax, %edx
14609 ; FALLBACK24-NEXT: movb %dl, %cl
14610 ; FALLBACK24-NEXT: shrl %cl, %ebp
14611 ; FALLBACK24-NEXT: movl 88(%esp,%esi), %eax
14612 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14613 ; FALLBACK24-NEXT: addl %eax, %eax
14614 ; FALLBACK24-NEXT: movb %ch, %cl
14615 ; FALLBACK24-NEXT: shll %cl, %eax
14616 ; FALLBACK24-NEXT: orl %ebp, %eax
14617 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14618 ; FALLBACK24-NEXT: movb %dl, %cl
14619 ; FALLBACK24-NEXT: shrl %cl, %edi
14620 ; FALLBACK24-NEXT: addl %ebx, %ebx
14621 ; FALLBACK24-NEXT: movb %ch, %cl
14622 ; FALLBACK24-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
14623 ; FALLBACK24-NEXT: shll %cl, %ebx
14624 ; FALLBACK24-NEXT: orl %edi, %ebx
14625 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14626 ; FALLBACK24-NEXT: movl 92(%esp,%esi), %ebx
14627 ; FALLBACK24-NEXT: movl %ebx, %ebp
14628 ; FALLBACK24-NEXT: movb %dl, %cl
14629 ; FALLBACK24-NEXT: shrl %cl, %ebp
14630 ; FALLBACK24-NEXT: movl 96(%esp,%esi), %edi
14631 ; FALLBACK24-NEXT: leal (%edi,%edi), %eax
14632 ; FALLBACK24-NEXT: movb %ch, %cl
14633 ; FALLBACK24-NEXT: shll %cl, %eax
14634 ; FALLBACK24-NEXT: orl %ebp, %eax
14635 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14636 ; FALLBACK24-NEXT: movb %dl, %cl
14637 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14638 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14639 ; FALLBACK24-NEXT: shrl %cl, %eax
14640 ; FALLBACK24-NEXT: addl %ebx, %ebx
14641 ; FALLBACK24-NEXT: movb %ch, %cl
14642 ; FALLBACK24-NEXT: shll %cl, %ebx
14643 ; FALLBACK24-NEXT: orl %eax, %ebx
14644 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14645 ; FALLBACK24-NEXT: movl 100(%esp,%esi), %ebx
14646 ; FALLBACK24-NEXT: movl %ebx, %ebp
14647 ; FALLBACK24-NEXT: movb %dl, %cl
14648 ; FALLBACK24-NEXT: shrl %cl, %ebp
14649 ; FALLBACK24-NEXT: movl 104(%esp,%esi), %edx
14650 ; FALLBACK24-NEXT: leal (%edx,%edx), %eax
14651 ; FALLBACK24-NEXT: movb %ch, %cl
14652 ; FALLBACK24-NEXT: shll %cl, %eax
14653 ; FALLBACK24-NEXT: orl %ebp, %eax
14654 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14655 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14656 ; FALLBACK24-NEXT: movb %al, %cl
14657 ; FALLBACK24-NEXT: shrl %cl, %edi
14658 ; FALLBACK24-NEXT: addl %ebx, %ebx
14659 ; FALLBACK24-NEXT: movb %ch, %cl
14660 ; FALLBACK24-NEXT: shll %cl, %ebx
14661 ; FALLBACK24-NEXT: orl %edi, %ebx
14662 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14663 ; FALLBACK24-NEXT: movl 108(%esp,%esi), %edi
14664 ; FALLBACK24-NEXT: movl %edi, %ebp
14665 ; FALLBACK24-NEXT: movl %eax, %ecx
14666 ; FALLBACK24-NEXT: shrl %cl, %ebp
14667 ; FALLBACK24-NEXT: movl 112(%esp,%esi), %ecx
14668 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14669 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %ebx
14670 ; FALLBACK24-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
14671 ; FALLBACK24-NEXT: movb %ch, %cl
14672 ; FALLBACK24-NEXT: shll %cl, %ebx
14673 ; FALLBACK24-NEXT: orl %ebp, %ebx
14674 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14675 ; FALLBACK24-NEXT: movb %al, %cl
14676 ; FALLBACK24-NEXT: shrl %cl, %edx
14677 ; FALLBACK24-NEXT: addl %edi, %edi
14678 ; FALLBACK24-NEXT: movb %ch, %cl
14679 ; FALLBACK24-NEXT: shll %cl, %edi
14680 ; FALLBACK24-NEXT: orl %edx, %edi
14681 ; FALLBACK24-NEXT: movl %esi, %edx
14682 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14683 ; FALLBACK24-NEXT: movl 116(%esp,%esi), %esi
14684 ; FALLBACK24-NEXT: movl %esi, %ebx
14685 ; FALLBACK24-NEXT: movb %al, %cl
14686 ; FALLBACK24-NEXT: shrl %cl, %ebx
14687 ; FALLBACK24-NEXT: movl 120(%esp,%edx), %eax
14688 ; FALLBACK24-NEXT: leal (%eax,%eax), %ebp
14689 ; FALLBACK24-NEXT: movb %ch, %cl
14690 ; FALLBACK24-NEXT: shll %cl, %ebp
14691 ; FALLBACK24-NEXT: orl %ebx, %ebp
14692 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14693 ; FALLBACK24-NEXT: movb %dl, %cl
14694 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
14695 ; FALLBACK24-NEXT: shrl %cl, %ebx
14696 ; FALLBACK24-NEXT: addl %esi, %esi
14697 ; FALLBACK24-NEXT: movb %ch, %cl
14698 ; FALLBACK24-NEXT: shll %cl, %esi
14699 ; FALLBACK24-NEXT: orl %ebx, %esi
14700 ; FALLBACK24-NEXT: movb %dl, %cl
14701 ; FALLBACK24-NEXT: shrl %cl, %eax
14702 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14703 ; FALLBACK24-NEXT: movl 124(%esp,%edx), %ebx
14704 ; FALLBACK24-NEXT: leal (%ebx,%ebx), %edx
14705 ; FALLBACK24-NEXT: movb %ch, %cl
14706 ; FALLBACK24-NEXT: shll %cl, %edx
14707 ; FALLBACK24-NEXT: orl %eax, %edx
14708 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14709 ; FALLBACK24-NEXT: # kill: def $cl killed $cl killed $ecx
14710 ; FALLBACK24-NEXT: shrl %cl, %ebx
14711 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
14712 ; FALLBACK24-NEXT: movl %ebx, 60(%eax)
14713 ; FALLBACK24-NEXT: movl %edx, 56(%eax)
14714 ; FALLBACK24-NEXT: movl %esi, 48(%eax)
14715 ; FALLBACK24-NEXT: movl %ebp, 52(%eax)
14716 ; FALLBACK24-NEXT: movl %edi, 40(%eax)
14717 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14718 ; FALLBACK24-NEXT: movl %ecx, 44(%eax)
14719 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14720 ; FALLBACK24-NEXT: movl %ecx, 32(%eax)
14721 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14722 ; FALLBACK24-NEXT: movl %ecx, 36(%eax)
14723 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14724 ; FALLBACK24-NEXT: movl %ecx, 24(%eax)
14725 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14726 ; FALLBACK24-NEXT: movl %ecx, 28(%eax)
14727 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14728 ; FALLBACK24-NEXT: movl %ecx, 16(%eax)
14729 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14730 ; FALLBACK24-NEXT: movl %ecx, 20(%eax)
14731 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14732 ; FALLBACK24-NEXT: movl %ecx, 8(%eax)
14733 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14734 ; FALLBACK24-NEXT: movl %ecx, 12(%eax)
14735 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14736 ; FALLBACK24-NEXT: movl %ecx, (%eax)
14737 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
14738 ; FALLBACK24-NEXT: movl %ecx, 4(%eax)
14739 ; FALLBACK24-NEXT: addl $204, %esp
14740 ; FALLBACK24-NEXT: popl %esi
14741 ; FALLBACK24-NEXT: popl %edi
14742 ; FALLBACK24-NEXT: popl %ebx
14743 ; FALLBACK24-NEXT: popl %ebp
14744 ; FALLBACK24-NEXT: vzeroupper
14745 ; FALLBACK24-NEXT: retl
14747 ; FALLBACK25-LABEL: lshr_64bytes:
14748 ; FALLBACK25: # %bb.0:
14749 ; FALLBACK25-NEXT: pushl %ebp
14750 ; FALLBACK25-NEXT: pushl %ebx
14751 ; FALLBACK25-NEXT: pushl %edi
14752 ; FALLBACK25-NEXT: pushl %esi
14753 ; FALLBACK25-NEXT: subl $188, %esp
14754 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
14755 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
14756 ; FALLBACK25-NEXT: vmovups (%ecx), %ymm0
14757 ; FALLBACK25-NEXT: vmovups 32(%ecx), %ymm1
14758 ; FALLBACK25-NEXT: movl (%eax), %ecx
14759 ; FALLBACK25-NEXT: vxorps %xmm2, %xmm2, %xmm2
14760 ; FALLBACK25-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14761 ; FALLBACK25-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14762 ; FALLBACK25-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
14763 ; FALLBACK25-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
14764 ; FALLBACK25-NEXT: movl %ecx, %ebp
14765 ; FALLBACK25-NEXT: andl $60, %ebp
14766 ; FALLBACK25-NEXT: movl 56(%esp,%ebp), %edx
14767 ; FALLBACK25-NEXT: movl 52(%esp,%ebp), %eax
14768 ; FALLBACK25-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14769 ; FALLBACK25-NEXT: shll $3, %ecx
14770 ; FALLBACK25-NEXT: andl $24, %ecx
14771 ; FALLBACK25-NEXT: shrdl %cl, %edx, %eax
14772 ; FALLBACK25-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14773 ; FALLBACK25-NEXT: movl 64(%esp,%ebp), %edi
14774 ; FALLBACK25-NEXT: movl 60(%esp,%ebp), %eax
14775 ; FALLBACK25-NEXT: movl %eax, %esi
14776 ; FALLBACK25-NEXT: shrdl %cl, %edi, %esi
14777 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14778 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
14779 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14780 ; FALLBACK25-NEXT: movl 72(%esp,%ebp), %esi
14781 ; FALLBACK25-NEXT: movl 68(%esp,%ebp), %eax
14782 ; FALLBACK25-NEXT: movl %eax, %edx
14783 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
14784 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14785 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edi
14786 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14787 ; FALLBACK25-NEXT: movl 80(%esp,%ebp), %edi
14788 ; FALLBACK25-NEXT: movl 76(%esp,%ebp), %eax
14789 ; FALLBACK25-NEXT: movl %eax, %edx
14790 ; FALLBACK25-NEXT: shrdl %cl, %edi, %edx
14791 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14792 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
14793 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14794 ; FALLBACK25-NEXT: movl 88(%esp,%ebp), %esi
14795 ; FALLBACK25-NEXT: movl 84(%esp,%ebp), %eax
14796 ; FALLBACK25-NEXT: movl %eax, %edx
14797 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
14798 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14799 ; FALLBACK25-NEXT: movl %esi, %edx
14800 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edi
14801 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14802 ; FALLBACK25-NEXT: movl 96(%esp,%ebp), %esi
14803 ; FALLBACK25-NEXT: movl 92(%esp,%ebp), %eax
14804 ; FALLBACK25-NEXT: movl %eax, %edi
14805 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edi
14806 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14807 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
14808 ; FALLBACK25-NEXT: movl %edx, (%esp) # 4-byte Spill
14809 ; FALLBACK25-NEXT: movl 104(%esp,%ebp), %edx
14810 ; FALLBACK25-NEXT: movl 100(%esp,%ebp), %eax
14811 ; FALLBACK25-NEXT: movl %eax, %edi
14812 ; FALLBACK25-NEXT: shrdl %cl, %edx, %edi
14813 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
14814 ; FALLBACK25-NEXT: movl 48(%esp,%ebp), %ebx
14815 ; FALLBACK25-NEXT: movl 108(%esp,%ebp), %eax
14816 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
14817 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
14818 ; FALLBACK25-NEXT: movl %edx, 56(%ebp)
14819 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
14820 ; FALLBACK25-NEXT: shrdl %cl, %edx, %ebx
14821 ; FALLBACK25-NEXT: # kill: def $cl killed $cl killed $ecx
14822 ; FALLBACK25-NEXT: shrl %cl, %eax
14823 ; FALLBACK25-NEXT: movl %eax, 60(%ebp)
14824 ; FALLBACK25-NEXT: movl %esi, 48(%ebp)
14825 ; FALLBACK25-NEXT: movl %edi, 52(%ebp)
14826 ; FALLBACK25-NEXT: movl (%esp), %eax # 4-byte Reload
14827 ; FALLBACK25-NEXT: movl %eax, 40(%ebp)
14828 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14829 ; FALLBACK25-NEXT: movl %eax, 44(%ebp)
14830 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14831 ; FALLBACK25-NEXT: movl %eax, 32(%ebp)
14832 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14833 ; FALLBACK25-NEXT: movl %eax, 36(%ebp)
14834 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14835 ; FALLBACK25-NEXT: movl %eax, 24(%ebp)
14836 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14837 ; FALLBACK25-NEXT: movl %eax, 28(%ebp)
14838 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14839 ; FALLBACK25-NEXT: movl %eax, 16(%ebp)
14840 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14841 ; FALLBACK25-NEXT: movl %eax, 20(%ebp)
14842 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14843 ; FALLBACK25-NEXT: movl %eax, 8(%ebp)
14844 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14845 ; FALLBACK25-NEXT: movl %eax, 12(%ebp)
14846 ; FALLBACK25-NEXT: movl %ebx, (%ebp)
14847 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14848 ; FALLBACK25-NEXT: movl %eax, 4(%ebp)
14849 ; FALLBACK25-NEXT: addl $188, %esp
14850 ; FALLBACK25-NEXT: popl %esi
14851 ; FALLBACK25-NEXT: popl %edi
14852 ; FALLBACK25-NEXT: popl %ebx
14853 ; FALLBACK25-NEXT: popl %ebp
14854 ; FALLBACK25-NEXT: vzeroupper
14855 ; FALLBACK25-NEXT: retl
14857 ; FALLBACK26-LABEL: lshr_64bytes:
14858 ; FALLBACK26: # %bb.0:
14859 ; FALLBACK26-NEXT: pushl %ebp
14860 ; FALLBACK26-NEXT: pushl %ebx
14861 ; FALLBACK26-NEXT: pushl %edi
14862 ; FALLBACK26-NEXT: pushl %esi
14863 ; FALLBACK26-NEXT: subl $204, %esp
14864 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
14865 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
14866 ; FALLBACK26-NEXT: vmovups (%ecx), %ymm0
14867 ; FALLBACK26-NEXT: vmovups 32(%ecx), %ymm1
14868 ; FALLBACK26-NEXT: movl (%eax), %ecx
14869 ; FALLBACK26-NEXT: vxorps %xmm2, %xmm2, %xmm2
14870 ; FALLBACK26-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14871 ; FALLBACK26-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
14872 ; FALLBACK26-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
14873 ; FALLBACK26-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
14874 ; FALLBACK26-NEXT: leal (,%ecx,8), %edx
14875 ; FALLBACK26-NEXT: andl $24, %edx
14876 ; FALLBACK26-NEXT: andl $60, %ecx
14877 ; FALLBACK26-NEXT: movl 68(%esp,%ecx), %esi
14878 ; FALLBACK26-NEXT: movl 72(%esp,%ecx), %eax
14879 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14880 ; FALLBACK26-NEXT: shrxl %edx, %esi, %edi
14881 ; FALLBACK26-NEXT: movl %edx, %ebx
14882 ; FALLBACK26-NEXT: notb %bl
14883 ; FALLBACK26-NEXT: leal (%eax,%eax), %ebp
14884 ; FALLBACK26-NEXT: shlxl %ebx, %ebp, %ebp
14885 ; FALLBACK26-NEXT: orl %edi, %ebp
14886 ; FALLBACK26-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14887 ; FALLBACK26-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
14888 ; FALLBACK26-NEXT: addl %esi, %esi
14889 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %esi
14890 ; FALLBACK26-NEXT: orl %edi, %esi
14891 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14892 ; FALLBACK26-NEXT: movl 80(%esp,%ecx), %esi
14893 ; FALLBACK26-NEXT: leal (%esi,%esi), %edi
14894 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14895 ; FALLBACK26-NEXT: movl 76(%esp,%ecx), %edi
14896 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
14897 ; FALLBACK26-NEXT: orl %ebp, %eax
14898 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14899 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
14900 ; FALLBACK26-NEXT: addl %edi, %edi
14901 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %edi
14902 ; FALLBACK26-NEXT: orl %eax, %edi
14903 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14904 ; FALLBACK26-NEXT: movl 88(%esp,%ecx), %eax
14905 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14906 ; FALLBACK26-NEXT: leal (%eax,%eax), %edi
14907 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14908 ; FALLBACK26-NEXT: movl 84(%esp,%ecx), %edi
14909 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
14910 ; FALLBACK26-NEXT: orl %ebp, %eax
14911 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14912 ; FALLBACK26-NEXT: shrxl %edx, %esi, %esi
14913 ; FALLBACK26-NEXT: addl %edi, %edi
14914 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14915 ; FALLBACK26-NEXT: orl %esi, %eax
14916 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14917 ; FALLBACK26-NEXT: movl 96(%esp,%ecx), %esi
14918 ; FALLBACK26-NEXT: leal (%esi,%esi), %edi
14919 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14920 ; FALLBACK26-NEXT: movl 92(%esp,%ecx), %edi
14921 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
14922 ; FALLBACK26-NEXT: orl %ebp, %eax
14923 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14924 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
14925 ; FALLBACK26-NEXT: addl %edi, %edi
14926 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %edi
14927 ; FALLBACK26-NEXT: orl %eax, %edi
14928 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14929 ; FALLBACK26-NEXT: movl 104(%esp,%ecx), %eax
14930 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14931 ; FALLBACK26-NEXT: leal (%eax,%eax), %edi
14932 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14933 ; FALLBACK26-NEXT: movl 100(%esp,%ecx), %edi
14934 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
14935 ; FALLBACK26-NEXT: orl %ebp, %eax
14936 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14937 ; FALLBACK26-NEXT: shrxl %edx, %esi, %esi
14938 ; FALLBACK26-NEXT: addl %edi, %edi
14939 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
14940 ; FALLBACK26-NEXT: orl %esi, %eax
14941 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14942 ; FALLBACK26-NEXT: movl 112(%esp,%ecx), %eax
14943 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14944 ; FALLBACK26-NEXT: leal (%eax,%eax), %esi
14945 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %eax
14946 ; FALLBACK26-NEXT: movl 108(%esp,%ecx), %esi
14947 ; FALLBACK26-NEXT: shrxl %edx, %esi, %ebp
14948 ; FALLBACK26-NEXT: orl %ebp, %eax
14949 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14950 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
14951 ; FALLBACK26-NEXT: addl %esi, %esi
14952 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %esi
14953 ; FALLBACK26-NEXT: orl %eax, %esi
14954 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14955 ; FALLBACK26-NEXT: movl 120(%esp,%ecx), %ebp
14956 ; FALLBACK26-NEXT: leal (%ebp,%ebp), %eax
14957 ; FALLBACK26-NEXT: shlxl %ebx, %eax, %esi
14958 ; FALLBACK26-NEXT: movl 116(%esp,%ecx), %eax
14959 ; FALLBACK26-NEXT: shrxl %edx, %eax, %edi
14960 ; FALLBACK26-NEXT: orl %edi, %esi
14961 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
14962 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
14963 ; FALLBACK26-NEXT: addl %eax, %eax
14964 ; FALLBACK26-NEXT: shlxl %ebx, %eax, %edi
14965 ; FALLBACK26-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
14966 ; FALLBACK26-NEXT: shrxl %edx, %ebp, %eax
14967 ; FALLBACK26-NEXT: movl 124(%esp,%ecx), %ecx
14968 ; FALLBACK26-NEXT: shrxl %edx, %ecx, %edx
14969 ; FALLBACK26-NEXT: addl %ecx, %ecx
14970 ; FALLBACK26-NEXT: shlxl %ebx, %ecx, %ebx
14971 ; FALLBACK26-NEXT: orl %eax, %ebx
14972 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
14973 ; FALLBACK26-NEXT: movl %edx, 60(%ecx)
14974 ; FALLBACK26-NEXT: movl %ebx, 56(%ecx)
14975 ; FALLBACK26-NEXT: movl %edi, 48(%ecx)
14976 ; FALLBACK26-NEXT: movl %esi, 52(%ecx)
14977 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14978 ; FALLBACK26-NEXT: movl %eax, 40(%ecx)
14979 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14980 ; FALLBACK26-NEXT: movl %eax, 44(%ecx)
14981 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14982 ; FALLBACK26-NEXT: movl %eax, 32(%ecx)
14983 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14984 ; FALLBACK26-NEXT: movl %eax, 36(%ecx)
14985 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14986 ; FALLBACK26-NEXT: movl %eax, 24(%ecx)
14987 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14988 ; FALLBACK26-NEXT: movl %eax, 28(%ecx)
14989 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14990 ; FALLBACK26-NEXT: movl %eax, 16(%ecx)
14991 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14992 ; FALLBACK26-NEXT: movl %eax, 20(%ecx)
14993 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14994 ; FALLBACK26-NEXT: movl %eax, 8(%ecx)
14995 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14996 ; FALLBACK26-NEXT: movl %eax, 12(%ecx)
14997 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
14998 ; FALLBACK26-NEXT: movl %eax, (%ecx)
14999 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15000 ; FALLBACK26-NEXT: movl %eax, 4(%ecx)
15001 ; FALLBACK26-NEXT: addl $204, %esp
15002 ; FALLBACK26-NEXT: popl %esi
15003 ; FALLBACK26-NEXT: popl %edi
15004 ; FALLBACK26-NEXT: popl %ebx
15005 ; FALLBACK26-NEXT: popl %ebp
15006 ; FALLBACK26-NEXT: vzeroupper
15007 ; FALLBACK26-NEXT: retl
15009 ; FALLBACK27-LABEL: lshr_64bytes:
15010 ; FALLBACK27: # %bb.0:
15011 ; FALLBACK27-NEXT: pushl %ebp
15012 ; FALLBACK27-NEXT: pushl %ebx
15013 ; FALLBACK27-NEXT: pushl %edi
15014 ; FALLBACK27-NEXT: pushl %esi
15015 ; FALLBACK27-NEXT: subl $188, %esp
15016 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
15017 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
15018 ; FALLBACK27-NEXT: vmovups (%ecx), %ymm0
15019 ; FALLBACK27-NEXT: vmovups 32(%ecx), %ymm1
15020 ; FALLBACK27-NEXT: movl (%eax), %ecx
15021 ; FALLBACK27-NEXT: vxorps %xmm2, %xmm2, %xmm2
15022 ; FALLBACK27-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
15023 ; FALLBACK27-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
15024 ; FALLBACK27-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
15025 ; FALLBACK27-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
15026 ; FALLBACK27-NEXT: movl %ecx, %ebp
15027 ; FALLBACK27-NEXT: andl $60, %ebp
15028 ; FALLBACK27-NEXT: movl 56(%esp,%ebp), %edx
15029 ; FALLBACK27-NEXT: movl 52(%esp,%ebp), %eax
15030 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15031 ; FALLBACK27-NEXT: shll $3, %ecx
15032 ; FALLBACK27-NEXT: andl $24, %ecx
15033 ; FALLBACK27-NEXT: shrdl %cl, %edx, %eax
15034 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15035 ; FALLBACK27-NEXT: movl 64(%esp,%ebp), %edi
15036 ; FALLBACK27-NEXT: movl 60(%esp,%ebp), %eax
15037 ; FALLBACK27-NEXT: movl %eax, %esi
15038 ; FALLBACK27-NEXT: shrdl %cl, %edi, %esi
15039 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15040 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
15041 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15042 ; FALLBACK27-NEXT: movl 72(%esp,%ebp), %esi
15043 ; FALLBACK27-NEXT: movl 68(%esp,%ebp), %eax
15044 ; FALLBACK27-NEXT: movl %eax, %edx
15045 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
15046 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15047 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edi
15048 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15049 ; FALLBACK27-NEXT: movl 80(%esp,%ebp), %edi
15050 ; FALLBACK27-NEXT: movl 76(%esp,%ebp), %eax
15051 ; FALLBACK27-NEXT: movl %eax, %edx
15052 ; FALLBACK27-NEXT: shrdl %cl, %edi, %edx
15053 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15054 ; FALLBACK27-NEXT: shrdl %cl, %eax, %esi
15055 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15056 ; FALLBACK27-NEXT: movl 88(%esp,%ebp), %ebx
15057 ; FALLBACK27-NEXT: movl 84(%esp,%ebp), %eax
15058 ; FALLBACK27-NEXT: movl %eax, %edx
15059 ; FALLBACK27-NEXT: shrdl %cl, %ebx, %edx
15060 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15061 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edi
15062 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15063 ; FALLBACK27-NEXT: movl 96(%esp,%ebp), %esi
15064 ; FALLBACK27-NEXT: movl 92(%esp,%ebp), %eax
15065 ; FALLBACK27-NEXT: movl %eax, %edx
15066 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
15067 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15068 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebx
15069 ; FALLBACK27-NEXT: movl 104(%esp,%ebp), %eax
15070 ; FALLBACK27-NEXT: movl 100(%esp,%ebp), %edi
15071 ; FALLBACK27-NEXT: movl %edi, %edx
15072 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
15073 ; FALLBACK27-NEXT: shrdl %cl, %edi, %esi
15074 ; FALLBACK27-NEXT: movl 48(%esp,%ebp), %edi
15075 ; FALLBACK27-NEXT: movl 108(%esp,%ebp), %ebp
15076 ; FALLBACK27-NEXT: movl %ebp, (%esp) # 4-byte Spill
15077 ; FALLBACK27-NEXT: shrdl %cl, %ebp, %eax
15078 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ebp
15079 ; FALLBACK27-NEXT: movl %eax, 56(%ebp)
15080 ; FALLBACK27-NEXT: movl %esi, 48(%ebp)
15081 ; FALLBACK27-NEXT: movl %edx, 52(%ebp)
15082 ; FALLBACK27-NEXT: movl %ebx, 40(%ebp)
15083 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15084 ; FALLBACK27-NEXT: movl %eax, 44(%ebp)
15085 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15086 ; FALLBACK27-NEXT: movl %eax, 32(%ebp)
15087 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15088 ; FALLBACK27-NEXT: movl %eax, 36(%ebp)
15089 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15090 ; FALLBACK27-NEXT: movl %eax, 24(%ebp)
15091 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15092 ; FALLBACK27-NEXT: movl %eax, 28(%ebp)
15093 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15094 ; FALLBACK27-NEXT: movl %eax, 16(%ebp)
15095 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15096 ; FALLBACK27-NEXT: movl %eax, 20(%ebp)
15097 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15098 ; FALLBACK27-NEXT: movl %eax, 8(%ebp)
15099 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15100 ; FALLBACK27-NEXT: movl %eax, 12(%ebp)
15101 ; FALLBACK27-NEXT: shrxl %ecx, (%esp), %eax # 4-byte Folded Reload
15102 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
15103 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
15104 ; FALLBACK27-NEXT: shrdl %cl, %edx, %edi
15105 ; FALLBACK27-NEXT: movl %edi, (%ebp)
15106 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15107 ; FALLBACK27-NEXT: movl %ecx, 4(%ebp)
15108 ; FALLBACK27-NEXT: movl %eax, 60(%ebp)
15109 ; FALLBACK27-NEXT: addl $188, %esp
15110 ; FALLBACK27-NEXT: popl %esi
15111 ; FALLBACK27-NEXT: popl %edi
15112 ; FALLBACK27-NEXT: popl %ebx
15113 ; FALLBACK27-NEXT: popl %ebp
15114 ; FALLBACK27-NEXT: vzeroupper
15115 ; FALLBACK27-NEXT: retl
15117 ; FALLBACK28-LABEL: lshr_64bytes:
15118 ; FALLBACK28: # %bb.0:
15119 ; FALLBACK28-NEXT: pushl %ebp
15120 ; FALLBACK28-NEXT: pushl %ebx
15121 ; FALLBACK28-NEXT: pushl %edi
15122 ; FALLBACK28-NEXT: pushl %esi
15123 ; FALLBACK28-NEXT: subl $204, %esp
15124 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
15125 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
15126 ; FALLBACK28-NEXT: vmovups (%ecx), %zmm0
15127 ; FALLBACK28-NEXT: movl (%eax), %ecx
15128 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
15129 ; FALLBACK28-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
15130 ; FALLBACK28-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
15131 ; FALLBACK28-NEXT: movl %ecx, %esi
15132 ; FALLBACK28-NEXT: andl $60, %esi
15133 ; FALLBACK28-NEXT: movl 68(%esp,%esi), %edx
15134 ; FALLBACK28-NEXT: shll $3, %ecx
15135 ; FALLBACK28-NEXT: andl $24, %ecx
15136 ; FALLBACK28-NEXT: movl %edx, %edi
15137 ; FALLBACK28-NEXT: shrl %cl, %edi
15138 ; FALLBACK28-NEXT: movl 72(%esp,%esi), %eax
15139 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15140 ; FALLBACK28-NEXT: leal (%eax,%eax), %ebx
15141 ; FALLBACK28-NEXT: movl %ecx, %ebp
15142 ; FALLBACK28-NEXT: movb %cl, %ch
15143 ; FALLBACK28-NEXT: notb %ch
15144 ; FALLBACK28-NEXT: movb %ch, %cl
15145 ; FALLBACK28-NEXT: shll %cl, %ebx
15146 ; FALLBACK28-NEXT: orl %edi, %ebx
15147 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15148 ; FALLBACK28-NEXT: movl 64(%esp,%esi), %edi
15149 ; FALLBACK28-NEXT: movl %ebp, %eax
15150 ; FALLBACK28-NEXT: movb %al, %cl
15151 ; FALLBACK28-NEXT: shrl %cl, %edi
15152 ; FALLBACK28-NEXT: addl %edx, %edx
15153 ; FALLBACK28-NEXT: movb %ch, %cl
15154 ; FALLBACK28-NEXT: shll %cl, %edx
15155 ; FALLBACK28-NEXT: orl %edi, %edx
15156 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15157 ; FALLBACK28-NEXT: movl 76(%esp,%esi), %edx
15158 ; FALLBACK28-NEXT: movl %edx, %ebp
15159 ; FALLBACK28-NEXT: movb %al, %cl
15160 ; FALLBACK28-NEXT: shrl %cl, %ebp
15161 ; FALLBACK28-NEXT: movl 80(%esp,%esi), %edi
15162 ; FALLBACK28-NEXT: leal (%edi,%edi), %ebx
15163 ; FALLBACK28-NEXT: movb %ch, %cl
15164 ; FALLBACK28-NEXT: shll %cl, %ebx
15165 ; FALLBACK28-NEXT: orl %ebp, %ebx
15166 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15167 ; FALLBACK28-NEXT: movb %al, %cl
15168 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
15169 ; FALLBACK28-NEXT: shrl %cl, %ebx
15170 ; FALLBACK28-NEXT: addl %edx, %edx
15171 ; FALLBACK28-NEXT: movb %ch, %cl
15172 ; FALLBACK28-NEXT: shll %cl, %edx
15173 ; FALLBACK28-NEXT: orl %ebx, %edx
15174 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15175 ; FALLBACK28-NEXT: movl 84(%esp,%esi), %ebx
15176 ; FALLBACK28-NEXT: movl %ebx, %ebp
15177 ; FALLBACK28-NEXT: movl %eax, %edx
15178 ; FALLBACK28-NEXT: movb %dl, %cl
15179 ; FALLBACK28-NEXT: shrl %cl, %ebp
15180 ; FALLBACK28-NEXT: movl 88(%esp,%esi), %eax
15181 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15182 ; FALLBACK28-NEXT: addl %eax, %eax
15183 ; FALLBACK28-NEXT: movb %ch, %cl
15184 ; FALLBACK28-NEXT: shll %cl, %eax
15185 ; FALLBACK28-NEXT: orl %ebp, %eax
15186 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15187 ; FALLBACK28-NEXT: movb %dl, %cl
15188 ; FALLBACK28-NEXT: shrl %cl, %edi
15189 ; FALLBACK28-NEXT: addl %ebx, %ebx
15190 ; FALLBACK28-NEXT: movb %ch, %cl
15191 ; FALLBACK28-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
15192 ; FALLBACK28-NEXT: shll %cl, %ebx
15193 ; FALLBACK28-NEXT: orl %edi, %ebx
15194 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15195 ; FALLBACK28-NEXT: movl 92(%esp,%esi), %ebx
15196 ; FALLBACK28-NEXT: movl %ebx, %ebp
15197 ; FALLBACK28-NEXT: movb %dl, %cl
15198 ; FALLBACK28-NEXT: shrl %cl, %ebp
15199 ; FALLBACK28-NEXT: movl 96(%esp,%esi), %edi
15200 ; FALLBACK28-NEXT: leal (%edi,%edi), %eax
15201 ; FALLBACK28-NEXT: movb %ch, %cl
15202 ; FALLBACK28-NEXT: shll %cl, %eax
15203 ; FALLBACK28-NEXT: orl %ebp, %eax
15204 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15205 ; FALLBACK28-NEXT: movb %dl, %cl
15206 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15207 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15208 ; FALLBACK28-NEXT: shrl %cl, %eax
15209 ; FALLBACK28-NEXT: addl %ebx, %ebx
15210 ; FALLBACK28-NEXT: movb %ch, %cl
15211 ; FALLBACK28-NEXT: shll %cl, %ebx
15212 ; FALLBACK28-NEXT: orl %eax, %ebx
15213 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15214 ; FALLBACK28-NEXT: movl 100(%esp,%esi), %ebx
15215 ; FALLBACK28-NEXT: movl %ebx, %ebp
15216 ; FALLBACK28-NEXT: movb %dl, %cl
15217 ; FALLBACK28-NEXT: shrl %cl, %ebp
15218 ; FALLBACK28-NEXT: movl 104(%esp,%esi), %edx
15219 ; FALLBACK28-NEXT: leal (%edx,%edx), %eax
15220 ; FALLBACK28-NEXT: movb %ch, %cl
15221 ; FALLBACK28-NEXT: shll %cl, %eax
15222 ; FALLBACK28-NEXT: orl %ebp, %eax
15223 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15224 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15225 ; FALLBACK28-NEXT: movb %al, %cl
15226 ; FALLBACK28-NEXT: shrl %cl, %edi
15227 ; FALLBACK28-NEXT: addl %ebx, %ebx
15228 ; FALLBACK28-NEXT: movb %ch, %cl
15229 ; FALLBACK28-NEXT: shll %cl, %ebx
15230 ; FALLBACK28-NEXT: orl %edi, %ebx
15231 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15232 ; FALLBACK28-NEXT: movl 108(%esp,%esi), %edi
15233 ; FALLBACK28-NEXT: movl %edi, %ebp
15234 ; FALLBACK28-NEXT: movl %eax, %ecx
15235 ; FALLBACK28-NEXT: shrl %cl, %ebp
15236 ; FALLBACK28-NEXT: movl 112(%esp,%esi), %ecx
15237 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15238 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %ebx
15239 ; FALLBACK28-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
15240 ; FALLBACK28-NEXT: movb %ch, %cl
15241 ; FALLBACK28-NEXT: shll %cl, %ebx
15242 ; FALLBACK28-NEXT: orl %ebp, %ebx
15243 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15244 ; FALLBACK28-NEXT: movb %al, %cl
15245 ; FALLBACK28-NEXT: shrl %cl, %edx
15246 ; FALLBACK28-NEXT: addl %edi, %edi
15247 ; FALLBACK28-NEXT: movb %ch, %cl
15248 ; FALLBACK28-NEXT: shll %cl, %edi
15249 ; FALLBACK28-NEXT: orl %edx, %edi
15250 ; FALLBACK28-NEXT: movl %esi, %edx
15251 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15252 ; FALLBACK28-NEXT: movl 116(%esp,%esi), %esi
15253 ; FALLBACK28-NEXT: movl %esi, %ebx
15254 ; FALLBACK28-NEXT: movb %al, %cl
15255 ; FALLBACK28-NEXT: shrl %cl, %ebx
15256 ; FALLBACK28-NEXT: movl 120(%esp,%edx), %eax
15257 ; FALLBACK28-NEXT: leal (%eax,%eax), %ebp
15258 ; FALLBACK28-NEXT: movb %ch, %cl
15259 ; FALLBACK28-NEXT: shll %cl, %ebp
15260 ; FALLBACK28-NEXT: orl %ebx, %ebp
15261 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
15262 ; FALLBACK28-NEXT: movb %dl, %cl
15263 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
15264 ; FALLBACK28-NEXT: shrl %cl, %ebx
15265 ; FALLBACK28-NEXT: addl %esi, %esi
15266 ; FALLBACK28-NEXT: movb %ch, %cl
15267 ; FALLBACK28-NEXT: shll %cl, %esi
15268 ; FALLBACK28-NEXT: orl %ebx, %esi
15269 ; FALLBACK28-NEXT: movb %dl, %cl
15270 ; FALLBACK28-NEXT: shrl %cl, %eax
15271 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
15272 ; FALLBACK28-NEXT: movl 124(%esp,%edx), %ebx
15273 ; FALLBACK28-NEXT: leal (%ebx,%ebx), %edx
15274 ; FALLBACK28-NEXT: movb %ch, %cl
15275 ; FALLBACK28-NEXT: shll %cl, %edx
15276 ; FALLBACK28-NEXT: orl %eax, %edx
15277 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15278 ; FALLBACK28-NEXT: # kill: def $cl killed $cl killed $ecx
15279 ; FALLBACK28-NEXT: shrl %cl, %ebx
15280 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
15281 ; FALLBACK28-NEXT: movl %ebx, 60(%eax)
15282 ; FALLBACK28-NEXT: movl %edx, 56(%eax)
15283 ; FALLBACK28-NEXT: movl %esi, 48(%eax)
15284 ; FALLBACK28-NEXT: movl %ebp, 52(%eax)
15285 ; FALLBACK28-NEXT: movl %edi, 40(%eax)
15286 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15287 ; FALLBACK28-NEXT: movl %ecx, 44(%eax)
15288 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15289 ; FALLBACK28-NEXT: movl %ecx, 32(%eax)
15290 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15291 ; FALLBACK28-NEXT: movl %ecx, 36(%eax)
15292 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15293 ; FALLBACK28-NEXT: movl %ecx, 24(%eax)
15294 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15295 ; FALLBACK28-NEXT: movl %ecx, 28(%eax)
15296 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15297 ; FALLBACK28-NEXT: movl %ecx, 16(%eax)
15298 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15299 ; FALLBACK28-NEXT: movl %ecx, 20(%eax)
15300 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15301 ; FALLBACK28-NEXT: movl %ecx, 8(%eax)
15302 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15303 ; FALLBACK28-NEXT: movl %ecx, 12(%eax)
15304 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15305 ; FALLBACK28-NEXT: movl %ecx, (%eax)
15306 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15307 ; FALLBACK28-NEXT: movl %ecx, 4(%eax)
15308 ; FALLBACK28-NEXT: addl $204, %esp
15309 ; FALLBACK28-NEXT: popl %esi
15310 ; FALLBACK28-NEXT: popl %edi
15311 ; FALLBACK28-NEXT: popl %ebx
15312 ; FALLBACK28-NEXT: popl %ebp
15313 ; FALLBACK28-NEXT: vzeroupper
15314 ; FALLBACK28-NEXT: retl
15316 ; FALLBACK29-LABEL: lshr_64bytes:
15317 ; FALLBACK29: # %bb.0:
15318 ; FALLBACK29-NEXT: pushl %ebp
15319 ; FALLBACK29-NEXT: pushl %ebx
15320 ; FALLBACK29-NEXT: pushl %edi
15321 ; FALLBACK29-NEXT: pushl %esi
15322 ; FALLBACK29-NEXT: subl $188, %esp
15323 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
15324 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
15325 ; FALLBACK29-NEXT: vmovups (%ecx), %zmm0
15326 ; FALLBACK29-NEXT: movl (%eax), %ecx
15327 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
15328 ; FALLBACK29-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
15329 ; FALLBACK29-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
15330 ; FALLBACK29-NEXT: movl %ecx, %ebp
15331 ; FALLBACK29-NEXT: andl $60, %ebp
15332 ; FALLBACK29-NEXT: movl 56(%esp,%ebp), %edx
15333 ; FALLBACK29-NEXT: movl 52(%esp,%ebp), %eax
15334 ; FALLBACK29-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15335 ; FALLBACK29-NEXT: shll $3, %ecx
15336 ; FALLBACK29-NEXT: andl $24, %ecx
15337 ; FALLBACK29-NEXT: shrdl %cl, %edx, %eax
15338 ; FALLBACK29-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15339 ; FALLBACK29-NEXT: movl 64(%esp,%ebp), %edi
15340 ; FALLBACK29-NEXT: movl 60(%esp,%ebp), %eax
15341 ; FALLBACK29-NEXT: movl %eax, %esi
15342 ; FALLBACK29-NEXT: shrdl %cl, %edi, %esi
15343 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15344 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
15345 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15346 ; FALLBACK29-NEXT: movl 72(%esp,%ebp), %esi
15347 ; FALLBACK29-NEXT: movl 68(%esp,%ebp), %eax
15348 ; FALLBACK29-NEXT: movl %eax, %edx
15349 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
15350 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15351 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edi
15352 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15353 ; FALLBACK29-NEXT: movl 80(%esp,%ebp), %edi
15354 ; FALLBACK29-NEXT: movl 76(%esp,%ebp), %eax
15355 ; FALLBACK29-NEXT: movl %eax, %edx
15356 ; FALLBACK29-NEXT: shrdl %cl, %edi, %edx
15357 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15358 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
15359 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15360 ; FALLBACK29-NEXT: movl 88(%esp,%ebp), %esi
15361 ; FALLBACK29-NEXT: movl 84(%esp,%ebp), %eax
15362 ; FALLBACK29-NEXT: movl %eax, %edx
15363 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
15364 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15365 ; FALLBACK29-NEXT: movl %esi, %edx
15366 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edi
15367 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15368 ; FALLBACK29-NEXT: movl 96(%esp,%ebp), %esi
15369 ; FALLBACK29-NEXT: movl 92(%esp,%ebp), %eax
15370 ; FALLBACK29-NEXT: movl %eax, %edi
15371 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edi
15372 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15373 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
15374 ; FALLBACK29-NEXT: movl %edx, (%esp) # 4-byte Spill
15375 ; FALLBACK29-NEXT: movl 104(%esp,%ebp), %edx
15376 ; FALLBACK29-NEXT: movl 100(%esp,%ebp), %eax
15377 ; FALLBACK29-NEXT: movl %eax, %edi
15378 ; FALLBACK29-NEXT: shrdl %cl, %edx, %edi
15379 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
15380 ; FALLBACK29-NEXT: movl 48(%esp,%ebp), %ebx
15381 ; FALLBACK29-NEXT: movl 108(%esp,%ebp), %eax
15382 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
15383 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
15384 ; FALLBACK29-NEXT: movl %edx, 56(%ebp)
15385 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
15386 ; FALLBACK29-NEXT: shrdl %cl, %edx, %ebx
15387 ; FALLBACK29-NEXT: # kill: def $cl killed $cl killed $ecx
15388 ; FALLBACK29-NEXT: shrl %cl, %eax
15389 ; FALLBACK29-NEXT: movl %eax, 60(%ebp)
15390 ; FALLBACK29-NEXT: movl %esi, 48(%ebp)
15391 ; FALLBACK29-NEXT: movl %edi, 52(%ebp)
15392 ; FALLBACK29-NEXT: movl (%esp), %eax # 4-byte Reload
15393 ; FALLBACK29-NEXT: movl %eax, 40(%ebp)
15394 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15395 ; FALLBACK29-NEXT: movl %eax, 44(%ebp)
15396 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15397 ; FALLBACK29-NEXT: movl %eax, 32(%ebp)
15398 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15399 ; FALLBACK29-NEXT: movl %eax, 36(%ebp)
15400 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15401 ; FALLBACK29-NEXT: movl %eax, 24(%ebp)
15402 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15403 ; FALLBACK29-NEXT: movl %eax, 28(%ebp)
15404 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15405 ; FALLBACK29-NEXT: movl %eax, 16(%ebp)
15406 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15407 ; FALLBACK29-NEXT: movl %eax, 20(%ebp)
15408 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15409 ; FALLBACK29-NEXT: movl %eax, 8(%ebp)
15410 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15411 ; FALLBACK29-NEXT: movl %eax, 12(%ebp)
15412 ; FALLBACK29-NEXT: movl %ebx, (%ebp)
15413 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15414 ; FALLBACK29-NEXT: movl %eax, 4(%ebp)
15415 ; FALLBACK29-NEXT: addl $188, %esp
15416 ; FALLBACK29-NEXT: popl %esi
15417 ; FALLBACK29-NEXT: popl %edi
15418 ; FALLBACK29-NEXT: popl %ebx
15419 ; FALLBACK29-NEXT: popl %ebp
15420 ; FALLBACK29-NEXT: vzeroupper
15421 ; FALLBACK29-NEXT: retl
15423 ; FALLBACK30-LABEL: lshr_64bytes:
15424 ; FALLBACK30: # %bb.0:
15425 ; FALLBACK30-NEXT: pushl %ebp
15426 ; FALLBACK30-NEXT: pushl %ebx
15427 ; FALLBACK30-NEXT: pushl %edi
15428 ; FALLBACK30-NEXT: pushl %esi
15429 ; FALLBACK30-NEXT: subl $204, %esp
15430 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
15431 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
15432 ; FALLBACK30-NEXT: vmovups (%ecx), %zmm0
15433 ; FALLBACK30-NEXT: movl (%eax), %edx
15434 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
15435 ; FALLBACK30-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
15436 ; FALLBACK30-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
15437 ; FALLBACK30-NEXT: leal (,%edx,8), %ecx
15438 ; FALLBACK30-NEXT: andl $24, %ecx
15439 ; FALLBACK30-NEXT: andl $60, %edx
15440 ; FALLBACK30-NEXT: movl 68(%esp,%edx), %esi
15441 ; FALLBACK30-NEXT: movl 72(%esp,%edx), %eax
15442 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15443 ; FALLBACK30-NEXT: shrxl %ecx, %esi, %edi
15444 ; FALLBACK30-NEXT: movl %ecx, %ebx
15445 ; FALLBACK30-NEXT: notb %bl
15446 ; FALLBACK30-NEXT: leal (%eax,%eax), %ebp
15447 ; FALLBACK30-NEXT: shlxl %ebx, %ebp, %ebp
15448 ; FALLBACK30-NEXT: orl %edi, %ebp
15449 ; FALLBACK30-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15450 ; FALLBACK30-NEXT: shrxl %ecx, 64(%esp,%edx), %edi
15451 ; FALLBACK30-NEXT: addl %esi, %esi
15452 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %esi
15453 ; FALLBACK30-NEXT: orl %edi, %esi
15454 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15455 ; FALLBACK30-NEXT: movl 80(%esp,%edx), %esi
15456 ; FALLBACK30-NEXT: leal (%esi,%esi), %edi
15457 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15458 ; FALLBACK30-NEXT: movl 76(%esp,%edx), %edi
15459 ; FALLBACK30-NEXT: shrxl %ecx, %edi, %ebp
15460 ; FALLBACK30-NEXT: orl %ebp, %eax
15461 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15462 ; FALLBACK30-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
15463 ; FALLBACK30-NEXT: addl %edi, %edi
15464 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %edi
15465 ; FALLBACK30-NEXT: orl %eax, %edi
15466 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15467 ; FALLBACK30-NEXT: movl 88(%esp,%edx), %eax
15468 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15469 ; FALLBACK30-NEXT: leal (%eax,%eax), %edi
15470 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15471 ; FALLBACK30-NEXT: movl 84(%esp,%edx), %edi
15472 ; FALLBACK30-NEXT: shrxl %ecx, %edi, %ebp
15473 ; FALLBACK30-NEXT: orl %ebp, %eax
15474 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15475 ; FALLBACK30-NEXT: shrxl %ecx, %esi, %esi
15476 ; FALLBACK30-NEXT: addl %edi, %edi
15477 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15478 ; FALLBACK30-NEXT: orl %esi, %eax
15479 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15480 ; FALLBACK30-NEXT: movl 96(%esp,%edx), %esi
15481 ; FALLBACK30-NEXT: leal (%esi,%esi), %edi
15482 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15483 ; FALLBACK30-NEXT: movl 92(%esp,%edx), %edi
15484 ; FALLBACK30-NEXT: shrxl %ecx, %edi, %ebp
15485 ; FALLBACK30-NEXT: orl %ebp, %eax
15486 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15487 ; FALLBACK30-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
15488 ; FALLBACK30-NEXT: addl %edi, %edi
15489 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %edi
15490 ; FALLBACK30-NEXT: orl %eax, %edi
15491 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15492 ; FALLBACK30-NEXT: movl 104(%esp,%edx), %eax
15493 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15494 ; FALLBACK30-NEXT: leal (%eax,%eax), %edi
15495 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15496 ; FALLBACK30-NEXT: movl 100(%esp,%edx), %edi
15497 ; FALLBACK30-NEXT: shrxl %ecx, %edi, %ebp
15498 ; FALLBACK30-NEXT: orl %ebp, %eax
15499 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15500 ; FALLBACK30-NEXT: shrxl %ecx, %esi, %esi
15501 ; FALLBACK30-NEXT: addl %edi, %edi
15502 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
15503 ; FALLBACK30-NEXT: orl %esi, %eax
15504 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15505 ; FALLBACK30-NEXT: movl 112(%esp,%edx), %eax
15506 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15507 ; FALLBACK30-NEXT: leal (%eax,%eax), %esi
15508 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %eax
15509 ; FALLBACK30-NEXT: movl 108(%esp,%edx), %esi
15510 ; FALLBACK30-NEXT: shrxl %ecx, %esi, %ebp
15511 ; FALLBACK30-NEXT: orl %ebp, %eax
15512 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15513 ; FALLBACK30-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
15514 ; FALLBACK30-NEXT: addl %esi, %esi
15515 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %esi
15516 ; FALLBACK30-NEXT: orl %eax, %esi
15517 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15518 ; FALLBACK30-NEXT: movl 120(%esp,%edx), %ebp
15519 ; FALLBACK30-NEXT: leal (%ebp,%ebp), %eax
15520 ; FALLBACK30-NEXT: shlxl %ebx, %eax, %esi
15521 ; FALLBACK30-NEXT: movl 116(%esp,%edx), %eax
15522 ; FALLBACK30-NEXT: shrxl %ecx, %eax, %edi
15523 ; FALLBACK30-NEXT: orl %edi, %esi
15524 ; FALLBACK30-NEXT: shrxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
15525 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15526 ; FALLBACK30-NEXT: addl %eax, %eax
15527 ; FALLBACK30-NEXT: shlxl %ebx, %eax, %edi
15528 ; FALLBACK30-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
15529 ; FALLBACK30-NEXT: shrxl %ecx, %ebp, %eax
15530 ; FALLBACK30-NEXT: movl 124(%esp,%edx), %edx
15531 ; FALLBACK30-NEXT: shrxl %ecx, %edx, %ebp
15532 ; FALLBACK30-NEXT: leal (%edx,%edx), %ecx
15533 ; FALLBACK30-NEXT: shlxl %ebx, %ecx, %edx
15534 ; FALLBACK30-NEXT: orl %eax, %edx
15535 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
15536 ; FALLBACK30-NEXT: movl %ebp, 60(%ecx)
15537 ; FALLBACK30-NEXT: movl %edx, 56(%ecx)
15538 ; FALLBACK30-NEXT: movl %edi, 48(%ecx)
15539 ; FALLBACK30-NEXT: movl %esi, 52(%ecx)
15540 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15541 ; FALLBACK30-NEXT: movl %eax, 40(%ecx)
15542 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15543 ; FALLBACK30-NEXT: movl %eax, 44(%ecx)
15544 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15545 ; FALLBACK30-NEXT: movl %eax, 32(%ecx)
15546 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15547 ; FALLBACK30-NEXT: movl %eax, 36(%ecx)
15548 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15549 ; FALLBACK30-NEXT: movl %eax, 24(%ecx)
15550 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15551 ; FALLBACK30-NEXT: movl %eax, 28(%ecx)
15552 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15553 ; FALLBACK30-NEXT: movl %eax, 16(%ecx)
15554 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15555 ; FALLBACK30-NEXT: movl %eax, 20(%ecx)
15556 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15557 ; FALLBACK30-NEXT: movl %eax, 8(%ecx)
15558 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15559 ; FALLBACK30-NEXT: movl %eax, 12(%ecx)
15560 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15561 ; FALLBACK30-NEXT: movl %eax, (%ecx)
15562 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15563 ; FALLBACK30-NEXT: movl %eax, 4(%ecx)
15564 ; FALLBACK30-NEXT: addl $204, %esp
15565 ; FALLBACK30-NEXT: popl %esi
15566 ; FALLBACK30-NEXT: popl %edi
15567 ; FALLBACK30-NEXT: popl %ebx
15568 ; FALLBACK30-NEXT: popl %ebp
15569 ; FALLBACK30-NEXT: vzeroupper
15570 ; FALLBACK30-NEXT: retl
15572 ; FALLBACK31-LABEL: lshr_64bytes:
15573 ; FALLBACK31: # %bb.0:
15574 ; FALLBACK31-NEXT: pushl %ebp
15575 ; FALLBACK31-NEXT: pushl %ebx
15576 ; FALLBACK31-NEXT: pushl %edi
15577 ; FALLBACK31-NEXT: pushl %esi
15578 ; FALLBACK31-NEXT: subl $188, %esp
15579 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
15580 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
15581 ; FALLBACK31-NEXT: vmovups (%ecx), %zmm0
15582 ; FALLBACK31-NEXT: movl (%eax), %ecx
15583 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
15584 ; FALLBACK31-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
15585 ; FALLBACK31-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
15586 ; FALLBACK31-NEXT: movl %ecx, %ebp
15587 ; FALLBACK31-NEXT: andl $60, %ebp
15588 ; FALLBACK31-NEXT: movl 56(%esp,%ebp), %edx
15589 ; FALLBACK31-NEXT: movl 52(%esp,%ebp), %eax
15590 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15591 ; FALLBACK31-NEXT: shll $3, %ecx
15592 ; FALLBACK31-NEXT: andl $24, %ecx
15593 ; FALLBACK31-NEXT: shrdl %cl, %edx, %eax
15594 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15595 ; FALLBACK31-NEXT: movl 64(%esp,%ebp), %edi
15596 ; FALLBACK31-NEXT: movl 60(%esp,%ebp), %eax
15597 ; FALLBACK31-NEXT: movl %eax, %esi
15598 ; FALLBACK31-NEXT: shrdl %cl, %edi, %esi
15599 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15600 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
15601 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15602 ; FALLBACK31-NEXT: movl 72(%esp,%ebp), %esi
15603 ; FALLBACK31-NEXT: movl 68(%esp,%ebp), %eax
15604 ; FALLBACK31-NEXT: movl %eax, %edx
15605 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
15606 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15607 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edi
15608 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15609 ; FALLBACK31-NEXT: movl 80(%esp,%ebp), %edi
15610 ; FALLBACK31-NEXT: movl 76(%esp,%ebp), %eax
15611 ; FALLBACK31-NEXT: movl %eax, %edx
15612 ; FALLBACK31-NEXT: shrdl %cl, %edi, %edx
15613 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15614 ; FALLBACK31-NEXT: shrdl %cl, %eax, %esi
15615 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15616 ; FALLBACK31-NEXT: movl 88(%esp,%ebp), %ebx
15617 ; FALLBACK31-NEXT: movl 84(%esp,%ebp), %eax
15618 ; FALLBACK31-NEXT: movl %eax, %edx
15619 ; FALLBACK31-NEXT: shrdl %cl, %ebx, %edx
15620 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15621 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edi
15622 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15623 ; FALLBACK31-NEXT: movl 96(%esp,%ebp), %esi
15624 ; FALLBACK31-NEXT: movl 92(%esp,%ebp), %eax
15625 ; FALLBACK31-NEXT: movl %eax, %edx
15626 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
15627 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15628 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebx
15629 ; FALLBACK31-NEXT: movl 104(%esp,%ebp), %eax
15630 ; FALLBACK31-NEXT: movl 100(%esp,%ebp), %edi
15631 ; FALLBACK31-NEXT: movl %edi, %edx
15632 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
15633 ; FALLBACK31-NEXT: shrdl %cl, %edi, %esi
15634 ; FALLBACK31-NEXT: movl 48(%esp,%ebp), %edi
15635 ; FALLBACK31-NEXT: movl 108(%esp,%ebp), %ebp
15636 ; FALLBACK31-NEXT: movl %ebp, (%esp) # 4-byte Spill
15637 ; FALLBACK31-NEXT: shrdl %cl, %ebp, %eax
15638 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ebp
15639 ; FALLBACK31-NEXT: movl %eax, 56(%ebp)
15640 ; FALLBACK31-NEXT: movl %esi, 48(%ebp)
15641 ; FALLBACK31-NEXT: movl %edx, 52(%ebp)
15642 ; FALLBACK31-NEXT: movl %ebx, 40(%ebp)
15643 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15644 ; FALLBACK31-NEXT: movl %eax, 44(%ebp)
15645 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15646 ; FALLBACK31-NEXT: movl %eax, 32(%ebp)
15647 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15648 ; FALLBACK31-NEXT: movl %eax, 36(%ebp)
15649 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15650 ; FALLBACK31-NEXT: movl %eax, 24(%ebp)
15651 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15652 ; FALLBACK31-NEXT: movl %eax, 28(%ebp)
15653 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15654 ; FALLBACK31-NEXT: movl %eax, 16(%ebp)
15655 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15656 ; FALLBACK31-NEXT: movl %eax, 20(%ebp)
15657 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15658 ; FALLBACK31-NEXT: movl %eax, 8(%ebp)
15659 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
15660 ; FALLBACK31-NEXT: movl %eax, 12(%ebp)
15661 ; FALLBACK31-NEXT: shrxl %ecx, (%esp), %eax # 4-byte Folded Reload
15662 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
15663 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
15664 ; FALLBACK31-NEXT: shrdl %cl, %edx, %edi
15665 ; FALLBACK31-NEXT: movl %edi, (%ebp)
15666 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15667 ; FALLBACK31-NEXT: movl %ecx, 4(%ebp)
15668 ; FALLBACK31-NEXT: movl %eax, 60(%ebp)
15669 ; FALLBACK31-NEXT: addl $188, %esp
15670 ; FALLBACK31-NEXT: popl %esi
15671 ; FALLBACK31-NEXT: popl %edi
15672 ; FALLBACK31-NEXT: popl %ebx
15673 ; FALLBACK31-NEXT: popl %ebp
15674 ; FALLBACK31-NEXT: vzeroupper
15675 ; FALLBACK31-NEXT: retl
15676 %src = load i512, ptr %src.ptr, align 1
15677 %byteOff = load i512, ptr %byteOff.ptr, align 1
15678 %bitOff = shl i512 %byteOff, 3
15679 %res = lshr i512 %src, %bitOff
15680 store i512 %res, ptr %dst, align 1
15684 define void @lshr_64bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
15685 ; X64-SSE2-LABEL: lshr_64bytes_qwordOff:
15686 ; X64-SSE2: # %bb.0:
15687 ; X64-SSE2-NEXT: pushq %rbx
15688 ; X64-SSE2-NEXT: movq (%rdi), %rax
15689 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
15690 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
15691 ; X64-SSE2-NEXT: movq 24(%rdi), %r9
15692 ; X64-SSE2-NEXT: movq 32(%rdi), %r10
15693 ; X64-SSE2-NEXT: movq 40(%rdi), %r11
15694 ; X64-SSE2-NEXT: movq 48(%rdi), %rbx
15695 ; X64-SSE2-NEXT: movq 56(%rdi), %rdi
15696 ; X64-SSE2-NEXT: movl (%rsi), %esi
15697 ; X64-SSE2-NEXT: xorps %xmm0, %xmm0
15698 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15699 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15700 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15701 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15702 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
15703 ; X64-SSE2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
15704 ; X64-SSE2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
15705 ; X64-SSE2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
15706 ; X64-SSE2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
15707 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
15708 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
15709 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
15710 ; X64-SSE2-NEXT: andl $7, %esi
15711 ; X64-SSE2-NEXT: movq -128(%rsp,%rsi,8), %rax
15712 ; X64-SSE2-NEXT: movq -120(%rsp,%rsi,8), %rcx
15713 ; X64-SSE2-NEXT: movq -104(%rsp,%rsi,8), %rdi
15714 ; X64-SSE2-NEXT: movq -112(%rsp,%rsi,8), %r8
15715 ; X64-SSE2-NEXT: movq -88(%rsp,%rsi,8), %r9
15716 ; X64-SSE2-NEXT: movq -96(%rsp,%rsi,8), %r10
15717 ; X64-SSE2-NEXT: movq -72(%rsp,%rsi,8), %r11
15718 ; X64-SSE2-NEXT: movq -80(%rsp,%rsi,8), %rsi
15719 ; X64-SSE2-NEXT: movq %rsi, 48(%rdx)
15720 ; X64-SSE2-NEXT: movq %r11, 56(%rdx)
15721 ; X64-SSE2-NEXT: movq %r10, 32(%rdx)
15722 ; X64-SSE2-NEXT: movq %r9, 40(%rdx)
15723 ; X64-SSE2-NEXT: movq %r8, 16(%rdx)
15724 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
15725 ; X64-SSE2-NEXT: movq %rax, (%rdx)
15726 ; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
15727 ; X64-SSE2-NEXT: popq %rbx
15728 ; X64-SSE2-NEXT: retq
15730 ; X64-SSE42-LABEL: lshr_64bytes_qwordOff:
15731 ; X64-SSE42: # %bb.0:
15732 ; X64-SSE42-NEXT: pushq %rax
15733 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
15734 ; X64-SSE42-NEXT: movups 16(%rdi), %xmm1
15735 ; X64-SSE42-NEXT: movups 32(%rdi), %xmm2
15736 ; X64-SSE42-NEXT: movups 48(%rdi), %xmm3
15737 ; X64-SSE42-NEXT: movl (%rsi), %eax
15738 ; X64-SSE42-NEXT: xorps %xmm4, %xmm4
15739 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
15740 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
15741 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
15742 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
15743 ; X64-SSE42-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
15744 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
15745 ; X64-SSE42-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
15746 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
15747 ; X64-SSE42-NEXT: andl $7, %eax
15748 ; X64-SSE42-NEXT: movups -128(%rsp,%rax,8), %xmm0
15749 ; X64-SSE42-NEXT: movups -112(%rsp,%rax,8), %xmm1
15750 ; X64-SSE42-NEXT: movups -96(%rsp,%rax,8), %xmm2
15751 ; X64-SSE42-NEXT: movups -80(%rsp,%rax,8), %xmm3
15752 ; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
15753 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
15754 ; X64-SSE42-NEXT: movups %xmm2, 32(%rdx)
15755 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
15756 ; X64-SSE42-NEXT: popq %rax
15757 ; X64-SSE42-NEXT: retq
15759 ; X64-AVX1-LABEL: lshr_64bytes_qwordOff:
15760 ; X64-AVX1: # %bb.0:
15761 ; X64-AVX1-NEXT: pushq %rax
15762 ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
15763 ; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
15764 ; X64-AVX1-NEXT: movl (%rsi), %eax
15765 ; X64-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
15766 ; X64-AVX1-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
15767 ; X64-AVX1-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
15768 ; X64-AVX1-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
15769 ; X64-AVX1-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
15770 ; X64-AVX1-NEXT: andl $7, %eax
15771 ; X64-AVX1-NEXT: vmovups -128(%rsp,%rax,8), %xmm0
15772 ; X64-AVX1-NEXT: vmovups -112(%rsp,%rax,8), %xmm1
15773 ; X64-AVX1-NEXT: vmovups -96(%rsp,%rax,8), %xmm2
15774 ; X64-AVX1-NEXT: vmovups -80(%rsp,%rax,8), %xmm3
15775 ; X64-AVX1-NEXT: vmovups %xmm3, 48(%rdx)
15776 ; X64-AVX1-NEXT: vmovups %xmm1, 16(%rdx)
15777 ; X64-AVX1-NEXT: vmovups %xmm2, 32(%rdx)
15778 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdx)
15779 ; X64-AVX1-NEXT: popq %rax
15780 ; X64-AVX1-NEXT: vzeroupper
15781 ; X64-AVX1-NEXT: retq
15783 ; X64-AVX512-LABEL: lshr_64bytes_qwordOff:
15784 ; X64-AVX512: # %bb.0:
15785 ; X64-AVX512-NEXT: pushq %rax
15786 ; X64-AVX512-NEXT: vmovups (%rdi), %zmm0
15787 ; X64-AVX512-NEXT: movl (%rsi), %eax
15788 ; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
15789 ; X64-AVX512-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
15790 ; X64-AVX512-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
15791 ; X64-AVX512-NEXT: andl $7, %eax
15792 ; X64-AVX512-NEXT: vmovups -128(%rsp,%rax,8), %xmm0
15793 ; X64-AVX512-NEXT: vmovups -112(%rsp,%rax,8), %xmm1
15794 ; X64-AVX512-NEXT: vmovups -96(%rsp,%rax,8), %xmm2
15795 ; X64-AVX512-NEXT: vmovups -80(%rsp,%rax,8), %xmm3
15796 ; X64-AVX512-NEXT: vmovups %xmm3, 48(%rdx)
15797 ; X64-AVX512-NEXT: vmovups %xmm1, 16(%rdx)
15798 ; X64-AVX512-NEXT: vmovups %xmm2, 32(%rdx)
15799 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdx)
15800 ; X64-AVX512-NEXT: popq %rax
15801 ; X64-AVX512-NEXT: vzeroupper
15802 ; X64-AVX512-NEXT: retq
15804 ; X86-SSE2-LABEL: lshr_64bytes_qwordOff:
15805 ; X86-SSE2: # %bb.0:
15806 ; X86-SSE2-NEXT: pushl %ebp
15807 ; X86-SSE2-NEXT: pushl %ebx
15808 ; X86-SSE2-NEXT: pushl %edi
15809 ; X86-SSE2-NEXT: pushl %esi
15810 ; X86-SSE2-NEXT: subl $188, %esp
15811 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
15812 ; X86-SSE2-NEXT: movl (%eax), %ecx
15813 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15814 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
15815 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15816 ; X86-SSE2-NEXT: movl 8(%eax), %ecx
15817 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15818 ; X86-SSE2-NEXT: movl 12(%eax), %ecx
15819 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15820 ; X86-SSE2-NEXT: movl 16(%eax), %ecx
15821 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15822 ; X86-SSE2-NEXT: movl 20(%eax), %ecx
15823 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15824 ; X86-SSE2-NEXT: movl 24(%eax), %ecx
15825 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15826 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
15827 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15828 ; X86-SSE2-NEXT: movl 32(%eax), %ecx
15829 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15830 ; X86-SSE2-NEXT: movl 36(%eax), %ecx
15831 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15832 ; X86-SSE2-NEXT: movl 40(%eax), %ebp
15833 ; X86-SSE2-NEXT: movl 44(%eax), %ebx
15834 ; X86-SSE2-NEXT: movl 48(%eax), %edi
15835 ; X86-SSE2-NEXT: movl 52(%eax), %esi
15836 ; X86-SSE2-NEXT: movl 56(%eax), %edx
15837 ; X86-SSE2-NEXT: movl 60(%eax), %ecx
15838 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
15839 ; X86-SSE2-NEXT: movl (%eax), %eax
15840 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
15841 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
15842 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
15843 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
15844 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15845 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
15846 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
15847 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
15848 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
15849 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
15850 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15851 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15852 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15853 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15854 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15855 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15856 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15857 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15858 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
15859 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15860 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15861 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15862 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15863 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15864 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15865 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15866 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15867 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15868 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15869 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15870 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
15871 ; X86-SSE2-NEXT: andl $7, %eax
15872 ; X86-SSE2-NEXT: movl 48(%esp,%eax,8), %ecx
15873 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15874 ; X86-SSE2-NEXT: movl 52(%esp,%eax,8), %ecx
15875 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15876 ; X86-SSE2-NEXT: movl 60(%esp,%eax,8), %ecx
15877 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15878 ; X86-SSE2-NEXT: movl 56(%esp,%eax,8), %ecx
15879 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15880 ; X86-SSE2-NEXT: movl 68(%esp,%eax,8), %ecx
15881 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15882 ; X86-SSE2-NEXT: movl 64(%esp,%eax,8), %ecx
15883 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15884 ; X86-SSE2-NEXT: movl 76(%esp,%eax,8), %ecx
15885 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15886 ; X86-SSE2-NEXT: movl 72(%esp,%eax,8), %ecx
15887 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15888 ; X86-SSE2-NEXT: movl 84(%esp,%eax,8), %ecx
15889 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15890 ; X86-SSE2-NEXT: movl 80(%esp,%eax,8), %ecx
15891 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
15892 ; X86-SSE2-NEXT: movl 92(%esp,%eax,8), %ebp
15893 ; X86-SSE2-NEXT: movl 88(%esp,%eax,8), %ebx
15894 ; X86-SSE2-NEXT: movl 100(%esp,%eax,8), %edi
15895 ; X86-SSE2-NEXT: movl 96(%esp,%eax,8), %esi
15896 ; X86-SSE2-NEXT: movl 108(%esp,%eax,8), %edx
15897 ; X86-SSE2-NEXT: movl 104(%esp,%eax,8), %ecx
15898 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
15899 ; X86-SSE2-NEXT: movl %ecx, 56(%eax)
15900 ; X86-SSE2-NEXT: movl %edx, 60(%eax)
15901 ; X86-SSE2-NEXT: movl %esi, 48(%eax)
15902 ; X86-SSE2-NEXT: movl %edi, 52(%eax)
15903 ; X86-SSE2-NEXT: movl %ebx, 40(%eax)
15904 ; X86-SSE2-NEXT: movl %ebp, 44(%eax)
15905 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15906 ; X86-SSE2-NEXT: movl %ecx, 32(%eax)
15907 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15908 ; X86-SSE2-NEXT: movl %ecx, 36(%eax)
15909 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15910 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
15911 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15912 ; X86-SSE2-NEXT: movl %ecx, 28(%eax)
15913 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15914 ; X86-SSE2-NEXT: movl %ecx, 16(%eax)
15915 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15916 ; X86-SSE2-NEXT: movl %ecx, 20(%eax)
15917 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15918 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
15919 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15920 ; X86-SSE2-NEXT: movl %ecx, 12(%eax)
15921 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15922 ; X86-SSE2-NEXT: movl %ecx, (%eax)
15923 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
15924 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
15925 ; X86-SSE2-NEXT: addl $188, %esp
15926 ; X86-SSE2-NEXT: popl %esi
15927 ; X86-SSE2-NEXT: popl %edi
15928 ; X86-SSE2-NEXT: popl %ebx
15929 ; X86-SSE2-NEXT: popl %ebp
15930 ; X86-SSE2-NEXT: retl
15932 ; X86-SSE42-LABEL: lshr_64bytes_qwordOff:
15933 ; X86-SSE42: # %bb.0:
15934 ; X86-SSE42-NEXT: subl $140, %esp
15935 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
15936 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
15937 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
15938 ; X86-SSE42-NEXT: movups (%edx), %xmm0
15939 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
15940 ; X86-SSE42-NEXT: movups 32(%edx), %xmm2
15941 ; X86-SSE42-NEXT: movups 48(%edx), %xmm3
15942 ; X86-SSE42-NEXT: movl (%ecx), %ecx
15943 ; X86-SSE42-NEXT: xorps %xmm4, %xmm4
15944 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
15945 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
15946 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
15947 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
15948 ; X86-SSE42-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
15949 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
15950 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
15951 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
15952 ; X86-SSE42-NEXT: andl $7, %ecx
15953 ; X86-SSE42-NEXT: movups (%esp,%ecx,8), %xmm0
15954 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,8), %xmm1
15955 ; X86-SSE42-NEXT: movups 32(%esp,%ecx,8), %xmm2
15956 ; X86-SSE42-NEXT: movups 48(%esp,%ecx,8), %xmm3
15957 ; X86-SSE42-NEXT: movups %xmm3, 48(%eax)
15958 ; X86-SSE42-NEXT: movups %xmm2, 32(%eax)
15959 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
15960 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
15961 ; X86-SSE42-NEXT: addl $140, %esp
15962 ; X86-SSE42-NEXT: retl
15964 ; X86-AVX1-LABEL: lshr_64bytes_qwordOff:
15965 ; X86-AVX1: # %bb.0:
15966 ; X86-AVX1-NEXT: subl $140, %esp
15967 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
15968 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
15969 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx
15970 ; X86-AVX1-NEXT: vmovups (%edx), %ymm0
15971 ; X86-AVX1-NEXT: vmovups 32(%edx), %ymm1
15972 ; X86-AVX1-NEXT: movl (%ecx), %ecx
15973 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
15974 ; X86-AVX1-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
15975 ; X86-AVX1-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
15976 ; X86-AVX1-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
15977 ; X86-AVX1-NEXT: vmovups %ymm0, (%esp)
15978 ; X86-AVX1-NEXT: andl $7, %ecx
15979 ; X86-AVX1-NEXT: vmovups (%esp,%ecx,8), %xmm0
15980 ; X86-AVX1-NEXT: vmovups 16(%esp,%ecx,8), %xmm1
15981 ; X86-AVX1-NEXT: vmovups 32(%esp,%ecx,8), %xmm2
15982 ; X86-AVX1-NEXT: vmovups 48(%esp,%ecx,8), %xmm3
15983 ; X86-AVX1-NEXT: vmovups %xmm3, 48(%eax)
15984 ; X86-AVX1-NEXT: vmovups %xmm2, 32(%eax)
15985 ; X86-AVX1-NEXT: vmovups %xmm1, 16(%eax)
15986 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax)
15987 ; X86-AVX1-NEXT: addl $140, %esp
15988 ; X86-AVX1-NEXT: vzeroupper
15989 ; X86-AVX1-NEXT: retl
15991 ; X86-AVX512-LABEL: lshr_64bytes_qwordOff:
15992 ; X86-AVX512: # %bb.0:
15993 ; X86-AVX512-NEXT: subl $140, %esp
15994 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
15995 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
15996 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
15997 ; X86-AVX512-NEXT: vmovups (%edx), %zmm0
15998 ; X86-AVX512-NEXT: movl (%ecx), %ecx
15999 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
16000 ; X86-AVX512-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
16001 ; X86-AVX512-NEXT: vmovups %zmm0, (%esp)
16002 ; X86-AVX512-NEXT: andl $7, %ecx
16003 ; X86-AVX512-NEXT: vmovups (%esp,%ecx,8), %xmm0
16004 ; X86-AVX512-NEXT: vmovups 16(%esp,%ecx,8), %xmm1
16005 ; X86-AVX512-NEXT: vmovups 32(%esp,%ecx,8), %xmm2
16006 ; X86-AVX512-NEXT: vmovups 48(%esp,%ecx,8), %xmm3
16007 ; X86-AVX512-NEXT: vmovups %xmm3, 48(%eax)
16008 ; X86-AVX512-NEXT: vmovups %xmm2, 32(%eax)
16009 ; X86-AVX512-NEXT: vmovups %xmm1, 16(%eax)
16010 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax)
16011 ; X86-AVX512-NEXT: addl $140, %esp
16012 ; X86-AVX512-NEXT: vzeroupper
16013 ; X86-AVX512-NEXT: retl
16014 %src = load i512, ptr %src.ptr, align 1
16015 %qwordOff = load i512, ptr %qwordOff.ptr, align 1
16016 %bitOff = shl i512 %qwordOff, 6
16017 %res = lshr i512 %src, %bitOff
16018 store i512 %res, ptr %dst, align 1
16022 define void @shl_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
16023 ; FALLBACK0-LABEL: shl_64bytes:
16024 ; FALLBACK0: # %bb.0:
16025 ; FALLBACK0-NEXT: pushq %r15
16026 ; FALLBACK0-NEXT: pushq %r14
16027 ; FALLBACK0-NEXT: pushq %r13
16028 ; FALLBACK0-NEXT: pushq %r12
16029 ; FALLBACK0-NEXT: pushq %rbx
16030 ; FALLBACK0-NEXT: movq (%rdi), %rax
16031 ; FALLBACK0-NEXT: movq 8(%rdi), %rcx
16032 ; FALLBACK0-NEXT: movq 16(%rdi), %r8
16033 ; FALLBACK0-NEXT: movq 24(%rdi), %r9
16034 ; FALLBACK0-NEXT: movq 32(%rdi), %r10
16035 ; FALLBACK0-NEXT: movq 40(%rdi), %r11
16036 ; FALLBACK0-NEXT: movq 48(%rdi), %rbx
16037 ; FALLBACK0-NEXT: movq 56(%rdi), %rdi
16038 ; FALLBACK0-NEXT: movl (%rsi), %esi
16039 ; FALLBACK0-NEXT: xorps %xmm0, %xmm0
16040 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16041 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16042 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16043 ; FALLBACK0-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16044 ; FALLBACK0-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
16045 ; FALLBACK0-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
16046 ; FALLBACK0-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
16047 ; FALLBACK0-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
16048 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
16049 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
16050 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
16051 ; FALLBACK0-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
16052 ; FALLBACK0-NEXT: leal (,%rsi,8), %eax
16053 ; FALLBACK0-NEXT: andl $56, %eax
16054 ; FALLBACK0-NEXT: andl $56, %esi
16055 ; FALLBACK0-NEXT: negl %esi
16056 ; FALLBACK0-NEXT: movslq %esi, %rbx
16057 ; FALLBACK0-NEXT: movq -64(%rsp,%rbx), %r8
16058 ; FALLBACK0-NEXT: movq -56(%rsp,%rbx), %rdi
16059 ; FALLBACK0-NEXT: movq %rdi, %r10
16060 ; FALLBACK0-NEXT: movl %eax, %ecx
16061 ; FALLBACK0-NEXT: shlq %cl, %r10
16062 ; FALLBACK0-NEXT: movl %eax, %esi
16063 ; FALLBACK0-NEXT: notb %sil
16064 ; FALLBACK0-NEXT: movq %r8, %r9
16065 ; FALLBACK0-NEXT: shrq %r9
16066 ; FALLBACK0-NEXT: movl %esi, %ecx
16067 ; FALLBACK0-NEXT: shrq %cl, %r9
16068 ; FALLBACK0-NEXT: orq %r10, %r9
16069 ; FALLBACK0-NEXT: movq -40(%rsp,%rbx), %r10
16070 ; FALLBACK0-NEXT: movq %r10, %r14
16071 ; FALLBACK0-NEXT: movl %eax, %ecx
16072 ; FALLBACK0-NEXT: shlq %cl, %r14
16073 ; FALLBACK0-NEXT: movq -48(%rsp,%rbx), %r15
16074 ; FALLBACK0-NEXT: movq %r15, %r11
16075 ; FALLBACK0-NEXT: shrq %r11
16076 ; FALLBACK0-NEXT: movl %esi, %ecx
16077 ; FALLBACK0-NEXT: shrq %cl, %r11
16078 ; FALLBACK0-NEXT: orq %r14, %r11
16079 ; FALLBACK0-NEXT: movl %eax, %ecx
16080 ; FALLBACK0-NEXT: shlq %cl, %r15
16081 ; FALLBACK0-NEXT: shrq %rdi
16082 ; FALLBACK0-NEXT: movl %esi, %ecx
16083 ; FALLBACK0-NEXT: shrq %cl, %rdi
16084 ; FALLBACK0-NEXT: orq %r15, %rdi
16085 ; FALLBACK0-NEXT: movq -24(%rsp,%rbx), %r14
16086 ; FALLBACK0-NEXT: movq %r14, %r12
16087 ; FALLBACK0-NEXT: movl %eax, %ecx
16088 ; FALLBACK0-NEXT: shlq %cl, %r12
16089 ; FALLBACK0-NEXT: movq -32(%rsp,%rbx), %r13
16090 ; FALLBACK0-NEXT: movq %r13, %r15
16091 ; FALLBACK0-NEXT: shrq %r15
16092 ; FALLBACK0-NEXT: movl %esi, %ecx
16093 ; FALLBACK0-NEXT: shrq %cl, %r15
16094 ; FALLBACK0-NEXT: orq %r12, %r15
16095 ; FALLBACK0-NEXT: movl %eax, %ecx
16096 ; FALLBACK0-NEXT: shlq %cl, %r13
16097 ; FALLBACK0-NEXT: shrq %r10
16098 ; FALLBACK0-NEXT: movl %esi, %ecx
16099 ; FALLBACK0-NEXT: shrq %cl, %r10
16100 ; FALLBACK0-NEXT: orq %r13, %r10
16101 ; FALLBACK0-NEXT: movq -8(%rsp,%rbx), %r12
16102 ; FALLBACK0-NEXT: movl %eax, %ecx
16103 ; FALLBACK0-NEXT: shlq %cl, %r12
16104 ; FALLBACK0-NEXT: movq -16(%rsp,%rbx), %rbx
16105 ; FALLBACK0-NEXT: movq %rbx, %r13
16106 ; FALLBACK0-NEXT: shrq %r13
16107 ; FALLBACK0-NEXT: movl %esi, %ecx
16108 ; FALLBACK0-NEXT: shrq %cl, %r13
16109 ; FALLBACK0-NEXT: orq %r12, %r13
16110 ; FALLBACK0-NEXT: movl %eax, %ecx
16111 ; FALLBACK0-NEXT: shlq %cl, %rbx
16112 ; FALLBACK0-NEXT: shrq %r14
16113 ; FALLBACK0-NEXT: movl %esi, %ecx
16114 ; FALLBACK0-NEXT: shrq %cl, %r14
16115 ; FALLBACK0-NEXT: orq %rbx, %r14
16116 ; FALLBACK0-NEXT: movl %eax, %ecx
16117 ; FALLBACK0-NEXT: shlq %cl, %r8
16118 ; FALLBACK0-NEXT: movq %r8, (%rdx)
16119 ; FALLBACK0-NEXT: movq %r14, 48(%rdx)
16120 ; FALLBACK0-NEXT: movq %r13, 56(%rdx)
16121 ; FALLBACK0-NEXT: movq %r10, 32(%rdx)
16122 ; FALLBACK0-NEXT: movq %r15, 40(%rdx)
16123 ; FALLBACK0-NEXT: movq %rdi, 16(%rdx)
16124 ; FALLBACK0-NEXT: movq %r11, 24(%rdx)
16125 ; FALLBACK0-NEXT: movq %r9, 8(%rdx)
16126 ; FALLBACK0-NEXT: popq %rbx
16127 ; FALLBACK0-NEXT: popq %r12
16128 ; FALLBACK0-NEXT: popq %r13
16129 ; FALLBACK0-NEXT: popq %r14
16130 ; FALLBACK0-NEXT: popq %r15
16131 ; FALLBACK0-NEXT: retq
16133 ; FALLBACK1-LABEL: shl_64bytes:
16134 ; FALLBACK1: # %bb.0:
16135 ; FALLBACK1-NEXT: pushq %r14
16136 ; FALLBACK1-NEXT: pushq %rbx
16137 ; FALLBACK1-NEXT: pushq %rax
16138 ; FALLBACK1-NEXT: movq (%rdi), %rax
16139 ; FALLBACK1-NEXT: movq 8(%rdi), %rcx
16140 ; FALLBACK1-NEXT: movq 16(%rdi), %r8
16141 ; FALLBACK1-NEXT: movq 24(%rdi), %r9
16142 ; FALLBACK1-NEXT: movq 32(%rdi), %r10
16143 ; FALLBACK1-NEXT: movq 40(%rdi), %r11
16144 ; FALLBACK1-NEXT: movq 48(%rdi), %rbx
16145 ; FALLBACK1-NEXT: movq 56(%rdi), %rdi
16146 ; FALLBACK1-NEXT: movl (%rsi), %esi
16147 ; FALLBACK1-NEXT: xorps %xmm0, %xmm0
16148 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16149 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16150 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16151 ; FALLBACK1-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16152 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
16153 ; FALLBACK1-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
16154 ; FALLBACK1-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
16155 ; FALLBACK1-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
16156 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
16157 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
16158 ; FALLBACK1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
16159 ; FALLBACK1-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
16160 ; FALLBACK1-NEXT: leal (,%rsi,8), %ecx
16161 ; FALLBACK1-NEXT: andl $56, %ecx
16162 ; FALLBACK1-NEXT: andl $56, %esi
16163 ; FALLBACK1-NEXT: negl %esi
16164 ; FALLBACK1-NEXT: movslq %esi, %r9
16165 ; FALLBACK1-NEXT: movq -48(%rsp,%r9), %rax
16166 ; FALLBACK1-NEXT: movq -40(%rsp,%r9), %r10
16167 ; FALLBACK1-NEXT: movq %r10, %rsi
16168 ; FALLBACK1-NEXT: shldq %cl, %rax, %rsi
16169 ; FALLBACK1-NEXT: movq -64(%rsp,%r9), %r8
16170 ; FALLBACK1-NEXT: movq -56(%rsp,%r9), %rdi
16171 ; FALLBACK1-NEXT: shldq %cl, %rdi, %rax
16172 ; FALLBACK1-NEXT: movq -32(%rsp,%r9), %r11
16173 ; FALLBACK1-NEXT: movq -24(%rsp,%r9), %rbx
16174 ; FALLBACK1-NEXT: movq %rbx, %r14
16175 ; FALLBACK1-NEXT: shldq %cl, %r11, %r14
16176 ; FALLBACK1-NEXT: shldq %cl, %r10, %r11
16177 ; FALLBACK1-NEXT: movq -16(%rsp,%r9), %r10
16178 ; FALLBACK1-NEXT: movq -8(%rsp,%r9), %r9
16179 ; FALLBACK1-NEXT: shldq %cl, %r10, %r9
16180 ; FALLBACK1-NEXT: shldq %cl, %rbx, %r10
16181 ; FALLBACK1-NEXT: shldq %cl, %r8, %rdi
16182 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
16183 ; FALLBACK1-NEXT: shlq %cl, %r8
16184 ; FALLBACK1-NEXT: movq %r10, 48(%rdx)
16185 ; FALLBACK1-NEXT: movq %r9, 56(%rdx)
16186 ; FALLBACK1-NEXT: movq %r11, 32(%rdx)
16187 ; FALLBACK1-NEXT: movq %r14, 40(%rdx)
16188 ; FALLBACK1-NEXT: movq %rax, 16(%rdx)
16189 ; FALLBACK1-NEXT: movq %rsi, 24(%rdx)
16190 ; FALLBACK1-NEXT: movq %r8, (%rdx)
16191 ; FALLBACK1-NEXT: movq %rdi, 8(%rdx)
16192 ; FALLBACK1-NEXT: addq $8, %rsp
16193 ; FALLBACK1-NEXT: popq %rbx
16194 ; FALLBACK1-NEXT: popq %r14
16195 ; FALLBACK1-NEXT: retq
16197 ; FALLBACK2-LABEL: shl_64bytes:
16198 ; FALLBACK2: # %bb.0:
16199 ; FALLBACK2-NEXT: pushq %rbp
16200 ; FALLBACK2-NEXT: pushq %r15
16201 ; FALLBACK2-NEXT: pushq %r14
16202 ; FALLBACK2-NEXT: pushq %r13
16203 ; FALLBACK2-NEXT: pushq %r12
16204 ; FALLBACK2-NEXT: pushq %rbx
16205 ; FALLBACK2-NEXT: pushq %rax
16206 ; FALLBACK2-NEXT: movq (%rdi), %rax
16207 ; FALLBACK2-NEXT: movq 8(%rdi), %rcx
16208 ; FALLBACK2-NEXT: movq 16(%rdi), %r8
16209 ; FALLBACK2-NEXT: movq 24(%rdi), %r9
16210 ; FALLBACK2-NEXT: movq 32(%rdi), %r10
16211 ; FALLBACK2-NEXT: movq 40(%rdi), %r11
16212 ; FALLBACK2-NEXT: movq 48(%rdi), %rbx
16213 ; FALLBACK2-NEXT: movq 56(%rdi), %rdi
16214 ; FALLBACK2-NEXT: movl (%rsi), %esi
16215 ; FALLBACK2-NEXT: xorps %xmm0, %xmm0
16216 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16217 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16218 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16219 ; FALLBACK2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16220 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
16221 ; FALLBACK2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
16222 ; FALLBACK2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
16223 ; FALLBACK2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
16224 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
16225 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
16226 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
16227 ; FALLBACK2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
16228 ; FALLBACK2-NEXT: leal (,%rsi,8), %eax
16229 ; FALLBACK2-NEXT: andl $56, %eax
16230 ; FALLBACK2-NEXT: andl $56, %esi
16231 ; FALLBACK2-NEXT: negl %esi
16232 ; FALLBACK2-NEXT: movslq %esi, %rsi
16233 ; FALLBACK2-NEXT: movq -64(%rsp,%rsi), %r10
16234 ; FALLBACK2-NEXT: movq -56(%rsp,%rsi), %rcx
16235 ; FALLBACK2-NEXT: shlxq %rax, %rcx, %r9
16236 ; FALLBACK2-NEXT: movq -40(%rsp,%rsi), %rdi
16237 ; FALLBACK2-NEXT: shlxq %rax, %rdi, %r11
16238 ; FALLBACK2-NEXT: movq -48(%rsp,%rsi), %r14
16239 ; FALLBACK2-NEXT: shlxq %rax, %r14, %rbx
16240 ; FALLBACK2-NEXT: movq -24(%rsp,%rsi), %r8
16241 ; FALLBACK2-NEXT: shlxq %rax, %r8, %r15
16242 ; FALLBACK2-NEXT: shlxq %rax, %r10, %r12
16243 ; FALLBACK2-NEXT: movl %eax, %r13d
16244 ; FALLBACK2-NEXT: notb %r13b
16245 ; FALLBACK2-NEXT: shrq %r10
16246 ; FALLBACK2-NEXT: shrxq %r13, %r10, %r10
16247 ; FALLBACK2-NEXT: orq %r9, %r10
16248 ; FALLBACK2-NEXT: movq -32(%rsp,%rsi), %r9
16249 ; FALLBACK2-NEXT: shlxq %rax, %r9, %rbp
16250 ; FALLBACK2-NEXT: shrq %r14
16251 ; FALLBACK2-NEXT: shrxq %r13, %r14, %r14
16252 ; FALLBACK2-NEXT: orq %r11, %r14
16253 ; FALLBACK2-NEXT: shlxq %rax, -8(%rsp,%rsi), %r11
16254 ; FALLBACK2-NEXT: movq -16(%rsp,%rsi), %rsi
16255 ; FALLBACK2-NEXT: shlxq %rax, %rsi, %rax
16256 ; FALLBACK2-NEXT: shrq %rcx
16257 ; FALLBACK2-NEXT: shrxq %r13, %rcx, %rcx
16258 ; FALLBACK2-NEXT: orq %rbx, %rcx
16259 ; FALLBACK2-NEXT: shrq %r9
16260 ; FALLBACK2-NEXT: shrxq %r13, %r9, %r9
16261 ; FALLBACK2-NEXT: orq %r15, %r9
16262 ; FALLBACK2-NEXT: shrq %rdi
16263 ; FALLBACK2-NEXT: shrxq %r13, %rdi, %rdi
16264 ; FALLBACK2-NEXT: orq %rbp, %rdi
16265 ; FALLBACK2-NEXT: shrq %rsi
16266 ; FALLBACK2-NEXT: shrxq %r13, %rsi, %rsi
16267 ; FALLBACK2-NEXT: orq %r11, %rsi
16268 ; FALLBACK2-NEXT: shrq %r8
16269 ; FALLBACK2-NEXT: shrxq %r13, %r8, %r8
16270 ; FALLBACK2-NEXT: orq %rax, %r8
16271 ; FALLBACK2-NEXT: movq %r12, (%rdx)
16272 ; FALLBACK2-NEXT: movq %r8, 48(%rdx)
16273 ; FALLBACK2-NEXT: movq %rsi, 56(%rdx)
16274 ; FALLBACK2-NEXT: movq %rdi, 32(%rdx)
16275 ; FALLBACK2-NEXT: movq %r9, 40(%rdx)
16276 ; FALLBACK2-NEXT: movq %rcx, 16(%rdx)
16277 ; FALLBACK2-NEXT: movq %r14, 24(%rdx)
16278 ; FALLBACK2-NEXT: movq %r10, 8(%rdx)
16279 ; FALLBACK2-NEXT: addq $8, %rsp
16280 ; FALLBACK2-NEXT: popq %rbx
16281 ; FALLBACK2-NEXT: popq %r12
16282 ; FALLBACK2-NEXT: popq %r13
16283 ; FALLBACK2-NEXT: popq %r14
16284 ; FALLBACK2-NEXT: popq %r15
16285 ; FALLBACK2-NEXT: popq %rbp
16286 ; FALLBACK2-NEXT: retq
16288 ; FALLBACK3-LABEL: shl_64bytes:
16289 ; FALLBACK3: # %bb.0:
16290 ; FALLBACK3-NEXT: pushq %r14
16291 ; FALLBACK3-NEXT: pushq %rbx
16292 ; FALLBACK3-NEXT: pushq %rax
16293 ; FALLBACK3-NEXT: movq (%rdi), %rax
16294 ; FALLBACK3-NEXT: movq 8(%rdi), %rcx
16295 ; FALLBACK3-NEXT: movq 16(%rdi), %r8
16296 ; FALLBACK3-NEXT: movq 24(%rdi), %r9
16297 ; FALLBACK3-NEXT: movq 32(%rdi), %r10
16298 ; FALLBACK3-NEXT: movq 40(%rdi), %r11
16299 ; FALLBACK3-NEXT: movq 48(%rdi), %rbx
16300 ; FALLBACK3-NEXT: movq 56(%rdi), %rdi
16301 ; FALLBACK3-NEXT: movl (%rsi), %esi
16302 ; FALLBACK3-NEXT: xorps %xmm0, %xmm0
16303 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16304 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16305 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16306 ; FALLBACK3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16307 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
16308 ; FALLBACK3-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
16309 ; FALLBACK3-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
16310 ; FALLBACK3-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
16311 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
16312 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
16313 ; FALLBACK3-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
16314 ; FALLBACK3-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
16315 ; FALLBACK3-NEXT: leal (,%rsi,8), %ecx
16316 ; FALLBACK3-NEXT: andl $56, %ecx
16317 ; FALLBACK3-NEXT: andl $56, %esi
16318 ; FALLBACK3-NEXT: negl %esi
16319 ; FALLBACK3-NEXT: movslq %esi, %r8
16320 ; FALLBACK3-NEXT: movq -48(%rsp,%r8), %rax
16321 ; FALLBACK3-NEXT: movq -40(%rsp,%r8), %r9
16322 ; FALLBACK3-NEXT: movq %r9, %rsi
16323 ; FALLBACK3-NEXT: shldq %cl, %rax, %rsi
16324 ; FALLBACK3-NEXT: movq -64(%rsp,%r8), %r10
16325 ; FALLBACK3-NEXT: movq -56(%rsp,%r8), %rdi
16326 ; FALLBACK3-NEXT: shldq %cl, %rdi, %rax
16327 ; FALLBACK3-NEXT: movq -32(%rsp,%r8), %r11
16328 ; FALLBACK3-NEXT: movq -24(%rsp,%r8), %rbx
16329 ; FALLBACK3-NEXT: movq %rbx, %r14
16330 ; FALLBACK3-NEXT: shldq %cl, %r11, %r14
16331 ; FALLBACK3-NEXT: shldq %cl, %r9, %r11
16332 ; FALLBACK3-NEXT: movq -16(%rsp,%r8), %r9
16333 ; FALLBACK3-NEXT: movq -8(%rsp,%r8), %r8
16334 ; FALLBACK3-NEXT: shldq %cl, %r9, %r8
16335 ; FALLBACK3-NEXT: shldq %cl, %rbx, %r9
16336 ; FALLBACK3-NEXT: shldq %cl, %r10, %rdi
16337 ; FALLBACK3-NEXT: shlxq %rcx, %r10, %rcx
16338 ; FALLBACK3-NEXT: movq %r9, 48(%rdx)
16339 ; FALLBACK3-NEXT: movq %r8, 56(%rdx)
16340 ; FALLBACK3-NEXT: movq %r11, 32(%rdx)
16341 ; FALLBACK3-NEXT: movq %r14, 40(%rdx)
16342 ; FALLBACK3-NEXT: movq %rax, 16(%rdx)
16343 ; FALLBACK3-NEXT: movq %rsi, 24(%rdx)
16344 ; FALLBACK3-NEXT: movq %rcx, (%rdx)
16345 ; FALLBACK3-NEXT: movq %rdi, 8(%rdx)
16346 ; FALLBACK3-NEXT: addq $8, %rsp
16347 ; FALLBACK3-NEXT: popq %rbx
16348 ; FALLBACK3-NEXT: popq %r14
16349 ; FALLBACK3-NEXT: retq
16351 ; FALLBACK4-LABEL: shl_64bytes:
16352 ; FALLBACK4: # %bb.0:
16353 ; FALLBACK4-NEXT: pushq %r15
16354 ; FALLBACK4-NEXT: pushq %r14
16355 ; FALLBACK4-NEXT: pushq %r13
16356 ; FALLBACK4-NEXT: pushq %r12
16357 ; FALLBACK4-NEXT: pushq %rbx
16358 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
16359 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
16360 ; FALLBACK4-NEXT: movups 32(%rdi), %xmm2
16361 ; FALLBACK4-NEXT: movups 48(%rdi), %xmm3
16362 ; FALLBACK4-NEXT: movl (%rsi), %ecx
16363 ; FALLBACK4-NEXT: xorps %xmm4, %xmm4
16364 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16365 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16366 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16367 ; FALLBACK4-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16368 ; FALLBACK4-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
16369 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
16370 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
16371 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16372 ; FALLBACK4-NEXT: leal (,%rcx,8), %eax
16373 ; FALLBACK4-NEXT: andl $56, %eax
16374 ; FALLBACK4-NEXT: andl $56, %ecx
16375 ; FALLBACK4-NEXT: negl %ecx
16376 ; FALLBACK4-NEXT: movslq %ecx, %r9
16377 ; FALLBACK4-NEXT: movq -24(%rsp,%r9), %rdi
16378 ; FALLBACK4-NEXT: movq %rdi, %r10
16379 ; FALLBACK4-NEXT: movl %eax, %ecx
16380 ; FALLBACK4-NEXT: shlq %cl, %r10
16381 ; FALLBACK4-NEXT: movl %eax, %esi
16382 ; FALLBACK4-NEXT: notb %sil
16383 ; FALLBACK4-NEXT: movq -32(%rsp,%r9), %r11
16384 ; FALLBACK4-NEXT: movq %r11, %r8
16385 ; FALLBACK4-NEXT: shrq %r8
16386 ; FALLBACK4-NEXT: movl %esi, %ecx
16387 ; FALLBACK4-NEXT: shrq %cl, %r8
16388 ; FALLBACK4-NEXT: orq %r10, %r8
16389 ; FALLBACK4-NEXT: movl %eax, %ecx
16390 ; FALLBACK4-NEXT: shlq %cl, %r11
16391 ; FALLBACK4-NEXT: movq -40(%rsp,%r9), %rbx
16392 ; FALLBACK4-NEXT: movq %rbx, %r10
16393 ; FALLBACK4-NEXT: shrq %r10
16394 ; FALLBACK4-NEXT: movl %esi, %ecx
16395 ; FALLBACK4-NEXT: shrq %cl, %r10
16396 ; FALLBACK4-NEXT: orq %r11, %r10
16397 ; FALLBACK4-NEXT: movl %eax, %ecx
16398 ; FALLBACK4-NEXT: shlq %cl, %rbx
16399 ; FALLBACK4-NEXT: movq -48(%rsp,%r9), %r15
16400 ; FALLBACK4-NEXT: movq %r15, %r11
16401 ; FALLBACK4-NEXT: shrq %r11
16402 ; FALLBACK4-NEXT: movl %esi, %ecx
16403 ; FALLBACK4-NEXT: shrq %cl, %r11
16404 ; FALLBACK4-NEXT: orq %rbx, %r11
16405 ; FALLBACK4-NEXT: movl %eax, %ecx
16406 ; FALLBACK4-NEXT: shlq %cl, %r15
16407 ; FALLBACK4-NEXT: movq -64(%rsp,%r9), %r14
16408 ; FALLBACK4-NEXT: movq -56(%rsp,%r9), %r12
16409 ; FALLBACK4-NEXT: movq %r12, %rbx
16410 ; FALLBACK4-NEXT: shrq %rbx
16411 ; FALLBACK4-NEXT: movl %esi, %ecx
16412 ; FALLBACK4-NEXT: shrq %cl, %rbx
16413 ; FALLBACK4-NEXT: orq %r15, %rbx
16414 ; FALLBACK4-NEXT: movl %eax, %ecx
16415 ; FALLBACK4-NEXT: shlq %cl, %r12
16416 ; FALLBACK4-NEXT: movq %r14, %r15
16417 ; FALLBACK4-NEXT: shrq %r15
16418 ; FALLBACK4-NEXT: movl %esi, %ecx
16419 ; FALLBACK4-NEXT: shrq %cl, %r15
16420 ; FALLBACK4-NEXT: orq %r12, %r15
16421 ; FALLBACK4-NEXT: movq -16(%rsp,%r9), %r12
16422 ; FALLBACK4-NEXT: movq %r12, %r13
16423 ; FALLBACK4-NEXT: movl %eax, %ecx
16424 ; FALLBACK4-NEXT: shlq %cl, %r13
16425 ; FALLBACK4-NEXT: shrq %rdi
16426 ; FALLBACK4-NEXT: movl %esi, %ecx
16427 ; FALLBACK4-NEXT: shrq %cl, %rdi
16428 ; FALLBACK4-NEXT: orq %r13, %rdi
16429 ; FALLBACK4-NEXT: movq -8(%rsp,%r9), %r9
16430 ; FALLBACK4-NEXT: movl %eax, %ecx
16431 ; FALLBACK4-NEXT: shlq %cl, %r9
16432 ; FALLBACK4-NEXT: shrq %r12
16433 ; FALLBACK4-NEXT: movl %esi, %ecx
16434 ; FALLBACK4-NEXT: shrq %cl, %r12
16435 ; FALLBACK4-NEXT: orq %r9, %r12
16436 ; FALLBACK4-NEXT: movl %eax, %ecx
16437 ; FALLBACK4-NEXT: shlq %cl, %r14
16438 ; FALLBACK4-NEXT: movq %r14, (%rdx)
16439 ; FALLBACK4-NEXT: movq %r12, 56(%rdx)
16440 ; FALLBACK4-NEXT: movq %rdi, 48(%rdx)
16441 ; FALLBACK4-NEXT: movq %r15, 8(%rdx)
16442 ; FALLBACK4-NEXT: movq %rbx, 16(%rdx)
16443 ; FALLBACK4-NEXT: movq %r11, 24(%rdx)
16444 ; FALLBACK4-NEXT: movq %r10, 32(%rdx)
16445 ; FALLBACK4-NEXT: movq %r8, 40(%rdx)
16446 ; FALLBACK4-NEXT: popq %rbx
16447 ; FALLBACK4-NEXT: popq %r12
16448 ; FALLBACK4-NEXT: popq %r13
16449 ; FALLBACK4-NEXT: popq %r14
16450 ; FALLBACK4-NEXT: popq %r15
16451 ; FALLBACK4-NEXT: retq
16453 ; FALLBACK5-LABEL: shl_64bytes:
16454 ; FALLBACK5: # %bb.0:
16455 ; FALLBACK5-NEXT: pushq %r15
16456 ; FALLBACK5-NEXT: pushq %r14
16457 ; FALLBACK5-NEXT: pushq %rbx
16458 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
16459 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
16460 ; FALLBACK5-NEXT: movups 32(%rdi), %xmm2
16461 ; FALLBACK5-NEXT: movups 48(%rdi), %xmm3
16462 ; FALLBACK5-NEXT: movl (%rsi), %eax
16463 ; FALLBACK5-NEXT: xorps %xmm4, %xmm4
16464 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16465 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16466 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16467 ; FALLBACK5-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16468 ; FALLBACK5-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
16469 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
16470 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
16471 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16472 ; FALLBACK5-NEXT: leal (,%rax,8), %ecx
16473 ; FALLBACK5-NEXT: andl $56, %ecx
16474 ; FALLBACK5-NEXT: andl $56, %eax
16475 ; FALLBACK5-NEXT: negl %eax
16476 ; FALLBACK5-NEXT: movslq %eax, %r8
16477 ; FALLBACK5-NEXT: movq -32(%rsp,%r8), %rax
16478 ; FALLBACK5-NEXT: movq -24(%rsp,%r8), %r9
16479 ; FALLBACK5-NEXT: movq %r9, %rsi
16480 ; FALLBACK5-NEXT: shldq %cl, %rax, %rsi
16481 ; FALLBACK5-NEXT: movq -40(%rsp,%r8), %rdi
16482 ; FALLBACK5-NEXT: shldq %cl, %rdi, %rax
16483 ; FALLBACK5-NEXT: movq -48(%rsp,%r8), %r10
16484 ; FALLBACK5-NEXT: shldq %cl, %r10, %rdi
16485 ; FALLBACK5-NEXT: movq -64(%rsp,%r8), %r11
16486 ; FALLBACK5-NEXT: movq -56(%rsp,%r8), %rbx
16487 ; FALLBACK5-NEXT: shldq %cl, %rbx, %r10
16488 ; FALLBACK5-NEXT: movq -16(%rsp,%r8), %r14
16489 ; FALLBACK5-NEXT: movq %r14, %r15
16490 ; FALLBACK5-NEXT: shldq %cl, %r9, %r15
16491 ; FALLBACK5-NEXT: movq -8(%rsp,%r8), %r8
16492 ; FALLBACK5-NEXT: shldq %cl, %r14, %r8
16493 ; FALLBACK5-NEXT: movq %r11, %r9
16494 ; FALLBACK5-NEXT: shlq %cl, %r9
16495 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
16496 ; FALLBACK5-NEXT: shldq %cl, %r11, %rbx
16497 ; FALLBACK5-NEXT: movq %r8, 56(%rdx)
16498 ; FALLBACK5-NEXT: movq %r15, 48(%rdx)
16499 ; FALLBACK5-NEXT: movq %rbx, 8(%rdx)
16500 ; FALLBACK5-NEXT: movq %r10, 16(%rdx)
16501 ; FALLBACK5-NEXT: movq %rdi, 24(%rdx)
16502 ; FALLBACK5-NEXT: movq %rax, 32(%rdx)
16503 ; FALLBACK5-NEXT: movq %rsi, 40(%rdx)
16504 ; FALLBACK5-NEXT: movq %r9, (%rdx)
16505 ; FALLBACK5-NEXT: popq %rbx
16506 ; FALLBACK5-NEXT: popq %r14
16507 ; FALLBACK5-NEXT: popq %r15
16508 ; FALLBACK5-NEXT: retq
16510 ; FALLBACK6-LABEL: shl_64bytes:
16511 ; FALLBACK6: # %bb.0:
16512 ; FALLBACK6-NEXT: pushq %rbp
16513 ; FALLBACK6-NEXT: pushq %r15
16514 ; FALLBACK6-NEXT: pushq %r14
16515 ; FALLBACK6-NEXT: pushq %r13
16516 ; FALLBACK6-NEXT: pushq %r12
16517 ; FALLBACK6-NEXT: pushq %rbx
16518 ; FALLBACK6-NEXT: subq $24, %rsp
16519 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
16520 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
16521 ; FALLBACK6-NEXT: movups 32(%rdi), %xmm2
16522 ; FALLBACK6-NEXT: movups 48(%rdi), %xmm3
16523 ; FALLBACK6-NEXT: movl (%rsi), %eax
16524 ; FALLBACK6-NEXT: xorps %xmm4, %xmm4
16525 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16526 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16527 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16528 ; FALLBACK6-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16529 ; FALLBACK6-NEXT: movaps %xmm3, (%rsp)
16530 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
16531 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
16532 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16533 ; FALLBACK6-NEXT: leal (,%rax,8), %ecx
16534 ; FALLBACK6-NEXT: andl $56, %ecx
16535 ; FALLBACK6-NEXT: andl $56, %eax
16536 ; FALLBACK6-NEXT: negl %eax
16537 ; FALLBACK6-NEXT: movslq %eax, %rsi
16538 ; FALLBACK6-NEXT: movq -8(%rsp,%rsi), %rax
16539 ; FALLBACK6-NEXT: shlxq %rcx, %rax, %r12
16540 ; FALLBACK6-NEXT: movq -16(%rsp,%rsi), %rdi
16541 ; FALLBACK6-NEXT: shlxq %rcx, %rdi, %r15
16542 ; FALLBACK6-NEXT: movq -24(%rsp,%rsi), %r13
16543 ; FALLBACK6-NEXT: shlxq %rcx, %r13, %r8
16544 ; FALLBACK6-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
16545 ; FALLBACK6-NEXT: movq -32(%rsp,%rsi), %r11
16546 ; FALLBACK6-NEXT: shlxq %rcx, %r11, %r10
16547 ; FALLBACK6-NEXT: movq -40(%rsp,%rsi), %r14
16548 ; FALLBACK6-NEXT: shlxq %rcx, %r14, %rbx
16549 ; FALLBACK6-NEXT: movl %ecx, %r9d
16550 ; FALLBACK6-NEXT: notb %r9b
16551 ; FALLBACK6-NEXT: shrq %rdi
16552 ; FALLBACK6-NEXT: shrxq %r9, %rdi, %rdi
16553 ; FALLBACK6-NEXT: orq %r12, %rdi
16554 ; FALLBACK6-NEXT: movq (%rsp,%rsi), %rbp
16555 ; FALLBACK6-NEXT: shlxq %rcx, %rbp, %r8
16556 ; FALLBACK6-NEXT: shrq %r13
16557 ; FALLBACK6-NEXT: shrxq %r9, %r13, %r12
16558 ; FALLBACK6-NEXT: orq %r15, %r12
16559 ; FALLBACK6-NEXT: shlxq %rcx, 8(%rsp,%rsi), %r15
16560 ; FALLBACK6-NEXT: movq -48(%rsp,%rsi), %rsi
16561 ; FALLBACK6-NEXT: shlxq %rcx, %rsi, %rcx
16562 ; FALLBACK6-NEXT: shrq %r11
16563 ; FALLBACK6-NEXT: shrxq %r9, %r11, %r11
16564 ; FALLBACK6-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
16565 ; FALLBACK6-NEXT: shrq %r14
16566 ; FALLBACK6-NEXT: shrxq %r9, %r14, %r14
16567 ; FALLBACK6-NEXT: orq %r10, %r14
16568 ; FALLBACK6-NEXT: shrq %rsi
16569 ; FALLBACK6-NEXT: shrxq %r9, %rsi, %rsi
16570 ; FALLBACK6-NEXT: orq %rbx, %rsi
16571 ; FALLBACK6-NEXT: shrq %rax
16572 ; FALLBACK6-NEXT: shrxq %r9, %rax, %rax
16573 ; FALLBACK6-NEXT: orq %r8, %rax
16574 ; FALLBACK6-NEXT: shrq %rbp
16575 ; FALLBACK6-NEXT: shrxq %r9, %rbp, %r8
16576 ; FALLBACK6-NEXT: orq %r15, %r8
16577 ; FALLBACK6-NEXT: movq %rcx, (%rdx)
16578 ; FALLBACK6-NEXT: movq %r8, 56(%rdx)
16579 ; FALLBACK6-NEXT: movq %rax, 48(%rdx)
16580 ; FALLBACK6-NEXT: movq %rsi, 8(%rdx)
16581 ; FALLBACK6-NEXT: movq %r14, 16(%rdx)
16582 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
16583 ; FALLBACK6-NEXT: movq %r12, 32(%rdx)
16584 ; FALLBACK6-NEXT: movq %rdi, 40(%rdx)
16585 ; FALLBACK6-NEXT: addq $24, %rsp
16586 ; FALLBACK6-NEXT: popq %rbx
16587 ; FALLBACK6-NEXT: popq %r12
16588 ; FALLBACK6-NEXT: popq %r13
16589 ; FALLBACK6-NEXT: popq %r14
16590 ; FALLBACK6-NEXT: popq %r15
16591 ; FALLBACK6-NEXT: popq %rbp
16592 ; FALLBACK6-NEXT: retq
16594 ; FALLBACK7-LABEL: shl_64bytes:
16595 ; FALLBACK7: # %bb.0:
16596 ; FALLBACK7-NEXT: pushq %r15
16597 ; FALLBACK7-NEXT: pushq %r14
16598 ; FALLBACK7-NEXT: pushq %rbx
16599 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
16600 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
16601 ; FALLBACK7-NEXT: movups 32(%rdi), %xmm2
16602 ; FALLBACK7-NEXT: movups 48(%rdi), %xmm3
16603 ; FALLBACK7-NEXT: movl (%rsi), %eax
16604 ; FALLBACK7-NEXT: xorps %xmm4, %xmm4
16605 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16606 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16607 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16608 ; FALLBACK7-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
16609 ; FALLBACK7-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
16610 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
16611 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
16612 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
16613 ; FALLBACK7-NEXT: leal (,%rax,8), %ecx
16614 ; FALLBACK7-NEXT: andl $56, %ecx
16615 ; FALLBACK7-NEXT: andl $56, %eax
16616 ; FALLBACK7-NEXT: negl %eax
16617 ; FALLBACK7-NEXT: movslq %eax, %r8
16618 ; FALLBACK7-NEXT: movq -32(%rsp,%r8), %rax
16619 ; FALLBACK7-NEXT: movq -24(%rsp,%r8), %r9
16620 ; FALLBACK7-NEXT: movq %r9, %rsi
16621 ; FALLBACK7-NEXT: shldq %cl, %rax, %rsi
16622 ; FALLBACK7-NEXT: movq -40(%rsp,%r8), %rdi
16623 ; FALLBACK7-NEXT: shldq %cl, %rdi, %rax
16624 ; FALLBACK7-NEXT: movq -48(%rsp,%r8), %r10
16625 ; FALLBACK7-NEXT: shldq %cl, %r10, %rdi
16626 ; FALLBACK7-NEXT: movq -64(%rsp,%r8), %r11
16627 ; FALLBACK7-NEXT: movq -56(%rsp,%r8), %rbx
16628 ; FALLBACK7-NEXT: shldq %cl, %rbx, %r10
16629 ; FALLBACK7-NEXT: movq -16(%rsp,%r8), %r14
16630 ; FALLBACK7-NEXT: movq %r14, %r15
16631 ; FALLBACK7-NEXT: shldq %cl, %r9, %r15
16632 ; FALLBACK7-NEXT: movq -8(%rsp,%r8), %r8
16633 ; FALLBACK7-NEXT: shldq %cl, %r14, %r8
16634 ; FALLBACK7-NEXT: shlxq %rcx, %r11, %r9
16635 ; FALLBACK7-NEXT: # kill: def $cl killed $cl killed $rcx
16636 ; FALLBACK7-NEXT: shldq %cl, %r11, %rbx
16637 ; FALLBACK7-NEXT: movq %r8, 56(%rdx)
16638 ; FALLBACK7-NEXT: movq %r15, 48(%rdx)
16639 ; FALLBACK7-NEXT: movq %rbx, 8(%rdx)
16640 ; FALLBACK7-NEXT: movq %r10, 16(%rdx)
16641 ; FALLBACK7-NEXT: movq %rdi, 24(%rdx)
16642 ; FALLBACK7-NEXT: movq %rax, 32(%rdx)
16643 ; FALLBACK7-NEXT: movq %rsi, 40(%rdx)
16644 ; FALLBACK7-NEXT: movq %r9, (%rdx)
16645 ; FALLBACK7-NEXT: popq %rbx
16646 ; FALLBACK7-NEXT: popq %r14
16647 ; FALLBACK7-NEXT: popq %r15
16648 ; FALLBACK7-NEXT: retq
16650 ; FALLBACK8-LABEL: shl_64bytes:
16651 ; FALLBACK8: # %bb.0:
16652 ; FALLBACK8-NEXT: pushq %r15
16653 ; FALLBACK8-NEXT: pushq %r14
16654 ; FALLBACK8-NEXT: pushq %r13
16655 ; FALLBACK8-NEXT: pushq %r12
16656 ; FALLBACK8-NEXT: pushq %rbx
16657 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
16658 ; FALLBACK8-NEXT: vmovups 32(%rdi), %ymm1
16659 ; FALLBACK8-NEXT: movl (%rsi), %ecx
16660 ; FALLBACK8-NEXT: vxorps %xmm2, %xmm2, %xmm2
16661 ; FALLBACK8-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16662 ; FALLBACK8-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16663 ; FALLBACK8-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
16664 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
16665 ; FALLBACK8-NEXT: leal (,%rcx,8), %eax
16666 ; FALLBACK8-NEXT: andl $56, %eax
16667 ; FALLBACK8-NEXT: andl $56, %ecx
16668 ; FALLBACK8-NEXT: negl %ecx
16669 ; FALLBACK8-NEXT: movslq %ecx, %r9
16670 ; FALLBACK8-NEXT: movq -24(%rsp,%r9), %rdi
16671 ; FALLBACK8-NEXT: movq %rdi, %r10
16672 ; FALLBACK8-NEXT: movl %eax, %ecx
16673 ; FALLBACK8-NEXT: shlq %cl, %r10
16674 ; FALLBACK8-NEXT: movl %eax, %esi
16675 ; FALLBACK8-NEXT: notb %sil
16676 ; FALLBACK8-NEXT: movq -32(%rsp,%r9), %r11
16677 ; FALLBACK8-NEXT: movq %r11, %r8
16678 ; FALLBACK8-NEXT: shrq %r8
16679 ; FALLBACK8-NEXT: movl %esi, %ecx
16680 ; FALLBACK8-NEXT: shrq %cl, %r8
16681 ; FALLBACK8-NEXT: orq %r10, %r8
16682 ; FALLBACK8-NEXT: movl %eax, %ecx
16683 ; FALLBACK8-NEXT: shlq %cl, %r11
16684 ; FALLBACK8-NEXT: movq -40(%rsp,%r9), %rbx
16685 ; FALLBACK8-NEXT: movq %rbx, %r10
16686 ; FALLBACK8-NEXT: shrq %r10
16687 ; FALLBACK8-NEXT: movl %esi, %ecx
16688 ; FALLBACK8-NEXT: shrq %cl, %r10
16689 ; FALLBACK8-NEXT: orq %r11, %r10
16690 ; FALLBACK8-NEXT: movl %eax, %ecx
16691 ; FALLBACK8-NEXT: shlq %cl, %rbx
16692 ; FALLBACK8-NEXT: movq -48(%rsp,%r9), %r15
16693 ; FALLBACK8-NEXT: movq %r15, %r11
16694 ; FALLBACK8-NEXT: shrq %r11
16695 ; FALLBACK8-NEXT: movl %esi, %ecx
16696 ; FALLBACK8-NEXT: shrq %cl, %r11
16697 ; FALLBACK8-NEXT: orq %rbx, %r11
16698 ; FALLBACK8-NEXT: movl %eax, %ecx
16699 ; FALLBACK8-NEXT: shlq %cl, %r15
16700 ; FALLBACK8-NEXT: movq -64(%rsp,%r9), %r14
16701 ; FALLBACK8-NEXT: movq -56(%rsp,%r9), %r12
16702 ; FALLBACK8-NEXT: movq %r12, %rbx
16703 ; FALLBACK8-NEXT: shrq %rbx
16704 ; FALLBACK8-NEXT: movl %esi, %ecx
16705 ; FALLBACK8-NEXT: shrq %cl, %rbx
16706 ; FALLBACK8-NEXT: orq %r15, %rbx
16707 ; FALLBACK8-NEXT: movl %eax, %ecx
16708 ; FALLBACK8-NEXT: shlq %cl, %r12
16709 ; FALLBACK8-NEXT: movq %r14, %r15
16710 ; FALLBACK8-NEXT: shrq %r15
16711 ; FALLBACK8-NEXT: movl %esi, %ecx
16712 ; FALLBACK8-NEXT: shrq %cl, %r15
16713 ; FALLBACK8-NEXT: orq %r12, %r15
16714 ; FALLBACK8-NEXT: movq -16(%rsp,%r9), %r12
16715 ; FALLBACK8-NEXT: movq %r12, %r13
16716 ; FALLBACK8-NEXT: movl %eax, %ecx
16717 ; FALLBACK8-NEXT: shlq %cl, %r13
16718 ; FALLBACK8-NEXT: shrq %rdi
16719 ; FALLBACK8-NEXT: movl %esi, %ecx
16720 ; FALLBACK8-NEXT: shrq %cl, %rdi
16721 ; FALLBACK8-NEXT: orq %r13, %rdi
16722 ; FALLBACK8-NEXT: movq -8(%rsp,%r9), %r9
16723 ; FALLBACK8-NEXT: movl %eax, %ecx
16724 ; FALLBACK8-NEXT: shlq %cl, %r9
16725 ; FALLBACK8-NEXT: shrq %r12
16726 ; FALLBACK8-NEXT: movl %esi, %ecx
16727 ; FALLBACK8-NEXT: shrq %cl, %r12
16728 ; FALLBACK8-NEXT: orq %r9, %r12
16729 ; FALLBACK8-NEXT: movl %eax, %ecx
16730 ; FALLBACK8-NEXT: shlq %cl, %r14
16731 ; FALLBACK8-NEXT: movq %r14, (%rdx)
16732 ; FALLBACK8-NEXT: movq %r12, 56(%rdx)
16733 ; FALLBACK8-NEXT: movq %rdi, 48(%rdx)
16734 ; FALLBACK8-NEXT: movq %r15, 8(%rdx)
16735 ; FALLBACK8-NEXT: movq %rbx, 16(%rdx)
16736 ; FALLBACK8-NEXT: movq %r11, 24(%rdx)
16737 ; FALLBACK8-NEXT: movq %r10, 32(%rdx)
16738 ; FALLBACK8-NEXT: movq %r8, 40(%rdx)
16739 ; FALLBACK8-NEXT: popq %rbx
16740 ; FALLBACK8-NEXT: popq %r12
16741 ; FALLBACK8-NEXT: popq %r13
16742 ; FALLBACK8-NEXT: popq %r14
16743 ; FALLBACK8-NEXT: popq %r15
16744 ; FALLBACK8-NEXT: vzeroupper
16745 ; FALLBACK8-NEXT: retq
16747 ; FALLBACK9-LABEL: shl_64bytes:
16748 ; FALLBACK9: # %bb.0:
16749 ; FALLBACK9-NEXT: pushq %r15
16750 ; FALLBACK9-NEXT: pushq %r14
16751 ; FALLBACK9-NEXT: pushq %rbx
16752 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
16753 ; FALLBACK9-NEXT: vmovups 32(%rdi), %ymm1
16754 ; FALLBACK9-NEXT: movl (%rsi), %eax
16755 ; FALLBACK9-NEXT: vxorps %xmm2, %xmm2, %xmm2
16756 ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16757 ; FALLBACK9-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16758 ; FALLBACK9-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
16759 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
16760 ; FALLBACK9-NEXT: leal (,%rax,8), %ecx
16761 ; FALLBACK9-NEXT: andl $56, %ecx
16762 ; FALLBACK9-NEXT: andl $56, %eax
16763 ; FALLBACK9-NEXT: negl %eax
16764 ; FALLBACK9-NEXT: movslq %eax, %r8
16765 ; FALLBACK9-NEXT: movq -32(%rsp,%r8), %rax
16766 ; FALLBACK9-NEXT: movq -24(%rsp,%r8), %r9
16767 ; FALLBACK9-NEXT: movq %r9, %rsi
16768 ; FALLBACK9-NEXT: shldq %cl, %rax, %rsi
16769 ; FALLBACK9-NEXT: movq -40(%rsp,%r8), %rdi
16770 ; FALLBACK9-NEXT: shldq %cl, %rdi, %rax
16771 ; FALLBACK9-NEXT: movq -48(%rsp,%r8), %r10
16772 ; FALLBACK9-NEXT: shldq %cl, %r10, %rdi
16773 ; FALLBACK9-NEXT: movq -64(%rsp,%r8), %r11
16774 ; FALLBACK9-NEXT: movq -56(%rsp,%r8), %rbx
16775 ; FALLBACK9-NEXT: shldq %cl, %rbx, %r10
16776 ; FALLBACK9-NEXT: movq -16(%rsp,%r8), %r14
16777 ; FALLBACK9-NEXT: movq %r14, %r15
16778 ; FALLBACK9-NEXT: shldq %cl, %r9, %r15
16779 ; FALLBACK9-NEXT: movq -8(%rsp,%r8), %r8
16780 ; FALLBACK9-NEXT: shldq %cl, %r14, %r8
16781 ; FALLBACK9-NEXT: movq %r11, %r9
16782 ; FALLBACK9-NEXT: shlq %cl, %r9
16783 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
16784 ; FALLBACK9-NEXT: shldq %cl, %r11, %rbx
16785 ; FALLBACK9-NEXT: movq %r8, 56(%rdx)
16786 ; FALLBACK9-NEXT: movq %r15, 48(%rdx)
16787 ; FALLBACK9-NEXT: movq %rbx, 8(%rdx)
16788 ; FALLBACK9-NEXT: movq %r10, 16(%rdx)
16789 ; FALLBACK9-NEXT: movq %rdi, 24(%rdx)
16790 ; FALLBACK9-NEXT: movq %rax, 32(%rdx)
16791 ; FALLBACK9-NEXT: movq %rsi, 40(%rdx)
16792 ; FALLBACK9-NEXT: movq %r9, (%rdx)
16793 ; FALLBACK9-NEXT: popq %rbx
16794 ; FALLBACK9-NEXT: popq %r14
16795 ; FALLBACK9-NEXT: popq %r15
16796 ; FALLBACK9-NEXT: vzeroupper
16797 ; FALLBACK9-NEXT: retq
16799 ; FALLBACK10-LABEL: shl_64bytes:
16800 ; FALLBACK10: # %bb.0:
16801 ; FALLBACK10-NEXT: pushq %rbp
16802 ; FALLBACK10-NEXT: pushq %r15
16803 ; FALLBACK10-NEXT: pushq %r14
16804 ; FALLBACK10-NEXT: pushq %r13
16805 ; FALLBACK10-NEXT: pushq %r12
16806 ; FALLBACK10-NEXT: pushq %rbx
16807 ; FALLBACK10-NEXT: subq $24, %rsp
16808 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
16809 ; FALLBACK10-NEXT: vmovups 32(%rdi), %ymm1
16810 ; FALLBACK10-NEXT: movl (%rsi), %eax
16811 ; FALLBACK10-NEXT: vxorps %xmm2, %xmm2, %xmm2
16812 ; FALLBACK10-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16813 ; FALLBACK10-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16814 ; FALLBACK10-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
16815 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
16816 ; FALLBACK10-NEXT: leal (,%rax,8), %ecx
16817 ; FALLBACK10-NEXT: andl $56, %ecx
16818 ; FALLBACK10-NEXT: andl $56, %eax
16819 ; FALLBACK10-NEXT: negl %eax
16820 ; FALLBACK10-NEXT: movslq %eax, %rsi
16821 ; FALLBACK10-NEXT: movq -8(%rsp,%rsi), %rax
16822 ; FALLBACK10-NEXT: shlxq %rcx, %rax, %r12
16823 ; FALLBACK10-NEXT: movq -16(%rsp,%rsi), %rdi
16824 ; FALLBACK10-NEXT: shlxq %rcx, %rdi, %r15
16825 ; FALLBACK10-NEXT: movq -24(%rsp,%rsi), %r13
16826 ; FALLBACK10-NEXT: shlxq %rcx, %r13, %r8
16827 ; FALLBACK10-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
16828 ; FALLBACK10-NEXT: movq -32(%rsp,%rsi), %r11
16829 ; FALLBACK10-NEXT: shlxq %rcx, %r11, %r10
16830 ; FALLBACK10-NEXT: movq -40(%rsp,%rsi), %r14
16831 ; FALLBACK10-NEXT: shlxq %rcx, %r14, %rbx
16832 ; FALLBACK10-NEXT: movl %ecx, %r9d
16833 ; FALLBACK10-NEXT: notb %r9b
16834 ; FALLBACK10-NEXT: shrq %rdi
16835 ; FALLBACK10-NEXT: shrxq %r9, %rdi, %rdi
16836 ; FALLBACK10-NEXT: orq %r12, %rdi
16837 ; FALLBACK10-NEXT: movq (%rsp,%rsi), %rbp
16838 ; FALLBACK10-NEXT: shlxq %rcx, %rbp, %r8
16839 ; FALLBACK10-NEXT: shrq %r13
16840 ; FALLBACK10-NEXT: shrxq %r9, %r13, %r12
16841 ; FALLBACK10-NEXT: orq %r15, %r12
16842 ; FALLBACK10-NEXT: shlxq %rcx, 8(%rsp,%rsi), %r15
16843 ; FALLBACK10-NEXT: movq -48(%rsp,%rsi), %rsi
16844 ; FALLBACK10-NEXT: shlxq %rcx, %rsi, %rcx
16845 ; FALLBACK10-NEXT: shrq %r11
16846 ; FALLBACK10-NEXT: shrxq %r9, %r11, %r11
16847 ; FALLBACK10-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
16848 ; FALLBACK10-NEXT: shrq %r14
16849 ; FALLBACK10-NEXT: shrxq %r9, %r14, %r14
16850 ; FALLBACK10-NEXT: orq %r10, %r14
16851 ; FALLBACK10-NEXT: shrq %rsi
16852 ; FALLBACK10-NEXT: shrxq %r9, %rsi, %rsi
16853 ; FALLBACK10-NEXT: orq %rbx, %rsi
16854 ; FALLBACK10-NEXT: shrq %rax
16855 ; FALLBACK10-NEXT: shrxq %r9, %rax, %rax
16856 ; FALLBACK10-NEXT: orq %r8, %rax
16857 ; FALLBACK10-NEXT: shrq %rbp
16858 ; FALLBACK10-NEXT: shrxq %r9, %rbp, %r8
16859 ; FALLBACK10-NEXT: orq %r15, %r8
16860 ; FALLBACK10-NEXT: movq %rcx, (%rdx)
16861 ; FALLBACK10-NEXT: movq %r8, 56(%rdx)
16862 ; FALLBACK10-NEXT: movq %rax, 48(%rdx)
16863 ; FALLBACK10-NEXT: movq %rsi, 8(%rdx)
16864 ; FALLBACK10-NEXT: movq %r14, 16(%rdx)
16865 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
16866 ; FALLBACK10-NEXT: movq %r12, 32(%rdx)
16867 ; FALLBACK10-NEXT: movq %rdi, 40(%rdx)
16868 ; FALLBACK10-NEXT: addq $24, %rsp
16869 ; FALLBACK10-NEXT: popq %rbx
16870 ; FALLBACK10-NEXT: popq %r12
16871 ; FALLBACK10-NEXT: popq %r13
16872 ; FALLBACK10-NEXT: popq %r14
16873 ; FALLBACK10-NEXT: popq %r15
16874 ; FALLBACK10-NEXT: popq %rbp
16875 ; FALLBACK10-NEXT: vzeroupper
16876 ; FALLBACK10-NEXT: retq
16878 ; FALLBACK11-LABEL: shl_64bytes:
16879 ; FALLBACK11: # %bb.0:
16880 ; FALLBACK11-NEXT: pushq %r15
16881 ; FALLBACK11-NEXT: pushq %r14
16882 ; FALLBACK11-NEXT: pushq %rbx
16883 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
16884 ; FALLBACK11-NEXT: vmovups 32(%rdi), %ymm1
16885 ; FALLBACK11-NEXT: movl (%rsi), %eax
16886 ; FALLBACK11-NEXT: vxorps %xmm2, %xmm2, %xmm2
16887 ; FALLBACK11-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16888 ; FALLBACK11-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
16889 ; FALLBACK11-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
16890 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
16891 ; FALLBACK11-NEXT: leal (,%rax,8), %ecx
16892 ; FALLBACK11-NEXT: andl $56, %ecx
16893 ; FALLBACK11-NEXT: andl $56, %eax
16894 ; FALLBACK11-NEXT: negl %eax
16895 ; FALLBACK11-NEXT: movslq %eax, %r8
16896 ; FALLBACK11-NEXT: movq -32(%rsp,%r8), %rax
16897 ; FALLBACK11-NEXT: movq -24(%rsp,%r8), %r9
16898 ; FALLBACK11-NEXT: movq %r9, %rsi
16899 ; FALLBACK11-NEXT: shldq %cl, %rax, %rsi
16900 ; FALLBACK11-NEXT: movq -40(%rsp,%r8), %rdi
16901 ; FALLBACK11-NEXT: shldq %cl, %rdi, %rax
16902 ; FALLBACK11-NEXT: movq -48(%rsp,%r8), %r10
16903 ; FALLBACK11-NEXT: shldq %cl, %r10, %rdi
16904 ; FALLBACK11-NEXT: movq -64(%rsp,%r8), %r11
16905 ; FALLBACK11-NEXT: movq -56(%rsp,%r8), %rbx
16906 ; FALLBACK11-NEXT: shldq %cl, %rbx, %r10
16907 ; FALLBACK11-NEXT: movq -16(%rsp,%r8), %r14
16908 ; FALLBACK11-NEXT: movq %r14, %r15
16909 ; FALLBACK11-NEXT: shldq %cl, %r9, %r15
16910 ; FALLBACK11-NEXT: movq -8(%rsp,%r8), %r8
16911 ; FALLBACK11-NEXT: shldq %cl, %r14, %r8
16912 ; FALLBACK11-NEXT: shlxq %rcx, %r11, %r9
16913 ; FALLBACK11-NEXT: # kill: def $cl killed $cl killed $rcx
16914 ; FALLBACK11-NEXT: shldq %cl, %r11, %rbx
16915 ; FALLBACK11-NEXT: movq %r8, 56(%rdx)
16916 ; FALLBACK11-NEXT: movq %r15, 48(%rdx)
16917 ; FALLBACK11-NEXT: movq %rbx, 8(%rdx)
16918 ; FALLBACK11-NEXT: movq %r10, 16(%rdx)
16919 ; FALLBACK11-NEXT: movq %rdi, 24(%rdx)
16920 ; FALLBACK11-NEXT: movq %rax, 32(%rdx)
16921 ; FALLBACK11-NEXT: movq %rsi, 40(%rdx)
16922 ; FALLBACK11-NEXT: movq %r9, (%rdx)
16923 ; FALLBACK11-NEXT: popq %rbx
16924 ; FALLBACK11-NEXT: popq %r14
16925 ; FALLBACK11-NEXT: popq %r15
16926 ; FALLBACK11-NEXT: vzeroupper
16927 ; FALLBACK11-NEXT: retq
16929 ; FALLBACK12-LABEL: shl_64bytes:
16930 ; FALLBACK12: # %bb.0:
16931 ; FALLBACK12-NEXT: pushq %r15
16932 ; FALLBACK12-NEXT: pushq %r14
16933 ; FALLBACK12-NEXT: pushq %r13
16934 ; FALLBACK12-NEXT: pushq %r12
16935 ; FALLBACK12-NEXT: pushq %rbx
16936 ; FALLBACK12-NEXT: vmovups (%rdi), %zmm0
16937 ; FALLBACK12-NEXT: movl (%rsi), %ecx
16938 ; FALLBACK12-NEXT: vxorps %xmm1, %xmm1, %xmm1
16939 ; FALLBACK12-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
16940 ; FALLBACK12-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
16941 ; FALLBACK12-NEXT: leal (,%rcx,8), %eax
16942 ; FALLBACK12-NEXT: andl $56, %eax
16943 ; FALLBACK12-NEXT: andl $56, %ecx
16944 ; FALLBACK12-NEXT: negl %ecx
16945 ; FALLBACK12-NEXT: movslq %ecx, %r9
16946 ; FALLBACK12-NEXT: movq -24(%rsp,%r9), %rdi
16947 ; FALLBACK12-NEXT: movq %rdi, %r10
16948 ; FALLBACK12-NEXT: movl %eax, %ecx
16949 ; FALLBACK12-NEXT: shlq %cl, %r10
16950 ; FALLBACK12-NEXT: movl %eax, %esi
16951 ; FALLBACK12-NEXT: notb %sil
16952 ; FALLBACK12-NEXT: movq -32(%rsp,%r9), %r11
16953 ; FALLBACK12-NEXT: movq %r11, %r8
16954 ; FALLBACK12-NEXT: shrq %r8
16955 ; FALLBACK12-NEXT: movl %esi, %ecx
16956 ; FALLBACK12-NEXT: shrq %cl, %r8
16957 ; FALLBACK12-NEXT: orq %r10, %r8
16958 ; FALLBACK12-NEXT: movl %eax, %ecx
16959 ; FALLBACK12-NEXT: shlq %cl, %r11
16960 ; FALLBACK12-NEXT: movq -40(%rsp,%r9), %rbx
16961 ; FALLBACK12-NEXT: movq %rbx, %r10
16962 ; FALLBACK12-NEXT: shrq %r10
16963 ; FALLBACK12-NEXT: movl %esi, %ecx
16964 ; FALLBACK12-NEXT: shrq %cl, %r10
16965 ; FALLBACK12-NEXT: orq %r11, %r10
16966 ; FALLBACK12-NEXT: movl %eax, %ecx
16967 ; FALLBACK12-NEXT: shlq %cl, %rbx
16968 ; FALLBACK12-NEXT: movq -48(%rsp,%r9), %r15
16969 ; FALLBACK12-NEXT: movq %r15, %r11
16970 ; FALLBACK12-NEXT: shrq %r11
16971 ; FALLBACK12-NEXT: movl %esi, %ecx
16972 ; FALLBACK12-NEXT: shrq %cl, %r11
16973 ; FALLBACK12-NEXT: orq %rbx, %r11
16974 ; FALLBACK12-NEXT: movl %eax, %ecx
16975 ; FALLBACK12-NEXT: shlq %cl, %r15
16976 ; FALLBACK12-NEXT: movq -64(%rsp,%r9), %r14
16977 ; FALLBACK12-NEXT: movq -56(%rsp,%r9), %r12
16978 ; FALLBACK12-NEXT: movq %r12, %rbx
16979 ; FALLBACK12-NEXT: shrq %rbx
16980 ; FALLBACK12-NEXT: movl %esi, %ecx
16981 ; FALLBACK12-NEXT: shrq %cl, %rbx
16982 ; FALLBACK12-NEXT: orq %r15, %rbx
16983 ; FALLBACK12-NEXT: movl %eax, %ecx
16984 ; FALLBACK12-NEXT: shlq %cl, %r12
16985 ; FALLBACK12-NEXT: movq %r14, %r15
16986 ; FALLBACK12-NEXT: shrq %r15
16987 ; FALLBACK12-NEXT: movl %esi, %ecx
16988 ; FALLBACK12-NEXT: shrq %cl, %r15
16989 ; FALLBACK12-NEXT: orq %r12, %r15
16990 ; FALLBACK12-NEXT: movq -16(%rsp,%r9), %r12
16991 ; FALLBACK12-NEXT: movq %r12, %r13
16992 ; FALLBACK12-NEXT: movl %eax, %ecx
16993 ; FALLBACK12-NEXT: shlq %cl, %r13
16994 ; FALLBACK12-NEXT: shrq %rdi
16995 ; FALLBACK12-NEXT: movl %esi, %ecx
16996 ; FALLBACK12-NEXT: shrq %cl, %rdi
16997 ; FALLBACK12-NEXT: orq %r13, %rdi
16998 ; FALLBACK12-NEXT: movq -8(%rsp,%r9), %r9
16999 ; FALLBACK12-NEXT: movl %eax, %ecx
17000 ; FALLBACK12-NEXT: shlq %cl, %r9
17001 ; FALLBACK12-NEXT: shrq %r12
17002 ; FALLBACK12-NEXT: movl %esi, %ecx
17003 ; FALLBACK12-NEXT: shrq %cl, %r12
17004 ; FALLBACK12-NEXT: orq %r9, %r12
17005 ; FALLBACK12-NEXT: movl %eax, %ecx
17006 ; FALLBACK12-NEXT: shlq %cl, %r14
17007 ; FALLBACK12-NEXT: movq %r14, (%rdx)
17008 ; FALLBACK12-NEXT: movq %r12, 56(%rdx)
17009 ; FALLBACK12-NEXT: movq %rdi, 48(%rdx)
17010 ; FALLBACK12-NEXT: movq %r15, 8(%rdx)
17011 ; FALLBACK12-NEXT: movq %rbx, 16(%rdx)
17012 ; FALLBACK12-NEXT: movq %r11, 24(%rdx)
17013 ; FALLBACK12-NEXT: movq %r10, 32(%rdx)
17014 ; FALLBACK12-NEXT: movq %r8, 40(%rdx)
17015 ; FALLBACK12-NEXT: popq %rbx
17016 ; FALLBACK12-NEXT: popq %r12
17017 ; FALLBACK12-NEXT: popq %r13
17018 ; FALLBACK12-NEXT: popq %r14
17019 ; FALLBACK12-NEXT: popq %r15
17020 ; FALLBACK12-NEXT: vzeroupper
17021 ; FALLBACK12-NEXT: retq
17023 ; FALLBACK13-LABEL: shl_64bytes:
17024 ; FALLBACK13: # %bb.0:
17025 ; FALLBACK13-NEXT: pushq %r15
17026 ; FALLBACK13-NEXT: pushq %r14
17027 ; FALLBACK13-NEXT: pushq %rbx
17028 ; FALLBACK13-NEXT: vmovups (%rdi), %zmm0
17029 ; FALLBACK13-NEXT: movl (%rsi), %eax
17030 ; FALLBACK13-NEXT: vxorps %xmm1, %xmm1, %xmm1
17031 ; FALLBACK13-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
17032 ; FALLBACK13-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
17033 ; FALLBACK13-NEXT: leal (,%rax,8), %ecx
17034 ; FALLBACK13-NEXT: andl $56, %ecx
17035 ; FALLBACK13-NEXT: andl $56, %eax
17036 ; FALLBACK13-NEXT: negl %eax
17037 ; FALLBACK13-NEXT: movslq %eax, %r8
17038 ; FALLBACK13-NEXT: movq -32(%rsp,%r8), %rax
17039 ; FALLBACK13-NEXT: movq -24(%rsp,%r8), %r9
17040 ; FALLBACK13-NEXT: movq %r9, %rsi
17041 ; FALLBACK13-NEXT: shldq %cl, %rax, %rsi
17042 ; FALLBACK13-NEXT: movq -40(%rsp,%r8), %rdi
17043 ; FALLBACK13-NEXT: shldq %cl, %rdi, %rax
17044 ; FALLBACK13-NEXT: movq -48(%rsp,%r8), %r10
17045 ; FALLBACK13-NEXT: shldq %cl, %r10, %rdi
17046 ; FALLBACK13-NEXT: movq -64(%rsp,%r8), %r11
17047 ; FALLBACK13-NEXT: movq -56(%rsp,%r8), %rbx
17048 ; FALLBACK13-NEXT: shldq %cl, %rbx, %r10
17049 ; FALLBACK13-NEXT: movq -16(%rsp,%r8), %r14
17050 ; FALLBACK13-NEXT: movq %r14, %r15
17051 ; FALLBACK13-NEXT: shldq %cl, %r9, %r15
17052 ; FALLBACK13-NEXT: movq -8(%rsp,%r8), %r8
17053 ; FALLBACK13-NEXT: shldq %cl, %r14, %r8
17054 ; FALLBACK13-NEXT: movq %r11, %r9
17055 ; FALLBACK13-NEXT: shlq %cl, %r9
17056 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
17057 ; FALLBACK13-NEXT: shldq %cl, %r11, %rbx
17058 ; FALLBACK13-NEXT: movq %r8, 56(%rdx)
17059 ; FALLBACK13-NEXT: movq %r15, 48(%rdx)
17060 ; FALLBACK13-NEXT: movq %rbx, 8(%rdx)
17061 ; FALLBACK13-NEXT: movq %r10, 16(%rdx)
17062 ; FALLBACK13-NEXT: movq %rdi, 24(%rdx)
17063 ; FALLBACK13-NEXT: movq %rax, 32(%rdx)
17064 ; FALLBACK13-NEXT: movq %rsi, 40(%rdx)
17065 ; FALLBACK13-NEXT: movq %r9, (%rdx)
17066 ; FALLBACK13-NEXT: popq %rbx
17067 ; FALLBACK13-NEXT: popq %r14
17068 ; FALLBACK13-NEXT: popq %r15
17069 ; FALLBACK13-NEXT: vzeroupper
17070 ; FALLBACK13-NEXT: retq
17072 ; FALLBACK14-LABEL: shl_64bytes:
17073 ; FALLBACK14: # %bb.0:
17074 ; FALLBACK14-NEXT: pushq %rbp
17075 ; FALLBACK14-NEXT: pushq %r15
17076 ; FALLBACK14-NEXT: pushq %r14
17077 ; FALLBACK14-NEXT: pushq %r13
17078 ; FALLBACK14-NEXT: pushq %r12
17079 ; FALLBACK14-NEXT: pushq %rbx
17080 ; FALLBACK14-NEXT: subq $24, %rsp
17081 ; FALLBACK14-NEXT: vmovups (%rdi), %zmm0
17082 ; FALLBACK14-NEXT: movl (%rsi), %eax
17083 ; FALLBACK14-NEXT: vxorps %xmm1, %xmm1, %xmm1
17084 ; FALLBACK14-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
17085 ; FALLBACK14-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
17086 ; FALLBACK14-NEXT: leal (,%rax,8), %ecx
17087 ; FALLBACK14-NEXT: andl $56, %ecx
17088 ; FALLBACK14-NEXT: andl $56, %eax
17089 ; FALLBACK14-NEXT: negl %eax
17090 ; FALLBACK14-NEXT: movslq %eax, %rsi
17091 ; FALLBACK14-NEXT: movq -8(%rsp,%rsi), %rax
17092 ; FALLBACK14-NEXT: shlxq %rcx, %rax, %r12
17093 ; FALLBACK14-NEXT: movq -16(%rsp,%rsi), %rdi
17094 ; FALLBACK14-NEXT: shlxq %rcx, %rdi, %r15
17095 ; FALLBACK14-NEXT: movq -24(%rsp,%rsi), %r13
17096 ; FALLBACK14-NEXT: shlxq %rcx, %r13, %r8
17097 ; FALLBACK14-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
17098 ; FALLBACK14-NEXT: movq -32(%rsp,%rsi), %r11
17099 ; FALLBACK14-NEXT: shlxq %rcx, %r11, %r10
17100 ; FALLBACK14-NEXT: movq -40(%rsp,%rsi), %r14
17101 ; FALLBACK14-NEXT: shlxq %rcx, %r14, %rbx
17102 ; FALLBACK14-NEXT: movl %ecx, %r9d
17103 ; FALLBACK14-NEXT: notb %r9b
17104 ; FALLBACK14-NEXT: shrq %rdi
17105 ; FALLBACK14-NEXT: shrxq %r9, %rdi, %rdi
17106 ; FALLBACK14-NEXT: orq %r12, %rdi
17107 ; FALLBACK14-NEXT: movq (%rsp,%rsi), %rbp
17108 ; FALLBACK14-NEXT: shlxq %rcx, %rbp, %r8
17109 ; FALLBACK14-NEXT: shrq %r13
17110 ; FALLBACK14-NEXT: shrxq %r9, %r13, %r12
17111 ; FALLBACK14-NEXT: orq %r15, %r12
17112 ; FALLBACK14-NEXT: shlxq %rcx, 8(%rsp,%rsi), %r15
17113 ; FALLBACK14-NEXT: movq -48(%rsp,%rsi), %rsi
17114 ; FALLBACK14-NEXT: shlxq %rcx, %rsi, %rcx
17115 ; FALLBACK14-NEXT: shrq %r11
17116 ; FALLBACK14-NEXT: shrxq %r9, %r11, %r11
17117 ; FALLBACK14-NEXT: orq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload
17118 ; FALLBACK14-NEXT: shrq %r14
17119 ; FALLBACK14-NEXT: shrxq %r9, %r14, %r14
17120 ; FALLBACK14-NEXT: orq %r10, %r14
17121 ; FALLBACK14-NEXT: shrq %rsi
17122 ; FALLBACK14-NEXT: shrxq %r9, %rsi, %rsi
17123 ; FALLBACK14-NEXT: orq %rbx, %rsi
17124 ; FALLBACK14-NEXT: shrq %rax
17125 ; FALLBACK14-NEXT: shrxq %r9, %rax, %rax
17126 ; FALLBACK14-NEXT: orq %r8, %rax
17127 ; FALLBACK14-NEXT: shrq %rbp
17128 ; FALLBACK14-NEXT: shrxq %r9, %rbp, %r8
17129 ; FALLBACK14-NEXT: orq %r15, %r8
17130 ; FALLBACK14-NEXT: movq %rcx, (%rdx)
17131 ; FALLBACK14-NEXT: movq %r8, 56(%rdx)
17132 ; FALLBACK14-NEXT: movq %rax, 48(%rdx)
17133 ; FALLBACK14-NEXT: movq %rsi, 8(%rdx)
17134 ; FALLBACK14-NEXT: movq %r14, 16(%rdx)
17135 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
17136 ; FALLBACK14-NEXT: movq %r12, 32(%rdx)
17137 ; FALLBACK14-NEXT: movq %rdi, 40(%rdx)
17138 ; FALLBACK14-NEXT: addq $24, %rsp
17139 ; FALLBACK14-NEXT: popq %rbx
17140 ; FALLBACK14-NEXT: popq %r12
17141 ; FALLBACK14-NEXT: popq %r13
17142 ; FALLBACK14-NEXT: popq %r14
17143 ; FALLBACK14-NEXT: popq %r15
17144 ; FALLBACK14-NEXT: popq %rbp
17145 ; FALLBACK14-NEXT: vzeroupper
17146 ; FALLBACK14-NEXT: retq
17148 ; FALLBACK15-LABEL: shl_64bytes:
17149 ; FALLBACK15: # %bb.0:
17150 ; FALLBACK15-NEXT: pushq %r15
17151 ; FALLBACK15-NEXT: pushq %r14
17152 ; FALLBACK15-NEXT: pushq %rbx
17153 ; FALLBACK15-NEXT: vmovups (%rdi), %zmm0
17154 ; FALLBACK15-NEXT: movl (%rsi), %eax
17155 ; FALLBACK15-NEXT: vxorps %xmm1, %xmm1, %xmm1
17156 ; FALLBACK15-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
17157 ; FALLBACK15-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
17158 ; FALLBACK15-NEXT: leal (,%rax,8), %ecx
17159 ; FALLBACK15-NEXT: andl $56, %ecx
17160 ; FALLBACK15-NEXT: andl $56, %eax
17161 ; FALLBACK15-NEXT: negl %eax
17162 ; FALLBACK15-NEXT: movslq %eax, %r8
17163 ; FALLBACK15-NEXT: movq -32(%rsp,%r8), %rax
17164 ; FALLBACK15-NEXT: movq -24(%rsp,%r8), %r9
17165 ; FALLBACK15-NEXT: movq %r9, %rsi
17166 ; FALLBACK15-NEXT: shldq %cl, %rax, %rsi
17167 ; FALLBACK15-NEXT: movq -40(%rsp,%r8), %rdi
17168 ; FALLBACK15-NEXT: shldq %cl, %rdi, %rax
17169 ; FALLBACK15-NEXT: movq -48(%rsp,%r8), %r10
17170 ; FALLBACK15-NEXT: shldq %cl, %r10, %rdi
17171 ; FALLBACK15-NEXT: movq -64(%rsp,%r8), %r11
17172 ; FALLBACK15-NEXT: movq -56(%rsp,%r8), %rbx
17173 ; FALLBACK15-NEXT: shldq %cl, %rbx, %r10
17174 ; FALLBACK15-NEXT: movq -16(%rsp,%r8), %r14
17175 ; FALLBACK15-NEXT: movq %r14, %r15
17176 ; FALLBACK15-NEXT: shldq %cl, %r9, %r15
17177 ; FALLBACK15-NEXT: movq -8(%rsp,%r8), %r8
17178 ; FALLBACK15-NEXT: shldq %cl, %r14, %r8
17179 ; FALLBACK15-NEXT: shlxq %rcx, %r11, %r9
17180 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx
17181 ; FALLBACK15-NEXT: shldq %cl, %r11, %rbx
17182 ; FALLBACK15-NEXT: movq %r8, 56(%rdx)
17183 ; FALLBACK15-NEXT: movq %r15, 48(%rdx)
17184 ; FALLBACK15-NEXT: movq %rbx, 8(%rdx)
17185 ; FALLBACK15-NEXT: movq %r10, 16(%rdx)
17186 ; FALLBACK15-NEXT: movq %rdi, 24(%rdx)
17187 ; FALLBACK15-NEXT: movq %rax, 32(%rdx)
17188 ; FALLBACK15-NEXT: movq %rsi, 40(%rdx)
17189 ; FALLBACK15-NEXT: movq %r9, (%rdx)
17190 ; FALLBACK15-NEXT: popq %rbx
17191 ; FALLBACK15-NEXT: popq %r14
17192 ; FALLBACK15-NEXT: popq %r15
17193 ; FALLBACK15-NEXT: vzeroupper
17194 ; FALLBACK15-NEXT: retq
17196 ; FALLBACK16-LABEL: shl_64bytes:
17197 ; FALLBACK16: # %bb.0:
17198 ; FALLBACK16-NEXT: pushl %ebp
17199 ; FALLBACK16-NEXT: pushl %ebx
17200 ; FALLBACK16-NEXT: pushl %edi
17201 ; FALLBACK16-NEXT: pushl %esi
17202 ; FALLBACK16-NEXT: subl $204, %esp
17203 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
17204 ; FALLBACK16-NEXT: movl (%eax), %ecx
17205 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17206 ; FALLBACK16-NEXT: movl 4(%eax), %ecx
17207 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17208 ; FALLBACK16-NEXT: movl 8(%eax), %ecx
17209 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17210 ; FALLBACK16-NEXT: movl 12(%eax), %ecx
17211 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17212 ; FALLBACK16-NEXT: movl 16(%eax), %ecx
17213 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17214 ; FALLBACK16-NEXT: movl 20(%eax), %ecx
17215 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17216 ; FALLBACK16-NEXT: movl 24(%eax), %ecx
17217 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17218 ; FALLBACK16-NEXT: movl 28(%eax), %ecx
17219 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17220 ; FALLBACK16-NEXT: movl 32(%eax), %ecx
17221 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17222 ; FALLBACK16-NEXT: movl 36(%eax), %ecx
17223 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17224 ; FALLBACK16-NEXT: movl 40(%eax), %ebp
17225 ; FALLBACK16-NEXT: movl 44(%eax), %ebx
17226 ; FALLBACK16-NEXT: movl 48(%eax), %edi
17227 ; FALLBACK16-NEXT: movl 52(%eax), %esi
17228 ; FALLBACK16-NEXT: movl 56(%eax), %edx
17229 ; FALLBACK16-NEXT: movl 60(%eax), %ecx
17230 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
17231 ; FALLBACK16-NEXT: movl (%eax), %eax
17232 ; FALLBACK16-NEXT: xorps %xmm0, %xmm0
17233 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17234 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17235 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
17236 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
17237 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
17238 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
17239 ; FALLBACK16-NEXT: movl %ebp, {{[0-9]+}}(%esp)
17240 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17241 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17242 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17243 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17244 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17245 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17246 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17247 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17248 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17249 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17250 ; FALLBACK16-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17251 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17252 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17253 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17254 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17255 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17256 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17257 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17258 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17259 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17260 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17261 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17262 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17263 ; FALLBACK16-NEXT: movl %eax, %edx
17264 ; FALLBACK16-NEXT: andl $60, %edx
17265 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17266 ; FALLBACK16-NEXT: leal {{[0-9]+}}(%esp), %ecx
17267 ; FALLBACK16-NEXT: subl %edx, %ecx
17268 ; FALLBACK16-NEXT: movl (%ecx), %edi
17269 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17270 ; FALLBACK16-NEXT: movl 4(%ecx), %edx
17271 ; FALLBACK16-NEXT: movl %ecx, %ebp
17272 ; FALLBACK16-NEXT: shll $3, %eax
17273 ; FALLBACK16-NEXT: andl $24, %eax
17274 ; FALLBACK16-NEXT: movl %edx, %esi
17275 ; FALLBACK16-NEXT: movl %eax, %ecx
17276 ; FALLBACK16-NEXT: shll %cl, %esi
17277 ; FALLBACK16-NEXT: shrl %edi
17278 ; FALLBACK16-NEXT: movb %al, %ch
17279 ; FALLBACK16-NEXT: notb %ch
17280 ; FALLBACK16-NEXT: movb %ch, %cl
17281 ; FALLBACK16-NEXT: shrl %cl, %edi
17282 ; FALLBACK16-NEXT: orl %esi, %edi
17283 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17284 ; FALLBACK16-NEXT: movl 12(%ebp), %ebx
17285 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17286 ; FALLBACK16-NEXT: movb %al, %cl
17287 ; FALLBACK16-NEXT: shll %cl, %ebx
17288 ; FALLBACK16-NEXT: movl 8(%ebp), %esi
17289 ; FALLBACK16-NEXT: movl %ebp, %edi
17290 ; FALLBACK16-NEXT: movl %esi, %ebp
17291 ; FALLBACK16-NEXT: shrl %ebp
17292 ; FALLBACK16-NEXT: movb %ch, %cl
17293 ; FALLBACK16-NEXT: shrl %cl, %ebp
17294 ; FALLBACK16-NEXT: orl %ebx, %ebp
17295 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17296 ; FALLBACK16-NEXT: movb %al, %cl
17297 ; FALLBACK16-NEXT: shll %cl, %esi
17298 ; FALLBACK16-NEXT: shrl %edx
17299 ; FALLBACK16-NEXT: movb %ch, %cl
17300 ; FALLBACK16-NEXT: shrl %cl, %edx
17301 ; FALLBACK16-NEXT: orl %esi, %edx
17302 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17303 ; FALLBACK16-NEXT: movl %edi, %ebp
17304 ; FALLBACK16-NEXT: movl 20(%edi), %ebx
17305 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17306 ; FALLBACK16-NEXT: movb %al, %cl
17307 ; FALLBACK16-NEXT: shll %cl, %ebx
17308 ; FALLBACK16-NEXT: movl 16(%edi), %esi
17309 ; FALLBACK16-NEXT: movl %esi, %edx
17310 ; FALLBACK16-NEXT: shrl %edx
17311 ; FALLBACK16-NEXT: movb %ch, %cl
17312 ; FALLBACK16-NEXT: shrl %cl, %edx
17313 ; FALLBACK16-NEXT: orl %ebx, %edx
17314 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17315 ; FALLBACK16-NEXT: movb %al, %cl
17316 ; FALLBACK16-NEXT: shll %cl, %esi
17317 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
17318 ; FALLBACK16-NEXT: shrl %edi
17319 ; FALLBACK16-NEXT: movb %ch, %cl
17320 ; FALLBACK16-NEXT: shrl %cl, %edi
17321 ; FALLBACK16-NEXT: orl %esi, %edi
17322 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17323 ; FALLBACK16-NEXT: movl %ebp, %edx
17324 ; FALLBACK16-NEXT: movl 28(%ebp), %ebx
17325 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17326 ; FALLBACK16-NEXT: movb %al, %cl
17327 ; FALLBACK16-NEXT: shll %cl, %ebx
17328 ; FALLBACK16-NEXT: movl 24(%ebp), %esi
17329 ; FALLBACK16-NEXT: movl %esi, %edi
17330 ; FALLBACK16-NEXT: shrl %edi
17331 ; FALLBACK16-NEXT: movb %ch, %cl
17332 ; FALLBACK16-NEXT: shrl %cl, %edi
17333 ; FALLBACK16-NEXT: orl %ebx, %edi
17334 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17335 ; FALLBACK16-NEXT: movb %al, %cl
17336 ; FALLBACK16-NEXT: shll %cl, %esi
17337 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
17338 ; FALLBACK16-NEXT: shrl %ebp
17339 ; FALLBACK16-NEXT: movb %ch, %cl
17340 ; FALLBACK16-NEXT: shrl %cl, %ebp
17341 ; FALLBACK16-NEXT: orl %esi, %ebp
17342 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17343 ; FALLBACK16-NEXT: movl 36(%edx), %ebx
17344 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17345 ; FALLBACK16-NEXT: movb %al, %cl
17346 ; FALLBACK16-NEXT: shll %cl, %ebx
17347 ; FALLBACK16-NEXT: movl 32(%edx), %esi
17348 ; FALLBACK16-NEXT: movl %edx, %ebp
17349 ; FALLBACK16-NEXT: movl %esi, %edi
17350 ; FALLBACK16-NEXT: shrl %edi
17351 ; FALLBACK16-NEXT: movb %ch, %cl
17352 ; FALLBACK16-NEXT: shrl %cl, %edi
17353 ; FALLBACK16-NEXT: orl %ebx, %edi
17354 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17355 ; FALLBACK16-NEXT: movb %al, %cl
17356 ; FALLBACK16-NEXT: shll %cl, %esi
17357 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17358 ; FALLBACK16-NEXT: shrl %edx
17359 ; FALLBACK16-NEXT: movb %ch, %cl
17360 ; FALLBACK16-NEXT: shrl %cl, %edx
17361 ; FALLBACK16-NEXT: orl %esi, %edx
17362 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17363 ; FALLBACK16-NEXT: movl 44(%ebp), %ebx
17364 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17365 ; FALLBACK16-NEXT: movb %al, %cl
17366 ; FALLBACK16-NEXT: shll %cl, %ebx
17367 ; FALLBACK16-NEXT: movl 40(%ebp), %esi
17368 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17369 ; FALLBACK16-NEXT: movl %esi, %edx
17370 ; FALLBACK16-NEXT: shrl %edx
17371 ; FALLBACK16-NEXT: movb %ch, %cl
17372 ; FALLBACK16-NEXT: shrl %cl, %edx
17373 ; FALLBACK16-NEXT: orl %ebx, %edx
17374 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17375 ; FALLBACK16-NEXT: movb %al, %cl
17376 ; FALLBACK16-NEXT: shll %cl, %esi
17377 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17378 ; FALLBACK16-NEXT: shrl %edx
17379 ; FALLBACK16-NEXT: movb %ch, %cl
17380 ; FALLBACK16-NEXT: shrl %cl, %edx
17381 ; FALLBACK16-NEXT: orl %esi, %edx
17382 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17383 ; FALLBACK16-NEXT: movl 52(%ebp), %esi
17384 ; FALLBACK16-NEXT: movl %esi, %edi
17385 ; FALLBACK16-NEXT: movb %al, %cl
17386 ; FALLBACK16-NEXT: shll %cl, %edi
17387 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17388 ; FALLBACK16-NEXT: negl %edx
17389 ; FALLBACK16-NEXT: movl 176(%esp,%edx), %ebx
17390 ; FALLBACK16-NEXT: movl %ebx, %ebp
17391 ; FALLBACK16-NEXT: shrl %ebp
17392 ; FALLBACK16-NEXT: movb %ch, %cl
17393 ; FALLBACK16-NEXT: shrl %cl, %ebp
17394 ; FALLBACK16-NEXT: orl %edi, %ebp
17395 ; FALLBACK16-NEXT: movb %al, %cl
17396 ; FALLBACK16-NEXT: shll %cl, %ebx
17397 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17398 ; FALLBACK16-NEXT: shrl %edx
17399 ; FALLBACK16-NEXT: movb %ch, %cl
17400 ; FALLBACK16-NEXT: shrl %cl, %edx
17401 ; FALLBACK16-NEXT: orl %ebx, %edx
17402 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17403 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
17404 ; FALLBACK16-NEXT: movl 60(%edi), %edx
17405 ; FALLBACK16-NEXT: movb %al, %cl
17406 ; FALLBACK16-NEXT: shll %cl, %edx
17407 ; FALLBACK16-NEXT: movl 56(%edi), %ebx
17408 ; FALLBACK16-NEXT: movl %ebx, %edi
17409 ; FALLBACK16-NEXT: shrl %edi
17410 ; FALLBACK16-NEXT: movb %ch, %cl
17411 ; FALLBACK16-NEXT: shrl %cl, %edi
17412 ; FALLBACK16-NEXT: orl %edx, %edi
17413 ; FALLBACK16-NEXT: movb %al, %cl
17414 ; FALLBACK16-NEXT: shll %cl, %ebx
17415 ; FALLBACK16-NEXT: shrl %esi
17416 ; FALLBACK16-NEXT: movb %ch, %cl
17417 ; FALLBACK16-NEXT: shrl %cl, %esi
17418 ; FALLBACK16-NEXT: orl %ebx, %esi
17419 ; FALLBACK16-NEXT: movl %eax, %ecx
17420 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17421 ; FALLBACK16-NEXT: shll %cl, %edx
17422 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
17423 ; FALLBACK16-NEXT: movl %edx, (%eax)
17424 ; FALLBACK16-NEXT: movl %esi, 56(%eax)
17425 ; FALLBACK16-NEXT: movl %edi, 60(%eax)
17426 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17427 ; FALLBACK16-NEXT: movl %ecx, 48(%eax)
17428 ; FALLBACK16-NEXT: movl %ebp, 52(%eax)
17429 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17430 ; FALLBACK16-NEXT: movl %ecx, 40(%eax)
17431 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17432 ; FALLBACK16-NEXT: movl %ecx, 44(%eax)
17433 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17434 ; FALLBACK16-NEXT: movl %ecx, 32(%eax)
17435 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17436 ; FALLBACK16-NEXT: movl %ecx, 36(%eax)
17437 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17438 ; FALLBACK16-NEXT: movl %ecx, 24(%eax)
17439 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17440 ; FALLBACK16-NEXT: movl %ecx, 28(%eax)
17441 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17442 ; FALLBACK16-NEXT: movl %ecx, 16(%eax)
17443 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17444 ; FALLBACK16-NEXT: movl %ecx, 20(%eax)
17445 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17446 ; FALLBACK16-NEXT: movl %ecx, 8(%eax)
17447 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17448 ; FALLBACK16-NEXT: movl %ecx, 12(%eax)
17449 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17450 ; FALLBACK16-NEXT: movl %ecx, 4(%eax)
17451 ; FALLBACK16-NEXT: addl $204, %esp
17452 ; FALLBACK16-NEXT: popl %esi
17453 ; FALLBACK16-NEXT: popl %edi
17454 ; FALLBACK16-NEXT: popl %ebx
17455 ; FALLBACK16-NEXT: popl %ebp
17456 ; FALLBACK16-NEXT: retl
17458 ; FALLBACK17-LABEL: shl_64bytes:
17459 ; FALLBACK17: # %bb.0:
17460 ; FALLBACK17-NEXT: pushl %ebp
17461 ; FALLBACK17-NEXT: pushl %ebx
17462 ; FALLBACK17-NEXT: pushl %edi
17463 ; FALLBACK17-NEXT: pushl %esi
17464 ; FALLBACK17-NEXT: subl $188, %esp
17465 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
17466 ; FALLBACK17-NEXT: movl (%ecx), %eax
17467 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17468 ; FALLBACK17-NEXT: movl 4(%ecx), %eax
17469 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17470 ; FALLBACK17-NEXT: movl 8(%ecx), %eax
17471 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17472 ; FALLBACK17-NEXT: movl 12(%ecx), %eax
17473 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17474 ; FALLBACK17-NEXT: movl 16(%ecx), %eax
17475 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17476 ; FALLBACK17-NEXT: movl 20(%ecx), %eax
17477 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17478 ; FALLBACK17-NEXT: movl 24(%ecx), %eax
17479 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17480 ; FALLBACK17-NEXT: movl 28(%ecx), %eax
17481 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17482 ; FALLBACK17-NEXT: movl 32(%ecx), %eax
17483 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17484 ; FALLBACK17-NEXT: movl 36(%ecx), %eax
17485 ; FALLBACK17-NEXT: movl %eax, (%esp) # 4-byte Spill
17486 ; FALLBACK17-NEXT: movl 40(%ecx), %ebp
17487 ; FALLBACK17-NEXT: movl 44(%ecx), %ebx
17488 ; FALLBACK17-NEXT: movl 48(%ecx), %edi
17489 ; FALLBACK17-NEXT: movl 52(%ecx), %esi
17490 ; FALLBACK17-NEXT: movl 56(%ecx), %edx
17491 ; FALLBACK17-NEXT: movl 60(%ecx), %eax
17492 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
17493 ; FALLBACK17-NEXT: movl (%ecx), %ecx
17494 ; FALLBACK17-NEXT: xorps %xmm0, %xmm0
17495 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17496 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17497 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
17498 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
17499 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
17500 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
17501 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
17502 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
17503 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17504 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17505 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17506 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17507 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17508 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17509 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17510 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17511 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17512 ; FALLBACK17-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17513 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17514 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17515 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17516 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17517 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17518 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17519 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17520 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17521 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17522 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17523 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17524 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
17525 ; FALLBACK17-NEXT: movl %ecx, %ebp
17526 ; FALLBACK17-NEXT: andl $60, %ebp
17527 ; FALLBACK17-NEXT: leal {{[0-9]+}}(%esp), %eax
17528 ; FALLBACK17-NEXT: subl %ebp, %eax
17529 ; FALLBACK17-NEXT: movl 8(%eax), %esi
17530 ; FALLBACK17-NEXT: movl 12(%eax), %edx
17531 ; FALLBACK17-NEXT: shll $3, %ecx
17532 ; FALLBACK17-NEXT: andl $24, %ecx
17533 ; FALLBACK17-NEXT: movl %edx, %edi
17534 ; FALLBACK17-NEXT: shldl %cl, %esi, %edi
17535 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17536 ; FALLBACK17-NEXT: movl 4(%eax), %edi
17537 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17538 ; FALLBACK17-NEXT: shldl %cl, %edi, %esi
17539 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17540 ; FALLBACK17-NEXT: movl 16(%eax), %edi
17541 ; FALLBACK17-NEXT: movl 20(%eax), %esi
17542 ; FALLBACK17-NEXT: movl %esi, %ebx
17543 ; FALLBACK17-NEXT: shldl %cl, %edi, %ebx
17544 ; FALLBACK17-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17545 ; FALLBACK17-NEXT: shldl %cl, %edx, %edi
17546 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17547 ; FALLBACK17-NEXT: movl 24(%eax), %edi
17548 ; FALLBACK17-NEXT: movl 28(%eax), %edx
17549 ; FALLBACK17-NEXT: movl %edx, %ebx
17550 ; FALLBACK17-NEXT: shldl %cl, %edi, %ebx
17551 ; FALLBACK17-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17552 ; FALLBACK17-NEXT: shldl %cl, %esi, %edi
17553 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17554 ; FALLBACK17-NEXT: movl 32(%eax), %edi
17555 ; FALLBACK17-NEXT: movl 36(%eax), %esi
17556 ; FALLBACK17-NEXT: movl %esi, %ebx
17557 ; FALLBACK17-NEXT: shldl %cl, %edi, %ebx
17558 ; FALLBACK17-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17559 ; FALLBACK17-NEXT: shldl %cl, %edx, %edi
17560 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17561 ; FALLBACK17-NEXT: movl 40(%eax), %edx
17562 ; FALLBACK17-NEXT: movl 44(%eax), %edi
17563 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17564 ; FALLBACK17-NEXT: shldl %cl, %edx, %edi
17565 ; FALLBACK17-NEXT: movl %edi, (%esp) # 4-byte Spill
17566 ; FALLBACK17-NEXT: shldl %cl, %esi, %edx
17567 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17568 ; FALLBACK17-NEXT: movl 56(%eax), %edx
17569 ; FALLBACK17-NEXT: movl 60(%eax), %edi
17570 ; FALLBACK17-NEXT: shldl %cl, %edx, %edi
17571 ; FALLBACK17-NEXT: movl (%eax), %ebx
17572 ; FALLBACK17-NEXT: movl 52(%eax), %esi
17573 ; FALLBACK17-NEXT: shldl %cl, %esi, %edx
17574 ; FALLBACK17-NEXT: negl %ebp
17575 ; FALLBACK17-NEXT: movl 160(%esp,%ebp), %eax
17576 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
17577 ; FALLBACK17-NEXT: movl %edx, 56(%ebp)
17578 ; FALLBACK17-NEXT: movl %edi, 60(%ebp)
17579 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17580 ; FALLBACK17-NEXT: shldl %cl, %ebx, %edx
17581 ; FALLBACK17-NEXT: shll %cl, %ebx
17582 ; FALLBACK17-NEXT: shldl %cl, %eax, %esi
17583 ; FALLBACK17-NEXT: # kill: def $cl killed $cl killed $ecx
17584 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
17585 ; FALLBACK17-NEXT: shldl %cl, %edi, %eax
17586 ; FALLBACK17-NEXT: movl %eax, 48(%ebp)
17587 ; FALLBACK17-NEXT: movl %esi, 52(%ebp)
17588 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17589 ; FALLBACK17-NEXT: movl %eax, 40(%ebp)
17590 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
17591 ; FALLBACK17-NEXT: movl %eax, 44(%ebp)
17592 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17593 ; FALLBACK17-NEXT: movl %eax, 32(%ebp)
17594 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17595 ; FALLBACK17-NEXT: movl %eax, 36(%ebp)
17596 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17597 ; FALLBACK17-NEXT: movl %eax, 24(%ebp)
17598 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17599 ; FALLBACK17-NEXT: movl %eax, 28(%ebp)
17600 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17601 ; FALLBACK17-NEXT: movl %eax, 16(%ebp)
17602 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17603 ; FALLBACK17-NEXT: movl %eax, 20(%ebp)
17604 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17605 ; FALLBACK17-NEXT: movl %eax, 8(%ebp)
17606 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17607 ; FALLBACK17-NEXT: movl %eax, 12(%ebp)
17608 ; FALLBACK17-NEXT: movl %ebx, (%ebp)
17609 ; FALLBACK17-NEXT: movl %edx, 4(%ebp)
17610 ; FALLBACK17-NEXT: addl $188, %esp
17611 ; FALLBACK17-NEXT: popl %esi
17612 ; FALLBACK17-NEXT: popl %edi
17613 ; FALLBACK17-NEXT: popl %ebx
17614 ; FALLBACK17-NEXT: popl %ebp
17615 ; FALLBACK17-NEXT: retl
17617 ; FALLBACK18-LABEL: shl_64bytes:
17618 ; FALLBACK18: # %bb.0:
17619 ; FALLBACK18-NEXT: pushl %ebp
17620 ; FALLBACK18-NEXT: pushl %ebx
17621 ; FALLBACK18-NEXT: pushl %edi
17622 ; FALLBACK18-NEXT: pushl %esi
17623 ; FALLBACK18-NEXT: subl $204, %esp
17624 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
17625 ; FALLBACK18-NEXT: movl (%eax), %ecx
17626 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17627 ; FALLBACK18-NEXT: movl 4(%eax), %ecx
17628 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17629 ; FALLBACK18-NEXT: movl 8(%eax), %ecx
17630 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17631 ; FALLBACK18-NEXT: movl 12(%eax), %ecx
17632 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17633 ; FALLBACK18-NEXT: movl 16(%eax), %ecx
17634 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17635 ; FALLBACK18-NEXT: movl 20(%eax), %ecx
17636 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17637 ; FALLBACK18-NEXT: movl 24(%eax), %ecx
17638 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17639 ; FALLBACK18-NEXT: movl 28(%eax), %ecx
17640 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17641 ; FALLBACK18-NEXT: movl 32(%eax), %ecx
17642 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17643 ; FALLBACK18-NEXT: movl 36(%eax), %ecx
17644 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17645 ; FALLBACK18-NEXT: movl 40(%eax), %ebx
17646 ; FALLBACK18-NEXT: movl 44(%eax), %edi
17647 ; FALLBACK18-NEXT: movl 48(%eax), %esi
17648 ; FALLBACK18-NEXT: movl 52(%eax), %edx
17649 ; FALLBACK18-NEXT: movl 56(%eax), %ecx
17650 ; FALLBACK18-NEXT: movl 60(%eax), %eax
17651 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %ebp
17652 ; FALLBACK18-NEXT: movl (%ebp), %ebp
17653 ; FALLBACK18-NEXT: xorps %xmm0, %xmm0
17654 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17655 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17656 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17657 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
17658 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
17659 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
17660 ; FALLBACK18-NEXT: movl %ebx, {{[0-9]+}}(%esp)
17661 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17662 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17663 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17664 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17665 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17666 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17667 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17668 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17669 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17670 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17671 ; FALLBACK18-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17672 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17673 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17674 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17675 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17676 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17677 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17678 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17679 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17680 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17681 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17682 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17683 ; FALLBACK18-NEXT: movl %eax, {{[0-9]+}}(%esp)
17684 ; FALLBACK18-NEXT: leal (,%ebp,8), %edx
17685 ; FALLBACK18-NEXT: andl $24, %edx
17686 ; FALLBACK18-NEXT: andl $60, %ebp
17687 ; FALLBACK18-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17688 ; FALLBACK18-NEXT: leal {{[0-9]+}}(%esp), %edi
17689 ; FALLBACK18-NEXT: subl %ebp, %edi
17690 ; FALLBACK18-NEXT: movl (%edi), %ecx
17691 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17692 ; FALLBACK18-NEXT: movl 4(%edi), %eax
17693 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17694 ; FALLBACK18-NEXT: movl %edx, %ebx
17695 ; FALLBACK18-NEXT: notb %bl
17696 ; FALLBACK18-NEXT: shrl %ecx
17697 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %esi
17698 ; FALLBACK18-NEXT: shlxl %edx, %eax, %ecx
17699 ; FALLBACK18-NEXT: orl %ecx, %esi
17700 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17701 ; FALLBACK18-NEXT: movl 8(%edi), %esi
17702 ; FALLBACK18-NEXT: movl %esi, %ecx
17703 ; FALLBACK18-NEXT: shrl %ecx
17704 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %eax
17705 ; FALLBACK18-NEXT: movl 12(%edi), %ecx
17706 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ebp
17707 ; FALLBACK18-NEXT: orl %ebp, %eax
17708 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17709 ; FALLBACK18-NEXT: shlxl %edx, %esi, %esi
17710 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17711 ; FALLBACK18-NEXT: shrl %eax
17712 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %eax
17713 ; FALLBACK18-NEXT: orl %esi, %eax
17714 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17715 ; FALLBACK18-NEXT: movl 16(%edi), %eax
17716 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17717 ; FALLBACK18-NEXT: shrl %eax
17718 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %eax
17719 ; FALLBACK18-NEXT: movl 20(%edi), %esi
17720 ; FALLBACK18-NEXT: shlxl %edx, %esi, %ebp
17721 ; FALLBACK18-NEXT: orl %ebp, %eax
17722 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17723 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17724 ; FALLBACK18-NEXT: shrl %ecx
17725 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %ecx
17726 ; FALLBACK18-NEXT: orl %eax, %ecx
17727 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17728 ; FALLBACK18-NEXT: movl 24(%edi), %ecx
17729 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17730 ; FALLBACK18-NEXT: shrl %ecx
17731 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %eax
17732 ; FALLBACK18-NEXT: movl 28(%edi), %ecx
17733 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ebp
17734 ; FALLBACK18-NEXT: orl %ebp, %eax
17735 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17736 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17737 ; FALLBACK18-NEXT: shrl %esi
17738 ; FALLBACK18-NEXT: shrxl %ebx, %esi, %esi
17739 ; FALLBACK18-NEXT: orl %eax, %esi
17740 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17741 ; FALLBACK18-NEXT: movl 32(%edi), %eax
17742 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17743 ; FALLBACK18-NEXT: shrl %eax
17744 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %eax
17745 ; FALLBACK18-NEXT: movl 36(%edi), %esi
17746 ; FALLBACK18-NEXT: shlxl %edx, %esi, %ebp
17747 ; FALLBACK18-NEXT: orl %ebp, %eax
17748 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17749 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17750 ; FALLBACK18-NEXT: shrl %ecx
17751 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %ecx
17752 ; FALLBACK18-NEXT: orl %eax, %ecx
17753 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17754 ; FALLBACK18-NEXT: movl 40(%edi), %ecx
17755 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17756 ; FALLBACK18-NEXT: shrl %ecx
17757 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %eax
17758 ; FALLBACK18-NEXT: movl 44(%edi), %ecx
17759 ; FALLBACK18-NEXT: shlxl %edx, %ecx, %ebp
17760 ; FALLBACK18-NEXT: orl %ebp, %eax
17761 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17762 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17763 ; FALLBACK18-NEXT: shrl %esi
17764 ; FALLBACK18-NEXT: shrxl %ebx, %esi, %esi
17765 ; FALLBACK18-NEXT: orl %eax, %esi
17766 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17767 ; FALLBACK18-NEXT: movl 48(%edi), %esi
17768 ; FALLBACK18-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17769 ; FALLBACK18-NEXT: shrl %esi
17770 ; FALLBACK18-NEXT: shrxl %ebx, %esi, %eax
17771 ; FALLBACK18-NEXT: movl 52(%edi), %esi
17772 ; FALLBACK18-NEXT: shlxl %edx, %esi, %ebp
17773 ; FALLBACK18-NEXT: orl %ebp, %eax
17774 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17775 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17776 ; FALLBACK18-NEXT: shrl %ecx
17777 ; FALLBACK18-NEXT: shrxl %ebx, %ecx, %ebp
17778 ; FALLBACK18-NEXT: orl %eax, %ebp
17779 ; FALLBACK18-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
17780 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17781 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17782 ; FALLBACK18-NEXT: negl %eax
17783 ; FALLBACK18-NEXT: shlxl %edx, 188(%esp,%eax), %ecx
17784 ; FALLBACK18-NEXT: movl 56(%edi), %eax
17785 ; FALLBACK18-NEXT: shlxl %edx, %eax, %edx
17786 ; FALLBACK18-NEXT: shrl %esi
17787 ; FALLBACK18-NEXT: shrxl %ebx, %esi, %esi
17788 ; FALLBACK18-NEXT: orl %edx, %esi
17789 ; FALLBACK18-NEXT: shrl %eax
17790 ; FALLBACK18-NEXT: shrxl %ebx, %eax, %eax
17791 ; FALLBACK18-NEXT: orl %eax, %ecx
17792 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
17793 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17794 ; FALLBACK18-NEXT: movl %edx, (%eax)
17795 ; FALLBACK18-NEXT: movl %esi, 56(%eax)
17796 ; FALLBACK18-NEXT: movl %ecx, 60(%eax)
17797 ; FALLBACK18-NEXT: movl %ebp, 48(%eax)
17798 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17799 ; FALLBACK18-NEXT: movl %ecx, 52(%eax)
17800 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17801 ; FALLBACK18-NEXT: movl %ecx, 40(%eax)
17802 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17803 ; FALLBACK18-NEXT: movl %ecx, 44(%eax)
17804 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17805 ; FALLBACK18-NEXT: movl %ecx, 32(%eax)
17806 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17807 ; FALLBACK18-NEXT: movl %ecx, 36(%eax)
17808 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17809 ; FALLBACK18-NEXT: movl %ecx, 24(%eax)
17810 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17811 ; FALLBACK18-NEXT: movl %ecx, 28(%eax)
17812 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17813 ; FALLBACK18-NEXT: movl %ecx, 16(%eax)
17814 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17815 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
17816 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17817 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
17818 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17819 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
17820 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17821 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
17822 ; FALLBACK18-NEXT: addl $204, %esp
17823 ; FALLBACK18-NEXT: popl %esi
17824 ; FALLBACK18-NEXT: popl %edi
17825 ; FALLBACK18-NEXT: popl %ebx
17826 ; FALLBACK18-NEXT: popl %ebp
17827 ; FALLBACK18-NEXT: retl
17829 ; FALLBACK19-LABEL: shl_64bytes:
17830 ; FALLBACK19: # %bb.0:
17831 ; FALLBACK19-NEXT: pushl %ebp
17832 ; FALLBACK19-NEXT: pushl %ebx
17833 ; FALLBACK19-NEXT: pushl %edi
17834 ; FALLBACK19-NEXT: pushl %esi
17835 ; FALLBACK19-NEXT: subl $204, %esp
17836 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
17837 ; FALLBACK19-NEXT: movl (%ebp), %eax
17838 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17839 ; FALLBACK19-NEXT: movl 4(%ebp), %eax
17840 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17841 ; FALLBACK19-NEXT: movl 8(%ebp), %eax
17842 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17843 ; FALLBACK19-NEXT: movl 12(%ebp), %eax
17844 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17845 ; FALLBACK19-NEXT: movl 16(%ebp), %eax
17846 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17847 ; FALLBACK19-NEXT: movl 20(%ebp), %eax
17848 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17849 ; FALLBACK19-NEXT: movl 24(%ebp), %eax
17850 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17851 ; FALLBACK19-NEXT: movl 28(%ebp), %eax
17852 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17853 ; FALLBACK19-NEXT: movl 32(%ebp), %eax
17854 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17855 ; FALLBACK19-NEXT: movl 36(%ebp), %eax
17856 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17857 ; FALLBACK19-NEXT: movl 40(%ebp), %ebx
17858 ; FALLBACK19-NEXT: movl 44(%ebp), %edi
17859 ; FALLBACK19-NEXT: movl 48(%ebp), %esi
17860 ; FALLBACK19-NEXT: movl 52(%ebp), %edx
17861 ; FALLBACK19-NEXT: movl 56(%ebp), %ecx
17862 ; FALLBACK19-NEXT: movl 60(%ebp), %eax
17863 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
17864 ; FALLBACK19-NEXT: movl (%ebp), %ebp
17865 ; FALLBACK19-NEXT: xorps %xmm0, %xmm0
17866 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17867 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17868 ; FALLBACK19-NEXT: movl %ecx, {{[0-9]+}}(%esp)
17869 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
17870 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
17871 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
17872 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
17873 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17874 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17875 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17876 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17877 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17878 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17879 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17880 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17881 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17882 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17883 ; FALLBACK19-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
17884 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17885 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17886 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17887 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17888 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17889 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17890 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17891 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17892 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17893 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17894 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
17895 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
17896 ; FALLBACK19-NEXT: leal (,%ebp,8), %ecx
17897 ; FALLBACK19-NEXT: andl $24, %ecx
17898 ; FALLBACK19-NEXT: andl $60, %ebp
17899 ; FALLBACK19-NEXT: leal {{[0-9]+}}(%esp), %eax
17900 ; FALLBACK19-NEXT: subl %ebp, %eax
17901 ; FALLBACK19-NEXT: movl 4(%eax), %esi
17902 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17903 ; FALLBACK19-NEXT: movl 8(%eax), %edi
17904 ; FALLBACK19-NEXT: movl 12(%eax), %edx
17905 ; FALLBACK19-NEXT: movl %edx, %ebx
17906 ; FALLBACK19-NEXT: shldl %cl, %edi, %ebx
17907 ; FALLBACK19-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17908 ; FALLBACK19-NEXT: shldl %cl, %esi, %edi
17909 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17910 ; FALLBACK19-NEXT: movl 16(%eax), %edi
17911 ; FALLBACK19-NEXT: movl 20(%eax), %esi
17912 ; FALLBACK19-NEXT: movl %esi, %ebx
17913 ; FALLBACK19-NEXT: shldl %cl, %edi, %ebx
17914 ; FALLBACK19-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17915 ; FALLBACK19-NEXT: shldl %cl, %edx, %edi
17916 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17917 ; FALLBACK19-NEXT: movl 24(%eax), %edi
17918 ; FALLBACK19-NEXT: movl 28(%eax), %edx
17919 ; FALLBACK19-NEXT: movl %edx, %ebx
17920 ; FALLBACK19-NEXT: shldl %cl, %edi, %ebx
17921 ; FALLBACK19-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17922 ; FALLBACK19-NEXT: shldl %cl, %esi, %edi
17923 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17924 ; FALLBACK19-NEXT: movl 32(%eax), %edi
17925 ; FALLBACK19-NEXT: movl 36(%eax), %esi
17926 ; FALLBACK19-NEXT: movl %esi, %ebx
17927 ; FALLBACK19-NEXT: shldl %cl, %edi, %ebx
17928 ; FALLBACK19-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17929 ; FALLBACK19-NEXT: shldl %cl, %edx, %edi
17930 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17931 ; FALLBACK19-NEXT: movl 40(%eax), %ebx
17932 ; FALLBACK19-NEXT: movl 44(%eax), %edx
17933 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17934 ; FALLBACK19-NEXT: shldl %cl, %ebx, %edx
17935 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17936 ; FALLBACK19-NEXT: shldl %cl, %esi, %ebx
17937 ; FALLBACK19-NEXT: movl 56(%eax), %edx
17938 ; FALLBACK19-NEXT: movl 60(%eax), %edi
17939 ; FALLBACK19-NEXT: shldl %cl, %edx, %edi
17940 ; FALLBACK19-NEXT: movl (%eax), %esi
17941 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17942 ; FALLBACK19-NEXT: movl 52(%eax), %esi
17943 ; FALLBACK19-NEXT: shldl %cl, %esi, %edx
17944 ; FALLBACK19-NEXT: negl %ebp
17945 ; FALLBACK19-NEXT: movl 176(%esp,%ebp), %ebp
17946 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %eax
17947 ; FALLBACK19-NEXT: movl %edx, 56(%eax)
17948 ; FALLBACK19-NEXT: movl %edi, 60(%eax)
17949 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17950 ; FALLBACK19-NEXT: shlxl %ecx, %edx, %edi
17951 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
17952 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
17953 ; FALLBACK19-NEXT: shldl %cl, %edx, %edi
17954 ; FALLBACK19-NEXT: shldl %cl, %ebp, %esi
17955 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
17956 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
17957 ; FALLBACK19-NEXT: shldl %cl, %edx, %ebp
17958 ; FALLBACK19-NEXT: movl %ebp, 48(%eax)
17959 ; FALLBACK19-NEXT: movl %esi, 52(%eax)
17960 ; FALLBACK19-NEXT: movl %ebx, 40(%eax)
17961 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17962 ; FALLBACK19-NEXT: movl %ecx, 44(%eax)
17963 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17964 ; FALLBACK19-NEXT: movl %ecx, 32(%eax)
17965 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17966 ; FALLBACK19-NEXT: movl %ecx, 36(%eax)
17967 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17968 ; FALLBACK19-NEXT: movl %ecx, 24(%eax)
17969 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17970 ; FALLBACK19-NEXT: movl %ecx, 28(%eax)
17971 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17972 ; FALLBACK19-NEXT: movl %ecx, 16(%eax)
17973 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17974 ; FALLBACK19-NEXT: movl %ecx, 20(%eax)
17975 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17976 ; FALLBACK19-NEXT: movl %ecx, 8(%eax)
17977 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17978 ; FALLBACK19-NEXT: movl %ecx, 12(%eax)
17979 ; FALLBACK19-NEXT: movl %edi, 4(%eax)
17980 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
17981 ; FALLBACK19-NEXT: movl %ecx, (%eax)
17982 ; FALLBACK19-NEXT: addl $204, %esp
17983 ; FALLBACK19-NEXT: popl %esi
17984 ; FALLBACK19-NEXT: popl %edi
17985 ; FALLBACK19-NEXT: popl %ebx
17986 ; FALLBACK19-NEXT: popl %ebp
17987 ; FALLBACK19-NEXT: retl
17989 ; FALLBACK20-LABEL: shl_64bytes:
17990 ; FALLBACK20: # %bb.0:
17991 ; FALLBACK20-NEXT: pushl %ebp
17992 ; FALLBACK20-NEXT: pushl %ebx
17993 ; FALLBACK20-NEXT: pushl %edi
17994 ; FALLBACK20-NEXT: pushl %esi
17995 ; FALLBACK20-NEXT: subl $204, %esp
17996 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
17997 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
17998 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
17999 ; FALLBACK20-NEXT: movups 16(%ecx), %xmm1
18000 ; FALLBACK20-NEXT: movups 32(%ecx), %xmm2
18001 ; FALLBACK20-NEXT: movups 48(%ecx), %xmm3
18002 ; FALLBACK20-NEXT: movl (%eax), %eax
18003 ; FALLBACK20-NEXT: xorps %xmm4, %xmm4
18004 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18005 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18006 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18007 ; FALLBACK20-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18008 ; FALLBACK20-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
18009 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
18010 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
18011 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
18012 ; FALLBACK20-NEXT: movl %eax, %edx
18013 ; FALLBACK20-NEXT: andl $60, %edx
18014 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18015 ; FALLBACK20-NEXT: leal {{[0-9]+}}(%esp), %ecx
18016 ; FALLBACK20-NEXT: subl %edx, %ecx
18017 ; FALLBACK20-NEXT: movl (%ecx), %edi
18018 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18019 ; FALLBACK20-NEXT: movl 4(%ecx), %edx
18020 ; FALLBACK20-NEXT: movl %ecx, %ebp
18021 ; FALLBACK20-NEXT: shll $3, %eax
18022 ; FALLBACK20-NEXT: andl $24, %eax
18023 ; FALLBACK20-NEXT: movl %edx, %esi
18024 ; FALLBACK20-NEXT: movl %eax, %ecx
18025 ; FALLBACK20-NEXT: shll %cl, %esi
18026 ; FALLBACK20-NEXT: shrl %edi
18027 ; FALLBACK20-NEXT: movb %al, %ch
18028 ; FALLBACK20-NEXT: notb %ch
18029 ; FALLBACK20-NEXT: movb %ch, %cl
18030 ; FALLBACK20-NEXT: shrl %cl, %edi
18031 ; FALLBACK20-NEXT: orl %esi, %edi
18032 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18033 ; FALLBACK20-NEXT: movl 12(%ebp), %ebx
18034 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18035 ; FALLBACK20-NEXT: movb %al, %cl
18036 ; FALLBACK20-NEXT: shll %cl, %ebx
18037 ; FALLBACK20-NEXT: movl 8(%ebp), %esi
18038 ; FALLBACK20-NEXT: movl %ebp, %edi
18039 ; FALLBACK20-NEXT: movl %esi, %ebp
18040 ; FALLBACK20-NEXT: shrl %ebp
18041 ; FALLBACK20-NEXT: movb %ch, %cl
18042 ; FALLBACK20-NEXT: shrl %cl, %ebp
18043 ; FALLBACK20-NEXT: orl %ebx, %ebp
18044 ; FALLBACK20-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18045 ; FALLBACK20-NEXT: movb %al, %cl
18046 ; FALLBACK20-NEXT: shll %cl, %esi
18047 ; FALLBACK20-NEXT: shrl %edx
18048 ; FALLBACK20-NEXT: movb %ch, %cl
18049 ; FALLBACK20-NEXT: shrl %cl, %edx
18050 ; FALLBACK20-NEXT: orl %esi, %edx
18051 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18052 ; FALLBACK20-NEXT: movl %edi, %ebp
18053 ; FALLBACK20-NEXT: movl 20(%edi), %ebx
18054 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18055 ; FALLBACK20-NEXT: movb %al, %cl
18056 ; FALLBACK20-NEXT: shll %cl, %ebx
18057 ; FALLBACK20-NEXT: movl 16(%edi), %esi
18058 ; FALLBACK20-NEXT: movl %esi, %edx
18059 ; FALLBACK20-NEXT: shrl %edx
18060 ; FALLBACK20-NEXT: movb %ch, %cl
18061 ; FALLBACK20-NEXT: shrl %cl, %edx
18062 ; FALLBACK20-NEXT: orl %ebx, %edx
18063 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18064 ; FALLBACK20-NEXT: movb %al, %cl
18065 ; FALLBACK20-NEXT: shll %cl, %esi
18066 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18067 ; FALLBACK20-NEXT: shrl %edi
18068 ; FALLBACK20-NEXT: movb %ch, %cl
18069 ; FALLBACK20-NEXT: shrl %cl, %edi
18070 ; FALLBACK20-NEXT: orl %esi, %edi
18071 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18072 ; FALLBACK20-NEXT: movl %ebp, %edx
18073 ; FALLBACK20-NEXT: movl 28(%ebp), %ebx
18074 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18075 ; FALLBACK20-NEXT: movb %al, %cl
18076 ; FALLBACK20-NEXT: shll %cl, %ebx
18077 ; FALLBACK20-NEXT: movl 24(%ebp), %esi
18078 ; FALLBACK20-NEXT: movl %esi, %edi
18079 ; FALLBACK20-NEXT: shrl %edi
18080 ; FALLBACK20-NEXT: movb %ch, %cl
18081 ; FALLBACK20-NEXT: shrl %cl, %edi
18082 ; FALLBACK20-NEXT: orl %ebx, %edi
18083 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18084 ; FALLBACK20-NEXT: movb %al, %cl
18085 ; FALLBACK20-NEXT: shll %cl, %esi
18086 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
18087 ; FALLBACK20-NEXT: shrl %ebp
18088 ; FALLBACK20-NEXT: movb %ch, %cl
18089 ; FALLBACK20-NEXT: shrl %cl, %ebp
18090 ; FALLBACK20-NEXT: orl %esi, %ebp
18091 ; FALLBACK20-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18092 ; FALLBACK20-NEXT: movl 36(%edx), %ebx
18093 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18094 ; FALLBACK20-NEXT: movb %al, %cl
18095 ; FALLBACK20-NEXT: shll %cl, %ebx
18096 ; FALLBACK20-NEXT: movl 32(%edx), %esi
18097 ; FALLBACK20-NEXT: movl %edx, %ebp
18098 ; FALLBACK20-NEXT: movl %esi, %edi
18099 ; FALLBACK20-NEXT: shrl %edi
18100 ; FALLBACK20-NEXT: movb %ch, %cl
18101 ; FALLBACK20-NEXT: shrl %cl, %edi
18102 ; FALLBACK20-NEXT: orl %ebx, %edi
18103 ; FALLBACK20-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18104 ; FALLBACK20-NEXT: movb %al, %cl
18105 ; FALLBACK20-NEXT: shll %cl, %esi
18106 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18107 ; FALLBACK20-NEXT: shrl %edx
18108 ; FALLBACK20-NEXT: movb %ch, %cl
18109 ; FALLBACK20-NEXT: shrl %cl, %edx
18110 ; FALLBACK20-NEXT: orl %esi, %edx
18111 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18112 ; FALLBACK20-NEXT: movl 44(%ebp), %ebx
18113 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18114 ; FALLBACK20-NEXT: movb %al, %cl
18115 ; FALLBACK20-NEXT: shll %cl, %ebx
18116 ; FALLBACK20-NEXT: movl 40(%ebp), %esi
18117 ; FALLBACK20-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18118 ; FALLBACK20-NEXT: movl %esi, %edx
18119 ; FALLBACK20-NEXT: shrl %edx
18120 ; FALLBACK20-NEXT: movb %ch, %cl
18121 ; FALLBACK20-NEXT: shrl %cl, %edx
18122 ; FALLBACK20-NEXT: orl %ebx, %edx
18123 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18124 ; FALLBACK20-NEXT: movb %al, %cl
18125 ; FALLBACK20-NEXT: shll %cl, %esi
18126 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18127 ; FALLBACK20-NEXT: shrl %edx
18128 ; FALLBACK20-NEXT: movb %ch, %cl
18129 ; FALLBACK20-NEXT: shrl %cl, %edx
18130 ; FALLBACK20-NEXT: orl %esi, %edx
18131 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18132 ; FALLBACK20-NEXT: movl 52(%ebp), %esi
18133 ; FALLBACK20-NEXT: movl %esi, %edi
18134 ; FALLBACK20-NEXT: movb %al, %cl
18135 ; FALLBACK20-NEXT: shll %cl, %edi
18136 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18137 ; FALLBACK20-NEXT: negl %edx
18138 ; FALLBACK20-NEXT: movl 176(%esp,%edx), %ebx
18139 ; FALLBACK20-NEXT: movl %ebx, %ebp
18140 ; FALLBACK20-NEXT: shrl %ebp
18141 ; FALLBACK20-NEXT: movb %ch, %cl
18142 ; FALLBACK20-NEXT: shrl %cl, %ebp
18143 ; FALLBACK20-NEXT: orl %edi, %ebp
18144 ; FALLBACK20-NEXT: movb %al, %cl
18145 ; FALLBACK20-NEXT: shll %cl, %ebx
18146 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18147 ; FALLBACK20-NEXT: shrl %edx
18148 ; FALLBACK20-NEXT: movb %ch, %cl
18149 ; FALLBACK20-NEXT: shrl %cl, %edx
18150 ; FALLBACK20-NEXT: orl %ebx, %edx
18151 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18152 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18153 ; FALLBACK20-NEXT: movl 60(%edi), %edx
18154 ; FALLBACK20-NEXT: movb %al, %cl
18155 ; FALLBACK20-NEXT: shll %cl, %edx
18156 ; FALLBACK20-NEXT: movl 56(%edi), %ebx
18157 ; FALLBACK20-NEXT: movl %ebx, %edi
18158 ; FALLBACK20-NEXT: shrl %edi
18159 ; FALLBACK20-NEXT: movb %ch, %cl
18160 ; FALLBACK20-NEXT: shrl %cl, %edi
18161 ; FALLBACK20-NEXT: orl %edx, %edi
18162 ; FALLBACK20-NEXT: movb %al, %cl
18163 ; FALLBACK20-NEXT: shll %cl, %ebx
18164 ; FALLBACK20-NEXT: shrl %esi
18165 ; FALLBACK20-NEXT: movb %ch, %cl
18166 ; FALLBACK20-NEXT: shrl %cl, %esi
18167 ; FALLBACK20-NEXT: orl %ebx, %esi
18168 ; FALLBACK20-NEXT: movl %eax, %ecx
18169 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18170 ; FALLBACK20-NEXT: shll %cl, %edx
18171 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
18172 ; FALLBACK20-NEXT: movl %edx, (%eax)
18173 ; FALLBACK20-NEXT: movl %esi, 56(%eax)
18174 ; FALLBACK20-NEXT: movl %edi, 60(%eax)
18175 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18176 ; FALLBACK20-NEXT: movl %ecx, 48(%eax)
18177 ; FALLBACK20-NEXT: movl %ebp, 52(%eax)
18178 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18179 ; FALLBACK20-NEXT: movl %ecx, 40(%eax)
18180 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18181 ; FALLBACK20-NEXT: movl %ecx, 44(%eax)
18182 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18183 ; FALLBACK20-NEXT: movl %ecx, 32(%eax)
18184 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18185 ; FALLBACK20-NEXT: movl %ecx, 36(%eax)
18186 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18187 ; FALLBACK20-NEXT: movl %ecx, 24(%eax)
18188 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18189 ; FALLBACK20-NEXT: movl %ecx, 28(%eax)
18190 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18191 ; FALLBACK20-NEXT: movl %ecx, 16(%eax)
18192 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18193 ; FALLBACK20-NEXT: movl %ecx, 20(%eax)
18194 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18195 ; FALLBACK20-NEXT: movl %ecx, 8(%eax)
18196 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18197 ; FALLBACK20-NEXT: movl %ecx, 12(%eax)
18198 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18199 ; FALLBACK20-NEXT: movl %ecx, 4(%eax)
18200 ; FALLBACK20-NEXT: addl $204, %esp
18201 ; FALLBACK20-NEXT: popl %esi
18202 ; FALLBACK20-NEXT: popl %edi
18203 ; FALLBACK20-NEXT: popl %ebx
18204 ; FALLBACK20-NEXT: popl %ebp
18205 ; FALLBACK20-NEXT: retl
18207 ; FALLBACK21-LABEL: shl_64bytes:
18208 ; FALLBACK21: # %bb.0:
18209 ; FALLBACK21-NEXT: pushl %ebp
18210 ; FALLBACK21-NEXT: pushl %ebx
18211 ; FALLBACK21-NEXT: pushl %edi
18212 ; FALLBACK21-NEXT: pushl %esi
18213 ; FALLBACK21-NEXT: subl $188, %esp
18214 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
18215 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
18216 ; FALLBACK21-NEXT: movups (%ecx), %xmm0
18217 ; FALLBACK21-NEXT: movups 16(%ecx), %xmm1
18218 ; FALLBACK21-NEXT: movups 32(%ecx), %xmm2
18219 ; FALLBACK21-NEXT: movups 48(%ecx), %xmm3
18220 ; FALLBACK21-NEXT: movl (%eax), %ecx
18221 ; FALLBACK21-NEXT: xorps %xmm4, %xmm4
18222 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18223 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18224 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18225 ; FALLBACK21-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18226 ; FALLBACK21-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
18227 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
18228 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
18229 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
18230 ; FALLBACK21-NEXT: movl %ecx, %ebp
18231 ; FALLBACK21-NEXT: andl $60, %ebp
18232 ; FALLBACK21-NEXT: leal {{[0-9]+}}(%esp), %eax
18233 ; FALLBACK21-NEXT: subl %ebp, %eax
18234 ; FALLBACK21-NEXT: movl 8(%eax), %esi
18235 ; FALLBACK21-NEXT: movl 12(%eax), %edx
18236 ; FALLBACK21-NEXT: shll $3, %ecx
18237 ; FALLBACK21-NEXT: andl $24, %ecx
18238 ; FALLBACK21-NEXT: movl %edx, %edi
18239 ; FALLBACK21-NEXT: shldl %cl, %esi, %edi
18240 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18241 ; FALLBACK21-NEXT: movl 4(%eax), %edi
18242 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18243 ; FALLBACK21-NEXT: shldl %cl, %edi, %esi
18244 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18245 ; FALLBACK21-NEXT: movl 16(%eax), %edi
18246 ; FALLBACK21-NEXT: movl 20(%eax), %esi
18247 ; FALLBACK21-NEXT: movl %esi, %ebx
18248 ; FALLBACK21-NEXT: shldl %cl, %edi, %ebx
18249 ; FALLBACK21-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18250 ; FALLBACK21-NEXT: shldl %cl, %edx, %edi
18251 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18252 ; FALLBACK21-NEXT: movl 24(%eax), %edi
18253 ; FALLBACK21-NEXT: movl 28(%eax), %edx
18254 ; FALLBACK21-NEXT: movl %edx, %ebx
18255 ; FALLBACK21-NEXT: shldl %cl, %edi, %ebx
18256 ; FALLBACK21-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18257 ; FALLBACK21-NEXT: shldl %cl, %esi, %edi
18258 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18259 ; FALLBACK21-NEXT: movl 32(%eax), %edi
18260 ; FALLBACK21-NEXT: movl 36(%eax), %esi
18261 ; FALLBACK21-NEXT: movl %esi, %ebx
18262 ; FALLBACK21-NEXT: shldl %cl, %edi, %ebx
18263 ; FALLBACK21-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18264 ; FALLBACK21-NEXT: shldl %cl, %edx, %edi
18265 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18266 ; FALLBACK21-NEXT: movl 40(%eax), %edx
18267 ; FALLBACK21-NEXT: movl 44(%eax), %edi
18268 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18269 ; FALLBACK21-NEXT: shldl %cl, %edx, %edi
18270 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18271 ; FALLBACK21-NEXT: shldl %cl, %esi, %edx
18272 ; FALLBACK21-NEXT: movl %edx, (%esp) # 4-byte Spill
18273 ; FALLBACK21-NEXT: movl 56(%eax), %edx
18274 ; FALLBACK21-NEXT: movl 60(%eax), %edi
18275 ; FALLBACK21-NEXT: shldl %cl, %edx, %edi
18276 ; FALLBACK21-NEXT: movl (%eax), %ebx
18277 ; FALLBACK21-NEXT: movl 52(%eax), %esi
18278 ; FALLBACK21-NEXT: shldl %cl, %esi, %edx
18279 ; FALLBACK21-NEXT: negl %ebp
18280 ; FALLBACK21-NEXT: movl 160(%esp,%ebp), %eax
18281 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
18282 ; FALLBACK21-NEXT: movl %edx, 56(%ebp)
18283 ; FALLBACK21-NEXT: movl %edi, 60(%ebp)
18284 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18285 ; FALLBACK21-NEXT: shldl %cl, %ebx, %edx
18286 ; FALLBACK21-NEXT: shll %cl, %ebx
18287 ; FALLBACK21-NEXT: shldl %cl, %eax, %esi
18288 ; FALLBACK21-NEXT: # kill: def $cl killed $cl killed $ecx
18289 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18290 ; FALLBACK21-NEXT: shldl %cl, %edi, %eax
18291 ; FALLBACK21-NEXT: movl %eax, 48(%ebp)
18292 ; FALLBACK21-NEXT: movl %esi, 52(%ebp)
18293 ; FALLBACK21-NEXT: movl (%esp), %eax # 4-byte Reload
18294 ; FALLBACK21-NEXT: movl %eax, 40(%ebp)
18295 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18296 ; FALLBACK21-NEXT: movl %eax, 44(%ebp)
18297 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18298 ; FALLBACK21-NEXT: movl %eax, 32(%ebp)
18299 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18300 ; FALLBACK21-NEXT: movl %eax, 36(%ebp)
18301 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18302 ; FALLBACK21-NEXT: movl %eax, 24(%ebp)
18303 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18304 ; FALLBACK21-NEXT: movl %eax, 28(%ebp)
18305 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18306 ; FALLBACK21-NEXT: movl %eax, 16(%ebp)
18307 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18308 ; FALLBACK21-NEXT: movl %eax, 20(%ebp)
18309 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18310 ; FALLBACK21-NEXT: movl %eax, 8(%ebp)
18311 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18312 ; FALLBACK21-NEXT: movl %eax, 12(%ebp)
18313 ; FALLBACK21-NEXT: movl %ebx, (%ebp)
18314 ; FALLBACK21-NEXT: movl %edx, 4(%ebp)
18315 ; FALLBACK21-NEXT: addl $188, %esp
18316 ; FALLBACK21-NEXT: popl %esi
18317 ; FALLBACK21-NEXT: popl %edi
18318 ; FALLBACK21-NEXT: popl %ebx
18319 ; FALLBACK21-NEXT: popl %ebp
18320 ; FALLBACK21-NEXT: retl
18322 ; FALLBACK22-LABEL: shl_64bytes:
18323 ; FALLBACK22: # %bb.0:
18324 ; FALLBACK22-NEXT: pushl %ebp
18325 ; FALLBACK22-NEXT: pushl %ebx
18326 ; FALLBACK22-NEXT: pushl %edi
18327 ; FALLBACK22-NEXT: pushl %esi
18328 ; FALLBACK22-NEXT: subl $204, %esp
18329 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
18330 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
18331 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
18332 ; FALLBACK22-NEXT: movups 16(%ecx), %xmm1
18333 ; FALLBACK22-NEXT: movups 32(%ecx), %xmm2
18334 ; FALLBACK22-NEXT: movups 48(%ecx), %xmm3
18335 ; FALLBACK22-NEXT: movl (%eax), %eax
18336 ; FALLBACK22-NEXT: xorps %xmm4, %xmm4
18337 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18338 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18339 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18340 ; FALLBACK22-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18341 ; FALLBACK22-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
18342 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
18343 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
18344 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
18345 ; FALLBACK22-NEXT: leal (,%eax,8), %edx
18346 ; FALLBACK22-NEXT: andl $24, %edx
18347 ; FALLBACK22-NEXT: andl $60, %eax
18348 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18349 ; FALLBACK22-NEXT: leal {{[0-9]+}}(%esp), %edi
18350 ; FALLBACK22-NEXT: subl %eax, %edi
18351 ; FALLBACK22-NEXT: movl (%edi), %ecx
18352 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18353 ; FALLBACK22-NEXT: movl 4(%edi), %eax
18354 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18355 ; FALLBACK22-NEXT: movl %edx, %ebx
18356 ; FALLBACK22-NEXT: notb %bl
18357 ; FALLBACK22-NEXT: shrl %ecx
18358 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %esi
18359 ; FALLBACK22-NEXT: shlxl %edx, %eax, %ecx
18360 ; FALLBACK22-NEXT: orl %ecx, %esi
18361 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18362 ; FALLBACK22-NEXT: movl 8(%edi), %esi
18363 ; FALLBACK22-NEXT: movl %esi, %ecx
18364 ; FALLBACK22-NEXT: shrl %ecx
18365 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %eax
18366 ; FALLBACK22-NEXT: movl 12(%edi), %ecx
18367 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %ebp
18368 ; FALLBACK22-NEXT: orl %ebp, %eax
18369 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18370 ; FALLBACK22-NEXT: shlxl %edx, %esi, %esi
18371 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18372 ; FALLBACK22-NEXT: shrl %eax
18373 ; FALLBACK22-NEXT: shrxl %ebx, %eax, %eax
18374 ; FALLBACK22-NEXT: orl %esi, %eax
18375 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18376 ; FALLBACK22-NEXT: movl 16(%edi), %eax
18377 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18378 ; FALLBACK22-NEXT: shrl %eax
18379 ; FALLBACK22-NEXT: shrxl %ebx, %eax, %eax
18380 ; FALLBACK22-NEXT: movl 20(%edi), %esi
18381 ; FALLBACK22-NEXT: shlxl %edx, %esi, %ebp
18382 ; FALLBACK22-NEXT: orl %ebp, %eax
18383 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18384 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18385 ; FALLBACK22-NEXT: shrl %ecx
18386 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %ecx
18387 ; FALLBACK22-NEXT: orl %eax, %ecx
18388 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18389 ; FALLBACK22-NEXT: movl 24(%edi), %ecx
18390 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18391 ; FALLBACK22-NEXT: shrl %ecx
18392 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %eax
18393 ; FALLBACK22-NEXT: movl 28(%edi), %ecx
18394 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %ebp
18395 ; FALLBACK22-NEXT: orl %ebp, %eax
18396 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18397 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18398 ; FALLBACK22-NEXT: shrl %esi
18399 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %esi
18400 ; FALLBACK22-NEXT: orl %eax, %esi
18401 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18402 ; FALLBACK22-NEXT: movl 32(%edi), %eax
18403 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18404 ; FALLBACK22-NEXT: shrl %eax
18405 ; FALLBACK22-NEXT: shrxl %ebx, %eax, %eax
18406 ; FALLBACK22-NEXT: movl 36(%edi), %esi
18407 ; FALLBACK22-NEXT: shlxl %edx, %esi, %ebp
18408 ; FALLBACK22-NEXT: orl %ebp, %eax
18409 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18410 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18411 ; FALLBACK22-NEXT: shrl %ecx
18412 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %ecx
18413 ; FALLBACK22-NEXT: orl %eax, %ecx
18414 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18415 ; FALLBACK22-NEXT: movl 40(%edi), %ecx
18416 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18417 ; FALLBACK22-NEXT: shrl %ecx
18418 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %eax
18419 ; FALLBACK22-NEXT: movl 44(%edi), %ecx
18420 ; FALLBACK22-NEXT: shlxl %edx, %ecx, %ebp
18421 ; FALLBACK22-NEXT: orl %ebp, %eax
18422 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18423 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18424 ; FALLBACK22-NEXT: shrl %esi
18425 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %esi
18426 ; FALLBACK22-NEXT: orl %eax, %esi
18427 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18428 ; FALLBACK22-NEXT: movl 48(%edi), %esi
18429 ; FALLBACK22-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18430 ; FALLBACK22-NEXT: shrl %esi
18431 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %eax
18432 ; FALLBACK22-NEXT: movl 52(%edi), %esi
18433 ; FALLBACK22-NEXT: shlxl %edx, %esi, %ebp
18434 ; FALLBACK22-NEXT: orl %ebp, %eax
18435 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18436 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18437 ; FALLBACK22-NEXT: shrl %ecx
18438 ; FALLBACK22-NEXT: shrxl %ebx, %ecx, %ebp
18439 ; FALLBACK22-NEXT: orl %eax, %ebp
18440 ; FALLBACK22-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18441 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18442 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18443 ; FALLBACK22-NEXT: negl %eax
18444 ; FALLBACK22-NEXT: shlxl %edx, 188(%esp,%eax), %ecx
18445 ; FALLBACK22-NEXT: movl 56(%edi), %eax
18446 ; FALLBACK22-NEXT: shlxl %edx, %eax, %edx
18447 ; FALLBACK22-NEXT: shrl %esi
18448 ; FALLBACK22-NEXT: shrxl %ebx, %esi, %esi
18449 ; FALLBACK22-NEXT: orl %edx, %esi
18450 ; FALLBACK22-NEXT: shrl %eax
18451 ; FALLBACK22-NEXT: shrxl %ebx, %eax, %eax
18452 ; FALLBACK22-NEXT: orl %eax, %ecx
18453 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
18454 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18455 ; FALLBACK22-NEXT: movl %edx, (%eax)
18456 ; FALLBACK22-NEXT: movl %esi, 56(%eax)
18457 ; FALLBACK22-NEXT: movl %ecx, 60(%eax)
18458 ; FALLBACK22-NEXT: movl %ebp, 48(%eax)
18459 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18460 ; FALLBACK22-NEXT: movl %ecx, 52(%eax)
18461 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18462 ; FALLBACK22-NEXT: movl %ecx, 40(%eax)
18463 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18464 ; FALLBACK22-NEXT: movl %ecx, 44(%eax)
18465 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18466 ; FALLBACK22-NEXT: movl %ecx, 32(%eax)
18467 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18468 ; FALLBACK22-NEXT: movl %ecx, 36(%eax)
18469 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18470 ; FALLBACK22-NEXT: movl %ecx, 24(%eax)
18471 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18472 ; FALLBACK22-NEXT: movl %ecx, 28(%eax)
18473 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18474 ; FALLBACK22-NEXT: movl %ecx, 16(%eax)
18475 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18476 ; FALLBACK22-NEXT: movl %ecx, 20(%eax)
18477 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18478 ; FALLBACK22-NEXT: movl %ecx, 8(%eax)
18479 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18480 ; FALLBACK22-NEXT: movl %ecx, 12(%eax)
18481 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18482 ; FALLBACK22-NEXT: movl %ecx, 4(%eax)
18483 ; FALLBACK22-NEXT: addl $204, %esp
18484 ; FALLBACK22-NEXT: popl %esi
18485 ; FALLBACK22-NEXT: popl %edi
18486 ; FALLBACK22-NEXT: popl %ebx
18487 ; FALLBACK22-NEXT: popl %ebp
18488 ; FALLBACK22-NEXT: retl
18490 ; FALLBACK23-LABEL: shl_64bytes:
18491 ; FALLBACK23: # %bb.0:
18492 ; FALLBACK23-NEXT: pushl %ebp
18493 ; FALLBACK23-NEXT: pushl %ebx
18494 ; FALLBACK23-NEXT: pushl %edi
18495 ; FALLBACK23-NEXT: pushl %esi
18496 ; FALLBACK23-NEXT: subl $204, %esp
18497 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
18498 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
18499 ; FALLBACK23-NEXT: movups (%ecx), %xmm0
18500 ; FALLBACK23-NEXT: movups 16(%ecx), %xmm1
18501 ; FALLBACK23-NEXT: movups 32(%ecx), %xmm2
18502 ; FALLBACK23-NEXT: movups 48(%ecx), %xmm3
18503 ; FALLBACK23-NEXT: movl (%eax), %ebp
18504 ; FALLBACK23-NEXT: xorps %xmm4, %xmm4
18505 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18506 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18507 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18508 ; FALLBACK23-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
18509 ; FALLBACK23-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
18510 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
18511 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
18512 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
18513 ; FALLBACK23-NEXT: leal (,%ebp,8), %ecx
18514 ; FALLBACK23-NEXT: andl $24, %ecx
18515 ; FALLBACK23-NEXT: andl $60, %ebp
18516 ; FALLBACK23-NEXT: leal {{[0-9]+}}(%esp), %eax
18517 ; FALLBACK23-NEXT: subl %ebp, %eax
18518 ; FALLBACK23-NEXT: movl 4(%eax), %esi
18519 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18520 ; FALLBACK23-NEXT: movl 8(%eax), %edi
18521 ; FALLBACK23-NEXT: movl 12(%eax), %edx
18522 ; FALLBACK23-NEXT: movl %edx, %ebx
18523 ; FALLBACK23-NEXT: shldl %cl, %edi, %ebx
18524 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18525 ; FALLBACK23-NEXT: shldl %cl, %esi, %edi
18526 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18527 ; FALLBACK23-NEXT: movl 16(%eax), %edi
18528 ; FALLBACK23-NEXT: movl 20(%eax), %esi
18529 ; FALLBACK23-NEXT: movl %esi, %ebx
18530 ; FALLBACK23-NEXT: shldl %cl, %edi, %ebx
18531 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18532 ; FALLBACK23-NEXT: shldl %cl, %edx, %edi
18533 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18534 ; FALLBACK23-NEXT: movl 24(%eax), %edi
18535 ; FALLBACK23-NEXT: movl 28(%eax), %edx
18536 ; FALLBACK23-NEXT: movl %edx, %ebx
18537 ; FALLBACK23-NEXT: shldl %cl, %edi, %ebx
18538 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18539 ; FALLBACK23-NEXT: shldl %cl, %esi, %edi
18540 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18541 ; FALLBACK23-NEXT: movl 32(%eax), %edi
18542 ; FALLBACK23-NEXT: movl 36(%eax), %esi
18543 ; FALLBACK23-NEXT: movl %esi, %ebx
18544 ; FALLBACK23-NEXT: shldl %cl, %edi, %ebx
18545 ; FALLBACK23-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18546 ; FALLBACK23-NEXT: shldl %cl, %edx, %edi
18547 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18548 ; FALLBACK23-NEXT: movl 40(%eax), %ebx
18549 ; FALLBACK23-NEXT: movl 44(%eax), %edx
18550 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18551 ; FALLBACK23-NEXT: shldl %cl, %ebx, %edx
18552 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18553 ; FALLBACK23-NEXT: shldl %cl, %esi, %ebx
18554 ; FALLBACK23-NEXT: movl 56(%eax), %edx
18555 ; FALLBACK23-NEXT: movl 60(%eax), %edi
18556 ; FALLBACK23-NEXT: shldl %cl, %edx, %edi
18557 ; FALLBACK23-NEXT: movl (%eax), %esi
18558 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18559 ; FALLBACK23-NEXT: movl 52(%eax), %esi
18560 ; FALLBACK23-NEXT: shldl %cl, %esi, %edx
18561 ; FALLBACK23-NEXT: negl %ebp
18562 ; FALLBACK23-NEXT: movl 176(%esp,%ebp), %ebp
18563 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
18564 ; FALLBACK23-NEXT: movl %edx, 56(%eax)
18565 ; FALLBACK23-NEXT: movl %edi, 60(%eax)
18566 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18567 ; FALLBACK23-NEXT: shlxl %ecx, %edx, %edi
18568 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18569 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18570 ; FALLBACK23-NEXT: shldl %cl, %edx, %edi
18571 ; FALLBACK23-NEXT: shldl %cl, %ebp, %esi
18572 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
18573 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18574 ; FALLBACK23-NEXT: shldl %cl, %edx, %ebp
18575 ; FALLBACK23-NEXT: movl %ebp, 48(%eax)
18576 ; FALLBACK23-NEXT: movl %esi, 52(%eax)
18577 ; FALLBACK23-NEXT: movl %ebx, 40(%eax)
18578 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18579 ; FALLBACK23-NEXT: movl %ecx, 44(%eax)
18580 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18581 ; FALLBACK23-NEXT: movl %ecx, 32(%eax)
18582 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18583 ; FALLBACK23-NEXT: movl %ecx, 36(%eax)
18584 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18585 ; FALLBACK23-NEXT: movl %ecx, 24(%eax)
18586 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18587 ; FALLBACK23-NEXT: movl %ecx, 28(%eax)
18588 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18589 ; FALLBACK23-NEXT: movl %ecx, 16(%eax)
18590 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18591 ; FALLBACK23-NEXT: movl %ecx, 20(%eax)
18592 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18593 ; FALLBACK23-NEXT: movl %ecx, 8(%eax)
18594 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18595 ; FALLBACK23-NEXT: movl %ecx, 12(%eax)
18596 ; FALLBACK23-NEXT: movl %edi, 4(%eax)
18597 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18598 ; FALLBACK23-NEXT: movl %ecx, (%eax)
18599 ; FALLBACK23-NEXT: addl $204, %esp
18600 ; FALLBACK23-NEXT: popl %esi
18601 ; FALLBACK23-NEXT: popl %edi
18602 ; FALLBACK23-NEXT: popl %ebx
18603 ; FALLBACK23-NEXT: popl %ebp
18604 ; FALLBACK23-NEXT: retl
18606 ; FALLBACK24-LABEL: shl_64bytes:
18607 ; FALLBACK24: # %bb.0:
18608 ; FALLBACK24-NEXT: pushl %ebp
18609 ; FALLBACK24-NEXT: pushl %ebx
18610 ; FALLBACK24-NEXT: pushl %edi
18611 ; FALLBACK24-NEXT: pushl %esi
18612 ; FALLBACK24-NEXT: subl $204, %esp
18613 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
18614 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
18615 ; FALLBACK24-NEXT: vmovups (%ecx), %ymm0
18616 ; FALLBACK24-NEXT: vmovups 32(%ecx), %ymm1
18617 ; FALLBACK24-NEXT: movl (%eax), %eax
18618 ; FALLBACK24-NEXT: vxorps %xmm2, %xmm2, %xmm2
18619 ; FALLBACK24-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18620 ; FALLBACK24-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18621 ; FALLBACK24-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
18622 ; FALLBACK24-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
18623 ; FALLBACK24-NEXT: movl %eax, %edx
18624 ; FALLBACK24-NEXT: andl $60, %edx
18625 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18626 ; FALLBACK24-NEXT: leal {{[0-9]+}}(%esp), %ecx
18627 ; FALLBACK24-NEXT: subl %edx, %ecx
18628 ; FALLBACK24-NEXT: movl (%ecx), %edi
18629 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18630 ; FALLBACK24-NEXT: movl 4(%ecx), %edx
18631 ; FALLBACK24-NEXT: movl %ecx, %ebp
18632 ; FALLBACK24-NEXT: shll $3, %eax
18633 ; FALLBACK24-NEXT: andl $24, %eax
18634 ; FALLBACK24-NEXT: movl %edx, %esi
18635 ; FALLBACK24-NEXT: movl %eax, %ecx
18636 ; FALLBACK24-NEXT: shll %cl, %esi
18637 ; FALLBACK24-NEXT: shrl %edi
18638 ; FALLBACK24-NEXT: movb %al, %ch
18639 ; FALLBACK24-NEXT: notb %ch
18640 ; FALLBACK24-NEXT: movb %ch, %cl
18641 ; FALLBACK24-NEXT: shrl %cl, %edi
18642 ; FALLBACK24-NEXT: orl %esi, %edi
18643 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18644 ; FALLBACK24-NEXT: movl 12(%ebp), %ebx
18645 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18646 ; FALLBACK24-NEXT: movb %al, %cl
18647 ; FALLBACK24-NEXT: shll %cl, %ebx
18648 ; FALLBACK24-NEXT: movl 8(%ebp), %esi
18649 ; FALLBACK24-NEXT: movl %ebp, %edi
18650 ; FALLBACK24-NEXT: movl %esi, %ebp
18651 ; FALLBACK24-NEXT: shrl %ebp
18652 ; FALLBACK24-NEXT: movb %ch, %cl
18653 ; FALLBACK24-NEXT: shrl %cl, %ebp
18654 ; FALLBACK24-NEXT: orl %ebx, %ebp
18655 ; FALLBACK24-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18656 ; FALLBACK24-NEXT: movb %al, %cl
18657 ; FALLBACK24-NEXT: shll %cl, %esi
18658 ; FALLBACK24-NEXT: shrl %edx
18659 ; FALLBACK24-NEXT: movb %ch, %cl
18660 ; FALLBACK24-NEXT: shrl %cl, %edx
18661 ; FALLBACK24-NEXT: orl %esi, %edx
18662 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18663 ; FALLBACK24-NEXT: movl %edi, %ebp
18664 ; FALLBACK24-NEXT: movl 20(%edi), %ebx
18665 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18666 ; FALLBACK24-NEXT: movb %al, %cl
18667 ; FALLBACK24-NEXT: shll %cl, %ebx
18668 ; FALLBACK24-NEXT: movl 16(%edi), %esi
18669 ; FALLBACK24-NEXT: movl %esi, %edx
18670 ; FALLBACK24-NEXT: shrl %edx
18671 ; FALLBACK24-NEXT: movb %ch, %cl
18672 ; FALLBACK24-NEXT: shrl %cl, %edx
18673 ; FALLBACK24-NEXT: orl %ebx, %edx
18674 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18675 ; FALLBACK24-NEXT: movb %al, %cl
18676 ; FALLBACK24-NEXT: shll %cl, %esi
18677 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18678 ; FALLBACK24-NEXT: shrl %edi
18679 ; FALLBACK24-NEXT: movb %ch, %cl
18680 ; FALLBACK24-NEXT: shrl %cl, %edi
18681 ; FALLBACK24-NEXT: orl %esi, %edi
18682 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18683 ; FALLBACK24-NEXT: movl %ebp, %edx
18684 ; FALLBACK24-NEXT: movl 28(%ebp), %ebx
18685 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18686 ; FALLBACK24-NEXT: movb %al, %cl
18687 ; FALLBACK24-NEXT: shll %cl, %ebx
18688 ; FALLBACK24-NEXT: movl 24(%ebp), %esi
18689 ; FALLBACK24-NEXT: movl %esi, %edi
18690 ; FALLBACK24-NEXT: shrl %edi
18691 ; FALLBACK24-NEXT: movb %ch, %cl
18692 ; FALLBACK24-NEXT: shrl %cl, %edi
18693 ; FALLBACK24-NEXT: orl %ebx, %edi
18694 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18695 ; FALLBACK24-NEXT: movb %al, %cl
18696 ; FALLBACK24-NEXT: shll %cl, %esi
18697 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
18698 ; FALLBACK24-NEXT: shrl %ebp
18699 ; FALLBACK24-NEXT: movb %ch, %cl
18700 ; FALLBACK24-NEXT: shrl %cl, %ebp
18701 ; FALLBACK24-NEXT: orl %esi, %ebp
18702 ; FALLBACK24-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18703 ; FALLBACK24-NEXT: movl 36(%edx), %ebx
18704 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18705 ; FALLBACK24-NEXT: movb %al, %cl
18706 ; FALLBACK24-NEXT: shll %cl, %ebx
18707 ; FALLBACK24-NEXT: movl 32(%edx), %esi
18708 ; FALLBACK24-NEXT: movl %edx, %ebp
18709 ; FALLBACK24-NEXT: movl %esi, %edi
18710 ; FALLBACK24-NEXT: shrl %edi
18711 ; FALLBACK24-NEXT: movb %ch, %cl
18712 ; FALLBACK24-NEXT: shrl %cl, %edi
18713 ; FALLBACK24-NEXT: orl %ebx, %edi
18714 ; FALLBACK24-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18715 ; FALLBACK24-NEXT: movb %al, %cl
18716 ; FALLBACK24-NEXT: shll %cl, %esi
18717 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18718 ; FALLBACK24-NEXT: shrl %edx
18719 ; FALLBACK24-NEXT: movb %ch, %cl
18720 ; FALLBACK24-NEXT: shrl %cl, %edx
18721 ; FALLBACK24-NEXT: orl %esi, %edx
18722 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18723 ; FALLBACK24-NEXT: movl 44(%ebp), %ebx
18724 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18725 ; FALLBACK24-NEXT: movb %al, %cl
18726 ; FALLBACK24-NEXT: shll %cl, %ebx
18727 ; FALLBACK24-NEXT: movl 40(%ebp), %esi
18728 ; FALLBACK24-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18729 ; FALLBACK24-NEXT: movl %esi, %edx
18730 ; FALLBACK24-NEXT: shrl %edx
18731 ; FALLBACK24-NEXT: movb %ch, %cl
18732 ; FALLBACK24-NEXT: shrl %cl, %edx
18733 ; FALLBACK24-NEXT: orl %ebx, %edx
18734 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18735 ; FALLBACK24-NEXT: movb %al, %cl
18736 ; FALLBACK24-NEXT: shll %cl, %esi
18737 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18738 ; FALLBACK24-NEXT: shrl %edx
18739 ; FALLBACK24-NEXT: movb %ch, %cl
18740 ; FALLBACK24-NEXT: shrl %cl, %edx
18741 ; FALLBACK24-NEXT: orl %esi, %edx
18742 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18743 ; FALLBACK24-NEXT: movl 52(%ebp), %esi
18744 ; FALLBACK24-NEXT: movl %esi, %edi
18745 ; FALLBACK24-NEXT: movb %al, %cl
18746 ; FALLBACK24-NEXT: shll %cl, %edi
18747 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18748 ; FALLBACK24-NEXT: negl %edx
18749 ; FALLBACK24-NEXT: movl 176(%esp,%edx), %ebx
18750 ; FALLBACK24-NEXT: movl %ebx, %ebp
18751 ; FALLBACK24-NEXT: shrl %ebp
18752 ; FALLBACK24-NEXT: movb %ch, %cl
18753 ; FALLBACK24-NEXT: shrl %cl, %ebp
18754 ; FALLBACK24-NEXT: orl %edi, %ebp
18755 ; FALLBACK24-NEXT: movb %al, %cl
18756 ; FALLBACK24-NEXT: shll %cl, %ebx
18757 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18758 ; FALLBACK24-NEXT: shrl %edx
18759 ; FALLBACK24-NEXT: movb %ch, %cl
18760 ; FALLBACK24-NEXT: shrl %cl, %edx
18761 ; FALLBACK24-NEXT: orl %ebx, %edx
18762 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18763 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18764 ; FALLBACK24-NEXT: movl 60(%edi), %edx
18765 ; FALLBACK24-NEXT: movb %al, %cl
18766 ; FALLBACK24-NEXT: shll %cl, %edx
18767 ; FALLBACK24-NEXT: movl 56(%edi), %ebx
18768 ; FALLBACK24-NEXT: movl %ebx, %edi
18769 ; FALLBACK24-NEXT: shrl %edi
18770 ; FALLBACK24-NEXT: movb %ch, %cl
18771 ; FALLBACK24-NEXT: shrl %cl, %edi
18772 ; FALLBACK24-NEXT: orl %edx, %edi
18773 ; FALLBACK24-NEXT: movb %al, %cl
18774 ; FALLBACK24-NEXT: shll %cl, %ebx
18775 ; FALLBACK24-NEXT: shrl %esi
18776 ; FALLBACK24-NEXT: movb %ch, %cl
18777 ; FALLBACK24-NEXT: shrl %cl, %esi
18778 ; FALLBACK24-NEXT: orl %ebx, %esi
18779 ; FALLBACK24-NEXT: movl %eax, %ecx
18780 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18781 ; FALLBACK24-NEXT: shll %cl, %edx
18782 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
18783 ; FALLBACK24-NEXT: movl %edx, (%eax)
18784 ; FALLBACK24-NEXT: movl %esi, 56(%eax)
18785 ; FALLBACK24-NEXT: movl %edi, 60(%eax)
18786 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18787 ; FALLBACK24-NEXT: movl %ecx, 48(%eax)
18788 ; FALLBACK24-NEXT: movl %ebp, 52(%eax)
18789 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18790 ; FALLBACK24-NEXT: movl %ecx, 40(%eax)
18791 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18792 ; FALLBACK24-NEXT: movl %ecx, 44(%eax)
18793 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18794 ; FALLBACK24-NEXT: movl %ecx, 32(%eax)
18795 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18796 ; FALLBACK24-NEXT: movl %ecx, 36(%eax)
18797 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18798 ; FALLBACK24-NEXT: movl %ecx, 24(%eax)
18799 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18800 ; FALLBACK24-NEXT: movl %ecx, 28(%eax)
18801 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18802 ; FALLBACK24-NEXT: movl %ecx, 16(%eax)
18803 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18804 ; FALLBACK24-NEXT: movl %ecx, 20(%eax)
18805 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18806 ; FALLBACK24-NEXT: movl %ecx, 8(%eax)
18807 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18808 ; FALLBACK24-NEXT: movl %ecx, 12(%eax)
18809 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
18810 ; FALLBACK24-NEXT: movl %ecx, 4(%eax)
18811 ; FALLBACK24-NEXT: addl $204, %esp
18812 ; FALLBACK24-NEXT: popl %esi
18813 ; FALLBACK24-NEXT: popl %edi
18814 ; FALLBACK24-NEXT: popl %ebx
18815 ; FALLBACK24-NEXT: popl %ebp
18816 ; FALLBACK24-NEXT: vzeroupper
18817 ; FALLBACK24-NEXT: retl
18819 ; FALLBACK25-LABEL: shl_64bytes:
18820 ; FALLBACK25: # %bb.0:
18821 ; FALLBACK25-NEXT: pushl %ebp
18822 ; FALLBACK25-NEXT: pushl %ebx
18823 ; FALLBACK25-NEXT: pushl %edi
18824 ; FALLBACK25-NEXT: pushl %esi
18825 ; FALLBACK25-NEXT: subl $188, %esp
18826 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
18827 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
18828 ; FALLBACK25-NEXT: vmovups (%ecx), %ymm0
18829 ; FALLBACK25-NEXT: vmovups 32(%ecx), %ymm1
18830 ; FALLBACK25-NEXT: movl (%eax), %ecx
18831 ; FALLBACK25-NEXT: vxorps %xmm2, %xmm2, %xmm2
18832 ; FALLBACK25-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18833 ; FALLBACK25-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18834 ; FALLBACK25-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
18835 ; FALLBACK25-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
18836 ; FALLBACK25-NEXT: movl %ecx, %ebp
18837 ; FALLBACK25-NEXT: andl $60, %ebp
18838 ; FALLBACK25-NEXT: leal {{[0-9]+}}(%esp), %eax
18839 ; FALLBACK25-NEXT: subl %ebp, %eax
18840 ; FALLBACK25-NEXT: movl 8(%eax), %esi
18841 ; FALLBACK25-NEXT: movl 12(%eax), %edx
18842 ; FALLBACK25-NEXT: shll $3, %ecx
18843 ; FALLBACK25-NEXT: andl $24, %ecx
18844 ; FALLBACK25-NEXT: movl %edx, %edi
18845 ; FALLBACK25-NEXT: shldl %cl, %esi, %edi
18846 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18847 ; FALLBACK25-NEXT: movl 4(%eax), %edi
18848 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18849 ; FALLBACK25-NEXT: shldl %cl, %edi, %esi
18850 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18851 ; FALLBACK25-NEXT: movl 16(%eax), %edi
18852 ; FALLBACK25-NEXT: movl 20(%eax), %esi
18853 ; FALLBACK25-NEXT: movl %esi, %ebx
18854 ; FALLBACK25-NEXT: shldl %cl, %edi, %ebx
18855 ; FALLBACK25-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18856 ; FALLBACK25-NEXT: shldl %cl, %edx, %edi
18857 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18858 ; FALLBACK25-NEXT: movl 24(%eax), %edi
18859 ; FALLBACK25-NEXT: movl 28(%eax), %edx
18860 ; FALLBACK25-NEXT: movl %edx, %ebx
18861 ; FALLBACK25-NEXT: shldl %cl, %edi, %ebx
18862 ; FALLBACK25-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18863 ; FALLBACK25-NEXT: shldl %cl, %esi, %edi
18864 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18865 ; FALLBACK25-NEXT: movl 32(%eax), %edi
18866 ; FALLBACK25-NEXT: movl 36(%eax), %esi
18867 ; FALLBACK25-NEXT: movl %esi, %ebx
18868 ; FALLBACK25-NEXT: shldl %cl, %edi, %ebx
18869 ; FALLBACK25-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18870 ; FALLBACK25-NEXT: shldl %cl, %edx, %edi
18871 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18872 ; FALLBACK25-NEXT: movl 40(%eax), %edx
18873 ; FALLBACK25-NEXT: movl 44(%eax), %edi
18874 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18875 ; FALLBACK25-NEXT: shldl %cl, %edx, %edi
18876 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18877 ; FALLBACK25-NEXT: shldl %cl, %esi, %edx
18878 ; FALLBACK25-NEXT: movl %edx, (%esp) # 4-byte Spill
18879 ; FALLBACK25-NEXT: movl 56(%eax), %edx
18880 ; FALLBACK25-NEXT: movl 60(%eax), %edi
18881 ; FALLBACK25-NEXT: shldl %cl, %edx, %edi
18882 ; FALLBACK25-NEXT: movl (%eax), %ebx
18883 ; FALLBACK25-NEXT: movl 52(%eax), %esi
18884 ; FALLBACK25-NEXT: shldl %cl, %esi, %edx
18885 ; FALLBACK25-NEXT: negl %ebp
18886 ; FALLBACK25-NEXT: movl 160(%esp,%ebp), %eax
18887 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
18888 ; FALLBACK25-NEXT: movl %edx, 56(%ebp)
18889 ; FALLBACK25-NEXT: movl %edi, 60(%ebp)
18890 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
18891 ; FALLBACK25-NEXT: shldl %cl, %ebx, %edx
18892 ; FALLBACK25-NEXT: shll %cl, %ebx
18893 ; FALLBACK25-NEXT: shldl %cl, %eax, %esi
18894 ; FALLBACK25-NEXT: # kill: def $cl killed $cl killed $ecx
18895 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
18896 ; FALLBACK25-NEXT: shldl %cl, %edi, %eax
18897 ; FALLBACK25-NEXT: movl %eax, 48(%ebp)
18898 ; FALLBACK25-NEXT: movl %esi, 52(%ebp)
18899 ; FALLBACK25-NEXT: movl (%esp), %eax # 4-byte Reload
18900 ; FALLBACK25-NEXT: movl %eax, 40(%ebp)
18901 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18902 ; FALLBACK25-NEXT: movl %eax, 44(%ebp)
18903 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18904 ; FALLBACK25-NEXT: movl %eax, 32(%ebp)
18905 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18906 ; FALLBACK25-NEXT: movl %eax, 36(%ebp)
18907 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18908 ; FALLBACK25-NEXT: movl %eax, 24(%ebp)
18909 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18910 ; FALLBACK25-NEXT: movl %eax, 28(%ebp)
18911 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18912 ; FALLBACK25-NEXT: movl %eax, 16(%ebp)
18913 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18914 ; FALLBACK25-NEXT: movl %eax, 20(%ebp)
18915 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18916 ; FALLBACK25-NEXT: movl %eax, 8(%ebp)
18917 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18918 ; FALLBACK25-NEXT: movl %eax, 12(%ebp)
18919 ; FALLBACK25-NEXT: movl %ebx, (%ebp)
18920 ; FALLBACK25-NEXT: movl %edx, 4(%ebp)
18921 ; FALLBACK25-NEXT: addl $188, %esp
18922 ; FALLBACK25-NEXT: popl %esi
18923 ; FALLBACK25-NEXT: popl %edi
18924 ; FALLBACK25-NEXT: popl %ebx
18925 ; FALLBACK25-NEXT: popl %ebp
18926 ; FALLBACK25-NEXT: vzeroupper
18927 ; FALLBACK25-NEXT: retl
18929 ; FALLBACK26-LABEL: shl_64bytes:
18930 ; FALLBACK26: # %bb.0:
18931 ; FALLBACK26-NEXT: pushl %ebp
18932 ; FALLBACK26-NEXT: pushl %ebx
18933 ; FALLBACK26-NEXT: pushl %edi
18934 ; FALLBACK26-NEXT: pushl %esi
18935 ; FALLBACK26-NEXT: subl $204, %esp
18936 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
18937 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
18938 ; FALLBACK26-NEXT: vmovups (%ecx), %ymm0
18939 ; FALLBACK26-NEXT: vmovups 32(%ecx), %ymm1
18940 ; FALLBACK26-NEXT: movl (%eax), %eax
18941 ; FALLBACK26-NEXT: vxorps %xmm2, %xmm2, %xmm2
18942 ; FALLBACK26-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18943 ; FALLBACK26-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
18944 ; FALLBACK26-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
18945 ; FALLBACK26-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
18946 ; FALLBACK26-NEXT: leal (,%eax,8), %edx
18947 ; FALLBACK26-NEXT: andl $24, %edx
18948 ; FALLBACK26-NEXT: andl $60, %eax
18949 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18950 ; FALLBACK26-NEXT: leal {{[0-9]+}}(%esp), %edi
18951 ; FALLBACK26-NEXT: subl %eax, %edi
18952 ; FALLBACK26-NEXT: movl (%edi), %ecx
18953 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18954 ; FALLBACK26-NEXT: movl 4(%edi), %eax
18955 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18956 ; FALLBACK26-NEXT: movl %edx, %ebx
18957 ; FALLBACK26-NEXT: notb %bl
18958 ; FALLBACK26-NEXT: shrl %ecx
18959 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %esi
18960 ; FALLBACK26-NEXT: shlxl %edx, %eax, %ecx
18961 ; FALLBACK26-NEXT: orl %ecx, %esi
18962 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18963 ; FALLBACK26-NEXT: movl 8(%edi), %esi
18964 ; FALLBACK26-NEXT: movl %esi, %ecx
18965 ; FALLBACK26-NEXT: shrl %ecx
18966 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %eax
18967 ; FALLBACK26-NEXT: movl 12(%edi), %ecx
18968 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %ebp
18969 ; FALLBACK26-NEXT: orl %ebp, %eax
18970 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18971 ; FALLBACK26-NEXT: shlxl %edx, %esi, %esi
18972 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
18973 ; FALLBACK26-NEXT: shrl %eax
18974 ; FALLBACK26-NEXT: shrxl %ebx, %eax, %eax
18975 ; FALLBACK26-NEXT: orl %esi, %eax
18976 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18977 ; FALLBACK26-NEXT: movl 16(%edi), %eax
18978 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18979 ; FALLBACK26-NEXT: shrl %eax
18980 ; FALLBACK26-NEXT: shrxl %ebx, %eax, %eax
18981 ; FALLBACK26-NEXT: movl 20(%edi), %esi
18982 ; FALLBACK26-NEXT: shlxl %edx, %esi, %ebp
18983 ; FALLBACK26-NEXT: orl %ebp, %eax
18984 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18985 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18986 ; FALLBACK26-NEXT: shrl %ecx
18987 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %ecx
18988 ; FALLBACK26-NEXT: orl %eax, %ecx
18989 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18990 ; FALLBACK26-NEXT: movl 24(%edi), %ecx
18991 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18992 ; FALLBACK26-NEXT: shrl %ecx
18993 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %eax
18994 ; FALLBACK26-NEXT: movl 28(%edi), %ecx
18995 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %ebp
18996 ; FALLBACK26-NEXT: orl %ebp, %eax
18997 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
18998 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
18999 ; FALLBACK26-NEXT: shrl %esi
19000 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %esi
19001 ; FALLBACK26-NEXT: orl %eax, %esi
19002 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19003 ; FALLBACK26-NEXT: movl 32(%edi), %eax
19004 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19005 ; FALLBACK26-NEXT: shrl %eax
19006 ; FALLBACK26-NEXT: shrxl %ebx, %eax, %eax
19007 ; FALLBACK26-NEXT: movl 36(%edi), %esi
19008 ; FALLBACK26-NEXT: shlxl %edx, %esi, %ebp
19009 ; FALLBACK26-NEXT: orl %ebp, %eax
19010 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19011 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19012 ; FALLBACK26-NEXT: shrl %ecx
19013 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %ecx
19014 ; FALLBACK26-NEXT: orl %eax, %ecx
19015 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19016 ; FALLBACK26-NEXT: movl 40(%edi), %ecx
19017 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19018 ; FALLBACK26-NEXT: shrl %ecx
19019 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %eax
19020 ; FALLBACK26-NEXT: movl 44(%edi), %ecx
19021 ; FALLBACK26-NEXT: shlxl %edx, %ecx, %ebp
19022 ; FALLBACK26-NEXT: orl %ebp, %eax
19023 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19024 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19025 ; FALLBACK26-NEXT: shrl %esi
19026 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %esi
19027 ; FALLBACK26-NEXT: orl %eax, %esi
19028 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19029 ; FALLBACK26-NEXT: movl 48(%edi), %esi
19030 ; FALLBACK26-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19031 ; FALLBACK26-NEXT: shrl %esi
19032 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %eax
19033 ; FALLBACK26-NEXT: movl 52(%edi), %esi
19034 ; FALLBACK26-NEXT: shlxl %edx, %esi, %ebp
19035 ; FALLBACK26-NEXT: orl %ebp, %eax
19036 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19037 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19038 ; FALLBACK26-NEXT: shrl %ecx
19039 ; FALLBACK26-NEXT: shrxl %ebx, %ecx, %ebp
19040 ; FALLBACK26-NEXT: orl %eax, %ebp
19041 ; FALLBACK26-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19042 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19043 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19044 ; FALLBACK26-NEXT: negl %eax
19045 ; FALLBACK26-NEXT: shlxl %edx, 188(%esp,%eax), %ecx
19046 ; FALLBACK26-NEXT: movl 56(%edi), %eax
19047 ; FALLBACK26-NEXT: shlxl %edx, %eax, %edx
19048 ; FALLBACK26-NEXT: shrl %esi
19049 ; FALLBACK26-NEXT: shrxl %ebx, %esi, %esi
19050 ; FALLBACK26-NEXT: orl %edx, %esi
19051 ; FALLBACK26-NEXT: shrl %eax
19052 ; FALLBACK26-NEXT: shrxl %ebx, %eax, %eax
19053 ; FALLBACK26-NEXT: orl %eax, %ecx
19054 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
19055 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19056 ; FALLBACK26-NEXT: movl %edx, (%eax)
19057 ; FALLBACK26-NEXT: movl %esi, 56(%eax)
19058 ; FALLBACK26-NEXT: movl %ecx, 60(%eax)
19059 ; FALLBACK26-NEXT: movl %ebp, 48(%eax)
19060 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19061 ; FALLBACK26-NEXT: movl %ecx, 52(%eax)
19062 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19063 ; FALLBACK26-NEXT: movl %ecx, 40(%eax)
19064 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19065 ; FALLBACK26-NEXT: movl %ecx, 44(%eax)
19066 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19067 ; FALLBACK26-NEXT: movl %ecx, 32(%eax)
19068 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19069 ; FALLBACK26-NEXT: movl %ecx, 36(%eax)
19070 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19071 ; FALLBACK26-NEXT: movl %ecx, 24(%eax)
19072 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19073 ; FALLBACK26-NEXT: movl %ecx, 28(%eax)
19074 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19075 ; FALLBACK26-NEXT: movl %ecx, 16(%eax)
19076 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19077 ; FALLBACK26-NEXT: movl %ecx, 20(%eax)
19078 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19079 ; FALLBACK26-NEXT: movl %ecx, 8(%eax)
19080 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19081 ; FALLBACK26-NEXT: movl %ecx, 12(%eax)
19082 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19083 ; FALLBACK26-NEXT: movl %ecx, 4(%eax)
19084 ; FALLBACK26-NEXT: addl $204, %esp
19085 ; FALLBACK26-NEXT: popl %esi
19086 ; FALLBACK26-NEXT: popl %edi
19087 ; FALLBACK26-NEXT: popl %ebx
19088 ; FALLBACK26-NEXT: popl %ebp
19089 ; FALLBACK26-NEXT: vzeroupper
19090 ; FALLBACK26-NEXT: retl
19092 ; FALLBACK27-LABEL: shl_64bytes:
19093 ; FALLBACK27: # %bb.0:
19094 ; FALLBACK27-NEXT: pushl %ebp
19095 ; FALLBACK27-NEXT: pushl %ebx
19096 ; FALLBACK27-NEXT: pushl %edi
19097 ; FALLBACK27-NEXT: pushl %esi
19098 ; FALLBACK27-NEXT: subl $204, %esp
19099 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
19100 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
19101 ; FALLBACK27-NEXT: vmovups (%ecx), %ymm0
19102 ; FALLBACK27-NEXT: vmovups 32(%ecx), %ymm1
19103 ; FALLBACK27-NEXT: movl (%eax), %ebx
19104 ; FALLBACK27-NEXT: vxorps %xmm2, %xmm2, %xmm2
19105 ; FALLBACK27-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
19106 ; FALLBACK27-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
19107 ; FALLBACK27-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
19108 ; FALLBACK27-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
19109 ; FALLBACK27-NEXT: leal (,%ebx,8), %ecx
19110 ; FALLBACK27-NEXT: andl $24, %ecx
19111 ; FALLBACK27-NEXT: andl $60, %ebx
19112 ; FALLBACK27-NEXT: leal {{[0-9]+}}(%esp), %eax
19113 ; FALLBACK27-NEXT: subl %ebx, %eax
19114 ; FALLBACK27-NEXT: movl 4(%eax), %esi
19115 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19116 ; FALLBACK27-NEXT: movl 8(%eax), %edi
19117 ; FALLBACK27-NEXT: movl 12(%eax), %edx
19118 ; FALLBACK27-NEXT: movl %edx, %ebp
19119 ; FALLBACK27-NEXT: shldl %cl, %edi, %ebp
19120 ; FALLBACK27-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19121 ; FALLBACK27-NEXT: shldl %cl, %esi, %edi
19122 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19123 ; FALLBACK27-NEXT: movl 16(%eax), %edi
19124 ; FALLBACK27-NEXT: movl 20(%eax), %esi
19125 ; FALLBACK27-NEXT: movl %esi, %ebp
19126 ; FALLBACK27-NEXT: shldl %cl, %edi, %ebp
19127 ; FALLBACK27-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19128 ; FALLBACK27-NEXT: shldl %cl, %edx, %edi
19129 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19130 ; FALLBACK27-NEXT: movl 24(%eax), %edi
19131 ; FALLBACK27-NEXT: movl 28(%eax), %edx
19132 ; FALLBACK27-NEXT: movl %edx, %ebp
19133 ; FALLBACK27-NEXT: shldl %cl, %edi, %ebp
19134 ; FALLBACK27-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19135 ; FALLBACK27-NEXT: shldl %cl, %esi, %edi
19136 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19137 ; FALLBACK27-NEXT: movl 32(%eax), %edi
19138 ; FALLBACK27-NEXT: movl 36(%eax), %esi
19139 ; FALLBACK27-NEXT: movl %esi, %ebp
19140 ; FALLBACK27-NEXT: shldl %cl, %edi, %ebp
19141 ; FALLBACK27-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19142 ; FALLBACK27-NEXT: shldl %cl, %edx, %edi
19143 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19144 ; FALLBACK27-NEXT: movl 40(%eax), %ebp
19145 ; FALLBACK27-NEXT: movl 44(%eax), %edx
19146 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19147 ; FALLBACK27-NEXT: shldl %cl, %ebp, %edx
19148 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19149 ; FALLBACK27-NEXT: shldl %cl, %esi, %ebp
19150 ; FALLBACK27-NEXT: movl 56(%eax), %edx
19151 ; FALLBACK27-NEXT: movl 60(%eax), %edi
19152 ; FALLBACK27-NEXT: shldl %cl, %edx, %edi
19153 ; FALLBACK27-NEXT: movl (%eax), %esi
19154 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19155 ; FALLBACK27-NEXT: movl 52(%eax), %esi
19156 ; FALLBACK27-NEXT: shldl %cl, %esi, %edx
19157 ; FALLBACK27-NEXT: negl %ebx
19158 ; FALLBACK27-NEXT: movl 176(%esp,%ebx), %ebx
19159 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
19160 ; FALLBACK27-NEXT: movl %edx, 56(%eax)
19161 ; FALLBACK27-NEXT: movl %edi, 60(%eax)
19162 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19163 ; FALLBACK27-NEXT: shlxl %ecx, %edx, %edi
19164 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19165 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
19166 ; FALLBACK27-NEXT: shldl %cl, %edx, %edi
19167 ; FALLBACK27-NEXT: shldl %cl, %ebx, %esi
19168 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
19169 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19170 ; FALLBACK27-NEXT: shldl %cl, %edx, %ebx
19171 ; FALLBACK27-NEXT: movl %ebx, 48(%eax)
19172 ; FALLBACK27-NEXT: movl %esi, 52(%eax)
19173 ; FALLBACK27-NEXT: movl %ebp, 40(%eax)
19174 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19175 ; FALLBACK27-NEXT: movl %ecx, 44(%eax)
19176 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19177 ; FALLBACK27-NEXT: movl %ecx, 32(%eax)
19178 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19179 ; FALLBACK27-NEXT: movl %ecx, 36(%eax)
19180 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19181 ; FALLBACK27-NEXT: movl %ecx, 24(%eax)
19182 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19183 ; FALLBACK27-NEXT: movl %ecx, 28(%eax)
19184 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19185 ; FALLBACK27-NEXT: movl %ecx, 16(%eax)
19186 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19187 ; FALLBACK27-NEXT: movl %ecx, 20(%eax)
19188 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19189 ; FALLBACK27-NEXT: movl %ecx, 8(%eax)
19190 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19191 ; FALLBACK27-NEXT: movl %ecx, 12(%eax)
19192 ; FALLBACK27-NEXT: movl %edi, 4(%eax)
19193 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19194 ; FALLBACK27-NEXT: movl %ecx, (%eax)
19195 ; FALLBACK27-NEXT: addl $204, %esp
19196 ; FALLBACK27-NEXT: popl %esi
19197 ; FALLBACK27-NEXT: popl %edi
19198 ; FALLBACK27-NEXT: popl %ebx
19199 ; FALLBACK27-NEXT: popl %ebp
19200 ; FALLBACK27-NEXT: vzeroupper
19201 ; FALLBACK27-NEXT: retl
19203 ; FALLBACK28-LABEL: shl_64bytes:
19204 ; FALLBACK28: # %bb.0:
19205 ; FALLBACK28-NEXT: pushl %ebp
19206 ; FALLBACK28-NEXT: pushl %ebx
19207 ; FALLBACK28-NEXT: pushl %edi
19208 ; FALLBACK28-NEXT: pushl %esi
19209 ; FALLBACK28-NEXT: subl $204, %esp
19210 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
19211 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
19212 ; FALLBACK28-NEXT: vmovups (%ecx), %zmm0
19213 ; FALLBACK28-NEXT: movl (%eax), %eax
19214 ; FALLBACK28-NEXT: vxorps %xmm1, %xmm1, %xmm1
19215 ; FALLBACK28-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
19216 ; FALLBACK28-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
19217 ; FALLBACK28-NEXT: movl %eax, %edx
19218 ; FALLBACK28-NEXT: andl $60, %edx
19219 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19220 ; FALLBACK28-NEXT: leal {{[0-9]+}}(%esp), %ecx
19221 ; FALLBACK28-NEXT: subl %edx, %ecx
19222 ; FALLBACK28-NEXT: movl (%ecx), %edi
19223 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19224 ; FALLBACK28-NEXT: movl 4(%ecx), %edx
19225 ; FALLBACK28-NEXT: movl %ecx, %ebp
19226 ; FALLBACK28-NEXT: shll $3, %eax
19227 ; FALLBACK28-NEXT: andl $24, %eax
19228 ; FALLBACK28-NEXT: movl %edx, %esi
19229 ; FALLBACK28-NEXT: movl %eax, %ecx
19230 ; FALLBACK28-NEXT: shll %cl, %esi
19231 ; FALLBACK28-NEXT: shrl %edi
19232 ; FALLBACK28-NEXT: movb %al, %ch
19233 ; FALLBACK28-NEXT: notb %ch
19234 ; FALLBACK28-NEXT: movb %ch, %cl
19235 ; FALLBACK28-NEXT: shrl %cl, %edi
19236 ; FALLBACK28-NEXT: orl %esi, %edi
19237 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19238 ; FALLBACK28-NEXT: movl 12(%ebp), %ebx
19239 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19240 ; FALLBACK28-NEXT: movb %al, %cl
19241 ; FALLBACK28-NEXT: shll %cl, %ebx
19242 ; FALLBACK28-NEXT: movl 8(%ebp), %esi
19243 ; FALLBACK28-NEXT: movl %ebp, %edi
19244 ; FALLBACK28-NEXT: movl %esi, %ebp
19245 ; FALLBACK28-NEXT: shrl %ebp
19246 ; FALLBACK28-NEXT: movb %ch, %cl
19247 ; FALLBACK28-NEXT: shrl %cl, %ebp
19248 ; FALLBACK28-NEXT: orl %ebx, %ebp
19249 ; FALLBACK28-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19250 ; FALLBACK28-NEXT: movb %al, %cl
19251 ; FALLBACK28-NEXT: shll %cl, %esi
19252 ; FALLBACK28-NEXT: shrl %edx
19253 ; FALLBACK28-NEXT: movb %ch, %cl
19254 ; FALLBACK28-NEXT: shrl %cl, %edx
19255 ; FALLBACK28-NEXT: orl %esi, %edx
19256 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19257 ; FALLBACK28-NEXT: movl %edi, %ebp
19258 ; FALLBACK28-NEXT: movl 20(%edi), %ebx
19259 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19260 ; FALLBACK28-NEXT: movb %al, %cl
19261 ; FALLBACK28-NEXT: shll %cl, %ebx
19262 ; FALLBACK28-NEXT: movl 16(%edi), %esi
19263 ; FALLBACK28-NEXT: movl %esi, %edx
19264 ; FALLBACK28-NEXT: shrl %edx
19265 ; FALLBACK28-NEXT: movb %ch, %cl
19266 ; FALLBACK28-NEXT: shrl %cl, %edx
19267 ; FALLBACK28-NEXT: orl %ebx, %edx
19268 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19269 ; FALLBACK28-NEXT: movb %al, %cl
19270 ; FALLBACK28-NEXT: shll %cl, %esi
19271 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
19272 ; FALLBACK28-NEXT: shrl %edi
19273 ; FALLBACK28-NEXT: movb %ch, %cl
19274 ; FALLBACK28-NEXT: shrl %cl, %edi
19275 ; FALLBACK28-NEXT: orl %esi, %edi
19276 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19277 ; FALLBACK28-NEXT: movl %ebp, %edx
19278 ; FALLBACK28-NEXT: movl 28(%ebp), %ebx
19279 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19280 ; FALLBACK28-NEXT: movb %al, %cl
19281 ; FALLBACK28-NEXT: shll %cl, %ebx
19282 ; FALLBACK28-NEXT: movl 24(%ebp), %esi
19283 ; FALLBACK28-NEXT: movl %esi, %edi
19284 ; FALLBACK28-NEXT: shrl %edi
19285 ; FALLBACK28-NEXT: movb %ch, %cl
19286 ; FALLBACK28-NEXT: shrl %cl, %edi
19287 ; FALLBACK28-NEXT: orl %ebx, %edi
19288 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19289 ; FALLBACK28-NEXT: movb %al, %cl
19290 ; FALLBACK28-NEXT: shll %cl, %esi
19291 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
19292 ; FALLBACK28-NEXT: shrl %ebp
19293 ; FALLBACK28-NEXT: movb %ch, %cl
19294 ; FALLBACK28-NEXT: shrl %cl, %ebp
19295 ; FALLBACK28-NEXT: orl %esi, %ebp
19296 ; FALLBACK28-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19297 ; FALLBACK28-NEXT: movl 36(%edx), %ebx
19298 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19299 ; FALLBACK28-NEXT: movb %al, %cl
19300 ; FALLBACK28-NEXT: shll %cl, %ebx
19301 ; FALLBACK28-NEXT: movl 32(%edx), %esi
19302 ; FALLBACK28-NEXT: movl %edx, %ebp
19303 ; FALLBACK28-NEXT: movl %esi, %edi
19304 ; FALLBACK28-NEXT: shrl %edi
19305 ; FALLBACK28-NEXT: movb %ch, %cl
19306 ; FALLBACK28-NEXT: shrl %cl, %edi
19307 ; FALLBACK28-NEXT: orl %ebx, %edi
19308 ; FALLBACK28-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19309 ; FALLBACK28-NEXT: movb %al, %cl
19310 ; FALLBACK28-NEXT: shll %cl, %esi
19311 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19312 ; FALLBACK28-NEXT: shrl %edx
19313 ; FALLBACK28-NEXT: movb %ch, %cl
19314 ; FALLBACK28-NEXT: shrl %cl, %edx
19315 ; FALLBACK28-NEXT: orl %esi, %edx
19316 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19317 ; FALLBACK28-NEXT: movl 44(%ebp), %ebx
19318 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19319 ; FALLBACK28-NEXT: movb %al, %cl
19320 ; FALLBACK28-NEXT: shll %cl, %ebx
19321 ; FALLBACK28-NEXT: movl 40(%ebp), %esi
19322 ; FALLBACK28-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19323 ; FALLBACK28-NEXT: movl %esi, %edx
19324 ; FALLBACK28-NEXT: shrl %edx
19325 ; FALLBACK28-NEXT: movb %ch, %cl
19326 ; FALLBACK28-NEXT: shrl %cl, %edx
19327 ; FALLBACK28-NEXT: orl %ebx, %edx
19328 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19329 ; FALLBACK28-NEXT: movb %al, %cl
19330 ; FALLBACK28-NEXT: shll %cl, %esi
19331 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19332 ; FALLBACK28-NEXT: shrl %edx
19333 ; FALLBACK28-NEXT: movb %ch, %cl
19334 ; FALLBACK28-NEXT: shrl %cl, %edx
19335 ; FALLBACK28-NEXT: orl %esi, %edx
19336 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19337 ; FALLBACK28-NEXT: movl 52(%ebp), %esi
19338 ; FALLBACK28-NEXT: movl %esi, %edi
19339 ; FALLBACK28-NEXT: movb %al, %cl
19340 ; FALLBACK28-NEXT: shll %cl, %edi
19341 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19342 ; FALLBACK28-NEXT: negl %edx
19343 ; FALLBACK28-NEXT: movl 176(%esp,%edx), %ebx
19344 ; FALLBACK28-NEXT: movl %ebx, %ebp
19345 ; FALLBACK28-NEXT: shrl %ebp
19346 ; FALLBACK28-NEXT: movb %ch, %cl
19347 ; FALLBACK28-NEXT: shrl %cl, %ebp
19348 ; FALLBACK28-NEXT: orl %edi, %ebp
19349 ; FALLBACK28-NEXT: movb %al, %cl
19350 ; FALLBACK28-NEXT: shll %cl, %ebx
19351 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19352 ; FALLBACK28-NEXT: shrl %edx
19353 ; FALLBACK28-NEXT: movb %ch, %cl
19354 ; FALLBACK28-NEXT: shrl %cl, %edx
19355 ; FALLBACK28-NEXT: orl %ebx, %edx
19356 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19357 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
19358 ; FALLBACK28-NEXT: movl 60(%edi), %edx
19359 ; FALLBACK28-NEXT: movb %al, %cl
19360 ; FALLBACK28-NEXT: shll %cl, %edx
19361 ; FALLBACK28-NEXT: movl 56(%edi), %ebx
19362 ; FALLBACK28-NEXT: movl %ebx, %edi
19363 ; FALLBACK28-NEXT: shrl %edi
19364 ; FALLBACK28-NEXT: movb %ch, %cl
19365 ; FALLBACK28-NEXT: shrl %cl, %edi
19366 ; FALLBACK28-NEXT: orl %edx, %edi
19367 ; FALLBACK28-NEXT: movb %al, %cl
19368 ; FALLBACK28-NEXT: shll %cl, %ebx
19369 ; FALLBACK28-NEXT: shrl %esi
19370 ; FALLBACK28-NEXT: movb %ch, %cl
19371 ; FALLBACK28-NEXT: shrl %cl, %esi
19372 ; FALLBACK28-NEXT: orl %ebx, %esi
19373 ; FALLBACK28-NEXT: movl %eax, %ecx
19374 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19375 ; FALLBACK28-NEXT: shll %cl, %edx
19376 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
19377 ; FALLBACK28-NEXT: movl %edx, (%eax)
19378 ; FALLBACK28-NEXT: movl %esi, 56(%eax)
19379 ; FALLBACK28-NEXT: movl %edi, 60(%eax)
19380 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19381 ; FALLBACK28-NEXT: movl %ecx, 48(%eax)
19382 ; FALLBACK28-NEXT: movl %ebp, 52(%eax)
19383 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19384 ; FALLBACK28-NEXT: movl %ecx, 40(%eax)
19385 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19386 ; FALLBACK28-NEXT: movl %ecx, 44(%eax)
19387 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19388 ; FALLBACK28-NEXT: movl %ecx, 32(%eax)
19389 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19390 ; FALLBACK28-NEXT: movl %ecx, 36(%eax)
19391 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19392 ; FALLBACK28-NEXT: movl %ecx, 24(%eax)
19393 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19394 ; FALLBACK28-NEXT: movl %ecx, 28(%eax)
19395 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19396 ; FALLBACK28-NEXT: movl %ecx, 16(%eax)
19397 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19398 ; FALLBACK28-NEXT: movl %ecx, 20(%eax)
19399 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19400 ; FALLBACK28-NEXT: movl %ecx, 8(%eax)
19401 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19402 ; FALLBACK28-NEXT: movl %ecx, 12(%eax)
19403 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19404 ; FALLBACK28-NEXT: movl %ecx, 4(%eax)
19405 ; FALLBACK28-NEXT: addl $204, %esp
19406 ; FALLBACK28-NEXT: popl %esi
19407 ; FALLBACK28-NEXT: popl %edi
19408 ; FALLBACK28-NEXT: popl %ebx
19409 ; FALLBACK28-NEXT: popl %ebp
19410 ; FALLBACK28-NEXT: vzeroupper
19411 ; FALLBACK28-NEXT: retl
19413 ; FALLBACK29-LABEL: shl_64bytes:
19414 ; FALLBACK29: # %bb.0:
19415 ; FALLBACK29-NEXT: pushl %ebp
19416 ; FALLBACK29-NEXT: pushl %ebx
19417 ; FALLBACK29-NEXT: pushl %edi
19418 ; FALLBACK29-NEXT: pushl %esi
19419 ; FALLBACK29-NEXT: subl $188, %esp
19420 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
19421 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
19422 ; FALLBACK29-NEXT: vmovups (%ecx), %zmm0
19423 ; FALLBACK29-NEXT: movl (%eax), %ecx
19424 ; FALLBACK29-NEXT: vxorps %xmm1, %xmm1, %xmm1
19425 ; FALLBACK29-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
19426 ; FALLBACK29-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
19427 ; FALLBACK29-NEXT: movl %ecx, %ebp
19428 ; FALLBACK29-NEXT: andl $60, %ebp
19429 ; FALLBACK29-NEXT: leal {{[0-9]+}}(%esp), %eax
19430 ; FALLBACK29-NEXT: subl %ebp, %eax
19431 ; FALLBACK29-NEXT: movl 8(%eax), %esi
19432 ; FALLBACK29-NEXT: movl 12(%eax), %edx
19433 ; FALLBACK29-NEXT: shll $3, %ecx
19434 ; FALLBACK29-NEXT: andl $24, %ecx
19435 ; FALLBACK29-NEXT: movl %edx, %edi
19436 ; FALLBACK29-NEXT: shldl %cl, %esi, %edi
19437 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19438 ; FALLBACK29-NEXT: movl 4(%eax), %edi
19439 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19440 ; FALLBACK29-NEXT: shldl %cl, %edi, %esi
19441 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19442 ; FALLBACK29-NEXT: movl 16(%eax), %edi
19443 ; FALLBACK29-NEXT: movl 20(%eax), %esi
19444 ; FALLBACK29-NEXT: movl %esi, %ebx
19445 ; FALLBACK29-NEXT: shldl %cl, %edi, %ebx
19446 ; FALLBACK29-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19447 ; FALLBACK29-NEXT: shldl %cl, %edx, %edi
19448 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19449 ; FALLBACK29-NEXT: movl 24(%eax), %edi
19450 ; FALLBACK29-NEXT: movl 28(%eax), %edx
19451 ; FALLBACK29-NEXT: movl %edx, %ebx
19452 ; FALLBACK29-NEXT: shldl %cl, %edi, %ebx
19453 ; FALLBACK29-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19454 ; FALLBACK29-NEXT: shldl %cl, %esi, %edi
19455 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19456 ; FALLBACK29-NEXT: movl 32(%eax), %edi
19457 ; FALLBACK29-NEXT: movl 36(%eax), %esi
19458 ; FALLBACK29-NEXT: movl %esi, %ebx
19459 ; FALLBACK29-NEXT: shldl %cl, %edi, %ebx
19460 ; FALLBACK29-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19461 ; FALLBACK29-NEXT: shldl %cl, %edx, %edi
19462 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19463 ; FALLBACK29-NEXT: movl 40(%eax), %edx
19464 ; FALLBACK29-NEXT: movl 44(%eax), %edi
19465 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19466 ; FALLBACK29-NEXT: shldl %cl, %edx, %edi
19467 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19468 ; FALLBACK29-NEXT: shldl %cl, %esi, %edx
19469 ; FALLBACK29-NEXT: movl %edx, (%esp) # 4-byte Spill
19470 ; FALLBACK29-NEXT: movl 56(%eax), %edx
19471 ; FALLBACK29-NEXT: movl 60(%eax), %edi
19472 ; FALLBACK29-NEXT: shldl %cl, %edx, %edi
19473 ; FALLBACK29-NEXT: movl (%eax), %ebx
19474 ; FALLBACK29-NEXT: movl 52(%eax), %esi
19475 ; FALLBACK29-NEXT: shldl %cl, %esi, %edx
19476 ; FALLBACK29-NEXT: negl %ebp
19477 ; FALLBACK29-NEXT: movl 160(%esp,%ebp), %eax
19478 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
19479 ; FALLBACK29-NEXT: movl %edx, 56(%ebp)
19480 ; FALLBACK29-NEXT: movl %edi, 60(%ebp)
19481 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19482 ; FALLBACK29-NEXT: shldl %cl, %ebx, %edx
19483 ; FALLBACK29-NEXT: shll %cl, %ebx
19484 ; FALLBACK29-NEXT: shldl %cl, %eax, %esi
19485 ; FALLBACK29-NEXT: # kill: def $cl killed $cl killed $ecx
19486 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
19487 ; FALLBACK29-NEXT: shldl %cl, %edi, %eax
19488 ; FALLBACK29-NEXT: movl %eax, 48(%ebp)
19489 ; FALLBACK29-NEXT: movl %esi, 52(%ebp)
19490 ; FALLBACK29-NEXT: movl (%esp), %eax # 4-byte Reload
19491 ; FALLBACK29-NEXT: movl %eax, 40(%ebp)
19492 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19493 ; FALLBACK29-NEXT: movl %eax, 44(%ebp)
19494 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19495 ; FALLBACK29-NEXT: movl %eax, 32(%ebp)
19496 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19497 ; FALLBACK29-NEXT: movl %eax, 36(%ebp)
19498 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19499 ; FALLBACK29-NEXT: movl %eax, 24(%ebp)
19500 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19501 ; FALLBACK29-NEXT: movl %eax, 28(%ebp)
19502 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19503 ; FALLBACK29-NEXT: movl %eax, 16(%ebp)
19504 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19505 ; FALLBACK29-NEXT: movl %eax, 20(%ebp)
19506 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19507 ; FALLBACK29-NEXT: movl %eax, 8(%ebp)
19508 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19509 ; FALLBACK29-NEXT: movl %eax, 12(%ebp)
19510 ; FALLBACK29-NEXT: movl %ebx, (%ebp)
19511 ; FALLBACK29-NEXT: movl %edx, 4(%ebp)
19512 ; FALLBACK29-NEXT: addl $188, %esp
19513 ; FALLBACK29-NEXT: popl %esi
19514 ; FALLBACK29-NEXT: popl %edi
19515 ; FALLBACK29-NEXT: popl %ebx
19516 ; FALLBACK29-NEXT: popl %ebp
19517 ; FALLBACK29-NEXT: vzeroupper
19518 ; FALLBACK29-NEXT: retl
19520 ; FALLBACK30-LABEL: shl_64bytes:
19521 ; FALLBACK30: # %bb.0:
19522 ; FALLBACK30-NEXT: pushl %ebp
19523 ; FALLBACK30-NEXT: pushl %ebx
19524 ; FALLBACK30-NEXT: pushl %edi
19525 ; FALLBACK30-NEXT: pushl %esi
19526 ; FALLBACK30-NEXT: subl $204, %esp
19527 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
19528 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
19529 ; FALLBACK30-NEXT: vmovups (%ecx), %zmm0
19530 ; FALLBACK30-NEXT: movl (%eax), %eax
19531 ; FALLBACK30-NEXT: vxorps %xmm1, %xmm1, %xmm1
19532 ; FALLBACK30-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
19533 ; FALLBACK30-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
19534 ; FALLBACK30-NEXT: leal (,%eax,8), %edx
19535 ; FALLBACK30-NEXT: andl $24, %edx
19536 ; FALLBACK30-NEXT: andl $60, %eax
19537 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19538 ; FALLBACK30-NEXT: leal {{[0-9]+}}(%esp), %edi
19539 ; FALLBACK30-NEXT: subl %eax, %edi
19540 ; FALLBACK30-NEXT: movl (%edi), %ecx
19541 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19542 ; FALLBACK30-NEXT: movl 4(%edi), %eax
19543 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19544 ; FALLBACK30-NEXT: movl %edx, %ebx
19545 ; FALLBACK30-NEXT: notb %bl
19546 ; FALLBACK30-NEXT: shrl %ecx
19547 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %esi
19548 ; FALLBACK30-NEXT: shlxl %edx, %eax, %ecx
19549 ; FALLBACK30-NEXT: orl %ecx, %esi
19550 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19551 ; FALLBACK30-NEXT: movl 8(%edi), %esi
19552 ; FALLBACK30-NEXT: movl %esi, %ecx
19553 ; FALLBACK30-NEXT: shrl %ecx
19554 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %eax
19555 ; FALLBACK30-NEXT: movl 12(%edi), %ecx
19556 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %ebp
19557 ; FALLBACK30-NEXT: orl %ebp, %eax
19558 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19559 ; FALLBACK30-NEXT: shlxl %edx, %esi, %esi
19560 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19561 ; FALLBACK30-NEXT: shrl %eax
19562 ; FALLBACK30-NEXT: shrxl %ebx, %eax, %eax
19563 ; FALLBACK30-NEXT: orl %esi, %eax
19564 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19565 ; FALLBACK30-NEXT: movl 16(%edi), %eax
19566 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19567 ; FALLBACK30-NEXT: shrl %eax
19568 ; FALLBACK30-NEXT: shrxl %ebx, %eax, %eax
19569 ; FALLBACK30-NEXT: movl 20(%edi), %esi
19570 ; FALLBACK30-NEXT: shlxl %edx, %esi, %ebp
19571 ; FALLBACK30-NEXT: orl %ebp, %eax
19572 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19573 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19574 ; FALLBACK30-NEXT: shrl %ecx
19575 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %ecx
19576 ; FALLBACK30-NEXT: orl %eax, %ecx
19577 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19578 ; FALLBACK30-NEXT: movl 24(%edi), %ecx
19579 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19580 ; FALLBACK30-NEXT: shrl %ecx
19581 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %eax
19582 ; FALLBACK30-NEXT: movl 28(%edi), %ecx
19583 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %ebp
19584 ; FALLBACK30-NEXT: orl %ebp, %eax
19585 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19586 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19587 ; FALLBACK30-NEXT: shrl %esi
19588 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %esi
19589 ; FALLBACK30-NEXT: orl %eax, %esi
19590 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19591 ; FALLBACK30-NEXT: movl 32(%edi), %eax
19592 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19593 ; FALLBACK30-NEXT: shrl %eax
19594 ; FALLBACK30-NEXT: shrxl %ebx, %eax, %eax
19595 ; FALLBACK30-NEXT: movl 36(%edi), %esi
19596 ; FALLBACK30-NEXT: shlxl %edx, %esi, %ebp
19597 ; FALLBACK30-NEXT: orl %ebp, %eax
19598 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19599 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19600 ; FALLBACK30-NEXT: shrl %ecx
19601 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %ecx
19602 ; FALLBACK30-NEXT: orl %eax, %ecx
19603 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19604 ; FALLBACK30-NEXT: movl 40(%edi), %ecx
19605 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19606 ; FALLBACK30-NEXT: shrl %ecx
19607 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %eax
19608 ; FALLBACK30-NEXT: movl 44(%edi), %ecx
19609 ; FALLBACK30-NEXT: shlxl %edx, %ecx, %ebp
19610 ; FALLBACK30-NEXT: orl %ebp, %eax
19611 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19612 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19613 ; FALLBACK30-NEXT: shrl %esi
19614 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %esi
19615 ; FALLBACK30-NEXT: orl %eax, %esi
19616 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19617 ; FALLBACK30-NEXT: movl 48(%edi), %esi
19618 ; FALLBACK30-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19619 ; FALLBACK30-NEXT: shrl %esi
19620 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %eax
19621 ; FALLBACK30-NEXT: movl 52(%edi), %esi
19622 ; FALLBACK30-NEXT: shlxl %edx, %esi, %ebp
19623 ; FALLBACK30-NEXT: orl %ebp, %eax
19624 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19625 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19626 ; FALLBACK30-NEXT: shrl %ecx
19627 ; FALLBACK30-NEXT: shrxl %ebx, %ecx, %ebp
19628 ; FALLBACK30-NEXT: orl %eax, %ebp
19629 ; FALLBACK30-NEXT: shlxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
19630 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19631 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19632 ; FALLBACK30-NEXT: negl %eax
19633 ; FALLBACK30-NEXT: shlxl %edx, 188(%esp,%eax), %ecx
19634 ; FALLBACK30-NEXT: movl 56(%edi), %eax
19635 ; FALLBACK30-NEXT: shlxl %edx, %eax, %edx
19636 ; FALLBACK30-NEXT: shrl %esi
19637 ; FALLBACK30-NEXT: shrxl %ebx, %esi, %esi
19638 ; FALLBACK30-NEXT: orl %edx, %esi
19639 ; FALLBACK30-NEXT: shrl %eax
19640 ; FALLBACK30-NEXT: shrxl %ebx, %eax, %eax
19641 ; FALLBACK30-NEXT: orl %eax, %ecx
19642 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
19643 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19644 ; FALLBACK30-NEXT: movl %edx, (%eax)
19645 ; FALLBACK30-NEXT: movl %esi, 56(%eax)
19646 ; FALLBACK30-NEXT: movl %ecx, 60(%eax)
19647 ; FALLBACK30-NEXT: movl %ebp, 48(%eax)
19648 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19649 ; FALLBACK30-NEXT: movl %ecx, 52(%eax)
19650 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19651 ; FALLBACK30-NEXT: movl %ecx, 40(%eax)
19652 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19653 ; FALLBACK30-NEXT: movl %ecx, 44(%eax)
19654 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19655 ; FALLBACK30-NEXT: movl %ecx, 32(%eax)
19656 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19657 ; FALLBACK30-NEXT: movl %ecx, 36(%eax)
19658 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19659 ; FALLBACK30-NEXT: movl %ecx, 24(%eax)
19660 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19661 ; FALLBACK30-NEXT: movl %ecx, 28(%eax)
19662 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19663 ; FALLBACK30-NEXT: movl %ecx, 16(%eax)
19664 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19665 ; FALLBACK30-NEXT: movl %ecx, 20(%eax)
19666 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19667 ; FALLBACK30-NEXT: movl %ecx, 8(%eax)
19668 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19669 ; FALLBACK30-NEXT: movl %ecx, 12(%eax)
19670 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19671 ; FALLBACK30-NEXT: movl %ecx, 4(%eax)
19672 ; FALLBACK30-NEXT: addl $204, %esp
19673 ; FALLBACK30-NEXT: popl %esi
19674 ; FALLBACK30-NEXT: popl %edi
19675 ; FALLBACK30-NEXT: popl %ebx
19676 ; FALLBACK30-NEXT: popl %ebp
19677 ; FALLBACK30-NEXT: vzeroupper
19678 ; FALLBACK30-NEXT: retl
19680 ; FALLBACK31-LABEL: shl_64bytes:
19681 ; FALLBACK31: # %bb.0:
19682 ; FALLBACK31-NEXT: pushl %ebp
19683 ; FALLBACK31-NEXT: pushl %ebx
19684 ; FALLBACK31-NEXT: pushl %edi
19685 ; FALLBACK31-NEXT: pushl %esi
19686 ; FALLBACK31-NEXT: subl $204, %esp
19687 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
19688 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
19689 ; FALLBACK31-NEXT: vmovups (%ecx), %zmm0
19690 ; FALLBACK31-NEXT: movl (%eax), %ebx
19691 ; FALLBACK31-NEXT: vxorps %xmm1, %xmm1, %xmm1
19692 ; FALLBACK31-NEXT: vmovups %zmm1, {{[0-9]+}}(%esp)
19693 ; FALLBACK31-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
19694 ; FALLBACK31-NEXT: leal (,%ebx,8), %ecx
19695 ; FALLBACK31-NEXT: andl $24, %ecx
19696 ; FALLBACK31-NEXT: andl $60, %ebx
19697 ; FALLBACK31-NEXT: leal {{[0-9]+}}(%esp), %eax
19698 ; FALLBACK31-NEXT: subl %ebx, %eax
19699 ; FALLBACK31-NEXT: movl 4(%eax), %esi
19700 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19701 ; FALLBACK31-NEXT: movl 8(%eax), %edi
19702 ; FALLBACK31-NEXT: movl 12(%eax), %edx
19703 ; FALLBACK31-NEXT: movl %edx, %ebp
19704 ; FALLBACK31-NEXT: shldl %cl, %edi, %ebp
19705 ; FALLBACK31-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19706 ; FALLBACK31-NEXT: shldl %cl, %esi, %edi
19707 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19708 ; FALLBACK31-NEXT: movl 16(%eax), %edi
19709 ; FALLBACK31-NEXT: movl 20(%eax), %esi
19710 ; FALLBACK31-NEXT: movl %esi, %ebp
19711 ; FALLBACK31-NEXT: shldl %cl, %edi, %ebp
19712 ; FALLBACK31-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19713 ; FALLBACK31-NEXT: shldl %cl, %edx, %edi
19714 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19715 ; FALLBACK31-NEXT: movl 24(%eax), %edi
19716 ; FALLBACK31-NEXT: movl 28(%eax), %edx
19717 ; FALLBACK31-NEXT: movl %edx, %ebp
19718 ; FALLBACK31-NEXT: shldl %cl, %edi, %ebp
19719 ; FALLBACK31-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19720 ; FALLBACK31-NEXT: shldl %cl, %esi, %edi
19721 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19722 ; FALLBACK31-NEXT: movl 32(%eax), %edi
19723 ; FALLBACK31-NEXT: movl 36(%eax), %esi
19724 ; FALLBACK31-NEXT: movl %esi, %ebp
19725 ; FALLBACK31-NEXT: shldl %cl, %edi, %ebp
19726 ; FALLBACK31-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19727 ; FALLBACK31-NEXT: shldl %cl, %edx, %edi
19728 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19729 ; FALLBACK31-NEXT: movl 40(%eax), %ebp
19730 ; FALLBACK31-NEXT: movl 44(%eax), %edx
19731 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19732 ; FALLBACK31-NEXT: shldl %cl, %ebp, %edx
19733 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19734 ; FALLBACK31-NEXT: shldl %cl, %esi, %ebp
19735 ; FALLBACK31-NEXT: movl 56(%eax), %edx
19736 ; FALLBACK31-NEXT: movl 60(%eax), %edi
19737 ; FALLBACK31-NEXT: shldl %cl, %edx, %edi
19738 ; FALLBACK31-NEXT: movl (%eax), %esi
19739 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19740 ; FALLBACK31-NEXT: movl 52(%eax), %esi
19741 ; FALLBACK31-NEXT: shldl %cl, %esi, %edx
19742 ; FALLBACK31-NEXT: negl %ebx
19743 ; FALLBACK31-NEXT: movl 176(%esp,%ebx), %ebx
19744 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
19745 ; FALLBACK31-NEXT: movl %edx, 56(%eax)
19746 ; FALLBACK31-NEXT: movl %edi, 60(%eax)
19747 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19748 ; FALLBACK31-NEXT: shlxl %ecx, %edx, %edi
19749 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19750 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
19751 ; FALLBACK31-NEXT: shldl %cl, %edx, %edi
19752 ; FALLBACK31-NEXT: shldl %cl, %ebx, %esi
19753 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
19754 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
19755 ; FALLBACK31-NEXT: shldl %cl, %edx, %ebx
19756 ; FALLBACK31-NEXT: movl %ebx, 48(%eax)
19757 ; FALLBACK31-NEXT: movl %esi, 52(%eax)
19758 ; FALLBACK31-NEXT: movl %ebp, 40(%eax)
19759 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19760 ; FALLBACK31-NEXT: movl %ecx, 44(%eax)
19761 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19762 ; FALLBACK31-NEXT: movl %ecx, 32(%eax)
19763 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19764 ; FALLBACK31-NEXT: movl %ecx, 36(%eax)
19765 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19766 ; FALLBACK31-NEXT: movl %ecx, 24(%eax)
19767 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19768 ; FALLBACK31-NEXT: movl %ecx, 28(%eax)
19769 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19770 ; FALLBACK31-NEXT: movl %ecx, 16(%eax)
19771 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19772 ; FALLBACK31-NEXT: movl %ecx, 20(%eax)
19773 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19774 ; FALLBACK31-NEXT: movl %ecx, 8(%eax)
19775 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19776 ; FALLBACK31-NEXT: movl %ecx, 12(%eax)
19777 ; FALLBACK31-NEXT: movl %edi, 4(%eax)
19778 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
19779 ; FALLBACK31-NEXT: movl %ecx, (%eax)
19780 ; FALLBACK31-NEXT: addl $204, %esp
19781 ; FALLBACK31-NEXT: popl %esi
19782 ; FALLBACK31-NEXT: popl %edi
19783 ; FALLBACK31-NEXT: popl %ebx
19784 ; FALLBACK31-NEXT: popl %ebp
19785 ; FALLBACK31-NEXT: vzeroupper
19786 ; FALLBACK31-NEXT: retl
19787 %src = load i512, ptr %src.ptr, align 1
19788 %byteOff = load i512, ptr %byteOff.ptr, align 1
19789 %bitOff = shl i512 %byteOff, 3
19790 %res = shl i512 %src, %bitOff
19791 store i512 %res, ptr %dst, align 1
19795 define void @shl_64bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
19796 ; X64-SSE2-LABEL: shl_64bytes_qwordOff:
19797 ; X64-SSE2: # %bb.0:
19798 ; X64-SSE2-NEXT: pushq %rbx
19799 ; X64-SSE2-NEXT: movq (%rdi), %rax
19800 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
19801 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
19802 ; X64-SSE2-NEXT: movq 24(%rdi), %r9
19803 ; X64-SSE2-NEXT: movq 32(%rdi), %r10
19804 ; X64-SSE2-NEXT: movq 40(%rdi), %r11
19805 ; X64-SSE2-NEXT: movq 48(%rdi), %rbx
19806 ; X64-SSE2-NEXT: movq 56(%rdi), %rdi
19807 ; X64-SSE2-NEXT: movl (%rsi), %esi
19808 ; X64-SSE2-NEXT: xorps %xmm0, %xmm0
19809 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
19810 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
19811 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
19812 ; X64-SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
19813 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
19814 ; X64-SSE2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
19815 ; X64-SSE2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
19816 ; X64-SSE2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
19817 ; X64-SSE2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
19818 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
19819 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
19820 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
19821 ; X64-SSE2-NEXT: shll $3, %esi
19822 ; X64-SSE2-NEXT: andl $56, %esi
19823 ; X64-SSE2-NEXT: negl %esi
19824 ; X64-SSE2-NEXT: movslq %esi, %rax
19825 ; X64-SSE2-NEXT: movq -64(%rsp,%rax), %rcx
19826 ; X64-SSE2-NEXT: movq -56(%rsp,%rax), %rsi
19827 ; X64-SSE2-NEXT: movq -40(%rsp,%rax), %rdi
19828 ; X64-SSE2-NEXT: movq -48(%rsp,%rax), %r8
19829 ; X64-SSE2-NEXT: movq -24(%rsp,%rax), %r9
19830 ; X64-SSE2-NEXT: movq -32(%rsp,%rax), %r10
19831 ; X64-SSE2-NEXT: movq -8(%rsp,%rax), %r11
19832 ; X64-SSE2-NEXT: movq -16(%rsp,%rax), %rax
19833 ; X64-SSE2-NEXT: movq %rax, 48(%rdx)
19834 ; X64-SSE2-NEXT: movq %r11, 56(%rdx)
19835 ; X64-SSE2-NEXT: movq %r10, 32(%rdx)
19836 ; X64-SSE2-NEXT: movq %r9, 40(%rdx)
19837 ; X64-SSE2-NEXT: movq %r8, 16(%rdx)
19838 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
19839 ; X64-SSE2-NEXT: movq %rcx, (%rdx)
19840 ; X64-SSE2-NEXT: movq %rsi, 8(%rdx)
19841 ; X64-SSE2-NEXT: popq %rbx
19842 ; X64-SSE2-NEXT: retq
19844 ; X64-SSE42-LABEL: shl_64bytes_qwordOff:
19845 ; X64-SSE42: # %bb.0:
19846 ; X64-SSE42-NEXT: pushq %rax
19847 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
19848 ; X64-SSE42-NEXT: movups 16(%rdi), %xmm1
19849 ; X64-SSE42-NEXT: movups 32(%rdi), %xmm2
19850 ; X64-SSE42-NEXT: movups 48(%rdi), %xmm3
19851 ; X64-SSE42-NEXT: movl (%rsi), %eax
19852 ; X64-SSE42-NEXT: xorps %xmm4, %xmm4
19853 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
19854 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
19855 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
19856 ; X64-SSE42-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
19857 ; X64-SSE42-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
19858 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
19859 ; X64-SSE42-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
19860 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
19861 ; X64-SSE42-NEXT: shll $3, %eax
19862 ; X64-SSE42-NEXT: andl $56, %eax
19863 ; X64-SSE42-NEXT: negl %eax
19864 ; X64-SSE42-NEXT: cltq
19865 ; X64-SSE42-NEXT: movups -64(%rsp,%rax), %xmm0
19866 ; X64-SSE42-NEXT: movups -48(%rsp,%rax), %xmm1
19867 ; X64-SSE42-NEXT: movups -32(%rsp,%rax), %xmm2
19868 ; X64-SSE42-NEXT: movups -16(%rsp,%rax), %xmm3
19869 ; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
19870 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
19871 ; X64-SSE42-NEXT: movups %xmm2, 32(%rdx)
19872 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
19873 ; X64-SSE42-NEXT: popq %rax
19874 ; X64-SSE42-NEXT: retq
19876 ; X64-AVX1-LABEL: shl_64bytes_qwordOff:
19877 ; X64-AVX1: # %bb.0:
19878 ; X64-AVX1-NEXT: pushq %rax
19879 ; X64-AVX1-NEXT: vmovups (%rdi), %ymm0
19880 ; X64-AVX1-NEXT: vmovups 32(%rdi), %ymm1
19881 ; X64-AVX1-NEXT: movl (%rsi), %eax
19882 ; X64-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
19883 ; X64-AVX1-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
19884 ; X64-AVX1-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp)
19885 ; X64-AVX1-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
19886 ; X64-AVX1-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
19887 ; X64-AVX1-NEXT: shll $3, %eax
19888 ; X64-AVX1-NEXT: andl $56, %eax
19889 ; X64-AVX1-NEXT: negl %eax
19890 ; X64-AVX1-NEXT: cltq
19891 ; X64-AVX1-NEXT: vmovups -64(%rsp,%rax), %xmm0
19892 ; X64-AVX1-NEXT: vmovups -48(%rsp,%rax), %xmm1
19893 ; X64-AVX1-NEXT: vmovups -32(%rsp,%rax), %xmm2
19894 ; X64-AVX1-NEXT: vmovups -16(%rsp,%rax), %xmm3
19895 ; X64-AVX1-NEXT: vmovups %xmm3, 48(%rdx)
19896 ; X64-AVX1-NEXT: vmovups %xmm1, 16(%rdx)
19897 ; X64-AVX1-NEXT: vmovups %xmm2, 32(%rdx)
19898 ; X64-AVX1-NEXT: vmovups %xmm0, (%rdx)
19899 ; X64-AVX1-NEXT: popq %rax
19900 ; X64-AVX1-NEXT: vzeroupper
19901 ; X64-AVX1-NEXT: retq
19903 ; X64-AVX512-LABEL: shl_64bytes_qwordOff:
19904 ; X64-AVX512: # %bb.0:
19905 ; X64-AVX512-NEXT: pushq %rax
19906 ; X64-AVX512-NEXT: vmovups (%rdi), %zmm0
19907 ; X64-AVX512-NEXT: movl (%rsi), %eax
19908 ; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
19909 ; X64-AVX512-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp)
19910 ; X64-AVX512-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
19911 ; X64-AVX512-NEXT: shll $3, %eax
19912 ; X64-AVX512-NEXT: andl $56, %eax
19913 ; X64-AVX512-NEXT: negl %eax
19914 ; X64-AVX512-NEXT: cltq
19915 ; X64-AVX512-NEXT: vmovups -64(%rsp,%rax), %xmm0
19916 ; X64-AVX512-NEXT: vmovups -48(%rsp,%rax), %xmm1
19917 ; X64-AVX512-NEXT: vmovups -32(%rsp,%rax), %xmm2
19918 ; X64-AVX512-NEXT: vmovups -16(%rsp,%rax), %xmm3
19919 ; X64-AVX512-NEXT: vmovups %xmm3, 48(%rdx)
19920 ; X64-AVX512-NEXT: vmovups %xmm1, 16(%rdx)
19921 ; X64-AVX512-NEXT: vmovups %xmm2, 32(%rdx)
19922 ; X64-AVX512-NEXT: vmovups %xmm0, (%rdx)
19923 ; X64-AVX512-NEXT: popq %rax
19924 ; X64-AVX512-NEXT: vzeroupper
19925 ; X64-AVX512-NEXT: retq
19927 ; X86-SSE2-LABEL: shl_64bytes_qwordOff:
19928 ; X86-SSE2: # %bb.0:
19929 ; X86-SSE2-NEXT: pushl %ebp
19930 ; X86-SSE2-NEXT: pushl %ebx
19931 ; X86-SSE2-NEXT: pushl %edi
19932 ; X86-SSE2-NEXT: pushl %esi
19933 ; X86-SSE2-NEXT: subl $188, %esp
19934 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
19935 ; X86-SSE2-NEXT: movl (%ecx), %eax
19936 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19937 ; X86-SSE2-NEXT: movl 4(%ecx), %eax
19938 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19939 ; X86-SSE2-NEXT: movl 8(%ecx), %eax
19940 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19941 ; X86-SSE2-NEXT: movl 12(%ecx), %eax
19942 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19943 ; X86-SSE2-NEXT: movl 16(%ecx), %eax
19944 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19945 ; X86-SSE2-NEXT: movl 20(%ecx), %eax
19946 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19947 ; X86-SSE2-NEXT: movl 24(%ecx), %eax
19948 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19949 ; X86-SSE2-NEXT: movl 28(%ecx), %eax
19950 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19951 ; X86-SSE2-NEXT: movl 32(%ecx), %eax
19952 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19953 ; X86-SSE2-NEXT: movl 36(%ecx), %eax
19954 ; X86-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
19955 ; X86-SSE2-NEXT: movl 40(%ecx), %ebp
19956 ; X86-SSE2-NEXT: movl 44(%ecx), %ebx
19957 ; X86-SSE2-NEXT: movl 48(%ecx), %edi
19958 ; X86-SSE2-NEXT: movl 52(%ecx), %esi
19959 ; X86-SSE2-NEXT: movl 56(%ecx), %edx
19960 ; X86-SSE2-NEXT: movl 60(%ecx), %eax
19961 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
19962 ; X86-SSE2-NEXT: movl (%ecx), %ecx
19963 ; X86-SSE2-NEXT: xorps %xmm0, %xmm0
19964 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
19965 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19966 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
19967 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
19968 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
19969 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
19970 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
19971 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19972 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19973 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19974 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19975 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19976 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19977 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19978 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19979 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
19980 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
19981 ; X86-SSE2-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
19982 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19983 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19984 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19985 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19986 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19987 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19988 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19989 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19990 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19991 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19992 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
19993 ; X86-SSE2-NEXT: movl %eax, {{[0-9]+}}(%esp)
19994 ; X86-SSE2-NEXT: shll $3, %ecx
19995 ; X86-SSE2-NEXT: andl $56, %ecx
19996 ; X86-SSE2-NEXT: leal {{[0-9]+}}(%esp), %eax
19997 ; X86-SSE2-NEXT: subl %ecx, %eax
19998 ; X86-SSE2-NEXT: movl (%eax), %edx
19999 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20000 ; X86-SSE2-NEXT: movl 4(%eax), %edx
20001 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20002 ; X86-SSE2-NEXT: movl 12(%eax), %edx
20003 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20004 ; X86-SSE2-NEXT: movl 8(%eax), %edx
20005 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20006 ; X86-SSE2-NEXT: movl 20(%eax), %edx
20007 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20008 ; X86-SSE2-NEXT: movl 16(%eax), %edx
20009 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20010 ; X86-SSE2-NEXT: movl 28(%eax), %edx
20011 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20012 ; X86-SSE2-NEXT: movl 24(%eax), %edx
20013 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20014 ; X86-SSE2-NEXT: movl 36(%eax), %edx
20015 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20016 ; X86-SSE2-NEXT: movl 32(%eax), %edx
20017 ; X86-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
20018 ; X86-SSE2-NEXT: movl 44(%eax), %ebp
20019 ; X86-SSE2-NEXT: movl 40(%eax), %ebx
20020 ; X86-SSE2-NEXT: movl 52(%eax), %edi
20021 ; X86-SSE2-NEXT: movl 60(%eax), %esi
20022 ; X86-SSE2-NEXT: movl 56(%eax), %edx
20023 ; X86-SSE2-NEXT: negl %ecx
20024 ; X86-SSE2-NEXT: movl 160(%esp,%ecx), %ecx
20025 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
20026 ; X86-SSE2-NEXT: movl %edx, 56(%eax)
20027 ; X86-SSE2-NEXT: movl %esi, 60(%eax)
20028 ; X86-SSE2-NEXT: movl %ecx, 48(%eax)
20029 ; X86-SSE2-NEXT: movl %edi, 52(%eax)
20030 ; X86-SSE2-NEXT: movl %ebx, 40(%eax)
20031 ; X86-SSE2-NEXT: movl %ebp, 44(%eax)
20032 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20033 ; X86-SSE2-NEXT: movl %ecx, 32(%eax)
20034 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20035 ; X86-SSE2-NEXT: movl %ecx, 36(%eax)
20036 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20037 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
20038 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20039 ; X86-SSE2-NEXT: movl %ecx, 28(%eax)
20040 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20041 ; X86-SSE2-NEXT: movl %ecx, 16(%eax)
20042 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20043 ; X86-SSE2-NEXT: movl %ecx, 20(%eax)
20044 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20045 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
20046 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20047 ; X86-SSE2-NEXT: movl %ecx, 12(%eax)
20048 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20049 ; X86-SSE2-NEXT: movl %ecx, (%eax)
20050 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
20051 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
20052 ; X86-SSE2-NEXT: addl $188, %esp
20053 ; X86-SSE2-NEXT: popl %esi
20054 ; X86-SSE2-NEXT: popl %edi
20055 ; X86-SSE2-NEXT: popl %ebx
20056 ; X86-SSE2-NEXT: popl %ebp
20057 ; X86-SSE2-NEXT: retl
20059 ; X86-SSE42-LABEL: shl_64bytes_qwordOff:
20060 ; X86-SSE42: # %bb.0:
20061 ; X86-SSE42-NEXT: subl $140, %esp
20062 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
20063 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
20064 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
20065 ; X86-SSE42-NEXT: movups (%edx), %xmm0
20066 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
20067 ; X86-SSE42-NEXT: movups 32(%edx), %xmm2
20068 ; X86-SSE42-NEXT: movups 48(%edx), %xmm3
20069 ; X86-SSE42-NEXT: movl (%ecx), %ecx
20070 ; X86-SSE42-NEXT: xorps %xmm4, %xmm4
20071 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
20072 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
20073 ; X86-SSE42-NEXT: movaps %xmm4, {{[0-9]+}}(%esp)
20074 ; X86-SSE42-NEXT: movaps %xmm4, (%esp)
20075 ; X86-SSE42-NEXT: movaps %xmm3, {{[0-9]+}}(%esp)
20076 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
20077 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
20078 ; X86-SSE42-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
20079 ; X86-SSE42-NEXT: shll $3, %ecx
20080 ; X86-SSE42-NEXT: andl $56, %ecx
20081 ; X86-SSE42-NEXT: leal {{[0-9]+}}(%esp), %edx
20082 ; X86-SSE42-NEXT: subl %ecx, %edx
20083 ; X86-SSE42-NEXT: movups (%edx), %xmm0
20084 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
20085 ; X86-SSE42-NEXT: movups 32(%edx), %xmm2
20086 ; X86-SSE42-NEXT: negl %ecx
20087 ; X86-SSE42-NEXT: movups 112(%esp,%ecx), %xmm3
20088 ; X86-SSE42-NEXT: movups %xmm3, 48(%eax)
20089 ; X86-SSE42-NEXT: movups %xmm2, 32(%eax)
20090 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
20091 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
20092 ; X86-SSE42-NEXT: addl $140, %esp
20093 ; X86-SSE42-NEXT: retl
20095 ; X86-AVX1-LABEL: shl_64bytes_qwordOff:
20096 ; X86-AVX1: # %bb.0:
20097 ; X86-AVX1-NEXT: subl $140, %esp
20098 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax
20099 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx
20100 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx
20101 ; X86-AVX1-NEXT: vmovups (%edx), %ymm0
20102 ; X86-AVX1-NEXT: vmovups 32(%edx), %ymm1
20103 ; X86-AVX1-NEXT: movl (%ecx), %ecx
20104 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
20105 ; X86-AVX1-NEXT: vmovups %ymm2, {{[0-9]+}}(%esp)
20106 ; X86-AVX1-NEXT: vmovups %ymm2, (%esp)
20107 ; X86-AVX1-NEXT: vmovups %ymm1, {{[0-9]+}}(%esp)
20108 ; X86-AVX1-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
20109 ; X86-AVX1-NEXT: shll $3, %ecx
20110 ; X86-AVX1-NEXT: andl $56, %ecx
20111 ; X86-AVX1-NEXT: leal {{[0-9]+}}(%esp), %edx
20112 ; X86-AVX1-NEXT: subl %ecx, %edx
20113 ; X86-AVX1-NEXT: vmovups (%edx), %xmm0
20114 ; X86-AVX1-NEXT: vmovups 16(%edx), %xmm1
20115 ; X86-AVX1-NEXT: vmovups 32(%edx), %xmm2
20116 ; X86-AVX1-NEXT: negl %ecx
20117 ; X86-AVX1-NEXT: vmovups 112(%esp,%ecx), %xmm3
20118 ; X86-AVX1-NEXT: vmovups %xmm3, 48(%eax)
20119 ; X86-AVX1-NEXT: vmovups %xmm2, 32(%eax)
20120 ; X86-AVX1-NEXT: vmovups %xmm1, 16(%eax)
20121 ; X86-AVX1-NEXT: vmovups %xmm0, (%eax)
20122 ; X86-AVX1-NEXT: addl $140, %esp
20123 ; X86-AVX1-NEXT: vzeroupper
20124 ; X86-AVX1-NEXT: retl
20126 ; X86-AVX512-LABEL: shl_64bytes_qwordOff:
20127 ; X86-AVX512: # %bb.0:
20128 ; X86-AVX512-NEXT: subl $140, %esp
20129 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
20130 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
20131 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx
20132 ; X86-AVX512-NEXT: vmovups (%edx), %zmm0
20133 ; X86-AVX512-NEXT: movl (%ecx), %ecx
20134 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
20135 ; X86-AVX512-NEXT: vmovups %zmm1, (%esp)
20136 ; X86-AVX512-NEXT: vmovups %zmm0, {{[0-9]+}}(%esp)
20137 ; X86-AVX512-NEXT: shll $3, %ecx
20138 ; X86-AVX512-NEXT: andl $56, %ecx
20139 ; X86-AVX512-NEXT: leal {{[0-9]+}}(%esp), %edx
20140 ; X86-AVX512-NEXT: subl %ecx, %edx
20141 ; X86-AVX512-NEXT: vmovups (%edx), %xmm0
20142 ; X86-AVX512-NEXT: vmovups 16(%edx), %xmm1
20143 ; X86-AVX512-NEXT: vmovups 32(%edx), %xmm2
20144 ; X86-AVX512-NEXT: negl %ecx
20145 ; X86-AVX512-NEXT: vmovups 112(%esp,%ecx), %xmm3
20146 ; X86-AVX512-NEXT: vmovups %xmm3, 48(%eax)
20147 ; X86-AVX512-NEXT: vmovups %xmm2, 32(%eax)
20148 ; X86-AVX512-NEXT: vmovups %xmm1, 16(%eax)
20149 ; X86-AVX512-NEXT: vmovups %xmm0, (%eax)
20150 ; X86-AVX512-NEXT: addl $140, %esp
20151 ; X86-AVX512-NEXT: vzeroupper
20152 ; X86-AVX512-NEXT: retl
20153 %src = load i512, ptr %src.ptr, align 1
20154 %qwordOff = load i512, ptr %qwordOff.ptr, align 1
20155 %bitOff = shl i512 %qwordOff, 6
20156 %res = shl i512 %src, %bitOff
20157 store i512 %res, ptr %dst, align 1
20161 define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
20162 ; FALLBACK0-LABEL: ashr_64bytes:
20163 ; FALLBACK0: # %bb.0:
20164 ; FALLBACK0-NEXT: pushq %r15
20165 ; FALLBACK0-NEXT: pushq %r14
20166 ; FALLBACK0-NEXT: pushq %r13
20167 ; FALLBACK0-NEXT: pushq %r12
20168 ; FALLBACK0-NEXT: pushq %rbx
20169 ; FALLBACK0-NEXT: movq (%rdi), %rax
20170 ; FALLBACK0-NEXT: movq 8(%rdi), %rcx
20171 ; FALLBACK0-NEXT: movq 16(%rdi), %r8
20172 ; FALLBACK0-NEXT: movq 24(%rdi), %r9
20173 ; FALLBACK0-NEXT: movq 32(%rdi), %r10
20174 ; FALLBACK0-NEXT: movq 40(%rdi), %r11
20175 ; FALLBACK0-NEXT: movq 48(%rdi), %rbx
20176 ; FALLBACK0-NEXT: movq 56(%rdi), %r14
20177 ; FALLBACK0-NEXT: movl (%rsi), %edi
20178 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20179 ; FALLBACK0-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
20180 ; FALLBACK0-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
20181 ; FALLBACK0-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
20182 ; FALLBACK0-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
20183 ; FALLBACK0-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
20184 ; FALLBACK0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20185 ; FALLBACK0-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
20186 ; FALLBACK0-NEXT: sarq $63, %r14
20187 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20188 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20189 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20190 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20191 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20192 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20193 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20194 ; FALLBACK0-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20195 ; FALLBACK0-NEXT: leal (,%rdi,8), %eax
20196 ; FALLBACK0-NEXT: andl $56, %eax
20197 ; FALLBACK0-NEXT: andl $56, %edi
20198 ; FALLBACK0-NEXT: movq -128(%rsp,%rdi), %r10
20199 ; FALLBACK0-NEXT: movq -120(%rsp,%rdi), %r8
20200 ; FALLBACK0-NEXT: movq %r8, %r11
20201 ; FALLBACK0-NEXT: movl %eax, %ecx
20202 ; FALLBACK0-NEXT: shrq %cl, %r11
20203 ; FALLBACK0-NEXT: movl %eax, %esi
20204 ; FALLBACK0-NEXT: notb %sil
20205 ; FALLBACK0-NEXT: movq -112(%rsp,%rdi), %rbx
20206 ; FALLBACK0-NEXT: leaq (%rbx,%rbx), %r9
20207 ; FALLBACK0-NEXT: movl %esi, %ecx
20208 ; FALLBACK0-NEXT: shlq %cl, %r9
20209 ; FALLBACK0-NEXT: orq %r11, %r9
20210 ; FALLBACK0-NEXT: movl %eax, %ecx
20211 ; FALLBACK0-NEXT: shrq %cl, %r10
20212 ; FALLBACK0-NEXT: addq %r8, %r8
20213 ; FALLBACK0-NEXT: movl %esi, %ecx
20214 ; FALLBACK0-NEXT: shlq %cl, %r8
20215 ; FALLBACK0-NEXT: orq %r10, %r8
20216 ; FALLBACK0-NEXT: movq -104(%rsp,%rdi), %r10
20217 ; FALLBACK0-NEXT: movq %r10, %r15
20218 ; FALLBACK0-NEXT: movl %eax, %ecx
20219 ; FALLBACK0-NEXT: shrq %cl, %r15
20220 ; FALLBACK0-NEXT: movq -96(%rsp,%rdi), %r14
20221 ; FALLBACK0-NEXT: leaq (%r14,%r14), %r11
20222 ; FALLBACK0-NEXT: movl %esi, %ecx
20223 ; FALLBACK0-NEXT: shlq %cl, %r11
20224 ; FALLBACK0-NEXT: orq %r15, %r11
20225 ; FALLBACK0-NEXT: movl %eax, %ecx
20226 ; FALLBACK0-NEXT: shrq %cl, %rbx
20227 ; FALLBACK0-NEXT: addq %r10, %r10
20228 ; FALLBACK0-NEXT: movl %esi, %ecx
20229 ; FALLBACK0-NEXT: shlq %cl, %r10
20230 ; FALLBACK0-NEXT: orq %rbx, %r10
20231 ; FALLBACK0-NEXT: movq -88(%rsp,%rdi), %rbx
20232 ; FALLBACK0-NEXT: movq %rbx, %r12
20233 ; FALLBACK0-NEXT: movl %eax, %ecx
20234 ; FALLBACK0-NEXT: shrq %cl, %r12
20235 ; FALLBACK0-NEXT: movq -80(%rsp,%rdi), %r13
20236 ; FALLBACK0-NEXT: leaq (%r13,%r13), %r15
20237 ; FALLBACK0-NEXT: movl %esi, %ecx
20238 ; FALLBACK0-NEXT: shlq %cl, %r15
20239 ; FALLBACK0-NEXT: orq %r12, %r15
20240 ; FALLBACK0-NEXT: movl %eax, %ecx
20241 ; FALLBACK0-NEXT: shrq %cl, %r14
20242 ; FALLBACK0-NEXT: addq %rbx, %rbx
20243 ; FALLBACK0-NEXT: movl %esi, %ecx
20244 ; FALLBACK0-NEXT: shlq %cl, %rbx
20245 ; FALLBACK0-NEXT: orq %r14, %rbx
20246 ; FALLBACK0-NEXT: movl %eax, %ecx
20247 ; FALLBACK0-NEXT: shrq %cl, %r13
20248 ; FALLBACK0-NEXT: movq -72(%rsp,%rdi), %rdi
20249 ; FALLBACK0-NEXT: leaq (%rdi,%rdi), %r14
20250 ; FALLBACK0-NEXT: movl %esi, %ecx
20251 ; FALLBACK0-NEXT: shlq %cl, %r14
20252 ; FALLBACK0-NEXT: orq %r13, %r14
20253 ; FALLBACK0-NEXT: movl %eax, %ecx
20254 ; FALLBACK0-NEXT: sarq %cl, %rdi
20255 ; FALLBACK0-NEXT: movq %rdi, 56(%rdx)
20256 ; FALLBACK0-NEXT: movq %r14, 48(%rdx)
20257 ; FALLBACK0-NEXT: movq %rbx, 32(%rdx)
20258 ; FALLBACK0-NEXT: movq %r15, 40(%rdx)
20259 ; FALLBACK0-NEXT: movq %r10, 16(%rdx)
20260 ; FALLBACK0-NEXT: movq %r11, 24(%rdx)
20261 ; FALLBACK0-NEXT: movq %r8, (%rdx)
20262 ; FALLBACK0-NEXT: movq %r9, 8(%rdx)
20263 ; FALLBACK0-NEXT: popq %rbx
20264 ; FALLBACK0-NEXT: popq %r12
20265 ; FALLBACK0-NEXT: popq %r13
20266 ; FALLBACK0-NEXT: popq %r14
20267 ; FALLBACK0-NEXT: popq %r15
20268 ; FALLBACK0-NEXT: retq
20270 ; FALLBACK1-LABEL: ashr_64bytes:
20271 ; FALLBACK1: # %bb.0:
20272 ; FALLBACK1-NEXT: pushq %r15
20273 ; FALLBACK1-NEXT: pushq %r14
20274 ; FALLBACK1-NEXT: pushq %rbx
20275 ; FALLBACK1-NEXT: movq (%rdi), %rcx
20276 ; FALLBACK1-NEXT: movq 8(%rdi), %r8
20277 ; FALLBACK1-NEXT: movq 16(%rdi), %r9
20278 ; FALLBACK1-NEXT: movq 24(%rdi), %r10
20279 ; FALLBACK1-NEXT: movq 32(%rdi), %r11
20280 ; FALLBACK1-NEXT: movq 40(%rdi), %rbx
20281 ; FALLBACK1-NEXT: movq 48(%rdi), %r14
20282 ; FALLBACK1-NEXT: movq 56(%rdi), %rdi
20283 ; FALLBACK1-NEXT: movl (%rsi), %eax
20284 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20285 ; FALLBACK1-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20286 ; FALLBACK1-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
20287 ; FALLBACK1-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
20288 ; FALLBACK1-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
20289 ; FALLBACK1-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
20290 ; FALLBACK1-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
20291 ; FALLBACK1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20292 ; FALLBACK1-NEXT: sarq $63, %rdi
20293 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20294 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20295 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20296 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20297 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20298 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20299 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20300 ; FALLBACK1-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20301 ; FALLBACK1-NEXT: leal (,%rax,8), %ecx
20302 ; FALLBACK1-NEXT: andl $56, %ecx
20303 ; FALLBACK1-NEXT: andl $56, %eax
20304 ; FALLBACK1-NEXT: movq -112(%rsp,%rax), %rdi
20305 ; FALLBACK1-NEXT: movq -128(%rsp,%rax), %rsi
20306 ; FALLBACK1-NEXT: movq -120(%rsp,%rax), %r9
20307 ; FALLBACK1-NEXT: movq %r9, %r8
20308 ; FALLBACK1-NEXT: shrdq %cl, %rdi, %r8
20309 ; FALLBACK1-NEXT: movq -96(%rsp,%rax), %r10
20310 ; FALLBACK1-NEXT: movq -104(%rsp,%rax), %r11
20311 ; FALLBACK1-NEXT: movq %r11, %rbx
20312 ; FALLBACK1-NEXT: shrdq %cl, %r10, %rbx
20313 ; FALLBACK1-NEXT: shrdq %cl, %r11, %rdi
20314 ; FALLBACK1-NEXT: movq -80(%rsp,%rax), %r11
20315 ; FALLBACK1-NEXT: movq -88(%rsp,%rax), %r14
20316 ; FALLBACK1-NEXT: movq %r14, %r15
20317 ; FALLBACK1-NEXT: shrdq %cl, %r11, %r15
20318 ; FALLBACK1-NEXT: shrdq %cl, %r14, %r10
20319 ; FALLBACK1-NEXT: movq -72(%rsp,%rax), %rax
20320 ; FALLBACK1-NEXT: shrdq %cl, %rax, %r11
20321 ; FALLBACK1-NEXT: shrdq %cl, %r9, %rsi
20322 ; FALLBACK1-NEXT: # kill: def $cl killed $cl killed $ecx
20323 ; FALLBACK1-NEXT: sarq %cl, %rax
20324 ; FALLBACK1-NEXT: movq %r11, 48(%rdx)
20325 ; FALLBACK1-NEXT: movq %rax, 56(%rdx)
20326 ; FALLBACK1-NEXT: movq %r10, 32(%rdx)
20327 ; FALLBACK1-NEXT: movq %r15, 40(%rdx)
20328 ; FALLBACK1-NEXT: movq %rdi, 16(%rdx)
20329 ; FALLBACK1-NEXT: movq %rbx, 24(%rdx)
20330 ; FALLBACK1-NEXT: movq %rsi, (%rdx)
20331 ; FALLBACK1-NEXT: movq %r8, 8(%rdx)
20332 ; FALLBACK1-NEXT: popq %rbx
20333 ; FALLBACK1-NEXT: popq %r14
20334 ; FALLBACK1-NEXT: popq %r15
20335 ; FALLBACK1-NEXT: retq
20337 ; FALLBACK2-LABEL: ashr_64bytes:
20338 ; FALLBACK2: # %bb.0:
20339 ; FALLBACK2-NEXT: pushq %rbp
20340 ; FALLBACK2-NEXT: pushq %r15
20341 ; FALLBACK2-NEXT: pushq %r14
20342 ; FALLBACK2-NEXT: pushq %r13
20343 ; FALLBACK2-NEXT: pushq %r12
20344 ; FALLBACK2-NEXT: pushq %rbx
20345 ; FALLBACK2-NEXT: pushq %rax
20346 ; FALLBACK2-NEXT: movq (%rdi), %rcx
20347 ; FALLBACK2-NEXT: movq 8(%rdi), %r8
20348 ; FALLBACK2-NEXT: movq 16(%rdi), %r9
20349 ; FALLBACK2-NEXT: movq 24(%rdi), %r10
20350 ; FALLBACK2-NEXT: movq 32(%rdi), %r11
20351 ; FALLBACK2-NEXT: movq 40(%rdi), %rbx
20352 ; FALLBACK2-NEXT: movq 48(%rdi), %r14
20353 ; FALLBACK2-NEXT: movq 56(%rdi), %rdi
20354 ; FALLBACK2-NEXT: movl (%rsi), %eax
20355 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20356 ; FALLBACK2-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20357 ; FALLBACK2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
20358 ; FALLBACK2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
20359 ; FALLBACK2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
20360 ; FALLBACK2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
20361 ; FALLBACK2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
20362 ; FALLBACK2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20363 ; FALLBACK2-NEXT: sarq $63, %rdi
20364 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20365 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20366 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20367 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20368 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20369 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20370 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20371 ; FALLBACK2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20372 ; FALLBACK2-NEXT: leal (,%rax,8), %ecx
20373 ; FALLBACK2-NEXT: andl $56, %ecx
20374 ; FALLBACK2-NEXT: andl $56, %eax
20375 ; FALLBACK2-NEXT: movq -120(%rsp,%rax), %rdi
20376 ; FALLBACK2-NEXT: movq -112(%rsp,%rax), %r9
20377 ; FALLBACK2-NEXT: shrxq %rcx, %rdi, %rbx
20378 ; FALLBACK2-NEXT: shrxq %rcx, -128(%rsp,%rax), %r13
20379 ; FALLBACK2-NEXT: movq -104(%rsp,%rax), %rsi
20380 ; FALLBACK2-NEXT: shrxq %rcx, %rsi, %r8
20381 ; FALLBACK2-NEXT: movq -96(%rsp,%rax), %r10
20382 ; FALLBACK2-NEXT: shrxq %rcx, %r9, %r11
20383 ; FALLBACK2-NEXT: movq -88(%rsp,%rax), %r14
20384 ; FALLBACK2-NEXT: shrxq %rcx, %r14, %r15
20385 ; FALLBACK2-NEXT: shrxq %rcx, %r10, %rbp
20386 ; FALLBACK2-NEXT: movl %ecx, %r12d
20387 ; FALLBACK2-NEXT: notb %r12b
20388 ; FALLBACK2-NEXT: addq %r9, %r9
20389 ; FALLBACK2-NEXT: shlxq %r12, %r9, %r9
20390 ; FALLBACK2-NEXT: orq %rbx, %r9
20391 ; FALLBACK2-NEXT: addq %rdi, %rdi
20392 ; FALLBACK2-NEXT: shlxq %r12, %rdi, %rdi
20393 ; FALLBACK2-NEXT: orq %r13, %rdi
20394 ; FALLBACK2-NEXT: movq -80(%rsp,%rax), %rbx
20395 ; FALLBACK2-NEXT: shrxq %rcx, %rbx, %r13
20396 ; FALLBACK2-NEXT: movq -72(%rsp,%rax), %rax
20397 ; FALLBACK2-NEXT: sarxq %rcx, %rax, %rcx
20398 ; FALLBACK2-NEXT: addq %r10, %r10
20399 ; FALLBACK2-NEXT: shlxq %r12, %r10, %r10
20400 ; FALLBACK2-NEXT: orq %r8, %r10
20401 ; FALLBACK2-NEXT: addq %rsi, %rsi
20402 ; FALLBACK2-NEXT: shlxq %r12, %rsi, %rsi
20403 ; FALLBACK2-NEXT: orq %r11, %rsi
20404 ; FALLBACK2-NEXT: leaq (%rbx,%rbx), %r8
20405 ; FALLBACK2-NEXT: shlxq %r12, %r8, %r8
20406 ; FALLBACK2-NEXT: orq %r15, %r8
20407 ; FALLBACK2-NEXT: addq %r14, %r14
20408 ; FALLBACK2-NEXT: shlxq %r12, %r14, %r11
20409 ; FALLBACK2-NEXT: orq %rbp, %r11
20410 ; FALLBACK2-NEXT: addq %rax, %rax
20411 ; FALLBACK2-NEXT: shlxq %r12, %rax, %rax
20412 ; FALLBACK2-NEXT: orq %r13, %rax
20413 ; FALLBACK2-NEXT: movq %rcx, 56(%rdx)
20414 ; FALLBACK2-NEXT: movq %rax, 48(%rdx)
20415 ; FALLBACK2-NEXT: movq %r11, 32(%rdx)
20416 ; FALLBACK2-NEXT: movq %r8, 40(%rdx)
20417 ; FALLBACK2-NEXT: movq %rsi, 16(%rdx)
20418 ; FALLBACK2-NEXT: movq %r10, 24(%rdx)
20419 ; FALLBACK2-NEXT: movq %rdi, (%rdx)
20420 ; FALLBACK2-NEXT: movq %r9, 8(%rdx)
20421 ; FALLBACK2-NEXT: addq $8, %rsp
20422 ; FALLBACK2-NEXT: popq %rbx
20423 ; FALLBACK2-NEXT: popq %r12
20424 ; FALLBACK2-NEXT: popq %r13
20425 ; FALLBACK2-NEXT: popq %r14
20426 ; FALLBACK2-NEXT: popq %r15
20427 ; FALLBACK2-NEXT: popq %rbp
20428 ; FALLBACK2-NEXT: retq
20430 ; FALLBACK3-LABEL: ashr_64bytes:
20431 ; FALLBACK3: # %bb.0:
20432 ; FALLBACK3-NEXT: pushq %r15
20433 ; FALLBACK3-NEXT: pushq %r14
20434 ; FALLBACK3-NEXT: pushq %rbx
20435 ; FALLBACK3-NEXT: movq (%rdi), %rcx
20436 ; FALLBACK3-NEXT: movq 8(%rdi), %r8
20437 ; FALLBACK3-NEXT: movq 16(%rdi), %r9
20438 ; FALLBACK3-NEXT: movq 24(%rdi), %r10
20439 ; FALLBACK3-NEXT: movq 32(%rdi), %r11
20440 ; FALLBACK3-NEXT: movq 40(%rdi), %rbx
20441 ; FALLBACK3-NEXT: movq 48(%rdi), %r14
20442 ; FALLBACK3-NEXT: movq 56(%rdi), %rdi
20443 ; FALLBACK3-NEXT: movl (%rsi), %eax
20444 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20445 ; FALLBACK3-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
20446 ; FALLBACK3-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
20447 ; FALLBACK3-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
20448 ; FALLBACK3-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
20449 ; FALLBACK3-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
20450 ; FALLBACK3-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
20451 ; FALLBACK3-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20452 ; FALLBACK3-NEXT: sarq $63, %rdi
20453 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20454 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20455 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20456 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20457 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20458 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20459 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20460 ; FALLBACK3-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20461 ; FALLBACK3-NEXT: leal (,%rax,8), %ecx
20462 ; FALLBACK3-NEXT: andl $56, %ecx
20463 ; FALLBACK3-NEXT: andl $56, %eax
20464 ; FALLBACK3-NEXT: movq -112(%rsp,%rax), %rdi
20465 ; FALLBACK3-NEXT: movq -128(%rsp,%rax), %rsi
20466 ; FALLBACK3-NEXT: movq -120(%rsp,%rax), %r9
20467 ; FALLBACK3-NEXT: movq %r9, %r8
20468 ; FALLBACK3-NEXT: shrdq %cl, %rdi, %r8
20469 ; FALLBACK3-NEXT: movq -96(%rsp,%rax), %r10
20470 ; FALLBACK3-NEXT: movq -104(%rsp,%rax), %r11
20471 ; FALLBACK3-NEXT: movq %r11, %rbx
20472 ; FALLBACK3-NEXT: shrdq %cl, %r10, %rbx
20473 ; FALLBACK3-NEXT: shrdq %cl, %r11, %rdi
20474 ; FALLBACK3-NEXT: movq -80(%rsp,%rax), %r11
20475 ; FALLBACK3-NEXT: movq -88(%rsp,%rax), %r14
20476 ; FALLBACK3-NEXT: movq %r14, %r15
20477 ; FALLBACK3-NEXT: shrdq %cl, %r11, %r15
20478 ; FALLBACK3-NEXT: shrdq %cl, %r14, %r10
20479 ; FALLBACK3-NEXT: movq -72(%rsp,%rax), %rax
20480 ; FALLBACK3-NEXT: shrdq %cl, %rax, %r11
20481 ; FALLBACK3-NEXT: sarxq %rcx, %rax, %rax
20482 ; FALLBACK3-NEXT: # kill: def $cl killed $cl killed $rcx
20483 ; FALLBACK3-NEXT: shrdq %cl, %r9, %rsi
20484 ; FALLBACK3-NEXT: movq %r11, 48(%rdx)
20485 ; FALLBACK3-NEXT: movq %r10, 32(%rdx)
20486 ; FALLBACK3-NEXT: movq %r15, 40(%rdx)
20487 ; FALLBACK3-NEXT: movq %rdi, 16(%rdx)
20488 ; FALLBACK3-NEXT: movq %rbx, 24(%rdx)
20489 ; FALLBACK3-NEXT: movq %rsi, (%rdx)
20490 ; FALLBACK3-NEXT: movq %r8, 8(%rdx)
20491 ; FALLBACK3-NEXT: movq %rax, 56(%rdx)
20492 ; FALLBACK3-NEXT: popq %rbx
20493 ; FALLBACK3-NEXT: popq %r14
20494 ; FALLBACK3-NEXT: popq %r15
20495 ; FALLBACK3-NEXT: retq
20497 ; FALLBACK4-LABEL: ashr_64bytes:
20498 ; FALLBACK4: # %bb.0:
20499 ; FALLBACK4-NEXT: pushq %rbp
20500 ; FALLBACK4-NEXT: pushq %r15
20501 ; FALLBACK4-NEXT: pushq %r14
20502 ; FALLBACK4-NEXT: pushq %r13
20503 ; FALLBACK4-NEXT: pushq %r12
20504 ; FALLBACK4-NEXT: pushq %rbx
20505 ; FALLBACK4-NEXT: pushq %rax
20506 ; FALLBACK4-NEXT: movups (%rdi), %xmm0
20507 ; FALLBACK4-NEXT: movups 16(%rdi), %xmm1
20508 ; FALLBACK4-NEXT: movups 32(%rdi), %xmm2
20509 ; FALLBACK4-NEXT: movq 48(%rdi), %rax
20510 ; FALLBACK4-NEXT: movq 56(%rdi), %rcx
20511 ; FALLBACK4-NEXT: movl (%rsi), %edi
20512 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20513 ; FALLBACK4-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
20514 ; FALLBACK4-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
20515 ; FALLBACK4-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
20516 ; FALLBACK4-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
20517 ; FALLBACK4-NEXT: sarq $63, %rcx
20518 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20519 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20520 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20521 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20522 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20523 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20524 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20525 ; FALLBACK4-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20526 ; FALLBACK4-NEXT: leal (,%rdi,8), %eax
20527 ; FALLBACK4-NEXT: andl $56, %eax
20528 ; FALLBACK4-NEXT: andl $56, %edi
20529 ; FALLBACK4-NEXT: movq -128(%rsp,%rdi), %r10
20530 ; FALLBACK4-NEXT: movq -120(%rsp,%rdi), %r9
20531 ; FALLBACK4-NEXT: movl %eax, %ecx
20532 ; FALLBACK4-NEXT: shrq %cl, %r10
20533 ; FALLBACK4-NEXT: movl %eax, %esi
20534 ; FALLBACK4-NEXT: notb %sil
20535 ; FALLBACK4-NEXT: leaq (%r9,%r9), %r8
20536 ; FALLBACK4-NEXT: movl %esi, %ecx
20537 ; FALLBACK4-NEXT: shlq %cl, %r8
20538 ; FALLBACK4-NEXT: orq %r10, %r8
20539 ; FALLBACK4-NEXT: movq -104(%rsp,%rdi), %r10
20540 ; FALLBACK4-NEXT: movq %r10, %rbx
20541 ; FALLBACK4-NEXT: movl %eax, %ecx
20542 ; FALLBACK4-NEXT: shrq %cl, %rbx
20543 ; FALLBACK4-NEXT: movq -96(%rsp,%rdi), %r12
20544 ; FALLBACK4-NEXT: leaq (%r12,%r12), %r11
20545 ; FALLBACK4-NEXT: movl %esi, %ecx
20546 ; FALLBACK4-NEXT: shlq %cl, %r11
20547 ; FALLBACK4-NEXT: orq %rbx, %r11
20548 ; FALLBACK4-NEXT: movq -112(%rsp,%rdi), %rbx
20549 ; FALLBACK4-NEXT: movq %rbx, %r14
20550 ; FALLBACK4-NEXT: movl %eax, %ecx
20551 ; FALLBACK4-NEXT: shrq %cl, %r14
20552 ; FALLBACK4-NEXT: addq %r10, %r10
20553 ; FALLBACK4-NEXT: movl %esi, %ecx
20554 ; FALLBACK4-NEXT: shlq %cl, %r10
20555 ; FALLBACK4-NEXT: orq %r14, %r10
20556 ; FALLBACK4-NEXT: movq -88(%rsp,%rdi), %r14
20557 ; FALLBACK4-NEXT: movq %r14, %r13
20558 ; FALLBACK4-NEXT: movl %eax, %ecx
20559 ; FALLBACK4-NEXT: shrq %cl, %r13
20560 ; FALLBACK4-NEXT: movq -80(%rsp,%rdi), %rbp
20561 ; FALLBACK4-NEXT: leaq (%rbp,%rbp), %r15
20562 ; FALLBACK4-NEXT: movl %esi, %ecx
20563 ; FALLBACK4-NEXT: shlq %cl, %r15
20564 ; FALLBACK4-NEXT: orq %r13, %r15
20565 ; FALLBACK4-NEXT: movl %eax, %ecx
20566 ; FALLBACK4-NEXT: shrq %cl, %r12
20567 ; FALLBACK4-NEXT: addq %r14, %r14
20568 ; FALLBACK4-NEXT: movl %esi, %ecx
20569 ; FALLBACK4-NEXT: shlq %cl, %r14
20570 ; FALLBACK4-NEXT: orq %r12, %r14
20571 ; FALLBACK4-NEXT: movl %eax, %ecx
20572 ; FALLBACK4-NEXT: shrq %cl, %rbp
20573 ; FALLBACK4-NEXT: movq -72(%rsp,%rdi), %rdi
20574 ; FALLBACK4-NEXT: leaq (%rdi,%rdi), %r12
20575 ; FALLBACK4-NEXT: movl %esi, %ecx
20576 ; FALLBACK4-NEXT: shlq %cl, %r12
20577 ; FALLBACK4-NEXT: orq %rbp, %r12
20578 ; FALLBACK4-NEXT: movl %eax, %ecx
20579 ; FALLBACK4-NEXT: shrq %cl, %r9
20580 ; FALLBACK4-NEXT: addq %rbx, %rbx
20581 ; FALLBACK4-NEXT: movl %esi, %ecx
20582 ; FALLBACK4-NEXT: shlq %cl, %rbx
20583 ; FALLBACK4-NEXT: orq %r9, %rbx
20584 ; FALLBACK4-NEXT: movl %eax, %ecx
20585 ; FALLBACK4-NEXT: sarq %cl, %rdi
20586 ; FALLBACK4-NEXT: movq %rdi, 56(%rdx)
20587 ; FALLBACK4-NEXT: movq %rbx, 8(%rdx)
20588 ; FALLBACK4-NEXT: movq %r12, 48(%rdx)
20589 ; FALLBACK4-NEXT: movq %r14, 32(%rdx)
20590 ; FALLBACK4-NEXT: movq %r15, 40(%rdx)
20591 ; FALLBACK4-NEXT: movq %r10, 16(%rdx)
20592 ; FALLBACK4-NEXT: movq %r11, 24(%rdx)
20593 ; FALLBACK4-NEXT: movq %r8, (%rdx)
20594 ; FALLBACK4-NEXT: addq $8, %rsp
20595 ; FALLBACK4-NEXT: popq %rbx
20596 ; FALLBACK4-NEXT: popq %r12
20597 ; FALLBACK4-NEXT: popq %r13
20598 ; FALLBACK4-NEXT: popq %r14
20599 ; FALLBACK4-NEXT: popq %r15
20600 ; FALLBACK4-NEXT: popq %rbp
20601 ; FALLBACK4-NEXT: retq
20603 ; FALLBACK5-LABEL: ashr_64bytes:
20604 ; FALLBACK5: # %bb.0:
20605 ; FALLBACK5-NEXT: pushq %r15
20606 ; FALLBACK5-NEXT: pushq %r14
20607 ; FALLBACK5-NEXT: pushq %rbx
20608 ; FALLBACK5-NEXT: movups (%rdi), %xmm0
20609 ; FALLBACK5-NEXT: movups 16(%rdi), %xmm1
20610 ; FALLBACK5-NEXT: movups 32(%rdi), %xmm2
20611 ; FALLBACK5-NEXT: movq 48(%rdi), %rcx
20612 ; FALLBACK5-NEXT: movq 56(%rdi), %rdi
20613 ; FALLBACK5-NEXT: movl (%rsi), %eax
20614 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20615 ; FALLBACK5-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20616 ; FALLBACK5-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
20617 ; FALLBACK5-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
20618 ; FALLBACK5-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
20619 ; FALLBACK5-NEXT: sarq $63, %rdi
20620 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20621 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20622 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20623 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20624 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20625 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20626 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20627 ; FALLBACK5-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20628 ; FALLBACK5-NEXT: leal (,%rax,8), %ecx
20629 ; FALLBACK5-NEXT: andl $56, %ecx
20630 ; FALLBACK5-NEXT: andl $56, %eax
20631 ; FALLBACK5-NEXT: movq -96(%rsp,%rax), %rdi
20632 ; FALLBACK5-NEXT: movq -104(%rsp,%rax), %r9
20633 ; FALLBACK5-NEXT: movq %r9, %rsi
20634 ; FALLBACK5-NEXT: shrdq %cl, %rdi, %rsi
20635 ; FALLBACK5-NEXT: movq -112(%rsp,%rax), %r10
20636 ; FALLBACK5-NEXT: movq %r10, %r8
20637 ; FALLBACK5-NEXT: shrdq %cl, %r9, %r8
20638 ; FALLBACK5-NEXT: movq -80(%rsp,%rax), %r9
20639 ; FALLBACK5-NEXT: movq -88(%rsp,%rax), %r11
20640 ; FALLBACK5-NEXT: movq %r11, %rbx
20641 ; FALLBACK5-NEXT: shrdq %cl, %r9, %rbx
20642 ; FALLBACK5-NEXT: shrdq %cl, %r11, %rdi
20643 ; FALLBACK5-NEXT: movq -72(%rsp,%rax), %r11
20644 ; FALLBACK5-NEXT: shrdq %cl, %r11, %r9
20645 ; FALLBACK5-NEXT: movq -128(%rsp,%rax), %r14
20646 ; FALLBACK5-NEXT: movq -120(%rsp,%rax), %rax
20647 ; FALLBACK5-NEXT: movq %rax, %r15
20648 ; FALLBACK5-NEXT: shrdq %cl, %r10, %r15
20649 ; FALLBACK5-NEXT: shrdq %cl, %rax, %r14
20650 ; FALLBACK5-NEXT: # kill: def $cl killed $cl killed $ecx
20651 ; FALLBACK5-NEXT: sarq %cl, %r11
20652 ; FALLBACK5-NEXT: movq %r15, 8(%rdx)
20653 ; FALLBACK5-NEXT: movq %r9, 48(%rdx)
20654 ; FALLBACK5-NEXT: movq %r11, 56(%rdx)
20655 ; FALLBACK5-NEXT: movq %rdi, 32(%rdx)
20656 ; FALLBACK5-NEXT: movq %rbx, 40(%rdx)
20657 ; FALLBACK5-NEXT: movq %r8, 16(%rdx)
20658 ; FALLBACK5-NEXT: movq %rsi, 24(%rdx)
20659 ; FALLBACK5-NEXT: movq %r14, (%rdx)
20660 ; FALLBACK5-NEXT: popq %rbx
20661 ; FALLBACK5-NEXT: popq %r14
20662 ; FALLBACK5-NEXT: popq %r15
20663 ; FALLBACK5-NEXT: retq
20665 ; FALLBACK6-LABEL: ashr_64bytes:
20666 ; FALLBACK6: # %bb.0:
20667 ; FALLBACK6-NEXT: pushq %rbp
20668 ; FALLBACK6-NEXT: pushq %r15
20669 ; FALLBACK6-NEXT: pushq %r14
20670 ; FALLBACK6-NEXT: pushq %r13
20671 ; FALLBACK6-NEXT: pushq %r12
20672 ; FALLBACK6-NEXT: pushq %rbx
20673 ; FALLBACK6-NEXT: pushq %rax
20674 ; FALLBACK6-NEXT: movups (%rdi), %xmm0
20675 ; FALLBACK6-NEXT: movups 16(%rdi), %xmm1
20676 ; FALLBACK6-NEXT: movups 32(%rdi), %xmm2
20677 ; FALLBACK6-NEXT: movq 48(%rdi), %rcx
20678 ; FALLBACK6-NEXT: movq 56(%rdi), %rdi
20679 ; FALLBACK6-NEXT: movl (%rsi), %eax
20680 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20681 ; FALLBACK6-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20682 ; FALLBACK6-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
20683 ; FALLBACK6-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
20684 ; FALLBACK6-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
20685 ; FALLBACK6-NEXT: sarq $63, %rdi
20686 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20687 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20688 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20689 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20690 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20691 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20692 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20693 ; FALLBACK6-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20694 ; FALLBACK6-NEXT: leal (,%rax,8), %esi
20695 ; FALLBACK6-NEXT: andl $56, %esi
20696 ; FALLBACK6-NEXT: andl $56, %eax
20697 ; FALLBACK6-NEXT: shrxq %rsi, -128(%rsp,%rax), %r11
20698 ; FALLBACK6-NEXT: movq -112(%rsp,%rax), %rcx
20699 ; FALLBACK6-NEXT: movq -104(%rsp,%rax), %rdi
20700 ; FALLBACK6-NEXT: shrxq %rsi, %rdi, %r12
20701 ; FALLBACK6-NEXT: movq -96(%rsp,%rax), %r13
20702 ; FALLBACK6-NEXT: shrxq %rsi, %rcx, %r9
20703 ; FALLBACK6-NEXT: movq -88(%rsp,%rax), %r10
20704 ; FALLBACK6-NEXT: shrxq %rsi, %r10, %r14
20705 ; FALLBACK6-NEXT: shrxq %rsi, %r13, %r15
20706 ; FALLBACK6-NEXT: movl %esi, %ebx
20707 ; FALLBACK6-NEXT: notb %bl
20708 ; FALLBACK6-NEXT: movq -120(%rsp,%rax), %rbp
20709 ; FALLBACK6-NEXT: leaq (%rbp,%rbp), %r8
20710 ; FALLBACK6-NEXT: shlxq %rbx, %r8, %r8
20711 ; FALLBACK6-NEXT: orq %r11, %r8
20712 ; FALLBACK6-NEXT: leaq (%r13,%r13), %r11
20713 ; FALLBACK6-NEXT: shlxq %rbx, %r11, %r11
20714 ; FALLBACK6-NEXT: orq %r12, %r11
20715 ; FALLBACK6-NEXT: movq -80(%rsp,%rax), %r12
20716 ; FALLBACK6-NEXT: shrxq %rsi, %r12, %r13
20717 ; FALLBACK6-NEXT: shrxq %rsi, %rbp, %rbp
20718 ; FALLBACK6-NEXT: movq -72(%rsp,%rax), %rax
20719 ; FALLBACK6-NEXT: sarxq %rsi, %rax, %rsi
20720 ; FALLBACK6-NEXT: addq %rdi, %rdi
20721 ; FALLBACK6-NEXT: shlxq %rbx, %rdi, %rdi
20722 ; FALLBACK6-NEXT: orq %r9, %rdi
20723 ; FALLBACK6-NEXT: leaq (%r12,%r12), %r9
20724 ; FALLBACK6-NEXT: shlxq %rbx, %r9, %r9
20725 ; FALLBACK6-NEXT: orq %r14, %r9
20726 ; FALLBACK6-NEXT: addq %r10, %r10
20727 ; FALLBACK6-NEXT: shlxq %rbx, %r10, %r10
20728 ; FALLBACK6-NEXT: orq %r15, %r10
20729 ; FALLBACK6-NEXT: addq %rax, %rax
20730 ; FALLBACK6-NEXT: shlxq %rbx, %rax, %rax
20731 ; FALLBACK6-NEXT: orq %r13, %rax
20732 ; FALLBACK6-NEXT: addq %rcx, %rcx
20733 ; FALLBACK6-NEXT: shlxq %rbx, %rcx, %rcx
20734 ; FALLBACK6-NEXT: orq %rbp, %rcx
20735 ; FALLBACK6-NEXT: movq %rsi, 56(%rdx)
20736 ; FALLBACK6-NEXT: movq %rcx, 8(%rdx)
20737 ; FALLBACK6-NEXT: movq %rax, 48(%rdx)
20738 ; FALLBACK6-NEXT: movq %r10, 32(%rdx)
20739 ; FALLBACK6-NEXT: movq %r9, 40(%rdx)
20740 ; FALLBACK6-NEXT: movq %rdi, 16(%rdx)
20741 ; FALLBACK6-NEXT: movq %r11, 24(%rdx)
20742 ; FALLBACK6-NEXT: movq %r8, (%rdx)
20743 ; FALLBACK6-NEXT: addq $8, %rsp
20744 ; FALLBACK6-NEXT: popq %rbx
20745 ; FALLBACK6-NEXT: popq %r12
20746 ; FALLBACK6-NEXT: popq %r13
20747 ; FALLBACK6-NEXT: popq %r14
20748 ; FALLBACK6-NEXT: popq %r15
20749 ; FALLBACK6-NEXT: popq %rbp
20750 ; FALLBACK6-NEXT: retq
20752 ; FALLBACK7-LABEL: ashr_64bytes:
20753 ; FALLBACK7: # %bb.0:
20754 ; FALLBACK7-NEXT: pushq %r15
20755 ; FALLBACK7-NEXT: pushq %r14
20756 ; FALLBACK7-NEXT: pushq %rbx
20757 ; FALLBACK7-NEXT: movups (%rdi), %xmm0
20758 ; FALLBACK7-NEXT: movups 16(%rdi), %xmm1
20759 ; FALLBACK7-NEXT: movups 32(%rdi), %xmm2
20760 ; FALLBACK7-NEXT: movq 48(%rdi), %rcx
20761 ; FALLBACK7-NEXT: movq 56(%rdi), %rdi
20762 ; FALLBACK7-NEXT: movl (%rsi), %eax
20763 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20764 ; FALLBACK7-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20765 ; FALLBACK7-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
20766 ; FALLBACK7-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
20767 ; FALLBACK7-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
20768 ; FALLBACK7-NEXT: sarq $63, %rdi
20769 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20770 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20771 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20772 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20773 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20774 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20775 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20776 ; FALLBACK7-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20777 ; FALLBACK7-NEXT: leal (,%rax,8), %ecx
20778 ; FALLBACK7-NEXT: andl $56, %ecx
20779 ; FALLBACK7-NEXT: andl $56, %eax
20780 ; FALLBACK7-NEXT: movq -96(%rsp,%rax), %rdi
20781 ; FALLBACK7-NEXT: movq -104(%rsp,%rax), %r9
20782 ; FALLBACK7-NEXT: movq %r9, %rsi
20783 ; FALLBACK7-NEXT: shrdq %cl, %rdi, %rsi
20784 ; FALLBACK7-NEXT: movq -112(%rsp,%rax), %r10
20785 ; FALLBACK7-NEXT: movq %r10, %r8
20786 ; FALLBACK7-NEXT: shrdq %cl, %r9, %r8
20787 ; FALLBACK7-NEXT: movq -80(%rsp,%rax), %r9
20788 ; FALLBACK7-NEXT: movq -88(%rsp,%rax), %r11
20789 ; FALLBACK7-NEXT: movq %r11, %rbx
20790 ; FALLBACK7-NEXT: shrdq %cl, %r9, %rbx
20791 ; FALLBACK7-NEXT: shrdq %cl, %r11, %rdi
20792 ; FALLBACK7-NEXT: movq -72(%rsp,%rax), %r11
20793 ; FALLBACK7-NEXT: shrdq %cl, %r11, %r9
20794 ; FALLBACK7-NEXT: movq -128(%rsp,%rax), %r14
20795 ; FALLBACK7-NEXT: movq -120(%rsp,%rax), %rax
20796 ; FALLBACK7-NEXT: movq %rax, %r15
20797 ; FALLBACK7-NEXT: shrdq %cl, %r10, %r15
20798 ; FALLBACK7-NEXT: sarxq %rcx, %r11, %r10
20799 ; FALLBACK7-NEXT: # kill: def $cl killed $cl killed $rcx
20800 ; FALLBACK7-NEXT: shrdq %cl, %rax, %r14
20801 ; FALLBACK7-NEXT: movq %r15, 8(%rdx)
20802 ; FALLBACK7-NEXT: movq %r9, 48(%rdx)
20803 ; FALLBACK7-NEXT: movq %rdi, 32(%rdx)
20804 ; FALLBACK7-NEXT: movq %rbx, 40(%rdx)
20805 ; FALLBACK7-NEXT: movq %r8, 16(%rdx)
20806 ; FALLBACK7-NEXT: movq %rsi, 24(%rdx)
20807 ; FALLBACK7-NEXT: movq %r14, (%rdx)
20808 ; FALLBACK7-NEXT: movq %r10, 56(%rdx)
20809 ; FALLBACK7-NEXT: popq %rbx
20810 ; FALLBACK7-NEXT: popq %r14
20811 ; FALLBACK7-NEXT: popq %r15
20812 ; FALLBACK7-NEXT: retq
20814 ; FALLBACK8-LABEL: ashr_64bytes:
20815 ; FALLBACK8: # %bb.0:
20816 ; FALLBACK8-NEXT: pushq %rbp
20817 ; FALLBACK8-NEXT: pushq %r15
20818 ; FALLBACK8-NEXT: pushq %r14
20819 ; FALLBACK8-NEXT: pushq %r13
20820 ; FALLBACK8-NEXT: pushq %r12
20821 ; FALLBACK8-NEXT: pushq %rbx
20822 ; FALLBACK8-NEXT: pushq %rax
20823 ; FALLBACK8-NEXT: vmovups (%rdi), %ymm0
20824 ; FALLBACK8-NEXT: vmovups 32(%rdi), %xmm1
20825 ; FALLBACK8-NEXT: movq 48(%rdi), %rax
20826 ; FALLBACK8-NEXT: movq 56(%rdi), %rcx
20827 ; FALLBACK8-NEXT: movl (%rsi), %edi
20828 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20829 ; FALLBACK8-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
20830 ; FALLBACK8-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
20831 ; FALLBACK8-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
20832 ; FALLBACK8-NEXT: sarq $63, %rcx
20833 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20834 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20835 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20836 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20837 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20838 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20839 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20840 ; FALLBACK8-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20841 ; FALLBACK8-NEXT: leal (,%rdi,8), %eax
20842 ; FALLBACK8-NEXT: andl $56, %eax
20843 ; FALLBACK8-NEXT: andl $56, %edi
20844 ; FALLBACK8-NEXT: movq -128(%rsp,%rdi), %r10
20845 ; FALLBACK8-NEXT: movq -120(%rsp,%rdi), %r9
20846 ; FALLBACK8-NEXT: movl %eax, %ecx
20847 ; FALLBACK8-NEXT: shrq %cl, %r10
20848 ; FALLBACK8-NEXT: movl %eax, %esi
20849 ; FALLBACK8-NEXT: notb %sil
20850 ; FALLBACK8-NEXT: leaq (%r9,%r9), %r8
20851 ; FALLBACK8-NEXT: movl %esi, %ecx
20852 ; FALLBACK8-NEXT: shlq %cl, %r8
20853 ; FALLBACK8-NEXT: orq %r10, %r8
20854 ; FALLBACK8-NEXT: movq -104(%rsp,%rdi), %r10
20855 ; FALLBACK8-NEXT: movq %r10, %rbx
20856 ; FALLBACK8-NEXT: movl %eax, %ecx
20857 ; FALLBACK8-NEXT: shrq %cl, %rbx
20858 ; FALLBACK8-NEXT: movq -96(%rsp,%rdi), %r12
20859 ; FALLBACK8-NEXT: leaq (%r12,%r12), %r11
20860 ; FALLBACK8-NEXT: movl %esi, %ecx
20861 ; FALLBACK8-NEXT: shlq %cl, %r11
20862 ; FALLBACK8-NEXT: orq %rbx, %r11
20863 ; FALLBACK8-NEXT: movq -112(%rsp,%rdi), %rbx
20864 ; FALLBACK8-NEXT: movq %rbx, %r14
20865 ; FALLBACK8-NEXT: movl %eax, %ecx
20866 ; FALLBACK8-NEXT: shrq %cl, %r14
20867 ; FALLBACK8-NEXT: addq %r10, %r10
20868 ; FALLBACK8-NEXT: movl %esi, %ecx
20869 ; FALLBACK8-NEXT: shlq %cl, %r10
20870 ; FALLBACK8-NEXT: orq %r14, %r10
20871 ; FALLBACK8-NEXT: movq -88(%rsp,%rdi), %r14
20872 ; FALLBACK8-NEXT: movq %r14, %r13
20873 ; FALLBACK8-NEXT: movl %eax, %ecx
20874 ; FALLBACK8-NEXT: shrq %cl, %r13
20875 ; FALLBACK8-NEXT: movq -80(%rsp,%rdi), %rbp
20876 ; FALLBACK8-NEXT: leaq (%rbp,%rbp), %r15
20877 ; FALLBACK8-NEXT: movl %esi, %ecx
20878 ; FALLBACK8-NEXT: shlq %cl, %r15
20879 ; FALLBACK8-NEXT: orq %r13, %r15
20880 ; FALLBACK8-NEXT: movl %eax, %ecx
20881 ; FALLBACK8-NEXT: shrq %cl, %r12
20882 ; FALLBACK8-NEXT: addq %r14, %r14
20883 ; FALLBACK8-NEXT: movl %esi, %ecx
20884 ; FALLBACK8-NEXT: shlq %cl, %r14
20885 ; FALLBACK8-NEXT: orq %r12, %r14
20886 ; FALLBACK8-NEXT: movl %eax, %ecx
20887 ; FALLBACK8-NEXT: shrq %cl, %rbp
20888 ; FALLBACK8-NEXT: movq -72(%rsp,%rdi), %rdi
20889 ; FALLBACK8-NEXT: leaq (%rdi,%rdi), %r12
20890 ; FALLBACK8-NEXT: movl %esi, %ecx
20891 ; FALLBACK8-NEXT: shlq %cl, %r12
20892 ; FALLBACK8-NEXT: orq %rbp, %r12
20893 ; FALLBACK8-NEXT: movl %eax, %ecx
20894 ; FALLBACK8-NEXT: shrq %cl, %r9
20895 ; FALLBACK8-NEXT: addq %rbx, %rbx
20896 ; FALLBACK8-NEXT: movl %esi, %ecx
20897 ; FALLBACK8-NEXT: shlq %cl, %rbx
20898 ; FALLBACK8-NEXT: orq %r9, %rbx
20899 ; FALLBACK8-NEXT: movl %eax, %ecx
20900 ; FALLBACK8-NEXT: sarq %cl, %rdi
20901 ; FALLBACK8-NEXT: movq %rdi, 56(%rdx)
20902 ; FALLBACK8-NEXT: movq %rbx, 8(%rdx)
20903 ; FALLBACK8-NEXT: movq %r12, 48(%rdx)
20904 ; FALLBACK8-NEXT: movq %r14, 32(%rdx)
20905 ; FALLBACK8-NEXT: movq %r15, 40(%rdx)
20906 ; FALLBACK8-NEXT: movq %r10, 16(%rdx)
20907 ; FALLBACK8-NEXT: movq %r11, 24(%rdx)
20908 ; FALLBACK8-NEXT: movq %r8, (%rdx)
20909 ; FALLBACK8-NEXT: addq $8, %rsp
20910 ; FALLBACK8-NEXT: popq %rbx
20911 ; FALLBACK8-NEXT: popq %r12
20912 ; FALLBACK8-NEXT: popq %r13
20913 ; FALLBACK8-NEXT: popq %r14
20914 ; FALLBACK8-NEXT: popq %r15
20915 ; FALLBACK8-NEXT: popq %rbp
20916 ; FALLBACK8-NEXT: vzeroupper
20917 ; FALLBACK8-NEXT: retq
20919 ; FALLBACK9-LABEL: ashr_64bytes:
20920 ; FALLBACK9: # %bb.0:
20921 ; FALLBACK9-NEXT: pushq %r15
20922 ; FALLBACK9-NEXT: pushq %r14
20923 ; FALLBACK9-NEXT: pushq %rbx
20924 ; FALLBACK9-NEXT: vmovups (%rdi), %ymm0
20925 ; FALLBACK9-NEXT: vmovups 32(%rdi), %xmm1
20926 ; FALLBACK9-NEXT: movq 48(%rdi), %rcx
20927 ; FALLBACK9-NEXT: movq 56(%rdi), %rdi
20928 ; FALLBACK9-NEXT: movl (%rsi), %eax
20929 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20930 ; FALLBACK9-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20931 ; FALLBACK9-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
20932 ; FALLBACK9-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
20933 ; FALLBACK9-NEXT: sarq $63, %rdi
20934 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20935 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20936 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20937 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20938 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20939 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20940 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20941 ; FALLBACK9-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20942 ; FALLBACK9-NEXT: leal (,%rax,8), %ecx
20943 ; FALLBACK9-NEXT: andl $56, %ecx
20944 ; FALLBACK9-NEXT: andl $56, %eax
20945 ; FALLBACK9-NEXT: movq -96(%rsp,%rax), %rdi
20946 ; FALLBACK9-NEXT: movq -104(%rsp,%rax), %r9
20947 ; FALLBACK9-NEXT: movq %r9, %rsi
20948 ; FALLBACK9-NEXT: shrdq %cl, %rdi, %rsi
20949 ; FALLBACK9-NEXT: movq -112(%rsp,%rax), %r10
20950 ; FALLBACK9-NEXT: movq %r10, %r8
20951 ; FALLBACK9-NEXT: shrdq %cl, %r9, %r8
20952 ; FALLBACK9-NEXT: movq -80(%rsp,%rax), %r9
20953 ; FALLBACK9-NEXT: movq -88(%rsp,%rax), %r11
20954 ; FALLBACK9-NEXT: movq %r11, %rbx
20955 ; FALLBACK9-NEXT: shrdq %cl, %r9, %rbx
20956 ; FALLBACK9-NEXT: shrdq %cl, %r11, %rdi
20957 ; FALLBACK9-NEXT: movq -72(%rsp,%rax), %r11
20958 ; FALLBACK9-NEXT: shrdq %cl, %r11, %r9
20959 ; FALLBACK9-NEXT: movq -128(%rsp,%rax), %r14
20960 ; FALLBACK9-NEXT: movq -120(%rsp,%rax), %rax
20961 ; FALLBACK9-NEXT: movq %rax, %r15
20962 ; FALLBACK9-NEXT: shrdq %cl, %r10, %r15
20963 ; FALLBACK9-NEXT: shrdq %cl, %rax, %r14
20964 ; FALLBACK9-NEXT: # kill: def $cl killed $cl killed $ecx
20965 ; FALLBACK9-NEXT: sarq %cl, %r11
20966 ; FALLBACK9-NEXT: movq %r15, 8(%rdx)
20967 ; FALLBACK9-NEXT: movq %r9, 48(%rdx)
20968 ; FALLBACK9-NEXT: movq %r11, 56(%rdx)
20969 ; FALLBACK9-NEXT: movq %rdi, 32(%rdx)
20970 ; FALLBACK9-NEXT: movq %rbx, 40(%rdx)
20971 ; FALLBACK9-NEXT: movq %r8, 16(%rdx)
20972 ; FALLBACK9-NEXT: movq %rsi, 24(%rdx)
20973 ; FALLBACK9-NEXT: movq %r14, (%rdx)
20974 ; FALLBACK9-NEXT: popq %rbx
20975 ; FALLBACK9-NEXT: popq %r14
20976 ; FALLBACK9-NEXT: popq %r15
20977 ; FALLBACK9-NEXT: vzeroupper
20978 ; FALLBACK9-NEXT: retq
20980 ; FALLBACK10-LABEL: ashr_64bytes:
20981 ; FALLBACK10: # %bb.0:
20982 ; FALLBACK10-NEXT: pushq %rbp
20983 ; FALLBACK10-NEXT: pushq %r15
20984 ; FALLBACK10-NEXT: pushq %r14
20985 ; FALLBACK10-NEXT: pushq %r13
20986 ; FALLBACK10-NEXT: pushq %r12
20987 ; FALLBACK10-NEXT: pushq %rbx
20988 ; FALLBACK10-NEXT: pushq %rax
20989 ; FALLBACK10-NEXT: vmovups (%rdi), %ymm0
20990 ; FALLBACK10-NEXT: vmovups 32(%rdi), %xmm1
20991 ; FALLBACK10-NEXT: movq 48(%rdi), %rcx
20992 ; FALLBACK10-NEXT: movq 56(%rdi), %rdi
20993 ; FALLBACK10-NEXT: movl (%rsi), %eax
20994 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
20995 ; FALLBACK10-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
20996 ; FALLBACK10-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
20997 ; FALLBACK10-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
20998 ; FALLBACK10-NEXT: sarq $63, %rdi
20999 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21000 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21001 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21002 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21003 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21004 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21005 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21006 ; FALLBACK10-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21007 ; FALLBACK10-NEXT: leal (,%rax,8), %esi
21008 ; FALLBACK10-NEXT: andl $56, %esi
21009 ; FALLBACK10-NEXT: andl $56, %eax
21010 ; FALLBACK10-NEXT: shrxq %rsi, -128(%rsp,%rax), %r11
21011 ; FALLBACK10-NEXT: movq -112(%rsp,%rax), %rcx
21012 ; FALLBACK10-NEXT: movq -104(%rsp,%rax), %rdi
21013 ; FALLBACK10-NEXT: shrxq %rsi, %rdi, %r12
21014 ; FALLBACK10-NEXT: movq -96(%rsp,%rax), %r13
21015 ; FALLBACK10-NEXT: shrxq %rsi, %rcx, %r9
21016 ; FALLBACK10-NEXT: movq -88(%rsp,%rax), %r10
21017 ; FALLBACK10-NEXT: shrxq %rsi, %r10, %r14
21018 ; FALLBACK10-NEXT: shrxq %rsi, %r13, %r15
21019 ; FALLBACK10-NEXT: movl %esi, %ebx
21020 ; FALLBACK10-NEXT: notb %bl
21021 ; FALLBACK10-NEXT: movq -120(%rsp,%rax), %rbp
21022 ; FALLBACK10-NEXT: leaq (%rbp,%rbp), %r8
21023 ; FALLBACK10-NEXT: shlxq %rbx, %r8, %r8
21024 ; FALLBACK10-NEXT: orq %r11, %r8
21025 ; FALLBACK10-NEXT: leaq (%r13,%r13), %r11
21026 ; FALLBACK10-NEXT: shlxq %rbx, %r11, %r11
21027 ; FALLBACK10-NEXT: orq %r12, %r11
21028 ; FALLBACK10-NEXT: movq -80(%rsp,%rax), %r12
21029 ; FALLBACK10-NEXT: shrxq %rsi, %r12, %r13
21030 ; FALLBACK10-NEXT: shrxq %rsi, %rbp, %rbp
21031 ; FALLBACK10-NEXT: movq -72(%rsp,%rax), %rax
21032 ; FALLBACK10-NEXT: sarxq %rsi, %rax, %rsi
21033 ; FALLBACK10-NEXT: addq %rdi, %rdi
21034 ; FALLBACK10-NEXT: shlxq %rbx, %rdi, %rdi
21035 ; FALLBACK10-NEXT: orq %r9, %rdi
21036 ; FALLBACK10-NEXT: leaq (%r12,%r12), %r9
21037 ; FALLBACK10-NEXT: shlxq %rbx, %r9, %r9
21038 ; FALLBACK10-NEXT: orq %r14, %r9
21039 ; FALLBACK10-NEXT: addq %r10, %r10
21040 ; FALLBACK10-NEXT: shlxq %rbx, %r10, %r10
21041 ; FALLBACK10-NEXT: orq %r15, %r10
21042 ; FALLBACK10-NEXT: addq %rax, %rax
21043 ; FALLBACK10-NEXT: shlxq %rbx, %rax, %rax
21044 ; FALLBACK10-NEXT: orq %r13, %rax
21045 ; FALLBACK10-NEXT: addq %rcx, %rcx
21046 ; FALLBACK10-NEXT: shlxq %rbx, %rcx, %rcx
21047 ; FALLBACK10-NEXT: orq %rbp, %rcx
21048 ; FALLBACK10-NEXT: movq %rsi, 56(%rdx)
21049 ; FALLBACK10-NEXT: movq %rcx, 8(%rdx)
21050 ; FALLBACK10-NEXT: movq %rax, 48(%rdx)
21051 ; FALLBACK10-NEXT: movq %r10, 32(%rdx)
21052 ; FALLBACK10-NEXT: movq %r9, 40(%rdx)
21053 ; FALLBACK10-NEXT: movq %rdi, 16(%rdx)
21054 ; FALLBACK10-NEXT: movq %r11, 24(%rdx)
21055 ; FALLBACK10-NEXT: movq %r8, (%rdx)
21056 ; FALLBACK10-NEXT: addq $8, %rsp
21057 ; FALLBACK10-NEXT: popq %rbx
21058 ; FALLBACK10-NEXT: popq %r12
21059 ; FALLBACK10-NEXT: popq %r13
21060 ; FALLBACK10-NEXT: popq %r14
21061 ; FALLBACK10-NEXT: popq %r15
21062 ; FALLBACK10-NEXT: popq %rbp
21063 ; FALLBACK10-NEXT: vzeroupper
21064 ; FALLBACK10-NEXT: retq
21066 ; FALLBACK11-LABEL: ashr_64bytes:
21067 ; FALLBACK11: # %bb.0:
21068 ; FALLBACK11-NEXT: pushq %r15
21069 ; FALLBACK11-NEXT: pushq %r14
21070 ; FALLBACK11-NEXT: pushq %rbx
21071 ; FALLBACK11-NEXT: vmovups (%rdi), %ymm0
21072 ; FALLBACK11-NEXT: vmovups 32(%rdi), %xmm1
21073 ; FALLBACK11-NEXT: movq 48(%rdi), %rcx
21074 ; FALLBACK11-NEXT: movq 56(%rdi), %rdi
21075 ; FALLBACK11-NEXT: movl (%rsi), %eax
21076 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21077 ; FALLBACK11-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21078 ; FALLBACK11-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
21079 ; FALLBACK11-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
21080 ; FALLBACK11-NEXT: sarq $63, %rdi
21081 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21082 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21083 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21084 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21085 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21086 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21087 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21088 ; FALLBACK11-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21089 ; FALLBACK11-NEXT: leal (,%rax,8), %ecx
21090 ; FALLBACK11-NEXT: andl $56, %ecx
21091 ; FALLBACK11-NEXT: andl $56, %eax
21092 ; FALLBACK11-NEXT: movq -96(%rsp,%rax), %rdi
21093 ; FALLBACK11-NEXT: movq -104(%rsp,%rax), %r9
21094 ; FALLBACK11-NEXT: movq %r9, %rsi
21095 ; FALLBACK11-NEXT: shrdq %cl, %rdi, %rsi
21096 ; FALLBACK11-NEXT: movq -112(%rsp,%rax), %r10
21097 ; FALLBACK11-NEXT: movq %r10, %r8
21098 ; FALLBACK11-NEXT: shrdq %cl, %r9, %r8
21099 ; FALLBACK11-NEXT: movq -80(%rsp,%rax), %r9
21100 ; FALLBACK11-NEXT: movq -88(%rsp,%rax), %r11
21101 ; FALLBACK11-NEXT: movq %r11, %rbx
21102 ; FALLBACK11-NEXT: shrdq %cl, %r9, %rbx
21103 ; FALLBACK11-NEXT: shrdq %cl, %r11, %rdi
21104 ; FALLBACK11-NEXT: movq -72(%rsp,%rax), %r11
21105 ; FALLBACK11-NEXT: shrdq %cl, %r11, %r9
21106 ; FALLBACK11-NEXT: movq -128(%rsp,%rax), %r14
21107 ; FALLBACK11-NEXT: movq -120(%rsp,%rax), %rax
21108 ; FALLBACK11-NEXT: movq %rax, %r15
21109 ; FALLBACK11-NEXT: shrdq %cl, %r10, %r15
21110 ; FALLBACK11-NEXT: sarxq %rcx, %r11, %r10
21111 ; FALLBACK11-NEXT: # kill: def $cl killed $cl killed $rcx
21112 ; FALLBACK11-NEXT: shrdq %cl, %rax, %r14
21113 ; FALLBACK11-NEXT: movq %r15, 8(%rdx)
21114 ; FALLBACK11-NEXT: movq %r9, 48(%rdx)
21115 ; FALLBACK11-NEXT: movq %rdi, 32(%rdx)
21116 ; FALLBACK11-NEXT: movq %rbx, 40(%rdx)
21117 ; FALLBACK11-NEXT: movq %r8, 16(%rdx)
21118 ; FALLBACK11-NEXT: movq %rsi, 24(%rdx)
21119 ; FALLBACK11-NEXT: movq %r14, (%rdx)
21120 ; FALLBACK11-NEXT: movq %r10, 56(%rdx)
21121 ; FALLBACK11-NEXT: popq %rbx
21122 ; FALLBACK11-NEXT: popq %r14
21123 ; FALLBACK11-NEXT: popq %r15
21124 ; FALLBACK11-NEXT: vzeroupper
21125 ; FALLBACK11-NEXT: retq
21127 ; FALLBACK12-LABEL: ashr_64bytes:
21128 ; FALLBACK12: # %bb.0:
21129 ; FALLBACK12-NEXT: pushq %rbp
21130 ; FALLBACK12-NEXT: pushq %r15
21131 ; FALLBACK12-NEXT: pushq %r14
21132 ; FALLBACK12-NEXT: pushq %r13
21133 ; FALLBACK12-NEXT: pushq %r12
21134 ; FALLBACK12-NEXT: pushq %rbx
21135 ; FALLBACK12-NEXT: pushq %rax
21136 ; FALLBACK12-NEXT: vmovups (%rdi), %ymm0
21137 ; FALLBACK12-NEXT: vmovups 32(%rdi), %xmm1
21138 ; FALLBACK12-NEXT: movq 48(%rdi), %rax
21139 ; FALLBACK12-NEXT: movq 56(%rdi), %rcx
21140 ; FALLBACK12-NEXT: movl (%rsi), %edi
21141 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21142 ; FALLBACK12-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
21143 ; FALLBACK12-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
21144 ; FALLBACK12-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
21145 ; FALLBACK12-NEXT: sarq $63, %rcx
21146 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21147 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21148 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21149 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21150 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21151 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21152 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21153 ; FALLBACK12-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21154 ; FALLBACK12-NEXT: leal (,%rdi,8), %eax
21155 ; FALLBACK12-NEXT: andl $56, %eax
21156 ; FALLBACK12-NEXT: andl $56, %edi
21157 ; FALLBACK12-NEXT: movq -128(%rsp,%rdi), %r10
21158 ; FALLBACK12-NEXT: movq -120(%rsp,%rdi), %r9
21159 ; FALLBACK12-NEXT: movl %eax, %ecx
21160 ; FALLBACK12-NEXT: shrq %cl, %r10
21161 ; FALLBACK12-NEXT: movl %eax, %esi
21162 ; FALLBACK12-NEXT: notb %sil
21163 ; FALLBACK12-NEXT: leaq (%r9,%r9), %r8
21164 ; FALLBACK12-NEXT: movl %esi, %ecx
21165 ; FALLBACK12-NEXT: shlq %cl, %r8
21166 ; FALLBACK12-NEXT: orq %r10, %r8
21167 ; FALLBACK12-NEXT: movq -104(%rsp,%rdi), %r10
21168 ; FALLBACK12-NEXT: movq %r10, %rbx
21169 ; FALLBACK12-NEXT: movl %eax, %ecx
21170 ; FALLBACK12-NEXT: shrq %cl, %rbx
21171 ; FALLBACK12-NEXT: movq -96(%rsp,%rdi), %r12
21172 ; FALLBACK12-NEXT: leaq (%r12,%r12), %r11
21173 ; FALLBACK12-NEXT: movl %esi, %ecx
21174 ; FALLBACK12-NEXT: shlq %cl, %r11
21175 ; FALLBACK12-NEXT: orq %rbx, %r11
21176 ; FALLBACK12-NEXT: movq -112(%rsp,%rdi), %rbx
21177 ; FALLBACK12-NEXT: movq %rbx, %r14
21178 ; FALLBACK12-NEXT: movl %eax, %ecx
21179 ; FALLBACK12-NEXT: shrq %cl, %r14
21180 ; FALLBACK12-NEXT: addq %r10, %r10
21181 ; FALLBACK12-NEXT: movl %esi, %ecx
21182 ; FALLBACK12-NEXT: shlq %cl, %r10
21183 ; FALLBACK12-NEXT: orq %r14, %r10
21184 ; FALLBACK12-NEXT: movq -88(%rsp,%rdi), %r14
21185 ; FALLBACK12-NEXT: movq %r14, %r13
21186 ; FALLBACK12-NEXT: movl %eax, %ecx
21187 ; FALLBACK12-NEXT: shrq %cl, %r13
21188 ; FALLBACK12-NEXT: movq -80(%rsp,%rdi), %rbp
21189 ; FALLBACK12-NEXT: leaq (%rbp,%rbp), %r15
21190 ; FALLBACK12-NEXT: movl %esi, %ecx
21191 ; FALLBACK12-NEXT: shlq %cl, %r15
21192 ; FALLBACK12-NEXT: orq %r13, %r15
21193 ; FALLBACK12-NEXT: movl %eax, %ecx
21194 ; FALLBACK12-NEXT: shrq %cl, %r12
21195 ; FALLBACK12-NEXT: addq %r14, %r14
21196 ; FALLBACK12-NEXT: movl %esi, %ecx
21197 ; FALLBACK12-NEXT: shlq %cl, %r14
21198 ; FALLBACK12-NEXT: orq %r12, %r14
21199 ; FALLBACK12-NEXT: movl %eax, %ecx
21200 ; FALLBACK12-NEXT: shrq %cl, %rbp
21201 ; FALLBACK12-NEXT: movq -72(%rsp,%rdi), %rdi
21202 ; FALLBACK12-NEXT: leaq (%rdi,%rdi), %r12
21203 ; FALLBACK12-NEXT: movl %esi, %ecx
21204 ; FALLBACK12-NEXT: shlq %cl, %r12
21205 ; FALLBACK12-NEXT: orq %rbp, %r12
21206 ; FALLBACK12-NEXT: movl %eax, %ecx
21207 ; FALLBACK12-NEXT: shrq %cl, %r9
21208 ; FALLBACK12-NEXT: addq %rbx, %rbx
21209 ; FALLBACK12-NEXT: movl %esi, %ecx
21210 ; FALLBACK12-NEXT: shlq %cl, %rbx
21211 ; FALLBACK12-NEXT: orq %r9, %rbx
21212 ; FALLBACK12-NEXT: movl %eax, %ecx
21213 ; FALLBACK12-NEXT: sarq %cl, %rdi
21214 ; FALLBACK12-NEXT: movq %rdi, 56(%rdx)
21215 ; FALLBACK12-NEXT: movq %rbx, 8(%rdx)
21216 ; FALLBACK12-NEXT: movq %r12, 48(%rdx)
21217 ; FALLBACK12-NEXT: movq %r14, 32(%rdx)
21218 ; FALLBACK12-NEXT: movq %r15, 40(%rdx)
21219 ; FALLBACK12-NEXT: movq %r10, 16(%rdx)
21220 ; FALLBACK12-NEXT: movq %r11, 24(%rdx)
21221 ; FALLBACK12-NEXT: movq %r8, (%rdx)
21222 ; FALLBACK12-NEXT: addq $8, %rsp
21223 ; FALLBACK12-NEXT: popq %rbx
21224 ; FALLBACK12-NEXT: popq %r12
21225 ; FALLBACK12-NEXT: popq %r13
21226 ; FALLBACK12-NEXT: popq %r14
21227 ; FALLBACK12-NEXT: popq %r15
21228 ; FALLBACK12-NEXT: popq %rbp
21229 ; FALLBACK12-NEXT: vzeroupper
21230 ; FALLBACK12-NEXT: retq
21232 ; FALLBACK13-LABEL: ashr_64bytes:
21233 ; FALLBACK13: # %bb.0:
21234 ; FALLBACK13-NEXT: pushq %r15
21235 ; FALLBACK13-NEXT: pushq %r14
21236 ; FALLBACK13-NEXT: pushq %rbx
21237 ; FALLBACK13-NEXT: vmovups (%rdi), %ymm0
21238 ; FALLBACK13-NEXT: vmovups 32(%rdi), %xmm1
21239 ; FALLBACK13-NEXT: movq 48(%rdi), %rcx
21240 ; FALLBACK13-NEXT: movq 56(%rdi), %rdi
21241 ; FALLBACK13-NEXT: movl (%rsi), %eax
21242 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21243 ; FALLBACK13-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21244 ; FALLBACK13-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
21245 ; FALLBACK13-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
21246 ; FALLBACK13-NEXT: sarq $63, %rdi
21247 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21248 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21249 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21250 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21251 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21252 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21253 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21254 ; FALLBACK13-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21255 ; FALLBACK13-NEXT: leal (,%rax,8), %ecx
21256 ; FALLBACK13-NEXT: andl $56, %ecx
21257 ; FALLBACK13-NEXT: andl $56, %eax
21258 ; FALLBACK13-NEXT: movq -96(%rsp,%rax), %rdi
21259 ; FALLBACK13-NEXT: movq -104(%rsp,%rax), %r9
21260 ; FALLBACK13-NEXT: movq %r9, %rsi
21261 ; FALLBACK13-NEXT: shrdq %cl, %rdi, %rsi
21262 ; FALLBACK13-NEXT: movq -112(%rsp,%rax), %r10
21263 ; FALLBACK13-NEXT: movq %r10, %r8
21264 ; FALLBACK13-NEXT: shrdq %cl, %r9, %r8
21265 ; FALLBACK13-NEXT: movq -80(%rsp,%rax), %r9
21266 ; FALLBACK13-NEXT: movq -88(%rsp,%rax), %r11
21267 ; FALLBACK13-NEXT: movq %r11, %rbx
21268 ; FALLBACK13-NEXT: shrdq %cl, %r9, %rbx
21269 ; FALLBACK13-NEXT: shrdq %cl, %r11, %rdi
21270 ; FALLBACK13-NEXT: movq -72(%rsp,%rax), %r11
21271 ; FALLBACK13-NEXT: shrdq %cl, %r11, %r9
21272 ; FALLBACK13-NEXT: movq -128(%rsp,%rax), %r14
21273 ; FALLBACK13-NEXT: movq -120(%rsp,%rax), %rax
21274 ; FALLBACK13-NEXT: movq %rax, %r15
21275 ; FALLBACK13-NEXT: shrdq %cl, %r10, %r15
21276 ; FALLBACK13-NEXT: shrdq %cl, %rax, %r14
21277 ; FALLBACK13-NEXT: # kill: def $cl killed $cl killed $ecx
21278 ; FALLBACK13-NEXT: sarq %cl, %r11
21279 ; FALLBACK13-NEXT: movq %r15, 8(%rdx)
21280 ; FALLBACK13-NEXT: movq %r9, 48(%rdx)
21281 ; FALLBACK13-NEXT: movq %r11, 56(%rdx)
21282 ; FALLBACK13-NEXT: movq %rdi, 32(%rdx)
21283 ; FALLBACK13-NEXT: movq %rbx, 40(%rdx)
21284 ; FALLBACK13-NEXT: movq %r8, 16(%rdx)
21285 ; FALLBACK13-NEXT: movq %rsi, 24(%rdx)
21286 ; FALLBACK13-NEXT: movq %r14, (%rdx)
21287 ; FALLBACK13-NEXT: popq %rbx
21288 ; FALLBACK13-NEXT: popq %r14
21289 ; FALLBACK13-NEXT: popq %r15
21290 ; FALLBACK13-NEXT: vzeroupper
21291 ; FALLBACK13-NEXT: retq
21293 ; FALLBACK14-LABEL: ashr_64bytes:
21294 ; FALLBACK14: # %bb.0:
21295 ; FALLBACK14-NEXT: pushq %rbp
21296 ; FALLBACK14-NEXT: pushq %r15
21297 ; FALLBACK14-NEXT: pushq %r14
21298 ; FALLBACK14-NEXT: pushq %r13
21299 ; FALLBACK14-NEXT: pushq %r12
21300 ; FALLBACK14-NEXT: pushq %rbx
21301 ; FALLBACK14-NEXT: pushq %rax
21302 ; FALLBACK14-NEXT: vmovups (%rdi), %ymm0
21303 ; FALLBACK14-NEXT: vmovups 32(%rdi), %xmm1
21304 ; FALLBACK14-NEXT: movq 48(%rdi), %rcx
21305 ; FALLBACK14-NEXT: movq 56(%rdi), %rdi
21306 ; FALLBACK14-NEXT: movl (%rsi), %eax
21307 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21308 ; FALLBACK14-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21309 ; FALLBACK14-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
21310 ; FALLBACK14-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
21311 ; FALLBACK14-NEXT: sarq $63, %rdi
21312 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21313 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21314 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21315 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21316 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21317 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21318 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21319 ; FALLBACK14-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21320 ; FALLBACK14-NEXT: leal (,%rax,8), %esi
21321 ; FALLBACK14-NEXT: andl $56, %esi
21322 ; FALLBACK14-NEXT: andl $56, %eax
21323 ; FALLBACK14-NEXT: shrxq %rsi, -128(%rsp,%rax), %r11
21324 ; FALLBACK14-NEXT: movq -112(%rsp,%rax), %rcx
21325 ; FALLBACK14-NEXT: movq -104(%rsp,%rax), %rdi
21326 ; FALLBACK14-NEXT: shrxq %rsi, %rdi, %r12
21327 ; FALLBACK14-NEXT: movq -96(%rsp,%rax), %r13
21328 ; FALLBACK14-NEXT: shrxq %rsi, %rcx, %r9
21329 ; FALLBACK14-NEXT: movq -88(%rsp,%rax), %r10
21330 ; FALLBACK14-NEXT: shrxq %rsi, %r10, %r14
21331 ; FALLBACK14-NEXT: shrxq %rsi, %r13, %r15
21332 ; FALLBACK14-NEXT: movl %esi, %ebx
21333 ; FALLBACK14-NEXT: notb %bl
21334 ; FALLBACK14-NEXT: movq -120(%rsp,%rax), %rbp
21335 ; FALLBACK14-NEXT: leaq (%rbp,%rbp), %r8
21336 ; FALLBACK14-NEXT: shlxq %rbx, %r8, %r8
21337 ; FALLBACK14-NEXT: orq %r11, %r8
21338 ; FALLBACK14-NEXT: leaq (%r13,%r13), %r11
21339 ; FALLBACK14-NEXT: shlxq %rbx, %r11, %r11
21340 ; FALLBACK14-NEXT: orq %r12, %r11
21341 ; FALLBACK14-NEXT: movq -80(%rsp,%rax), %r12
21342 ; FALLBACK14-NEXT: shrxq %rsi, %r12, %r13
21343 ; FALLBACK14-NEXT: shrxq %rsi, %rbp, %rbp
21344 ; FALLBACK14-NEXT: movq -72(%rsp,%rax), %rax
21345 ; FALLBACK14-NEXT: sarxq %rsi, %rax, %rsi
21346 ; FALLBACK14-NEXT: addq %rdi, %rdi
21347 ; FALLBACK14-NEXT: shlxq %rbx, %rdi, %rdi
21348 ; FALLBACK14-NEXT: orq %r9, %rdi
21349 ; FALLBACK14-NEXT: leaq (%r12,%r12), %r9
21350 ; FALLBACK14-NEXT: shlxq %rbx, %r9, %r9
21351 ; FALLBACK14-NEXT: orq %r14, %r9
21352 ; FALLBACK14-NEXT: addq %r10, %r10
21353 ; FALLBACK14-NEXT: shlxq %rbx, %r10, %r10
21354 ; FALLBACK14-NEXT: orq %r15, %r10
21355 ; FALLBACK14-NEXT: addq %rax, %rax
21356 ; FALLBACK14-NEXT: shlxq %rbx, %rax, %rax
21357 ; FALLBACK14-NEXT: orq %r13, %rax
21358 ; FALLBACK14-NEXT: addq %rcx, %rcx
21359 ; FALLBACK14-NEXT: shlxq %rbx, %rcx, %rcx
21360 ; FALLBACK14-NEXT: orq %rbp, %rcx
21361 ; FALLBACK14-NEXT: movq %rsi, 56(%rdx)
21362 ; FALLBACK14-NEXT: movq %rcx, 8(%rdx)
21363 ; FALLBACK14-NEXT: movq %rax, 48(%rdx)
21364 ; FALLBACK14-NEXT: movq %r10, 32(%rdx)
21365 ; FALLBACK14-NEXT: movq %r9, 40(%rdx)
21366 ; FALLBACK14-NEXT: movq %rdi, 16(%rdx)
21367 ; FALLBACK14-NEXT: movq %r11, 24(%rdx)
21368 ; FALLBACK14-NEXT: movq %r8, (%rdx)
21369 ; FALLBACK14-NEXT: addq $8, %rsp
21370 ; FALLBACK14-NEXT: popq %rbx
21371 ; FALLBACK14-NEXT: popq %r12
21372 ; FALLBACK14-NEXT: popq %r13
21373 ; FALLBACK14-NEXT: popq %r14
21374 ; FALLBACK14-NEXT: popq %r15
21375 ; FALLBACK14-NEXT: popq %rbp
21376 ; FALLBACK14-NEXT: vzeroupper
21377 ; FALLBACK14-NEXT: retq
21379 ; FALLBACK15-LABEL: ashr_64bytes:
21380 ; FALLBACK15: # %bb.0:
21381 ; FALLBACK15-NEXT: pushq %r15
21382 ; FALLBACK15-NEXT: pushq %r14
21383 ; FALLBACK15-NEXT: pushq %rbx
21384 ; FALLBACK15-NEXT: vmovups (%rdi), %ymm0
21385 ; FALLBACK15-NEXT: vmovups 32(%rdi), %xmm1
21386 ; FALLBACK15-NEXT: movq 48(%rdi), %rcx
21387 ; FALLBACK15-NEXT: movq 56(%rdi), %rdi
21388 ; FALLBACK15-NEXT: movl (%rsi), %eax
21389 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21390 ; FALLBACK15-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
21391 ; FALLBACK15-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
21392 ; FALLBACK15-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
21393 ; FALLBACK15-NEXT: sarq $63, %rdi
21394 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21395 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21396 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21397 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21398 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21399 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21400 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21401 ; FALLBACK15-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
21402 ; FALLBACK15-NEXT: leal (,%rax,8), %ecx
21403 ; FALLBACK15-NEXT: andl $56, %ecx
21404 ; FALLBACK15-NEXT: andl $56, %eax
21405 ; FALLBACK15-NEXT: movq -96(%rsp,%rax), %rdi
21406 ; FALLBACK15-NEXT: movq -104(%rsp,%rax), %r9
21407 ; FALLBACK15-NEXT: movq %r9, %rsi
21408 ; FALLBACK15-NEXT: shrdq %cl, %rdi, %rsi
21409 ; FALLBACK15-NEXT: movq -112(%rsp,%rax), %r10
21410 ; FALLBACK15-NEXT: movq %r10, %r8
21411 ; FALLBACK15-NEXT: shrdq %cl, %r9, %r8
21412 ; FALLBACK15-NEXT: movq -80(%rsp,%rax), %r9
21413 ; FALLBACK15-NEXT: movq -88(%rsp,%rax), %r11
21414 ; FALLBACK15-NEXT: movq %r11, %rbx
21415 ; FALLBACK15-NEXT: shrdq %cl, %r9, %rbx
21416 ; FALLBACK15-NEXT: shrdq %cl, %r11, %rdi
21417 ; FALLBACK15-NEXT: movq -72(%rsp,%rax), %r11
21418 ; FALLBACK15-NEXT: shrdq %cl, %r11, %r9
21419 ; FALLBACK15-NEXT: movq -128(%rsp,%rax), %r14
21420 ; FALLBACK15-NEXT: movq -120(%rsp,%rax), %rax
21421 ; FALLBACK15-NEXT: movq %rax, %r15
21422 ; FALLBACK15-NEXT: shrdq %cl, %r10, %r15
21423 ; FALLBACK15-NEXT: sarxq %rcx, %r11, %r10
21424 ; FALLBACK15-NEXT: # kill: def $cl killed $cl killed $rcx
21425 ; FALLBACK15-NEXT: shrdq %cl, %rax, %r14
21426 ; FALLBACK15-NEXT: movq %r15, 8(%rdx)
21427 ; FALLBACK15-NEXT: movq %r9, 48(%rdx)
21428 ; FALLBACK15-NEXT: movq %rdi, 32(%rdx)
21429 ; FALLBACK15-NEXT: movq %rbx, 40(%rdx)
21430 ; FALLBACK15-NEXT: movq %r8, 16(%rdx)
21431 ; FALLBACK15-NEXT: movq %rsi, 24(%rdx)
21432 ; FALLBACK15-NEXT: movq %r14, (%rdx)
21433 ; FALLBACK15-NEXT: movq %r10, 56(%rdx)
21434 ; FALLBACK15-NEXT: popq %rbx
21435 ; FALLBACK15-NEXT: popq %r14
21436 ; FALLBACK15-NEXT: popq %r15
21437 ; FALLBACK15-NEXT: vzeroupper
21438 ; FALLBACK15-NEXT: retq
21440 ; FALLBACK16-LABEL: ashr_64bytes:
21441 ; FALLBACK16: # %bb.0:
21442 ; FALLBACK16-NEXT: pushl %ebp
21443 ; FALLBACK16-NEXT: pushl %ebx
21444 ; FALLBACK16-NEXT: pushl %edi
21445 ; FALLBACK16-NEXT: pushl %esi
21446 ; FALLBACK16-NEXT: subl $204, %esp
21447 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ecx
21448 ; FALLBACK16-NEXT: movl (%ecx), %eax
21449 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21450 ; FALLBACK16-NEXT: movl 4(%ecx), %eax
21451 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21452 ; FALLBACK16-NEXT: movl 8(%ecx), %eax
21453 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21454 ; FALLBACK16-NEXT: movl 12(%ecx), %eax
21455 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21456 ; FALLBACK16-NEXT: movl 16(%ecx), %eax
21457 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21458 ; FALLBACK16-NEXT: movl 20(%ecx), %eax
21459 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21460 ; FALLBACK16-NEXT: movl 24(%ecx), %eax
21461 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21462 ; FALLBACK16-NEXT: movl 28(%ecx), %eax
21463 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21464 ; FALLBACK16-NEXT: movl 32(%ecx), %eax
21465 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21466 ; FALLBACK16-NEXT: movl 36(%ecx), %eax
21467 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21468 ; FALLBACK16-NEXT: movl 40(%ecx), %ebx
21469 ; FALLBACK16-NEXT: movl 44(%ecx), %edi
21470 ; FALLBACK16-NEXT: movl 48(%ecx), %esi
21471 ; FALLBACK16-NEXT: movl 52(%ecx), %edx
21472 ; FALLBACK16-NEXT: movl 56(%ecx), %eax
21473 ; FALLBACK16-NEXT: movl 60(%ecx), %ecx
21474 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %ebp
21475 ; FALLBACK16-NEXT: movl (%ebp), %ebp
21476 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21477 ; FALLBACK16-NEXT: movl %edx, {{[0-9]+}}(%esp)
21478 ; FALLBACK16-NEXT: movl %esi, {{[0-9]+}}(%esp)
21479 ; FALLBACK16-NEXT: movl %edi, {{[0-9]+}}(%esp)
21480 ; FALLBACK16-NEXT: movl %ebx, {{[0-9]+}}(%esp)
21481 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21482 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21483 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21484 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21485 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21486 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21487 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21488 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21489 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21490 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21491 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21492 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21493 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21494 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21495 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21496 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21497 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21498 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21499 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21500 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21501 ; FALLBACK16-NEXT: movl %eax, {{[0-9]+}}(%esp)
21502 ; FALLBACK16-NEXT: sarl $31, %ecx
21503 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21504 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21505 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21506 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21507 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21508 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21509 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21510 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21511 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21512 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21513 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21514 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21515 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21516 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21517 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21518 ; FALLBACK16-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21519 ; FALLBACK16-NEXT: movl %ebp, %ecx
21520 ; FALLBACK16-NEXT: movl %ebp, %esi
21521 ; FALLBACK16-NEXT: andl $60, %esi
21522 ; FALLBACK16-NEXT: movl 68(%esp,%esi), %edx
21523 ; FALLBACK16-NEXT: shll $3, %ecx
21524 ; FALLBACK16-NEXT: andl $24, %ecx
21525 ; FALLBACK16-NEXT: movl %edx, %eax
21526 ; FALLBACK16-NEXT: shrl %cl, %eax
21527 ; FALLBACK16-NEXT: movl 72(%esp,%esi), %edi
21528 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21529 ; FALLBACK16-NEXT: addl %edi, %edi
21530 ; FALLBACK16-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
21531 ; FALLBACK16-NEXT: movl %ecx, %ebx
21532 ; FALLBACK16-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
21533 ; FALLBACK16-NEXT: notb %ch
21534 ; FALLBACK16-NEXT: movb %ch, %cl
21535 ; FALLBACK16-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
21536 ; FALLBACK16-NEXT: shll %cl, %edi
21537 ; FALLBACK16-NEXT: orl %eax, %edi
21538 ; FALLBACK16-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21539 ; FALLBACK16-NEXT: movl 64(%esp,%esi), %eax
21540 ; FALLBACK16-NEXT: movb %bl, %cl
21541 ; FALLBACK16-NEXT: shrl %cl, %eax
21542 ; FALLBACK16-NEXT: addl %edx, %edx
21543 ; FALLBACK16-NEXT: movb %ch, %cl
21544 ; FALLBACK16-NEXT: shll %cl, %edx
21545 ; FALLBACK16-NEXT: orl %eax, %edx
21546 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21547 ; FALLBACK16-NEXT: movl 76(%esp,%esi), %ebp
21548 ; FALLBACK16-NEXT: movl %ebp, %edx
21549 ; FALLBACK16-NEXT: movb %bl, %cl
21550 ; FALLBACK16-NEXT: shrl %cl, %edx
21551 ; FALLBACK16-NEXT: movl 80(%esp,%esi), %edi
21552 ; FALLBACK16-NEXT: leal (%edi,%edi), %eax
21553 ; FALLBACK16-NEXT: movb %ch, %cl
21554 ; FALLBACK16-NEXT: shll %cl, %eax
21555 ; FALLBACK16-NEXT: orl %edx, %eax
21556 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21557 ; FALLBACK16-NEXT: movb %bl, %cl
21558 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21559 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21560 ; FALLBACK16-NEXT: shrl %cl, %eax
21561 ; FALLBACK16-NEXT: addl %ebp, %ebp
21562 ; FALLBACK16-NEXT: movb %ch, %cl
21563 ; FALLBACK16-NEXT: shll %cl, %ebp
21564 ; FALLBACK16-NEXT: orl %eax, %ebp
21565 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21566 ; FALLBACK16-NEXT: movl %esi, %edx
21567 ; FALLBACK16-NEXT: movl 84(%esp,%esi), %eax
21568 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21569 ; FALLBACK16-NEXT: movb %bl, %cl
21570 ; FALLBACK16-NEXT: shrl %cl, %eax
21571 ; FALLBACK16-NEXT: movl 88(%esp,%esi), %esi
21572 ; FALLBACK16-NEXT: leal (%esi,%esi), %ebp
21573 ; FALLBACK16-NEXT: movb %ch, %cl
21574 ; FALLBACK16-NEXT: shll %cl, %ebp
21575 ; FALLBACK16-NEXT: orl %eax, %ebp
21576 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21577 ; FALLBACK16-NEXT: movb %bl, %cl
21578 ; FALLBACK16-NEXT: shrl %cl, %edi
21579 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
21580 ; FALLBACK16-NEXT: addl %ebx, %ebx
21581 ; FALLBACK16-NEXT: movb %ch, %cl
21582 ; FALLBACK16-NEXT: shll %cl, %ebx
21583 ; FALLBACK16-NEXT: orl %edi, %ebx
21584 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21585 ; FALLBACK16-NEXT: movl %edx, %eax
21586 ; FALLBACK16-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21587 ; FALLBACK16-NEXT: movl 92(%esp,%edx), %ebp
21588 ; FALLBACK16-NEXT: movl %ebp, %edx
21589 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
21590 ; FALLBACK16-NEXT: movb %bl, %cl
21591 ; FALLBACK16-NEXT: shrl %cl, %edx
21592 ; FALLBACK16-NEXT: movl 96(%esp,%eax), %edi
21593 ; FALLBACK16-NEXT: leal (%edi,%edi), %eax
21594 ; FALLBACK16-NEXT: movb %ch, %cl
21595 ; FALLBACK16-NEXT: shll %cl, %eax
21596 ; FALLBACK16-NEXT: orl %edx, %eax
21597 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21598 ; FALLBACK16-NEXT: movb %bl, %cl
21599 ; FALLBACK16-NEXT: shrl %cl, %esi
21600 ; FALLBACK16-NEXT: addl %ebp, %ebp
21601 ; FALLBACK16-NEXT: movb %ch, %cl
21602 ; FALLBACK16-NEXT: shll %cl, %ebp
21603 ; FALLBACK16-NEXT: orl %esi, %ebp
21604 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21605 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21606 ; FALLBACK16-NEXT: movl 100(%esp,%edx), %eax
21607 ; FALLBACK16-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21608 ; FALLBACK16-NEXT: movb %bl, %cl
21609 ; FALLBACK16-NEXT: shrl %cl, %eax
21610 ; FALLBACK16-NEXT: movl 104(%esp,%edx), %esi
21611 ; FALLBACK16-NEXT: leal (%esi,%esi), %ebp
21612 ; FALLBACK16-NEXT: movb %ch, %cl
21613 ; FALLBACK16-NEXT: shll %cl, %ebp
21614 ; FALLBACK16-NEXT: orl %eax, %ebp
21615 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21616 ; FALLBACK16-NEXT: movl %ebx, %edx
21617 ; FALLBACK16-NEXT: movb %dl, %cl
21618 ; FALLBACK16-NEXT: shrl %cl, %edi
21619 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
21620 ; FALLBACK16-NEXT: addl %ebx, %ebx
21621 ; FALLBACK16-NEXT: movb %ch, %cl
21622 ; FALLBACK16-NEXT: shll %cl, %ebx
21623 ; FALLBACK16-NEXT: orl %edi, %ebx
21624 ; FALLBACK16-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21625 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
21626 ; FALLBACK16-NEXT: movl 108(%esp,%ebp), %edi
21627 ; FALLBACK16-NEXT: movl %edi, %eax
21628 ; FALLBACK16-NEXT: movl %edx, %ebx
21629 ; FALLBACK16-NEXT: movl %ebx, %ecx
21630 ; FALLBACK16-NEXT: shrl %cl, %eax
21631 ; FALLBACK16-NEXT: movl 112(%esp,%ebp), %ecx
21632 ; FALLBACK16-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21633 ; FALLBACK16-NEXT: movl %ebp, %edx
21634 ; FALLBACK16-NEXT: leal (%ecx,%ecx), %ebp
21635 ; FALLBACK16-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
21636 ; FALLBACK16-NEXT: movb %ch, %cl
21637 ; FALLBACK16-NEXT: shll %cl, %ebp
21638 ; FALLBACK16-NEXT: orl %eax, %ebp
21639 ; FALLBACK16-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21640 ; FALLBACK16-NEXT: movb %bl, %cl
21641 ; FALLBACK16-NEXT: shrl %cl, %esi
21642 ; FALLBACK16-NEXT: addl %edi, %edi
21643 ; FALLBACK16-NEXT: movb %ch, %cl
21644 ; FALLBACK16-NEXT: shll %cl, %edi
21645 ; FALLBACK16-NEXT: orl %esi, %edi
21646 ; FALLBACK16-NEXT: movl 116(%esp,%edx), %esi
21647 ; FALLBACK16-NEXT: movl %esi, %eax
21648 ; FALLBACK16-NEXT: movl %ebx, %ecx
21649 ; FALLBACK16-NEXT: shrl %cl, %eax
21650 ; FALLBACK16-NEXT: movl 120(%esp,%edx), %edx
21651 ; FALLBACK16-NEXT: leal (%edx,%edx), %ebp
21652 ; FALLBACK16-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
21653 ; FALLBACK16-NEXT: movb %ch, %cl
21654 ; FALLBACK16-NEXT: shll %cl, %ebp
21655 ; FALLBACK16-NEXT: orl %eax, %ebp
21656 ; FALLBACK16-NEXT: movb %bl, %cl
21657 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21658 ; FALLBACK16-NEXT: shrl %cl, %eax
21659 ; FALLBACK16-NEXT: addl %esi, %esi
21660 ; FALLBACK16-NEXT: movb %ch, %cl
21661 ; FALLBACK16-NEXT: shll %cl, %esi
21662 ; FALLBACK16-NEXT: orl %eax, %esi
21663 ; FALLBACK16-NEXT: movb %bl, %cl
21664 ; FALLBACK16-NEXT: movl %edx, %eax
21665 ; FALLBACK16-NEXT: shrl %cl, %eax
21666 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21667 ; FALLBACK16-NEXT: movl 124(%esp,%edx), %ebx
21668 ; FALLBACK16-NEXT: leal (%ebx,%ebx), %edx
21669 ; FALLBACK16-NEXT: movb %ch, %cl
21670 ; FALLBACK16-NEXT: shll %cl, %edx
21671 ; FALLBACK16-NEXT: orl %eax, %edx
21672 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21673 ; FALLBACK16-NEXT: # kill: def $cl killed $cl killed $ecx
21674 ; FALLBACK16-NEXT: sarl %cl, %ebx
21675 ; FALLBACK16-NEXT: movl {{[0-9]+}}(%esp), %eax
21676 ; FALLBACK16-NEXT: movl %ebx, 60(%eax)
21677 ; FALLBACK16-NEXT: movl %edx, 56(%eax)
21678 ; FALLBACK16-NEXT: movl %esi, 48(%eax)
21679 ; FALLBACK16-NEXT: movl %ebp, 52(%eax)
21680 ; FALLBACK16-NEXT: movl %edi, 40(%eax)
21681 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21682 ; FALLBACK16-NEXT: movl %ecx, 44(%eax)
21683 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21684 ; FALLBACK16-NEXT: movl %ecx, 32(%eax)
21685 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21686 ; FALLBACK16-NEXT: movl %ecx, 36(%eax)
21687 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21688 ; FALLBACK16-NEXT: movl %ecx, 24(%eax)
21689 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21690 ; FALLBACK16-NEXT: movl %ecx, 28(%eax)
21691 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21692 ; FALLBACK16-NEXT: movl %ecx, 16(%eax)
21693 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21694 ; FALLBACK16-NEXT: movl %ecx, 20(%eax)
21695 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21696 ; FALLBACK16-NEXT: movl %ecx, 8(%eax)
21697 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21698 ; FALLBACK16-NEXT: movl %ecx, 12(%eax)
21699 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21700 ; FALLBACK16-NEXT: movl %ecx, (%eax)
21701 ; FALLBACK16-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
21702 ; FALLBACK16-NEXT: movl %ecx, 4(%eax)
21703 ; FALLBACK16-NEXT: addl $204, %esp
21704 ; FALLBACK16-NEXT: popl %esi
21705 ; FALLBACK16-NEXT: popl %edi
21706 ; FALLBACK16-NEXT: popl %ebx
21707 ; FALLBACK16-NEXT: popl %ebp
21708 ; FALLBACK16-NEXT: retl
21710 ; FALLBACK17-LABEL: ashr_64bytes:
21711 ; FALLBACK17: # %bb.0:
21712 ; FALLBACK17-NEXT: pushl %ebp
21713 ; FALLBACK17-NEXT: pushl %ebx
21714 ; FALLBACK17-NEXT: pushl %edi
21715 ; FALLBACK17-NEXT: pushl %esi
21716 ; FALLBACK17-NEXT: subl $188, %esp
21717 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %eax
21718 ; FALLBACK17-NEXT: movl (%eax), %ecx
21719 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21720 ; FALLBACK17-NEXT: movl 4(%eax), %ecx
21721 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21722 ; FALLBACK17-NEXT: movl 8(%eax), %ecx
21723 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21724 ; FALLBACK17-NEXT: movl 12(%eax), %ecx
21725 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21726 ; FALLBACK17-NEXT: movl 16(%eax), %ecx
21727 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21728 ; FALLBACK17-NEXT: movl 20(%eax), %ecx
21729 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21730 ; FALLBACK17-NEXT: movl 24(%eax), %ecx
21731 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21732 ; FALLBACK17-NEXT: movl 28(%eax), %ecx
21733 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21734 ; FALLBACK17-NEXT: movl 32(%eax), %ecx
21735 ; FALLBACK17-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21736 ; FALLBACK17-NEXT: movl 36(%eax), %ecx
21737 ; FALLBACK17-NEXT: movl %ecx, (%esp) # 4-byte Spill
21738 ; FALLBACK17-NEXT: movl 40(%eax), %ebp
21739 ; FALLBACK17-NEXT: movl 44(%eax), %ebx
21740 ; FALLBACK17-NEXT: movl 48(%eax), %edi
21741 ; FALLBACK17-NEXT: movl 52(%eax), %esi
21742 ; FALLBACK17-NEXT: movl 56(%eax), %edx
21743 ; FALLBACK17-NEXT: movl 60(%eax), %eax
21744 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ecx
21745 ; FALLBACK17-NEXT: movl (%ecx), %ecx
21746 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21747 ; FALLBACK17-NEXT: movl %esi, {{[0-9]+}}(%esp)
21748 ; FALLBACK17-NEXT: movl %edi, {{[0-9]+}}(%esp)
21749 ; FALLBACK17-NEXT: movl %ebx, {{[0-9]+}}(%esp)
21750 ; FALLBACK17-NEXT: movl %ebp, {{[0-9]+}}(%esp)
21751 ; FALLBACK17-NEXT: movl (%esp), %edx # 4-byte Reload
21752 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21753 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21754 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21755 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21756 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21757 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21758 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21759 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21760 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21761 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21762 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21763 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21764 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21765 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21766 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21767 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21768 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21769 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21770 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21771 ; FALLBACK17-NEXT: movl %edx, {{[0-9]+}}(%esp)
21772 ; FALLBACK17-NEXT: sarl $31, %eax
21773 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21774 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21775 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21776 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21777 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21778 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21779 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21780 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21781 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21782 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21783 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21784 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21785 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21786 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21787 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21788 ; FALLBACK17-NEXT: movl %eax, {{[0-9]+}}(%esp)
21789 ; FALLBACK17-NEXT: movl %ecx, %ebp
21790 ; FALLBACK17-NEXT: andl $60, %ebp
21791 ; FALLBACK17-NEXT: movl 56(%esp,%ebp), %edx
21792 ; FALLBACK17-NEXT: movl 52(%esp,%ebp), %eax
21793 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21794 ; FALLBACK17-NEXT: shll $3, %ecx
21795 ; FALLBACK17-NEXT: andl $24, %ecx
21796 ; FALLBACK17-NEXT: shrdl %cl, %edx, %eax
21797 ; FALLBACK17-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21798 ; FALLBACK17-NEXT: movl 64(%esp,%ebp), %edi
21799 ; FALLBACK17-NEXT: movl 60(%esp,%ebp), %eax
21800 ; FALLBACK17-NEXT: movl %eax, %esi
21801 ; FALLBACK17-NEXT: shrdl %cl, %edi, %esi
21802 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21803 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
21804 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21805 ; FALLBACK17-NEXT: movl 72(%esp,%ebp), %esi
21806 ; FALLBACK17-NEXT: movl 68(%esp,%ebp), %eax
21807 ; FALLBACK17-NEXT: movl %eax, %edx
21808 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edx
21809 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21810 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edi
21811 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21812 ; FALLBACK17-NEXT: movl 80(%esp,%ebp), %edi
21813 ; FALLBACK17-NEXT: movl 76(%esp,%ebp), %eax
21814 ; FALLBACK17-NEXT: movl %eax, %edx
21815 ; FALLBACK17-NEXT: shrdl %cl, %edi, %edx
21816 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21817 ; FALLBACK17-NEXT: shrdl %cl, %eax, %esi
21818 ; FALLBACK17-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21819 ; FALLBACK17-NEXT: movl 88(%esp,%ebp), %esi
21820 ; FALLBACK17-NEXT: movl 84(%esp,%ebp), %eax
21821 ; FALLBACK17-NEXT: movl %eax, %edx
21822 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edx
21823 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21824 ; FALLBACK17-NEXT: movl %esi, %edx
21825 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edi
21826 ; FALLBACK17-NEXT: movl %edi, (%esp) # 4-byte Spill
21827 ; FALLBACK17-NEXT: movl 96(%esp,%ebp), %esi
21828 ; FALLBACK17-NEXT: movl 92(%esp,%ebp), %eax
21829 ; FALLBACK17-NEXT: movl %eax, %edi
21830 ; FALLBACK17-NEXT: shrdl %cl, %esi, %edi
21831 ; FALLBACK17-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21832 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
21833 ; FALLBACK17-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21834 ; FALLBACK17-NEXT: movl 104(%esp,%ebp), %edx
21835 ; FALLBACK17-NEXT: movl 100(%esp,%ebp), %eax
21836 ; FALLBACK17-NEXT: movl %eax, %edi
21837 ; FALLBACK17-NEXT: shrdl %cl, %edx, %edi
21838 ; FALLBACK17-NEXT: shrdl %cl, %eax, %esi
21839 ; FALLBACK17-NEXT: movl 48(%esp,%ebp), %ebx
21840 ; FALLBACK17-NEXT: movl 108(%esp,%ebp), %eax
21841 ; FALLBACK17-NEXT: shrdl %cl, %eax, %edx
21842 ; FALLBACK17-NEXT: movl {{[0-9]+}}(%esp), %ebp
21843 ; FALLBACK17-NEXT: movl %edx, 56(%ebp)
21844 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21845 ; FALLBACK17-NEXT: shrdl %cl, %edx, %ebx
21846 ; FALLBACK17-NEXT: # kill: def $cl killed $cl killed $ecx
21847 ; FALLBACK17-NEXT: sarl %cl, %eax
21848 ; FALLBACK17-NEXT: movl %eax, 60(%ebp)
21849 ; FALLBACK17-NEXT: movl %esi, 48(%ebp)
21850 ; FALLBACK17-NEXT: movl %edi, 52(%ebp)
21851 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21852 ; FALLBACK17-NEXT: movl %eax, 40(%ebp)
21853 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21854 ; FALLBACK17-NEXT: movl %eax, 44(%ebp)
21855 ; FALLBACK17-NEXT: movl (%esp), %eax # 4-byte Reload
21856 ; FALLBACK17-NEXT: movl %eax, 32(%ebp)
21857 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21858 ; FALLBACK17-NEXT: movl %eax, 36(%ebp)
21859 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21860 ; FALLBACK17-NEXT: movl %eax, 24(%ebp)
21861 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21862 ; FALLBACK17-NEXT: movl %eax, 28(%ebp)
21863 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21864 ; FALLBACK17-NEXT: movl %eax, 16(%ebp)
21865 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21866 ; FALLBACK17-NEXT: movl %eax, 20(%ebp)
21867 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21868 ; FALLBACK17-NEXT: movl %eax, 8(%ebp)
21869 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21870 ; FALLBACK17-NEXT: movl %eax, 12(%ebp)
21871 ; FALLBACK17-NEXT: movl %ebx, (%ebp)
21872 ; FALLBACK17-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
21873 ; FALLBACK17-NEXT: movl %eax, 4(%ebp)
21874 ; FALLBACK17-NEXT: addl $188, %esp
21875 ; FALLBACK17-NEXT: popl %esi
21876 ; FALLBACK17-NEXT: popl %edi
21877 ; FALLBACK17-NEXT: popl %ebx
21878 ; FALLBACK17-NEXT: popl %ebp
21879 ; FALLBACK17-NEXT: retl
21881 ; FALLBACK18-LABEL: ashr_64bytes:
21882 ; FALLBACK18: # %bb.0:
21883 ; FALLBACK18-NEXT: pushl %ebp
21884 ; FALLBACK18-NEXT: pushl %ebx
21885 ; FALLBACK18-NEXT: pushl %edi
21886 ; FALLBACK18-NEXT: pushl %esi
21887 ; FALLBACK18-NEXT: subl $204, %esp
21888 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
21889 ; FALLBACK18-NEXT: movl (%eax), %ecx
21890 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21891 ; FALLBACK18-NEXT: movl 4(%eax), %ecx
21892 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21893 ; FALLBACK18-NEXT: movl 8(%eax), %ecx
21894 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21895 ; FALLBACK18-NEXT: movl 12(%eax), %ecx
21896 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21897 ; FALLBACK18-NEXT: movl 16(%eax), %ecx
21898 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21899 ; FALLBACK18-NEXT: movl 20(%eax), %ecx
21900 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21901 ; FALLBACK18-NEXT: movl 24(%eax), %ecx
21902 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21903 ; FALLBACK18-NEXT: movl 28(%eax), %ecx
21904 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21905 ; FALLBACK18-NEXT: movl 32(%eax), %ecx
21906 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21907 ; FALLBACK18-NEXT: movl 36(%eax), %ecx
21908 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21909 ; FALLBACK18-NEXT: movl 40(%eax), %ebp
21910 ; FALLBACK18-NEXT: movl 44(%eax), %ebx
21911 ; FALLBACK18-NEXT: movl 48(%eax), %edi
21912 ; FALLBACK18-NEXT: movl 52(%eax), %esi
21913 ; FALLBACK18-NEXT: movl 56(%eax), %edx
21914 ; FALLBACK18-NEXT: movl 60(%eax), %ecx
21915 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
21916 ; FALLBACK18-NEXT: movl (%eax), %eax
21917 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21918 ; FALLBACK18-NEXT: movl %esi, {{[0-9]+}}(%esp)
21919 ; FALLBACK18-NEXT: movl %edi, {{[0-9]+}}(%esp)
21920 ; FALLBACK18-NEXT: movl %ebx, {{[0-9]+}}(%esp)
21921 ; FALLBACK18-NEXT: movl %ebp, {{[0-9]+}}(%esp)
21922 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21923 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21924 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21925 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21926 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21927 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21928 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21929 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21930 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21931 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21932 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21933 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21934 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21935 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21936 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21937 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21938 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21939 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21940 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21941 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
21942 ; FALLBACK18-NEXT: movl %edx, {{[0-9]+}}(%esp)
21943 ; FALLBACK18-NEXT: sarl $31, %ecx
21944 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21945 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21946 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21947 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21948 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21949 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21950 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21951 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21952 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21953 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21954 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21955 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21956 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21957 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21958 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21959 ; FALLBACK18-NEXT: movl %ecx, {{[0-9]+}}(%esp)
21960 ; FALLBACK18-NEXT: movl %eax, %ecx
21961 ; FALLBACK18-NEXT: leal (,%eax,8), %edx
21962 ; FALLBACK18-NEXT: andl $24, %edx
21963 ; FALLBACK18-NEXT: andl $60, %ecx
21964 ; FALLBACK18-NEXT: movl 68(%esp,%ecx), %esi
21965 ; FALLBACK18-NEXT: movl 72(%esp,%ecx), %edi
21966 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21967 ; FALLBACK18-NEXT: shrxl %edx, %esi, %eax
21968 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21969 ; FALLBACK18-NEXT: movl %edx, %ebx
21970 ; FALLBACK18-NEXT: notb %bl
21971 ; FALLBACK18-NEXT: leal (%edi,%edi), %ebp
21972 ; FALLBACK18-NEXT: shlxl %ebx, %ebp, %eax
21973 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
21974 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21975 ; FALLBACK18-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
21976 ; FALLBACK18-NEXT: addl %esi, %esi
21977 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %eax
21978 ; FALLBACK18-NEXT: orl %edi, %eax
21979 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21980 ; FALLBACK18-NEXT: movl 80(%esp,%ecx), %esi
21981 ; FALLBACK18-NEXT: leal (%esi,%esi), %edi
21982 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
21983 ; FALLBACK18-NEXT: movl 76(%esp,%ecx), %edi
21984 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
21985 ; FALLBACK18-NEXT: orl %ebp, %eax
21986 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21987 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
21988 ; FALLBACK18-NEXT: addl %edi, %edi
21989 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %edi
21990 ; FALLBACK18-NEXT: orl %eax, %edi
21991 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21992 ; FALLBACK18-NEXT: movl 88(%esp,%ecx), %eax
21993 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
21994 ; FALLBACK18-NEXT: leal (%eax,%eax), %edi
21995 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
21996 ; FALLBACK18-NEXT: movl 84(%esp,%ecx), %edi
21997 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
21998 ; FALLBACK18-NEXT: orl %ebp, %eax
21999 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22000 ; FALLBACK18-NEXT: shrxl %edx, %esi, %esi
22001 ; FALLBACK18-NEXT: addl %edi, %edi
22002 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
22003 ; FALLBACK18-NEXT: orl %esi, %eax
22004 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22005 ; FALLBACK18-NEXT: movl 96(%esp,%ecx), %esi
22006 ; FALLBACK18-NEXT: leal (%esi,%esi), %edi
22007 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
22008 ; FALLBACK18-NEXT: movl 92(%esp,%ecx), %edi
22009 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
22010 ; FALLBACK18-NEXT: orl %ebp, %eax
22011 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22012 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
22013 ; FALLBACK18-NEXT: addl %edi, %edi
22014 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %edi
22015 ; FALLBACK18-NEXT: orl %eax, %edi
22016 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22017 ; FALLBACK18-NEXT: movl 104(%esp,%ecx), %eax
22018 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22019 ; FALLBACK18-NEXT: leal (%eax,%eax), %edi
22020 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
22021 ; FALLBACK18-NEXT: movl 100(%esp,%ecx), %edi
22022 ; FALLBACK18-NEXT: shrxl %edx, %edi, %ebp
22023 ; FALLBACK18-NEXT: orl %ebp, %eax
22024 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22025 ; FALLBACK18-NEXT: shrxl %edx, %esi, %esi
22026 ; FALLBACK18-NEXT: addl %edi, %edi
22027 ; FALLBACK18-NEXT: shlxl %ebx, %edi, %eax
22028 ; FALLBACK18-NEXT: orl %esi, %eax
22029 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22030 ; FALLBACK18-NEXT: movl 112(%esp,%ecx), %eax
22031 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22032 ; FALLBACK18-NEXT: leal (%eax,%eax), %esi
22033 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %eax
22034 ; FALLBACK18-NEXT: movl 108(%esp,%ecx), %esi
22035 ; FALLBACK18-NEXT: movl %ecx, %edi
22036 ; FALLBACK18-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22037 ; FALLBACK18-NEXT: shrxl %edx, %esi, %ebp
22038 ; FALLBACK18-NEXT: orl %ebp, %eax
22039 ; FALLBACK18-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22040 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
22041 ; FALLBACK18-NEXT: addl %esi, %esi
22042 ; FALLBACK18-NEXT: shlxl %ebx, %esi, %esi
22043 ; FALLBACK18-NEXT: orl %ecx, %esi
22044 ; FALLBACK18-NEXT: movl 120(%esp,%edi), %ebp
22045 ; FALLBACK18-NEXT: leal (%ebp,%ebp), %ecx
22046 ; FALLBACK18-NEXT: shlxl %ebx, %ecx, %ecx
22047 ; FALLBACK18-NEXT: movl 116(%esp,%edi), %eax
22048 ; FALLBACK18-NEXT: shrxl %edx, %eax, %edi
22049 ; FALLBACK18-NEXT: orl %edi, %ecx
22050 ; FALLBACK18-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
22051 ; FALLBACK18-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22052 ; FALLBACK18-NEXT: addl %eax, %eax
22053 ; FALLBACK18-NEXT: shlxl %ebx, %eax, %edi
22054 ; FALLBACK18-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
22055 ; FALLBACK18-NEXT: shrxl %edx, %ebp, %eax
22056 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
22057 ; FALLBACK18-NEXT: movl 124(%esp,%ebp), %ebp
22058 ; FALLBACK18-NEXT: sarxl %edx, %ebp, %edx
22059 ; FALLBACK18-NEXT: addl %ebp, %ebp
22060 ; FALLBACK18-NEXT: shlxl %ebx, %ebp, %ebx
22061 ; FALLBACK18-NEXT: orl %eax, %ebx
22062 ; FALLBACK18-NEXT: movl {{[0-9]+}}(%esp), %eax
22063 ; FALLBACK18-NEXT: movl %edx, 60(%eax)
22064 ; FALLBACK18-NEXT: movl %ebx, 56(%eax)
22065 ; FALLBACK18-NEXT: movl %edi, 48(%eax)
22066 ; FALLBACK18-NEXT: movl %ecx, 52(%eax)
22067 ; FALLBACK18-NEXT: movl %esi, 40(%eax)
22068 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22069 ; FALLBACK18-NEXT: movl %ecx, 44(%eax)
22070 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22071 ; FALLBACK18-NEXT: movl %ecx, 32(%eax)
22072 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22073 ; FALLBACK18-NEXT: movl %ecx, 36(%eax)
22074 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22075 ; FALLBACK18-NEXT: movl %ecx, 24(%eax)
22076 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22077 ; FALLBACK18-NEXT: movl %ecx, 28(%eax)
22078 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22079 ; FALLBACK18-NEXT: movl %ecx, 16(%eax)
22080 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22081 ; FALLBACK18-NEXT: movl %ecx, 20(%eax)
22082 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22083 ; FALLBACK18-NEXT: movl %ecx, 8(%eax)
22084 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22085 ; FALLBACK18-NEXT: movl %ecx, 12(%eax)
22086 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22087 ; FALLBACK18-NEXT: movl %ecx, (%eax)
22088 ; FALLBACK18-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22089 ; FALLBACK18-NEXT: movl %ecx, 4(%eax)
22090 ; FALLBACK18-NEXT: addl $204, %esp
22091 ; FALLBACK18-NEXT: popl %esi
22092 ; FALLBACK18-NEXT: popl %edi
22093 ; FALLBACK18-NEXT: popl %ebx
22094 ; FALLBACK18-NEXT: popl %ebp
22095 ; FALLBACK18-NEXT: retl
22097 ; FALLBACK19-LABEL: ashr_64bytes:
22098 ; FALLBACK19: # %bb.0:
22099 ; FALLBACK19-NEXT: pushl %ebp
22100 ; FALLBACK19-NEXT: pushl %ebx
22101 ; FALLBACK19-NEXT: pushl %edi
22102 ; FALLBACK19-NEXT: pushl %esi
22103 ; FALLBACK19-NEXT: subl $188, %esp
22104 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %eax
22105 ; FALLBACK19-NEXT: movl (%eax), %ecx
22106 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22107 ; FALLBACK19-NEXT: movl 4(%eax), %ecx
22108 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22109 ; FALLBACK19-NEXT: movl 8(%eax), %ecx
22110 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22111 ; FALLBACK19-NEXT: movl 12(%eax), %ecx
22112 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22113 ; FALLBACK19-NEXT: movl 16(%eax), %ecx
22114 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22115 ; FALLBACK19-NEXT: movl 20(%eax), %ecx
22116 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22117 ; FALLBACK19-NEXT: movl 24(%eax), %ecx
22118 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22119 ; FALLBACK19-NEXT: movl 28(%eax), %ecx
22120 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22121 ; FALLBACK19-NEXT: movl 32(%eax), %ecx
22122 ; FALLBACK19-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22123 ; FALLBACK19-NEXT: movl 36(%eax), %ecx
22124 ; FALLBACK19-NEXT: movl %ecx, (%esp) # 4-byte Spill
22125 ; FALLBACK19-NEXT: movl 40(%eax), %ebp
22126 ; FALLBACK19-NEXT: movl 44(%eax), %ebx
22127 ; FALLBACK19-NEXT: movl 48(%eax), %edi
22128 ; FALLBACK19-NEXT: movl 52(%eax), %esi
22129 ; FALLBACK19-NEXT: movl 56(%eax), %edx
22130 ; FALLBACK19-NEXT: movl 60(%eax), %eax
22131 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ecx
22132 ; FALLBACK19-NEXT: movl (%ecx), %ecx
22133 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22134 ; FALLBACK19-NEXT: movl %esi, {{[0-9]+}}(%esp)
22135 ; FALLBACK19-NEXT: movl %edi, {{[0-9]+}}(%esp)
22136 ; FALLBACK19-NEXT: movl %ebx, {{[0-9]+}}(%esp)
22137 ; FALLBACK19-NEXT: movl %ebp, {{[0-9]+}}(%esp)
22138 ; FALLBACK19-NEXT: movl (%esp), %edx # 4-byte Reload
22139 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22140 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22141 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22142 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22143 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22144 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22145 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22146 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22147 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22148 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22149 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22150 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22151 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22152 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22153 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22154 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22155 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22156 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22157 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22158 ; FALLBACK19-NEXT: movl %edx, {{[0-9]+}}(%esp)
22159 ; FALLBACK19-NEXT: sarl $31, %eax
22160 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22161 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22162 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22163 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22164 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22165 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22166 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22167 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22168 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22169 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22170 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22171 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22172 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22173 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22174 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22175 ; FALLBACK19-NEXT: movl %eax, {{[0-9]+}}(%esp)
22176 ; FALLBACK19-NEXT: movl %ecx, %ebp
22177 ; FALLBACK19-NEXT: andl $60, %ebp
22178 ; FALLBACK19-NEXT: movl 56(%esp,%ebp), %edx
22179 ; FALLBACK19-NEXT: movl 52(%esp,%ebp), %eax
22180 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22181 ; FALLBACK19-NEXT: shll $3, %ecx
22182 ; FALLBACK19-NEXT: andl $24, %ecx
22183 ; FALLBACK19-NEXT: shrdl %cl, %edx, %eax
22184 ; FALLBACK19-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22185 ; FALLBACK19-NEXT: movl 64(%esp,%ebp), %edi
22186 ; FALLBACK19-NEXT: movl 60(%esp,%ebp), %eax
22187 ; FALLBACK19-NEXT: movl %eax, %esi
22188 ; FALLBACK19-NEXT: shrdl %cl, %edi, %esi
22189 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22190 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
22191 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22192 ; FALLBACK19-NEXT: movl 72(%esp,%ebp), %esi
22193 ; FALLBACK19-NEXT: movl 68(%esp,%ebp), %eax
22194 ; FALLBACK19-NEXT: movl %eax, %edx
22195 ; FALLBACK19-NEXT: shrdl %cl, %esi, %edx
22196 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22197 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edi
22198 ; FALLBACK19-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22199 ; FALLBACK19-NEXT: movl 80(%esp,%ebp), %edi
22200 ; FALLBACK19-NEXT: movl 76(%esp,%ebp), %eax
22201 ; FALLBACK19-NEXT: movl %eax, %edx
22202 ; FALLBACK19-NEXT: shrdl %cl, %edi, %edx
22203 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22204 ; FALLBACK19-NEXT: shrdl %cl, %eax, %esi
22205 ; FALLBACK19-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22206 ; FALLBACK19-NEXT: movl 88(%esp,%ebp), %ebx
22207 ; FALLBACK19-NEXT: movl 84(%esp,%ebp), %eax
22208 ; FALLBACK19-NEXT: movl %eax, %edx
22209 ; FALLBACK19-NEXT: shrdl %cl, %ebx, %edx
22210 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22211 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edi
22212 ; FALLBACK19-NEXT: movl %edi, (%esp) # 4-byte Spill
22213 ; FALLBACK19-NEXT: movl 96(%esp,%ebp), %esi
22214 ; FALLBACK19-NEXT: movl 92(%esp,%ebp), %eax
22215 ; FALLBACK19-NEXT: movl %eax, %edx
22216 ; FALLBACK19-NEXT: shrdl %cl, %esi, %edx
22217 ; FALLBACK19-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22218 ; FALLBACK19-NEXT: shrdl %cl, %eax, %ebx
22219 ; FALLBACK19-NEXT: movl 104(%esp,%ebp), %eax
22220 ; FALLBACK19-NEXT: movl 100(%esp,%ebp), %edi
22221 ; FALLBACK19-NEXT: movl %edi, %edx
22222 ; FALLBACK19-NEXT: shrdl %cl, %eax, %edx
22223 ; FALLBACK19-NEXT: shrdl %cl, %edi, %esi
22224 ; FALLBACK19-NEXT: movl 48(%esp,%ebp), %edi
22225 ; FALLBACK19-NEXT: movl 108(%esp,%ebp), %ebp
22226 ; FALLBACK19-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22227 ; FALLBACK19-NEXT: shrdl %cl, %ebp, %eax
22228 ; FALLBACK19-NEXT: movl {{[0-9]+}}(%esp), %ebp
22229 ; FALLBACK19-NEXT: movl %eax, 56(%ebp)
22230 ; FALLBACK19-NEXT: movl %esi, 48(%ebp)
22231 ; FALLBACK19-NEXT: movl %edx, 52(%ebp)
22232 ; FALLBACK19-NEXT: movl %ebx, 40(%ebp)
22233 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22234 ; FALLBACK19-NEXT: movl %eax, 44(%ebp)
22235 ; FALLBACK19-NEXT: movl (%esp), %eax # 4-byte Reload
22236 ; FALLBACK19-NEXT: movl %eax, 32(%ebp)
22237 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22238 ; FALLBACK19-NEXT: movl %eax, 36(%ebp)
22239 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22240 ; FALLBACK19-NEXT: movl %eax, 24(%ebp)
22241 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22242 ; FALLBACK19-NEXT: movl %eax, 28(%ebp)
22243 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22244 ; FALLBACK19-NEXT: movl %eax, 16(%ebp)
22245 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22246 ; FALLBACK19-NEXT: movl %eax, 20(%ebp)
22247 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22248 ; FALLBACK19-NEXT: movl %eax, 8(%ebp)
22249 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22250 ; FALLBACK19-NEXT: movl %eax, 12(%ebp)
22251 ; FALLBACK19-NEXT: sarxl %ecx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
22252 ; FALLBACK19-NEXT: # kill: def $cl killed $cl killed $ecx
22253 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22254 ; FALLBACK19-NEXT: shrdl %cl, %edx, %edi
22255 ; FALLBACK19-NEXT: movl %edi, (%ebp)
22256 ; FALLBACK19-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22257 ; FALLBACK19-NEXT: movl %ecx, 4(%ebp)
22258 ; FALLBACK19-NEXT: movl %eax, 60(%ebp)
22259 ; FALLBACK19-NEXT: addl $188, %esp
22260 ; FALLBACK19-NEXT: popl %esi
22261 ; FALLBACK19-NEXT: popl %edi
22262 ; FALLBACK19-NEXT: popl %ebx
22263 ; FALLBACK19-NEXT: popl %ebp
22264 ; FALLBACK19-NEXT: retl
22266 ; FALLBACK20-LABEL: ashr_64bytes:
22267 ; FALLBACK20: # %bb.0:
22268 ; FALLBACK20-NEXT: pushl %ebp
22269 ; FALLBACK20-NEXT: pushl %ebx
22270 ; FALLBACK20-NEXT: pushl %edi
22271 ; FALLBACK20-NEXT: pushl %esi
22272 ; FALLBACK20-NEXT: subl $204, %esp
22273 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
22274 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %ecx
22275 ; FALLBACK20-NEXT: movups (%ecx), %xmm0
22276 ; FALLBACK20-NEXT: movups 16(%ecx), %xmm1
22277 ; FALLBACK20-NEXT: movups 32(%ecx), %xmm2
22278 ; FALLBACK20-NEXT: movl 48(%ecx), %edx
22279 ; FALLBACK20-NEXT: movl 52(%ecx), %esi
22280 ; FALLBACK20-NEXT: movl 56(%ecx), %edi
22281 ; FALLBACK20-NEXT: movl 60(%ecx), %ecx
22282 ; FALLBACK20-NEXT: movl (%eax), %eax
22283 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22284 ; FALLBACK20-NEXT: movl %edi, {{[0-9]+}}(%esp)
22285 ; FALLBACK20-NEXT: movl %esi, {{[0-9]+}}(%esp)
22286 ; FALLBACK20-NEXT: movl %edx, {{[0-9]+}}(%esp)
22287 ; FALLBACK20-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
22288 ; FALLBACK20-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
22289 ; FALLBACK20-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
22290 ; FALLBACK20-NEXT: sarl $31, %ecx
22291 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22292 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22293 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22294 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22295 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22296 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22297 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22298 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22299 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22300 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22301 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22302 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22303 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22304 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22305 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22306 ; FALLBACK20-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22307 ; FALLBACK20-NEXT: movl %eax, %esi
22308 ; FALLBACK20-NEXT: andl $60, %esi
22309 ; FALLBACK20-NEXT: movl 68(%esp,%esi), %edx
22310 ; FALLBACK20-NEXT: shll $3, %eax
22311 ; FALLBACK20-NEXT: andl $24, %eax
22312 ; FALLBACK20-NEXT: movl %edx, %edi
22313 ; FALLBACK20-NEXT: movl %eax, %ecx
22314 ; FALLBACK20-NEXT: shrl %cl, %edi
22315 ; FALLBACK20-NEXT: movl 72(%esp,%esi), %ecx
22316 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22317 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
22318 ; FALLBACK20-NEXT: movb %al, %ch
22319 ; FALLBACK20-NEXT: notb %ch
22320 ; FALLBACK20-NEXT: movb %ch, %cl
22321 ; FALLBACK20-NEXT: shll %cl, %ebx
22322 ; FALLBACK20-NEXT: orl %edi, %ebx
22323 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22324 ; FALLBACK20-NEXT: movl 64(%esp,%esi), %edi
22325 ; FALLBACK20-NEXT: movb %al, %cl
22326 ; FALLBACK20-NEXT: shrl %cl, %edi
22327 ; FALLBACK20-NEXT: addl %edx, %edx
22328 ; FALLBACK20-NEXT: movb %ch, %cl
22329 ; FALLBACK20-NEXT: shll %cl, %edx
22330 ; FALLBACK20-NEXT: orl %edi, %edx
22331 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22332 ; FALLBACK20-NEXT: movl 76(%esp,%esi), %edx
22333 ; FALLBACK20-NEXT: movl %edx, %ebp
22334 ; FALLBACK20-NEXT: movb %al, %cl
22335 ; FALLBACK20-NEXT: shrl %cl, %ebp
22336 ; FALLBACK20-NEXT: movl 80(%esp,%esi), %edi
22337 ; FALLBACK20-NEXT: leal (%edi,%edi), %ebx
22338 ; FALLBACK20-NEXT: movb %ch, %cl
22339 ; FALLBACK20-NEXT: shll %cl, %ebx
22340 ; FALLBACK20-NEXT: orl %ebp, %ebx
22341 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22342 ; FALLBACK20-NEXT: movb %al, %cl
22343 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
22344 ; FALLBACK20-NEXT: shrl %cl, %ebx
22345 ; FALLBACK20-NEXT: addl %edx, %edx
22346 ; FALLBACK20-NEXT: movb %ch, %cl
22347 ; FALLBACK20-NEXT: shll %cl, %edx
22348 ; FALLBACK20-NEXT: orl %ebx, %edx
22349 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22350 ; FALLBACK20-NEXT: movl 84(%esp,%esi), %ebx
22351 ; FALLBACK20-NEXT: movl %ebx, %ebp
22352 ; FALLBACK20-NEXT: movl %eax, %edx
22353 ; FALLBACK20-NEXT: movb %dl, %cl
22354 ; FALLBACK20-NEXT: shrl %cl, %ebp
22355 ; FALLBACK20-NEXT: movl 88(%esp,%esi), %eax
22356 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22357 ; FALLBACK20-NEXT: addl %eax, %eax
22358 ; FALLBACK20-NEXT: movb %ch, %cl
22359 ; FALLBACK20-NEXT: shll %cl, %eax
22360 ; FALLBACK20-NEXT: orl %ebp, %eax
22361 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22362 ; FALLBACK20-NEXT: movb %dl, %cl
22363 ; FALLBACK20-NEXT: shrl %cl, %edi
22364 ; FALLBACK20-NEXT: addl %ebx, %ebx
22365 ; FALLBACK20-NEXT: movb %ch, %cl
22366 ; FALLBACK20-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
22367 ; FALLBACK20-NEXT: shll %cl, %ebx
22368 ; FALLBACK20-NEXT: orl %edi, %ebx
22369 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22370 ; FALLBACK20-NEXT: movl 92(%esp,%esi), %ebx
22371 ; FALLBACK20-NEXT: movl %ebx, %ebp
22372 ; FALLBACK20-NEXT: movb %dl, %cl
22373 ; FALLBACK20-NEXT: shrl %cl, %ebp
22374 ; FALLBACK20-NEXT: movl 96(%esp,%esi), %edi
22375 ; FALLBACK20-NEXT: leal (%edi,%edi), %eax
22376 ; FALLBACK20-NEXT: movb %ch, %cl
22377 ; FALLBACK20-NEXT: shll %cl, %eax
22378 ; FALLBACK20-NEXT: orl %ebp, %eax
22379 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22380 ; FALLBACK20-NEXT: movb %dl, %cl
22381 ; FALLBACK20-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22382 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22383 ; FALLBACK20-NEXT: shrl %cl, %eax
22384 ; FALLBACK20-NEXT: addl %ebx, %ebx
22385 ; FALLBACK20-NEXT: movb %ch, %cl
22386 ; FALLBACK20-NEXT: shll %cl, %ebx
22387 ; FALLBACK20-NEXT: orl %eax, %ebx
22388 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22389 ; FALLBACK20-NEXT: movl 100(%esp,%esi), %ebx
22390 ; FALLBACK20-NEXT: movl %ebx, %ebp
22391 ; FALLBACK20-NEXT: movb %dl, %cl
22392 ; FALLBACK20-NEXT: shrl %cl, %ebp
22393 ; FALLBACK20-NEXT: movl 104(%esp,%esi), %edx
22394 ; FALLBACK20-NEXT: leal (%edx,%edx), %eax
22395 ; FALLBACK20-NEXT: movb %ch, %cl
22396 ; FALLBACK20-NEXT: shll %cl, %eax
22397 ; FALLBACK20-NEXT: orl %ebp, %eax
22398 ; FALLBACK20-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22399 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22400 ; FALLBACK20-NEXT: movb %al, %cl
22401 ; FALLBACK20-NEXT: shrl %cl, %edi
22402 ; FALLBACK20-NEXT: addl %ebx, %ebx
22403 ; FALLBACK20-NEXT: movb %ch, %cl
22404 ; FALLBACK20-NEXT: shll %cl, %ebx
22405 ; FALLBACK20-NEXT: orl %edi, %ebx
22406 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22407 ; FALLBACK20-NEXT: movl 108(%esp,%esi), %edi
22408 ; FALLBACK20-NEXT: movl %edi, %ebp
22409 ; FALLBACK20-NEXT: movl %eax, %ecx
22410 ; FALLBACK20-NEXT: shrl %cl, %ebp
22411 ; FALLBACK20-NEXT: movl 112(%esp,%esi), %ecx
22412 ; FALLBACK20-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22413 ; FALLBACK20-NEXT: leal (%ecx,%ecx), %ebx
22414 ; FALLBACK20-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
22415 ; FALLBACK20-NEXT: movb %ch, %cl
22416 ; FALLBACK20-NEXT: shll %cl, %ebx
22417 ; FALLBACK20-NEXT: orl %ebp, %ebx
22418 ; FALLBACK20-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22419 ; FALLBACK20-NEXT: movb %al, %cl
22420 ; FALLBACK20-NEXT: shrl %cl, %edx
22421 ; FALLBACK20-NEXT: addl %edi, %edi
22422 ; FALLBACK20-NEXT: movb %ch, %cl
22423 ; FALLBACK20-NEXT: shll %cl, %edi
22424 ; FALLBACK20-NEXT: orl %edx, %edi
22425 ; FALLBACK20-NEXT: movl %esi, %edx
22426 ; FALLBACK20-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22427 ; FALLBACK20-NEXT: movl 116(%esp,%esi), %esi
22428 ; FALLBACK20-NEXT: movl %esi, %ebx
22429 ; FALLBACK20-NEXT: movb %al, %cl
22430 ; FALLBACK20-NEXT: shrl %cl, %ebx
22431 ; FALLBACK20-NEXT: movl 120(%esp,%edx), %eax
22432 ; FALLBACK20-NEXT: leal (%eax,%eax), %ebp
22433 ; FALLBACK20-NEXT: movb %ch, %cl
22434 ; FALLBACK20-NEXT: shll %cl, %ebp
22435 ; FALLBACK20-NEXT: orl %ebx, %ebp
22436 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22437 ; FALLBACK20-NEXT: movb %dl, %cl
22438 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
22439 ; FALLBACK20-NEXT: shrl %cl, %ebx
22440 ; FALLBACK20-NEXT: addl %esi, %esi
22441 ; FALLBACK20-NEXT: movb %ch, %cl
22442 ; FALLBACK20-NEXT: shll %cl, %esi
22443 ; FALLBACK20-NEXT: orl %ebx, %esi
22444 ; FALLBACK20-NEXT: movb %dl, %cl
22445 ; FALLBACK20-NEXT: shrl %cl, %eax
22446 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22447 ; FALLBACK20-NEXT: movl 124(%esp,%edx), %ebx
22448 ; FALLBACK20-NEXT: leal (%ebx,%ebx), %edx
22449 ; FALLBACK20-NEXT: movb %ch, %cl
22450 ; FALLBACK20-NEXT: shll %cl, %edx
22451 ; FALLBACK20-NEXT: orl %eax, %edx
22452 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22453 ; FALLBACK20-NEXT: # kill: def $cl killed $cl killed $ecx
22454 ; FALLBACK20-NEXT: sarl %cl, %ebx
22455 ; FALLBACK20-NEXT: movl {{[0-9]+}}(%esp), %eax
22456 ; FALLBACK20-NEXT: movl %ebx, 60(%eax)
22457 ; FALLBACK20-NEXT: movl %edx, 56(%eax)
22458 ; FALLBACK20-NEXT: movl %esi, 48(%eax)
22459 ; FALLBACK20-NEXT: movl %ebp, 52(%eax)
22460 ; FALLBACK20-NEXT: movl %edi, 40(%eax)
22461 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22462 ; FALLBACK20-NEXT: movl %ecx, 44(%eax)
22463 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22464 ; FALLBACK20-NEXT: movl %ecx, 32(%eax)
22465 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22466 ; FALLBACK20-NEXT: movl %ecx, 36(%eax)
22467 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22468 ; FALLBACK20-NEXT: movl %ecx, 24(%eax)
22469 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22470 ; FALLBACK20-NEXT: movl %ecx, 28(%eax)
22471 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22472 ; FALLBACK20-NEXT: movl %ecx, 16(%eax)
22473 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22474 ; FALLBACK20-NEXT: movl %ecx, 20(%eax)
22475 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22476 ; FALLBACK20-NEXT: movl %ecx, 8(%eax)
22477 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22478 ; FALLBACK20-NEXT: movl %ecx, 12(%eax)
22479 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22480 ; FALLBACK20-NEXT: movl %ecx, (%eax)
22481 ; FALLBACK20-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22482 ; FALLBACK20-NEXT: movl %ecx, 4(%eax)
22483 ; FALLBACK20-NEXT: addl $204, %esp
22484 ; FALLBACK20-NEXT: popl %esi
22485 ; FALLBACK20-NEXT: popl %edi
22486 ; FALLBACK20-NEXT: popl %ebx
22487 ; FALLBACK20-NEXT: popl %ebp
22488 ; FALLBACK20-NEXT: retl
22490 ; FALLBACK21-LABEL: ashr_64bytes:
22491 ; FALLBACK21: # %bb.0:
22492 ; FALLBACK21-NEXT: pushl %ebp
22493 ; FALLBACK21-NEXT: pushl %ebx
22494 ; FALLBACK21-NEXT: pushl %edi
22495 ; FALLBACK21-NEXT: pushl %esi
22496 ; FALLBACK21-NEXT: subl $188, %esp
22497 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ecx
22498 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %eax
22499 ; FALLBACK21-NEXT: movups (%eax), %xmm0
22500 ; FALLBACK21-NEXT: movups 16(%eax), %xmm1
22501 ; FALLBACK21-NEXT: movups 32(%eax), %xmm2
22502 ; FALLBACK21-NEXT: movl 48(%eax), %edx
22503 ; FALLBACK21-NEXT: movl 52(%eax), %esi
22504 ; FALLBACK21-NEXT: movl 56(%eax), %edi
22505 ; FALLBACK21-NEXT: movl 60(%eax), %eax
22506 ; FALLBACK21-NEXT: movl (%ecx), %ecx
22507 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22508 ; FALLBACK21-NEXT: movl %edi, {{[0-9]+}}(%esp)
22509 ; FALLBACK21-NEXT: movl %esi, {{[0-9]+}}(%esp)
22510 ; FALLBACK21-NEXT: movl %edx, {{[0-9]+}}(%esp)
22511 ; FALLBACK21-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
22512 ; FALLBACK21-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
22513 ; FALLBACK21-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
22514 ; FALLBACK21-NEXT: sarl $31, %eax
22515 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22516 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22517 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22518 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22519 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22520 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22521 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22522 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22523 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22524 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22525 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22526 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22527 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22528 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22529 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22530 ; FALLBACK21-NEXT: movl %eax, {{[0-9]+}}(%esp)
22531 ; FALLBACK21-NEXT: movl %ecx, %ebp
22532 ; FALLBACK21-NEXT: andl $60, %ebp
22533 ; FALLBACK21-NEXT: movl 56(%esp,%ebp), %edx
22534 ; FALLBACK21-NEXT: movl 52(%esp,%ebp), %eax
22535 ; FALLBACK21-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22536 ; FALLBACK21-NEXT: shll $3, %ecx
22537 ; FALLBACK21-NEXT: andl $24, %ecx
22538 ; FALLBACK21-NEXT: shrdl %cl, %edx, %eax
22539 ; FALLBACK21-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22540 ; FALLBACK21-NEXT: movl 64(%esp,%ebp), %edi
22541 ; FALLBACK21-NEXT: movl 60(%esp,%ebp), %eax
22542 ; FALLBACK21-NEXT: movl %eax, %esi
22543 ; FALLBACK21-NEXT: shrdl %cl, %edi, %esi
22544 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22545 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
22546 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22547 ; FALLBACK21-NEXT: movl 72(%esp,%ebp), %esi
22548 ; FALLBACK21-NEXT: movl 68(%esp,%ebp), %eax
22549 ; FALLBACK21-NEXT: movl %eax, %edx
22550 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
22551 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22552 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edi
22553 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22554 ; FALLBACK21-NEXT: movl 80(%esp,%ebp), %edi
22555 ; FALLBACK21-NEXT: movl 76(%esp,%ebp), %eax
22556 ; FALLBACK21-NEXT: movl %eax, %edx
22557 ; FALLBACK21-NEXT: shrdl %cl, %edi, %edx
22558 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22559 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
22560 ; FALLBACK21-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22561 ; FALLBACK21-NEXT: movl 88(%esp,%ebp), %esi
22562 ; FALLBACK21-NEXT: movl 84(%esp,%ebp), %eax
22563 ; FALLBACK21-NEXT: movl %eax, %edx
22564 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edx
22565 ; FALLBACK21-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22566 ; FALLBACK21-NEXT: movl %esi, %edx
22567 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edi
22568 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22569 ; FALLBACK21-NEXT: movl 96(%esp,%ebp), %esi
22570 ; FALLBACK21-NEXT: movl 92(%esp,%ebp), %eax
22571 ; FALLBACK21-NEXT: movl %eax, %edi
22572 ; FALLBACK21-NEXT: shrdl %cl, %esi, %edi
22573 ; FALLBACK21-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22574 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
22575 ; FALLBACK21-NEXT: movl %edx, (%esp) # 4-byte Spill
22576 ; FALLBACK21-NEXT: movl 104(%esp,%ebp), %edx
22577 ; FALLBACK21-NEXT: movl 100(%esp,%ebp), %eax
22578 ; FALLBACK21-NEXT: movl %eax, %edi
22579 ; FALLBACK21-NEXT: shrdl %cl, %edx, %edi
22580 ; FALLBACK21-NEXT: shrdl %cl, %eax, %esi
22581 ; FALLBACK21-NEXT: movl 48(%esp,%ebp), %ebx
22582 ; FALLBACK21-NEXT: movl 108(%esp,%ebp), %eax
22583 ; FALLBACK21-NEXT: shrdl %cl, %eax, %edx
22584 ; FALLBACK21-NEXT: movl {{[0-9]+}}(%esp), %ebp
22585 ; FALLBACK21-NEXT: movl %edx, 56(%ebp)
22586 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22587 ; FALLBACK21-NEXT: shrdl %cl, %edx, %ebx
22588 ; FALLBACK21-NEXT: # kill: def $cl killed $cl killed $ecx
22589 ; FALLBACK21-NEXT: sarl %cl, %eax
22590 ; FALLBACK21-NEXT: movl %eax, 60(%ebp)
22591 ; FALLBACK21-NEXT: movl %esi, 48(%ebp)
22592 ; FALLBACK21-NEXT: movl %edi, 52(%ebp)
22593 ; FALLBACK21-NEXT: movl (%esp), %eax # 4-byte Reload
22594 ; FALLBACK21-NEXT: movl %eax, 40(%ebp)
22595 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22596 ; FALLBACK21-NEXT: movl %eax, 44(%ebp)
22597 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22598 ; FALLBACK21-NEXT: movl %eax, 32(%ebp)
22599 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22600 ; FALLBACK21-NEXT: movl %eax, 36(%ebp)
22601 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22602 ; FALLBACK21-NEXT: movl %eax, 24(%ebp)
22603 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22604 ; FALLBACK21-NEXT: movl %eax, 28(%ebp)
22605 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22606 ; FALLBACK21-NEXT: movl %eax, 16(%ebp)
22607 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22608 ; FALLBACK21-NEXT: movl %eax, 20(%ebp)
22609 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22610 ; FALLBACK21-NEXT: movl %eax, 8(%ebp)
22611 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22612 ; FALLBACK21-NEXT: movl %eax, 12(%ebp)
22613 ; FALLBACK21-NEXT: movl %ebx, (%ebp)
22614 ; FALLBACK21-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22615 ; FALLBACK21-NEXT: movl %eax, 4(%ebp)
22616 ; FALLBACK21-NEXT: addl $188, %esp
22617 ; FALLBACK21-NEXT: popl %esi
22618 ; FALLBACK21-NEXT: popl %edi
22619 ; FALLBACK21-NEXT: popl %ebx
22620 ; FALLBACK21-NEXT: popl %ebp
22621 ; FALLBACK21-NEXT: retl
22623 ; FALLBACK22-LABEL: ashr_64bytes:
22624 ; FALLBACK22: # %bb.0:
22625 ; FALLBACK22-NEXT: pushl %ebp
22626 ; FALLBACK22-NEXT: pushl %ebx
22627 ; FALLBACK22-NEXT: pushl %edi
22628 ; FALLBACK22-NEXT: pushl %esi
22629 ; FALLBACK22-NEXT: subl $204, %esp
22630 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
22631 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %ecx
22632 ; FALLBACK22-NEXT: movups (%ecx), %xmm0
22633 ; FALLBACK22-NEXT: movups 16(%ecx), %xmm1
22634 ; FALLBACK22-NEXT: movups 32(%ecx), %xmm2
22635 ; FALLBACK22-NEXT: movl 48(%ecx), %edx
22636 ; FALLBACK22-NEXT: movl 52(%ecx), %esi
22637 ; FALLBACK22-NEXT: movl 56(%ecx), %edi
22638 ; FALLBACK22-NEXT: movl 60(%ecx), %ecx
22639 ; FALLBACK22-NEXT: movl (%eax), %eax
22640 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22641 ; FALLBACK22-NEXT: movl %edi, {{[0-9]+}}(%esp)
22642 ; FALLBACK22-NEXT: movl %esi, {{[0-9]+}}(%esp)
22643 ; FALLBACK22-NEXT: movl %edx, {{[0-9]+}}(%esp)
22644 ; FALLBACK22-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
22645 ; FALLBACK22-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
22646 ; FALLBACK22-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
22647 ; FALLBACK22-NEXT: sarl $31, %ecx
22648 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22649 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22650 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22651 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22652 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22653 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22654 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22655 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22656 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22657 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22658 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22659 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22660 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22661 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22662 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22663 ; FALLBACK22-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22664 ; FALLBACK22-NEXT: movl %eax, %ecx
22665 ; FALLBACK22-NEXT: leal (,%eax,8), %edx
22666 ; FALLBACK22-NEXT: andl $24, %edx
22667 ; FALLBACK22-NEXT: andl $60, %ecx
22668 ; FALLBACK22-NEXT: movl 68(%esp,%ecx), %esi
22669 ; FALLBACK22-NEXT: movl 72(%esp,%ecx), %edi
22670 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22671 ; FALLBACK22-NEXT: shrxl %edx, %esi, %eax
22672 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22673 ; FALLBACK22-NEXT: movl %edx, %ebx
22674 ; FALLBACK22-NEXT: notb %bl
22675 ; FALLBACK22-NEXT: leal (%edi,%edi), %ebp
22676 ; FALLBACK22-NEXT: shlxl %ebx, %ebp, %eax
22677 ; FALLBACK22-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
22678 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22679 ; FALLBACK22-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
22680 ; FALLBACK22-NEXT: addl %esi, %esi
22681 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %eax
22682 ; FALLBACK22-NEXT: orl %edi, %eax
22683 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22684 ; FALLBACK22-NEXT: movl 80(%esp,%ecx), %esi
22685 ; FALLBACK22-NEXT: leal (%esi,%esi), %edi
22686 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22687 ; FALLBACK22-NEXT: movl 76(%esp,%ecx), %edi
22688 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
22689 ; FALLBACK22-NEXT: orl %ebp, %eax
22690 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22691 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
22692 ; FALLBACK22-NEXT: addl %edi, %edi
22693 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %edi
22694 ; FALLBACK22-NEXT: orl %eax, %edi
22695 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22696 ; FALLBACK22-NEXT: movl 88(%esp,%ecx), %eax
22697 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22698 ; FALLBACK22-NEXT: leal (%eax,%eax), %edi
22699 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22700 ; FALLBACK22-NEXT: movl 84(%esp,%ecx), %edi
22701 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
22702 ; FALLBACK22-NEXT: orl %ebp, %eax
22703 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22704 ; FALLBACK22-NEXT: shrxl %edx, %esi, %esi
22705 ; FALLBACK22-NEXT: addl %edi, %edi
22706 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22707 ; FALLBACK22-NEXT: orl %esi, %eax
22708 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22709 ; FALLBACK22-NEXT: movl 96(%esp,%ecx), %esi
22710 ; FALLBACK22-NEXT: leal (%esi,%esi), %edi
22711 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22712 ; FALLBACK22-NEXT: movl 92(%esp,%ecx), %edi
22713 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
22714 ; FALLBACK22-NEXT: orl %ebp, %eax
22715 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22716 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
22717 ; FALLBACK22-NEXT: addl %edi, %edi
22718 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %edi
22719 ; FALLBACK22-NEXT: orl %eax, %edi
22720 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22721 ; FALLBACK22-NEXT: movl 104(%esp,%ecx), %eax
22722 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22723 ; FALLBACK22-NEXT: leal (%eax,%eax), %edi
22724 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22725 ; FALLBACK22-NEXT: movl 100(%esp,%ecx), %edi
22726 ; FALLBACK22-NEXT: shrxl %edx, %edi, %ebp
22727 ; FALLBACK22-NEXT: orl %ebp, %eax
22728 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22729 ; FALLBACK22-NEXT: shrxl %edx, %esi, %esi
22730 ; FALLBACK22-NEXT: addl %edi, %edi
22731 ; FALLBACK22-NEXT: shlxl %ebx, %edi, %eax
22732 ; FALLBACK22-NEXT: orl %esi, %eax
22733 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22734 ; FALLBACK22-NEXT: movl 112(%esp,%ecx), %eax
22735 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22736 ; FALLBACK22-NEXT: leal (%eax,%eax), %esi
22737 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %eax
22738 ; FALLBACK22-NEXT: movl 108(%esp,%ecx), %esi
22739 ; FALLBACK22-NEXT: movl %ecx, %edi
22740 ; FALLBACK22-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22741 ; FALLBACK22-NEXT: shrxl %edx, %esi, %ebp
22742 ; FALLBACK22-NEXT: orl %ebp, %eax
22743 ; FALLBACK22-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22744 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
22745 ; FALLBACK22-NEXT: addl %esi, %esi
22746 ; FALLBACK22-NEXT: shlxl %ebx, %esi, %esi
22747 ; FALLBACK22-NEXT: orl %ecx, %esi
22748 ; FALLBACK22-NEXT: movl 120(%esp,%edi), %ebp
22749 ; FALLBACK22-NEXT: leal (%ebp,%ebp), %ecx
22750 ; FALLBACK22-NEXT: shlxl %ebx, %ecx, %ecx
22751 ; FALLBACK22-NEXT: movl 116(%esp,%edi), %eax
22752 ; FALLBACK22-NEXT: shrxl %edx, %eax, %edi
22753 ; FALLBACK22-NEXT: orl %edi, %ecx
22754 ; FALLBACK22-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
22755 ; FALLBACK22-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22756 ; FALLBACK22-NEXT: addl %eax, %eax
22757 ; FALLBACK22-NEXT: shlxl %ebx, %eax, %edi
22758 ; FALLBACK22-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
22759 ; FALLBACK22-NEXT: shrxl %edx, %ebp, %eax
22760 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
22761 ; FALLBACK22-NEXT: movl 124(%esp,%ebp), %ebp
22762 ; FALLBACK22-NEXT: sarxl %edx, %ebp, %edx
22763 ; FALLBACK22-NEXT: addl %ebp, %ebp
22764 ; FALLBACK22-NEXT: shlxl %ebx, %ebp, %ebx
22765 ; FALLBACK22-NEXT: orl %eax, %ebx
22766 ; FALLBACK22-NEXT: movl {{[0-9]+}}(%esp), %eax
22767 ; FALLBACK22-NEXT: movl %edx, 60(%eax)
22768 ; FALLBACK22-NEXT: movl %ebx, 56(%eax)
22769 ; FALLBACK22-NEXT: movl %edi, 48(%eax)
22770 ; FALLBACK22-NEXT: movl %ecx, 52(%eax)
22771 ; FALLBACK22-NEXT: movl %esi, 40(%eax)
22772 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22773 ; FALLBACK22-NEXT: movl %ecx, 44(%eax)
22774 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22775 ; FALLBACK22-NEXT: movl %ecx, 32(%eax)
22776 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22777 ; FALLBACK22-NEXT: movl %ecx, 36(%eax)
22778 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22779 ; FALLBACK22-NEXT: movl %ecx, 24(%eax)
22780 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22781 ; FALLBACK22-NEXT: movl %ecx, 28(%eax)
22782 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22783 ; FALLBACK22-NEXT: movl %ecx, 16(%eax)
22784 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22785 ; FALLBACK22-NEXT: movl %ecx, 20(%eax)
22786 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22787 ; FALLBACK22-NEXT: movl %ecx, 8(%eax)
22788 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22789 ; FALLBACK22-NEXT: movl %ecx, 12(%eax)
22790 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22791 ; FALLBACK22-NEXT: movl %ecx, (%eax)
22792 ; FALLBACK22-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22793 ; FALLBACK22-NEXT: movl %ecx, 4(%eax)
22794 ; FALLBACK22-NEXT: addl $204, %esp
22795 ; FALLBACK22-NEXT: popl %esi
22796 ; FALLBACK22-NEXT: popl %edi
22797 ; FALLBACK22-NEXT: popl %ebx
22798 ; FALLBACK22-NEXT: popl %ebp
22799 ; FALLBACK22-NEXT: retl
22801 ; FALLBACK23-LABEL: ashr_64bytes:
22802 ; FALLBACK23: # %bb.0:
22803 ; FALLBACK23-NEXT: pushl %ebp
22804 ; FALLBACK23-NEXT: pushl %ebx
22805 ; FALLBACK23-NEXT: pushl %edi
22806 ; FALLBACK23-NEXT: pushl %esi
22807 ; FALLBACK23-NEXT: subl $188, %esp
22808 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ecx
22809 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %eax
22810 ; FALLBACK23-NEXT: movups (%eax), %xmm0
22811 ; FALLBACK23-NEXT: movups 16(%eax), %xmm1
22812 ; FALLBACK23-NEXT: movups 32(%eax), %xmm2
22813 ; FALLBACK23-NEXT: movl 48(%eax), %edx
22814 ; FALLBACK23-NEXT: movl 52(%eax), %esi
22815 ; FALLBACK23-NEXT: movl 56(%eax), %edi
22816 ; FALLBACK23-NEXT: movl 60(%eax), %eax
22817 ; FALLBACK23-NEXT: movl (%ecx), %ecx
22818 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22819 ; FALLBACK23-NEXT: movl %edi, {{[0-9]+}}(%esp)
22820 ; FALLBACK23-NEXT: movl %esi, {{[0-9]+}}(%esp)
22821 ; FALLBACK23-NEXT: movl %edx, {{[0-9]+}}(%esp)
22822 ; FALLBACK23-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
22823 ; FALLBACK23-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
22824 ; FALLBACK23-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
22825 ; FALLBACK23-NEXT: sarl $31, %eax
22826 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22827 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22828 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22829 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22830 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22831 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22832 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22833 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22834 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22835 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22836 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22837 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22838 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22839 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22840 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22841 ; FALLBACK23-NEXT: movl %eax, {{[0-9]+}}(%esp)
22842 ; FALLBACK23-NEXT: movl %ecx, %ebp
22843 ; FALLBACK23-NEXT: andl $60, %ebp
22844 ; FALLBACK23-NEXT: movl 56(%esp,%ebp), %edx
22845 ; FALLBACK23-NEXT: movl 52(%esp,%ebp), %eax
22846 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22847 ; FALLBACK23-NEXT: shll $3, %ecx
22848 ; FALLBACK23-NEXT: andl $24, %ecx
22849 ; FALLBACK23-NEXT: shrdl %cl, %edx, %eax
22850 ; FALLBACK23-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22851 ; FALLBACK23-NEXT: movl 64(%esp,%ebp), %edi
22852 ; FALLBACK23-NEXT: movl 60(%esp,%ebp), %eax
22853 ; FALLBACK23-NEXT: movl %eax, %esi
22854 ; FALLBACK23-NEXT: shrdl %cl, %edi, %esi
22855 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22856 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
22857 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22858 ; FALLBACK23-NEXT: movl 72(%esp,%ebp), %esi
22859 ; FALLBACK23-NEXT: movl 68(%esp,%ebp), %eax
22860 ; FALLBACK23-NEXT: movl %eax, %edx
22861 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
22862 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22863 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edi
22864 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22865 ; FALLBACK23-NEXT: movl 80(%esp,%ebp), %edi
22866 ; FALLBACK23-NEXT: movl 76(%esp,%ebp), %eax
22867 ; FALLBACK23-NEXT: movl %eax, %edx
22868 ; FALLBACK23-NEXT: shrdl %cl, %edi, %edx
22869 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22870 ; FALLBACK23-NEXT: shrdl %cl, %eax, %esi
22871 ; FALLBACK23-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22872 ; FALLBACK23-NEXT: movl 88(%esp,%ebp), %ebx
22873 ; FALLBACK23-NEXT: movl 84(%esp,%ebp), %eax
22874 ; FALLBACK23-NEXT: movl %eax, %edx
22875 ; FALLBACK23-NEXT: shrdl %cl, %ebx, %edx
22876 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22877 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edi
22878 ; FALLBACK23-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22879 ; FALLBACK23-NEXT: movl 96(%esp,%ebp), %esi
22880 ; FALLBACK23-NEXT: movl 92(%esp,%ebp), %eax
22881 ; FALLBACK23-NEXT: movl %eax, %edx
22882 ; FALLBACK23-NEXT: shrdl %cl, %esi, %edx
22883 ; FALLBACK23-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22884 ; FALLBACK23-NEXT: shrdl %cl, %eax, %ebx
22885 ; FALLBACK23-NEXT: movl 104(%esp,%ebp), %eax
22886 ; FALLBACK23-NEXT: movl 100(%esp,%ebp), %edi
22887 ; FALLBACK23-NEXT: movl %edi, %edx
22888 ; FALLBACK23-NEXT: shrdl %cl, %eax, %edx
22889 ; FALLBACK23-NEXT: shrdl %cl, %edi, %esi
22890 ; FALLBACK23-NEXT: movl 48(%esp,%ebp), %edi
22891 ; FALLBACK23-NEXT: movl 108(%esp,%ebp), %ebp
22892 ; FALLBACK23-NEXT: movl %ebp, (%esp) # 4-byte Spill
22893 ; FALLBACK23-NEXT: shrdl %cl, %ebp, %eax
22894 ; FALLBACK23-NEXT: movl {{[0-9]+}}(%esp), %ebp
22895 ; FALLBACK23-NEXT: movl %eax, 56(%ebp)
22896 ; FALLBACK23-NEXT: movl %esi, 48(%ebp)
22897 ; FALLBACK23-NEXT: movl %edx, 52(%ebp)
22898 ; FALLBACK23-NEXT: movl %ebx, 40(%ebp)
22899 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22900 ; FALLBACK23-NEXT: movl %eax, 44(%ebp)
22901 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22902 ; FALLBACK23-NEXT: movl %eax, 32(%ebp)
22903 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22904 ; FALLBACK23-NEXT: movl %eax, 36(%ebp)
22905 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22906 ; FALLBACK23-NEXT: movl %eax, 24(%ebp)
22907 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22908 ; FALLBACK23-NEXT: movl %eax, 28(%ebp)
22909 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22910 ; FALLBACK23-NEXT: movl %eax, 16(%ebp)
22911 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22912 ; FALLBACK23-NEXT: movl %eax, 20(%ebp)
22913 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22914 ; FALLBACK23-NEXT: movl %eax, 8(%ebp)
22915 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
22916 ; FALLBACK23-NEXT: movl %eax, 12(%ebp)
22917 ; FALLBACK23-NEXT: sarxl %ecx, (%esp), %eax # 4-byte Folded Reload
22918 ; FALLBACK23-NEXT: # kill: def $cl killed $cl killed $ecx
22919 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
22920 ; FALLBACK23-NEXT: shrdl %cl, %edx, %edi
22921 ; FALLBACK23-NEXT: movl %edi, (%ebp)
22922 ; FALLBACK23-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
22923 ; FALLBACK23-NEXT: movl %ecx, 4(%ebp)
22924 ; FALLBACK23-NEXT: movl %eax, 60(%ebp)
22925 ; FALLBACK23-NEXT: addl $188, %esp
22926 ; FALLBACK23-NEXT: popl %esi
22927 ; FALLBACK23-NEXT: popl %edi
22928 ; FALLBACK23-NEXT: popl %ebx
22929 ; FALLBACK23-NEXT: popl %ebp
22930 ; FALLBACK23-NEXT: retl
22932 ; FALLBACK24-LABEL: ashr_64bytes:
22933 ; FALLBACK24: # %bb.0:
22934 ; FALLBACK24-NEXT: pushl %ebp
22935 ; FALLBACK24-NEXT: pushl %ebx
22936 ; FALLBACK24-NEXT: pushl %edi
22937 ; FALLBACK24-NEXT: pushl %esi
22938 ; FALLBACK24-NEXT: subl $204, %esp
22939 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
22940 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %ecx
22941 ; FALLBACK24-NEXT: vmovups (%ecx), %ymm0
22942 ; FALLBACK24-NEXT: vmovups 32(%ecx), %xmm1
22943 ; FALLBACK24-NEXT: movl 48(%ecx), %edx
22944 ; FALLBACK24-NEXT: movl 52(%ecx), %esi
22945 ; FALLBACK24-NEXT: movl 56(%ecx), %edi
22946 ; FALLBACK24-NEXT: movl 60(%ecx), %ecx
22947 ; FALLBACK24-NEXT: movl (%eax), %eax
22948 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22949 ; FALLBACK24-NEXT: movl %edi, {{[0-9]+}}(%esp)
22950 ; FALLBACK24-NEXT: movl %esi, {{[0-9]+}}(%esp)
22951 ; FALLBACK24-NEXT: movl %edx, {{[0-9]+}}(%esp)
22952 ; FALLBACK24-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
22953 ; FALLBACK24-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
22954 ; FALLBACK24-NEXT: sarl $31, %ecx
22955 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22956 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22957 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22958 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22959 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22960 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22961 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22962 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22963 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22964 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22965 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22966 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22967 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22968 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22969 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22970 ; FALLBACK24-NEXT: movl %ecx, {{[0-9]+}}(%esp)
22971 ; FALLBACK24-NEXT: movl %eax, %esi
22972 ; FALLBACK24-NEXT: andl $60, %esi
22973 ; FALLBACK24-NEXT: movl 68(%esp,%esi), %edx
22974 ; FALLBACK24-NEXT: shll $3, %eax
22975 ; FALLBACK24-NEXT: andl $24, %eax
22976 ; FALLBACK24-NEXT: movl %edx, %edi
22977 ; FALLBACK24-NEXT: movl %eax, %ecx
22978 ; FALLBACK24-NEXT: shrl %cl, %edi
22979 ; FALLBACK24-NEXT: movl 72(%esp,%esi), %ecx
22980 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22981 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %ebx
22982 ; FALLBACK24-NEXT: movb %al, %ch
22983 ; FALLBACK24-NEXT: notb %ch
22984 ; FALLBACK24-NEXT: movb %ch, %cl
22985 ; FALLBACK24-NEXT: shll %cl, %ebx
22986 ; FALLBACK24-NEXT: orl %edi, %ebx
22987 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22988 ; FALLBACK24-NEXT: movl 64(%esp,%esi), %edi
22989 ; FALLBACK24-NEXT: movb %al, %cl
22990 ; FALLBACK24-NEXT: shrl %cl, %edi
22991 ; FALLBACK24-NEXT: addl %edx, %edx
22992 ; FALLBACK24-NEXT: movb %ch, %cl
22993 ; FALLBACK24-NEXT: shll %cl, %edx
22994 ; FALLBACK24-NEXT: orl %edi, %edx
22995 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
22996 ; FALLBACK24-NEXT: movl 76(%esp,%esi), %edx
22997 ; FALLBACK24-NEXT: movl %edx, %ebp
22998 ; FALLBACK24-NEXT: movb %al, %cl
22999 ; FALLBACK24-NEXT: shrl %cl, %ebp
23000 ; FALLBACK24-NEXT: movl 80(%esp,%esi), %edi
23001 ; FALLBACK24-NEXT: leal (%edi,%edi), %ebx
23002 ; FALLBACK24-NEXT: movb %ch, %cl
23003 ; FALLBACK24-NEXT: shll %cl, %ebx
23004 ; FALLBACK24-NEXT: orl %ebp, %ebx
23005 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23006 ; FALLBACK24-NEXT: movb %al, %cl
23007 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
23008 ; FALLBACK24-NEXT: shrl %cl, %ebx
23009 ; FALLBACK24-NEXT: addl %edx, %edx
23010 ; FALLBACK24-NEXT: movb %ch, %cl
23011 ; FALLBACK24-NEXT: shll %cl, %edx
23012 ; FALLBACK24-NEXT: orl %ebx, %edx
23013 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23014 ; FALLBACK24-NEXT: movl 84(%esp,%esi), %ebx
23015 ; FALLBACK24-NEXT: movl %ebx, %ebp
23016 ; FALLBACK24-NEXT: movl %eax, %edx
23017 ; FALLBACK24-NEXT: movb %dl, %cl
23018 ; FALLBACK24-NEXT: shrl %cl, %ebp
23019 ; FALLBACK24-NEXT: movl 88(%esp,%esi), %eax
23020 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23021 ; FALLBACK24-NEXT: addl %eax, %eax
23022 ; FALLBACK24-NEXT: movb %ch, %cl
23023 ; FALLBACK24-NEXT: shll %cl, %eax
23024 ; FALLBACK24-NEXT: orl %ebp, %eax
23025 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23026 ; FALLBACK24-NEXT: movb %dl, %cl
23027 ; FALLBACK24-NEXT: shrl %cl, %edi
23028 ; FALLBACK24-NEXT: addl %ebx, %ebx
23029 ; FALLBACK24-NEXT: movb %ch, %cl
23030 ; FALLBACK24-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
23031 ; FALLBACK24-NEXT: shll %cl, %ebx
23032 ; FALLBACK24-NEXT: orl %edi, %ebx
23033 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23034 ; FALLBACK24-NEXT: movl 92(%esp,%esi), %ebx
23035 ; FALLBACK24-NEXT: movl %ebx, %ebp
23036 ; FALLBACK24-NEXT: movb %dl, %cl
23037 ; FALLBACK24-NEXT: shrl %cl, %ebp
23038 ; FALLBACK24-NEXT: movl 96(%esp,%esi), %edi
23039 ; FALLBACK24-NEXT: leal (%edi,%edi), %eax
23040 ; FALLBACK24-NEXT: movb %ch, %cl
23041 ; FALLBACK24-NEXT: shll %cl, %eax
23042 ; FALLBACK24-NEXT: orl %ebp, %eax
23043 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23044 ; FALLBACK24-NEXT: movb %dl, %cl
23045 ; FALLBACK24-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23046 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23047 ; FALLBACK24-NEXT: shrl %cl, %eax
23048 ; FALLBACK24-NEXT: addl %ebx, %ebx
23049 ; FALLBACK24-NEXT: movb %ch, %cl
23050 ; FALLBACK24-NEXT: shll %cl, %ebx
23051 ; FALLBACK24-NEXT: orl %eax, %ebx
23052 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23053 ; FALLBACK24-NEXT: movl 100(%esp,%esi), %ebx
23054 ; FALLBACK24-NEXT: movl %ebx, %ebp
23055 ; FALLBACK24-NEXT: movb %dl, %cl
23056 ; FALLBACK24-NEXT: shrl %cl, %ebp
23057 ; FALLBACK24-NEXT: movl 104(%esp,%esi), %edx
23058 ; FALLBACK24-NEXT: leal (%edx,%edx), %eax
23059 ; FALLBACK24-NEXT: movb %ch, %cl
23060 ; FALLBACK24-NEXT: shll %cl, %eax
23061 ; FALLBACK24-NEXT: orl %ebp, %eax
23062 ; FALLBACK24-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23063 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23064 ; FALLBACK24-NEXT: movb %al, %cl
23065 ; FALLBACK24-NEXT: shrl %cl, %edi
23066 ; FALLBACK24-NEXT: addl %ebx, %ebx
23067 ; FALLBACK24-NEXT: movb %ch, %cl
23068 ; FALLBACK24-NEXT: shll %cl, %ebx
23069 ; FALLBACK24-NEXT: orl %edi, %ebx
23070 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23071 ; FALLBACK24-NEXT: movl 108(%esp,%esi), %edi
23072 ; FALLBACK24-NEXT: movl %edi, %ebp
23073 ; FALLBACK24-NEXT: movl %eax, %ecx
23074 ; FALLBACK24-NEXT: shrl %cl, %ebp
23075 ; FALLBACK24-NEXT: movl 112(%esp,%esi), %ecx
23076 ; FALLBACK24-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23077 ; FALLBACK24-NEXT: leal (%ecx,%ecx), %ebx
23078 ; FALLBACK24-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
23079 ; FALLBACK24-NEXT: movb %ch, %cl
23080 ; FALLBACK24-NEXT: shll %cl, %ebx
23081 ; FALLBACK24-NEXT: orl %ebp, %ebx
23082 ; FALLBACK24-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23083 ; FALLBACK24-NEXT: movb %al, %cl
23084 ; FALLBACK24-NEXT: shrl %cl, %edx
23085 ; FALLBACK24-NEXT: addl %edi, %edi
23086 ; FALLBACK24-NEXT: movb %ch, %cl
23087 ; FALLBACK24-NEXT: shll %cl, %edi
23088 ; FALLBACK24-NEXT: orl %edx, %edi
23089 ; FALLBACK24-NEXT: movl %esi, %edx
23090 ; FALLBACK24-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23091 ; FALLBACK24-NEXT: movl 116(%esp,%esi), %esi
23092 ; FALLBACK24-NEXT: movl %esi, %ebx
23093 ; FALLBACK24-NEXT: movb %al, %cl
23094 ; FALLBACK24-NEXT: shrl %cl, %ebx
23095 ; FALLBACK24-NEXT: movl 120(%esp,%edx), %eax
23096 ; FALLBACK24-NEXT: leal (%eax,%eax), %ebp
23097 ; FALLBACK24-NEXT: movb %ch, %cl
23098 ; FALLBACK24-NEXT: shll %cl, %ebp
23099 ; FALLBACK24-NEXT: orl %ebx, %ebp
23100 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23101 ; FALLBACK24-NEXT: movb %dl, %cl
23102 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
23103 ; FALLBACK24-NEXT: shrl %cl, %ebx
23104 ; FALLBACK24-NEXT: addl %esi, %esi
23105 ; FALLBACK24-NEXT: movb %ch, %cl
23106 ; FALLBACK24-NEXT: shll %cl, %esi
23107 ; FALLBACK24-NEXT: orl %ebx, %esi
23108 ; FALLBACK24-NEXT: movb %dl, %cl
23109 ; FALLBACK24-NEXT: shrl %cl, %eax
23110 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23111 ; FALLBACK24-NEXT: movl 124(%esp,%edx), %ebx
23112 ; FALLBACK24-NEXT: leal (%ebx,%ebx), %edx
23113 ; FALLBACK24-NEXT: movb %ch, %cl
23114 ; FALLBACK24-NEXT: shll %cl, %edx
23115 ; FALLBACK24-NEXT: orl %eax, %edx
23116 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23117 ; FALLBACK24-NEXT: # kill: def $cl killed $cl killed $ecx
23118 ; FALLBACK24-NEXT: sarl %cl, %ebx
23119 ; FALLBACK24-NEXT: movl {{[0-9]+}}(%esp), %eax
23120 ; FALLBACK24-NEXT: movl %ebx, 60(%eax)
23121 ; FALLBACK24-NEXT: movl %edx, 56(%eax)
23122 ; FALLBACK24-NEXT: movl %esi, 48(%eax)
23123 ; FALLBACK24-NEXT: movl %ebp, 52(%eax)
23124 ; FALLBACK24-NEXT: movl %edi, 40(%eax)
23125 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23126 ; FALLBACK24-NEXT: movl %ecx, 44(%eax)
23127 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23128 ; FALLBACK24-NEXT: movl %ecx, 32(%eax)
23129 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23130 ; FALLBACK24-NEXT: movl %ecx, 36(%eax)
23131 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23132 ; FALLBACK24-NEXT: movl %ecx, 24(%eax)
23133 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23134 ; FALLBACK24-NEXT: movl %ecx, 28(%eax)
23135 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23136 ; FALLBACK24-NEXT: movl %ecx, 16(%eax)
23137 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23138 ; FALLBACK24-NEXT: movl %ecx, 20(%eax)
23139 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23140 ; FALLBACK24-NEXT: movl %ecx, 8(%eax)
23141 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23142 ; FALLBACK24-NEXT: movl %ecx, 12(%eax)
23143 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23144 ; FALLBACK24-NEXT: movl %ecx, (%eax)
23145 ; FALLBACK24-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23146 ; FALLBACK24-NEXT: movl %ecx, 4(%eax)
23147 ; FALLBACK24-NEXT: addl $204, %esp
23148 ; FALLBACK24-NEXT: popl %esi
23149 ; FALLBACK24-NEXT: popl %edi
23150 ; FALLBACK24-NEXT: popl %ebx
23151 ; FALLBACK24-NEXT: popl %ebp
23152 ; FALLBACK24-NEXT: vzeroupper
23153 ; FALLBACK24-NEXT: retl
23155 ; FALLBACK25-LABEL: ashr_64bytes:
23156 ; FALLBACK25: # %bb.0:
23157 ; FALLBACK25-NEXT: pushl %ebp
23158 ; FALLBACK25-NEXT: pushl %ebx
23159 ; FALLBACK25-NEXT: pushl %edi
23160 ; FALLBACK25-NEXT: pushl %esi
23161 ; FALLBACK25-NEXT: subl $188, %esp
23162 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ecx
23163 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %eax
23164 ; FALLBACK25-NEXT: vmovups (%eax), %ymm0
23165 ; FALLBACK25-NEXT: vmovups 32(%eax), %xmm1
23166 ; FALLBACK25-NEXT: movl 48(%eax), %edx
23167 ; FALLBACK25-NEXT: movl 52(%eax), %esi
23168 ; FALLBACK25-NEXT: movl 56(%eax), %edi
23169 ; FALLBACK25-NEXT: movl 60(%eax), %eax
23170 ; FALLBACK25-NEXT: movl (%ecx), %ecx
23171 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23172 ; FALLBACK25-NEXT: movl %edi, {{[0-9]+}}(%esp)
23173 ; FALLBACK25-NEXT: movl %esi, {{[0-9]+}}(%esp)
23174 ; FALLBACK25-NEXT: movl %edx, {{[0-9]+}}(%esp)
23175 ; FALLBACK25-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23176 ; FALLBACK25-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23177 ; FALLBACK25-NEXT: sarl $31, %eax
23178 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23179 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23180 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23181 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23182 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23183 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23184 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23185 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23186 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23187 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23188 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23189 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23190 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23191 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23192 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23193 ; FALLBACK25-NEXT: movl %eax, {{[0-9]+}}(%esp)
23194 ; FALLBACK25-NEXT: movl %ecx, %ebp
23195 ; FALLBACK25-NEXT: andl $60, %ebp
23196 ; FALLBACK25-NEXT: movl 56(%esp,%ebp), %edx
23197 ; FALLBACK25-NEXT: movl 52(%esp,%ebp), %eax
23198 ; FALLBACK25-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23199 ; FALLBACK25-NEXT: shll $3, %ecx
23200 ; FALLBACK25-NEXT: andl $24, %ecx
23201 ; FALLBACK25-NEXT: shrdl %cl, %edx, %eax
23202 ; FALLBACK25-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23203 ; FALLBACK25-NEXT: movl 64(%esp,%ebp), %edi
23204 ; FALLBACK25-NEXT: movl 60(%esp,%ebp), %eax
23205 ; FALLBACK25-NEXT: movl %eax, %esi
23206 ; FALLBACK25-NEXT: shrdl %cl, %edi, %esi
23207 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23208 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
23209 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23210 ; FALLBACK25-NEXT: movl 72(%esp,%ebp), %esi
23211 ; FALLBACK25-NEXT: movl 68(%esp,%ebp), %eax
23212 ; FALLBACK25-NEXT: movl %eax, %edx
23213 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
23214 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23215 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edi
23216 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23217 ; FALLBACK25-NEXT: movl 80(%esp,%ebp), %edi
23218 ; FALLBACK25-NEXT: movl 76(%esp,%ebp), %eax
23219 ; FALLBACK25-NEXT: movl %eax, %edx
23220 ; FALLBACK25-NEXT: shrdl %cl, %edi, %edx
23221 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23222 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
23223 ; FALLBACK25-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23224 ; FALLBACK25-NEXT: movl 88(%esp,%ebp), %esi
23225 ; FALLBACK25-NEXT: movl 84(%esp,%ebp), %eax
23226 ; FALLBACK25-NEXT: movl %eax, %edx
23227 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edx
23228 ; FALLBACK25-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23229 ; FALLBACK25-NEXT: movl %esi, %edx
23230 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edi
23231 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23232 ; FALLBACK25-NEXT: movl 96(%esp,%ebp), %esi
23233 ; FALLBACK25-NEXT: movl 92(%esp,%ebp), %eax
23234 ; FALLBACK25-NEXT: movl %eax, %edi
23235 ; FALLBACK25-NEXT: shrdl %cl, %esi, %edi
23236 ; FALLBACK25-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23237 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
23238 ; FALLBACK25-NEXT: movl %edx, (%esp) # 4-byte Spill
23239 ; FALLBACK25-NEXT: movl 104(%esp,%ebp), %edx
23240 ; FALLBACK25-NEXT: movl 100(%esp,%ebp), %eax
23241 ; FALLBACK25-NEXT: movl %eax, %edi
23242 ; FALLBACK25-NEXT: shrdl %cl, %edx, %edi
23243 ; FALLBACK25-NEXT: shrdl %cl, %eax, %esi
23244 ; FALLBACK25-NEXT: movl 48(%esp,%ebp), %ebx
23245 ; FALLBACK25-NEXT: movl 108(%esp,%ebp), %eax
23246 ; FALLBACK25-NEXT: shrdl %cl, %eax, %edx
23247 ; FALLBACK25-NEXT: movl {{[0-9]+}}(%esp), %ebp
23248 ; FALLBACK25-NEXT: movl %edx, 56(%ebp)
23249 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23250 ; FALLBACK25-NEXT: shrdl %cl, %edx, %ebx
23251 ; FALLBACK25-NEXT: # kill: def $cl killed $cl killed $ecx
23252 ; FALLBACK25-NEXT: sarl %cl, %eax
23253 ; FALLBACK25-NEXT: movl %eax, 60(%ebp)
23254 ; FALLBACK25-NEXT: movl %esi, 48(%ebp)
23255 ; FALLBACK25-NEXT: movl %edi, 52(%ebp)
23256 ; FALLBACK25-NEXT: movl (%esp), %eax # 4-byte Reload
23257 ; FALLBACK25-NEXT: movl %eax, 40(%ebp)
23258 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23259 ; FALLBACK25-NEXT: movl %eax, 44(%ebp)
23260 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23261 ; FALLBACK25-NEXT: movl %eax, 32(%ebp)
23262 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23263 ; FALLBACK25-NEXT: movl %eax, 36(%ebp)
23264 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23265 ; FALLBACK25-NEXT: movl %eax, 24(%ebp)
23266 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23267 ; FALLBACK25-NEXT: movl %eax, 28(%ebp)
23268 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23269 ; FALLBACK25-NEXT: movl %eax, 16(%ebp)
23270 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23271 ; FALLBACK25-NEXT: movl %eax, 20(%ebp)
23272 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23273 ; FALLBACK25-NEXT: movl %eax, 8(%ebp)
23274 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23275 ; FALLBACK25-NEXT: movl %eax, 12(%ebp)
23276 ; FALLBACK25-NEXT: movl %ebx, (%ebp)
23277 ; FALLBACK25-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23278 ; FALLBACK25-NEXT: movl %eax, 4(%ebp)
23279 ; FALLBACK25-NEXT: addl $188, %esp
23280 ; FALLBACK25-NEXT: popl %esi
23281 ; FALLBACK25-NEXT: popl %edi
23282 ; FALLBACK25-NEXT: popl %ebx
23283 ; FALLBACK25-NEXT: popl %ebp
23284 ; FALLBACK25-NEXT: vzeroupper
23285 ; FALLBACK25-NEXT: retl
23287 ; FALLBACK26-LABEL: ashr_64bytes:
23288 ; FALLBACK26: # %bb.0:
23289 ; FALLBACK26-NEXT: pushl %ebp
23290 ; FALLBACK26-NEXT: pushl %ebx
23291 ; FALLBACK26-NEXT: pushl %edi
23292 ; FALLBACK26-NEXT: pushl %esi
23293 ; FALLBACK26-NEXT: subl $204, %esp
23294 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
23295 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %ecx
23296 ; FALLBACK26-NEXT: vmovups (%ecx), %ymm0
23297 ; FALLBACK26-NEXT: vmovups 32(%ecx), %xmm1
23298 ; FALLBACK26-NEXT: movl 48(%ecx), %edx
23299 ; FALLBACK26-NEXT: movl 52(%ecx), %esi
23300 ; FALLBACK26-NEXT: movl 56(%ecx), %edi
23301 ; FALLBACK26-NEXT: movl 60(%ecx), %ecx
23302 ; FALLBACK26-NEXT: movl (%eax), %eax
23303 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23304 ; FALLBACK26-NEXT: movl %edi, {{[0-9]+}}(%esp)
23305 ; FALLBACK26-NEXT: movl %esi, {{[0-9]+}}(%esp)
23306 ; FALLBACK26-NEXT: movl %edx, {{[0-9]+}}(%esp)
23307 ; FALLBACK26-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23308 ; FALLBACK26-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23309 ; FALLBACK26-NEXT: sarl $31, %ecx
23310 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23311 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23312 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23313 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23314 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23315 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23316 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23317 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23318 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23319 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23320 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23321 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23322 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23323 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23324 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23325 ; FALLBACK26-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23326 ; FALLBACK26-NEXT: movl %eax, %ecx
23327 ; FALLBACK26-NEXT: leal (,%eax,8), %edx
23328 ; FALLBACK26-NEXT: andl $24, %edx
23329 ; FALLBACK26-NEXT: andl $60, %ecx
23330 ; FALLBACK26-NEXT: movl 68(%esp,%ecx), %esi
23331 ; FALLBACK26-NEXT: movl 72(%esp,%ecx), %edi
23332 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23333 ; FALLBACK26-NEXT: shrxl %edx, %esi, %eax
23334 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23335 ; FALLBACK26-NEXT: movl %edx, %ebx
23336 ; FALLBACK26-NEXT: notb %bl
23337 ; FALLBACK26-NEXT: leal (%edi,%edi), %ebp
23338 ; FALLBACK26-NEXT: shlxl %ebx, %ebp, %eax
23339 ; FALLBACK26-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
23340 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23341 ; FALLBACK26-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
23342 ; FALLBACK26-NEXT: addl %esi, %esi
23343 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %eax
23344 ; FALLBACK26-NEXT: orl %edi, %eax
23345 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23346 ; FALLBACK26-NEXT: movl 80(%esp,%ecx), %esi
23347 ; FALLBACK26-NEXT: leal (%esi,%esi), %edi
23348 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23349 ; FALLBACK26-NEXT: movl 76(%esp,%ecx), %edi
23350 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
23351 ; FALLBACK26-NEXT: orl %ebp, %eax
23352 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23353 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
23354 ; FALLBACK26-NEXT: addl %edi, %edi
23355 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %edi
23356 ; FALLBACK26-NEXT: orl %eax, %edi
23357 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23358 ; FALLBACK26-NEXT: movl 88(%esp,%ecx), %eax
23359 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23360 ; FALLBACK26-NEXT: leal (%eax,%eax), %edi
23361 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23362 ; FALLBACK26-NEXT: movl 84(%esp,%ecx), %edi
23363 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
23364 ; FALLBACK26-NEXT: orl %ebp, %eax
23365 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23366 ; FALLBACK26-NEXT: shrxl %edx, %esi, %esi
23367 ; FALLBACK26-NEXT: addl %edi, %edi
23368 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23369 ; FALLBACK26-NEXT: orl %esi, %eax
23370 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23371 ; FALLBACK26-NEXT: movl 96(%esp,%ecx), %esi
23372 ; FALLBACK26-NEXT: leal (%esi,%esi), %edi
23373 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23374 ; FALLBACK26-NEXT: movl 92(%esp,%ecx), %edi
23375 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
23376 ; FALLBACK26-NEXT: orl %ebp, %eax
23377 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23378 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
23379 ; FALLBACK26-NEXT: addl %edi, %edi
23380 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %edi
23381 ; FALLBACK26-NEXT: orl %eax, %edi
23382 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23383 ; FALLBACK26-NEXT: movl 104(%esp,%ecx), %eax
23384 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23385 ; FALLBACK26-NEXT: leal (%eax,%eax), %edi
23386 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23387 ; FALLBACK26-NEXT: movl 100(%esp,%ecx), %edi
23388 ; FALLBACK26-NEXT: shrxl %edx, %edi, %ebp
23389 ; FALLBACK26-NEXT: orl %ebp, %eax
23390 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23391 ; FALLBACK26-NEXT: shrxl %edx, %esi, %esi
23392 ; FALLBACK26-NEXT: addl %edi, %edi
23393 ; FALLBACK26-NEXT: shlxl %ebx, %edi, %eax
23394 ; FALLBACK26-NEXT: orl %esi, %eax
23395 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23396 ; FALLBACK26-NEXT: movl 112(%esp,%ecx), %eax
23397 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23398 ; FALLBACK26-NEXT: leal (%eax,%eax), %esi
23399 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %eax
23400 ; FALLBACK26-NEXT: movl 108(%esp,%ecx), %esi
23401 ; FALLBACK26-NEXT: movl %ecx, %edi
23402 ; FALLBACK26-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23403 ; FALLBACK26-NEXT: shrxl %edx, %esi, %ebp
23404 ; FALLBACK26-NEXT: orl %ebp, %eax
23405 ; FALLBACK26-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23406 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
23407 ; FALLBACK26-NEXT: addl %esi, %esi
23408 ; FALLBACK26-NEXT: shlxl %ebx, %esi, %esi
23409 ; FALLBACK26-NEXT: orl %ecx, %esi
23410 ; FALLBACK26-NEXT: movl 120(%esp,%edi), %ebp
23411 ; FALLBACK26-NEXT: leal (%ebp,%ebp), %ecx
23412 ; FALLBACK26-NEXT: shlxl %ebx, %ecx, %ecx
23413 ; FALLBACK26-NEXT: movl 116(%esp,%edi), %eax
23414 ; FALLBACK26-NEXT: shrxl %edx, %eax, %edi
23415 ; FALLBACK26-NEXT: orl %edi, %ecx
23416 ; FALLBACK26-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
23417 ; FALLBACK26-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23418 ; FALLBACK26-NEXT: addl %eax, %eax
23419 ; FALLBACK26-NEXT: shlxl %ebx, %eax, %edi
23420 ; FALLBACK26-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
23421 ; FALLBACK26-NEXT: shrxl %edx, %ebp, %eax
23422 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
23423 ; FALLBACK26-NEXT: movl 124(%esp,%ebp), %ebp
23424 ; FALLBACK26-NEXT: sarxl %edx, %ebp, %edx
23425 ; FALLBACK26-NEXT: addl %ebp, %ebp
23426 ; FALLBACK26-NEXT: shlxl %ebx, %ebp, %ebx
23427 ; FALLBACK26-NEXT: orl %eax, %ebx
23428 ; FALLBACK26-NEXT: movl {{[0-9]+}}(%esp), %eax
23429 ; FALLBACK26-NEXT: movl %edx, 60(%eax)
23430 ; FALLBACK26-NEXT: movl %ebx, 56(%eax)
23431 ; FALLBACK26-NEXT: movl %edi, 48(%eax)
23432 ; FALLBACK26-NEXT: movl %ecx, 52(%eax)
23433 ; FALLBACK26-NEXT: movl %esi, 40(%eax)
23434 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23435 ; FALLBACK26-NEXT: movl %ecx, 44(%eax)
23436 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23437 ; FALLBACK26-NEXT: movl %ecx, 32(%eax)
23438 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23439 ; FALLBACK26-NEXT: movl %ecx, 36(%eax)
23440 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23441 ; FALLBACK26-NEXT: movl %ecx, 24(%eax)
23442 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23443 ; FALLBACK26-NEXT: movl %ecx, 28(%eax)
23444 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23445 ; FALLBACK26-NEXT: movl %ecx, 16(%eax)
23446 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23447 ; FALLBACK26-NEXT: movl %ecx, 20(%eax)
23448 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23449 ; FALLBACK26-NEXT: movl %ecx, 8(%eax)
23450 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23451 ; FALLBACK26-NEXT: movl %ecx, 12(%eax)
23452 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23453 ; FALLBACK26-NEXT: movl %ecx, (%eax)
23454 ; FALLBACK26-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23455 ; FALLBACK26-NEXT: movl %ecx, 4(%eax)
23456 ; FALLBACK26-NEXT: addl $204, %esp
23457 ; FALLBACK26-NEXT: popl %esi
23458 ; FALLBACK26-NEXT: popl %edi
23459 ; FALLBACK26-NEXT: popl %ebx
23460 ; FALLBACK26-NEXT: popl %ebp
23461 ; FALLBACK26-NEXT: vzeroupper
23462 ; FALLBACK26-NEXT: retl
23464 ; FALLBACK27-LABEL: ashr_64bytes:
23465 ; FALLBACK27: # %bb.0:
23466 ; FALLBACK27-NEXT: pushl %ebp
23467 ; FALLBACK27-NEXT: pushl %ebx
23468 ; FALLBACK27-NEXT: pushl %edi
23469 ; FALLBACK27-NEXT: pushl %esi
23470 ; FALLBACK27-NEXT: subl $188, %esp
23471 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ecx
23472 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %eax
23473 ; FALLBACK27-NEXT: vmovups (%eax), %ymm0
23474 ; FALLBACK27-NEXT: vmovups 32(%eax), %xmm1
23475 ; FALLBACK27-NEXT: movl 48(%eax), %edx
23476 ; FALLBACK27-NEXT: movl 52(%eax), %esi
23477 ; FALLBACK27-NEXT: movl 56(%eax), %edi
23478 ; FALLBACK27-NEXT: movl 60(%eax), %eax
23479 ; FALLBACK27-NEXT: movl (%ecx), %ecx
23480 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23481 ; FALLBACK27-NEXT: movl %edi, {{[0-9]+}}(%esp)
23482 ; FALLBACK27-NEXT: movl %esi, {{[0-9]+}}(%esp)
23483 ; FALLBACK27-NEXT: movl %edx, {{[0-9]+}}(%esp)
23484 ; FALLBACK27-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23485 ; FALLBACK27-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23486 ; FALLBACK27-NEXT: sarl $31, %eax
23487 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23488 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23489 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23490 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23491 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23492 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23493 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23494 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23495 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23496 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23497 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23498 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23499 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23500 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23501 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23502 ; FALLBACK27-NEXT: movl %eax, {{[0-9]+}}(%esp)
23503 ; FALLBACK27-NEXT: movl %ecx, %ebp
23504 ; FALLBACK27-NEXT: andl $60, %ebp
23505 ; FALLBACK27-NEXT: movl 56(%esp,%ebp), %edx
23506 ; FALLBACK27-NEXT: movl 52(%esp,%ebp), %eax
23507 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23508 ; FALLBACK27-NEXT: shll $3, %ecx
23509 ; FALLBACK27-NEXT: andl $24, %ecx
23510 ; FALLBACK27-NEXT: shrdl %cl, %edx, %eax
23511 ; FALLBACK27-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23512 ; FALLBACK27-NEXT: movl 64(%esp,%ebp), %edi
23513 ; FALLBACK27-NEXT: movl 60(%esp,%ebp), %eax
23514 ; FALLBACK27-NEXT: movl %eax, %esi
23515 ; FALLBACK27-NEXT: shrdl %cl, %edi, %esi
23516 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23517 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
23518 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23519 ; FALLBACK27-NEXT: movl 72(%esp,%ebp), %esi
23520 ; FALLBACK27-NEXT: movl 68(%esp,%ebp), %eax
23521 ; FALLBACK27-NEXT: movl %eax, %edx
23522 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
23523 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23524 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edi
23525 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23526 ; FALLBACK27-NEXT: movl 80(%esp,%ebp), %edi
23527 ; FALLBACK27-NEXT: movl 76(%esp,%ebp), %eax
23528 ; FALLBACK27-NEXT: movl %eax, %edx
23529 ; FALLBACK27-NEXT: shrdl %cl, %edi, %edx
23530 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23531 ; FALLBACK27-NEXT: shrdl %cl, %eax, %esi
23532 ; FALLBACK27-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23533 ; FALLBACK27-NEXT: movl 88(%esp,%ebp), %ebx
23534 ; FALLBACK27-NEXT: movl 84(%esp,%ebp), %eax
23535 ; FALLBACK27-NEXT: movl %eax, %edx
23536 ; FALLBACK27-NEXT: shrdl %cl, %ebx, %edx
23537 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23538 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edi
23539 ; FALLBACK27-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23540 ; FALLBACK27-NEXT: movl 96(%esp,%ebp), %esi
23541 ; FALLBACK27-NEXT: movl 92(%esp,%ebp), %eax
23542 ; FALLBACK27-NEXT: movl %eax, %edx
23543 ; FALLBACK27-NEXT: shrdl %cl, %esi, %edx
23544 ; FALLBACK27-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23545 ; FALLBACK27-NEXT: shrdl %cl, %eax, %ebx
23546 ; FALLBACK27-NEXT: movl 104(%esp,%ebp), %eax
23547 ; FALLBACK27-NEXT: movl 100(%esp,%ebp), %edi
23548 ; FALLBACK27-NEXT: movl %edi, %edx
23549 ; FALLBACK27-NEXT: shrdl %cl, %eax, %edx
23550 ; FALLBACK27-NEXT: shrdl %cl, %edi, %esi
23551 ; FALLBACK27-NEXT: movl 48(%esp,%ebp), %edi
23552 ; FALLBACK27-NEXT: movl 108(%esp,%ebp), %ebp
23553 ; FALLBACK27-NEXT: movl %ebp, (%esp) # 4-byte Spill
23554 ; FALLBACK27-NEXT: shrdl %cl, %ebp, %eax
23555 ; FALLBACK27-NEXT: movl {{[0-9]+}}(%esp), %ebp
23556 ; FALLBACK27-NEXT: movl %eax, 56(%ebp)
23557 ; FALLBACK27-NEXT: movl %esi, 48(%ebp)
23558 ; FALLBACK27-NEXT: movl %edx, 52(%ebp)
23559 ; FALLBACK27-NEXT: movl %ebx, 40(%ebp)
23560 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23561 ; FALLBACK27-NEXT: movl %eax, 44(%ebp)
23562 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23563 ; FALLBACK27-NEXT: movl %eax, 32(%ebp)
23564 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23565 ; FALLBACK27-NEXT: movl %eax, 36(%ebp)
23566 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23567 ; FALLBACK27-NEXT: movl %eax, 24(%ebp)
23568 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23569 ; FALLBACK27-NEXT: movl %eax, 28(%ebp)
23570 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23571 ; FALLBACK27-NEXT: movl %eax, 16(%ebp)
23572 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23573 ; FALLBACK27-NEXT: movl %eax, 20(%ebp)
23574 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23575 ; FALLBACK27-NEXT: movl %eax, 8(%ebp)
23576 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23577 ; FALLBACK27-NEXT: movl %eax, 12(%ebp)
23578 ; FALLBACK27-NEXT: sarxl %ecx, (%esp), %eax # 4-byte Folded Reload
23579 ; FALLBACK27-NEXT: # kill: def $cl killed $cl killed $ecx
23580 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23581 ; FALLBACK27-NEXT: shrdl %cl, %edx, %edi
23582 ; FALLBACK27-NEXT: movl %edi, (%ebp)
23583 ; FALLBACK27-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23584 ; FALLBACK27-NEXT: movl %ecx, 4(%ebp)
23585 ; FALLBACK27-NEXT: movl %eax, 60(%ebp)
23586 ; FALLBACK27-NEXT: addl $188, %esp
23587 ; FALLBACK27-NEXT: popl %esi
23588 ; FALLBACK27-NEXT: popl %edi
23589 ; FALLBACK27-NEXT: popl %ebx
23590 ; FALLBACK27-NEXT: popl %ebp
23591 ; FALLBACK27-NEXT: vzeroupper
23592 ; FALLBACK27-NEXT: retl
23594 ; FALLBACK28-LABEL: ashr_64bytes:
23595 ; FALLBACK28: # %bb.0:
23596 ; FALLBACK28-NEXT: pushl %ebp
23597 ; FALLBACK28-NEXT: pushl %ebx
23598 ; FALLBACK28-NEXT: pushl %edi
23599 ; FALLBACK28-NEXT: pushl %esi
23600 ; FALLBACK28-NEXT: subl $204, %esp
23601 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
23602 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %ecx
23603 ; FALLBACK28-NEXT: vmovups (%ecx), %ymm0
23604 ; FALLBACK28-NEXT: vmovups 32(%ecx), %xmm1
23605 ; FALLBACK28-NEXT: movl 48(%ecx), %edx
23606 ; FALLBACK28-NEXT: movl 52(%ecx), %esi
23607 ; FALLBACK28-NEXT: movl 56(%ecx), %edi
23608 ; FALLBACK28-NEXT: movl 60(%ecx), %ecx
23609 ; FALLBACK28-NEXT: movl (%eax), %eax
23610 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23611 ; FALLBACK28-NEXT: movl %edi, {{[0-9]+}}(%esp)
23612 ; FALLBACK28-NEXT: movl %esi, {{[0-9]+}}(%esp)
23613 ; FALLBACK28-NEXT: movl %edx, {{[0-9]+}}(%esp)
23614 ; FALLBACK28-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23615 ; FALLBACK28-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23616 ; FALLBACK28-NEXT: sarl $31, %ecx
23617 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23618 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23619 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23620 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23621 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23622 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23623 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23624 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23625 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23626 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23627 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23628 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23629 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23630 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23631 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23632 ; FALLBACK28-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23633 ; FALLBACK28-NEXT: movl %eax, %esi
23634 ; FALLBACK28-NEXT: andl $60, %esi
23635 ; FALLBACK28-NEXT: movl 68(%esp,%esi), %edx
23636 ; FALLBACK28-NEXT: shll $3, %eax
23637 ; FALLBACK28-NEXT: andl $24, %eax
23638 ; FALLBACK28-NEXT: movl %edx, %edi
23639 ; FALLBACK28-NEXT: movl %eax, %ecx
23640 ; FALLBACK28-NEXT: shrl %cl, %edi
23641 ; FALLBACK28-NEXT: movl 72(%esp,%esi), %ecx
23642 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23643 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %ebx
23644 ; FALLBACK28-NEXT: movb %al, %ch
23645 ; FALLBACK28-NEXT: notb %ch
23646 ; FALLBACK28-NEXT: movb %ch, %cl
23647 ; FALLBACK28-NEXT: shll %cl, %ebx
23648 ; FALLBACK28-NEXT: orl %edi, %ebx
23649 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23650 ; FALLBACK28-NEXT: movl 64(%esp,%esi), %edi
23651 ; FALLBACK28-NEXT: movb %al, %cl
23652 ; FALLBACK28-NEXT: shrl %cl, %edi
23653 ; FALLBACK28-NEXT: addl %edx, %edx
23654 ; FALLBACK28-NEXT: movb %ch, %cl
23655 ; FALLBACK28-NEXT: shll %cl, %edx
23656 ; FALLBACK28-NEXT: orl %edi, %edx
23657 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23658 ; FALLBACK28-NEXT: movl 76(%esp,%esi), %edx
23659 ; FALLBACK28-NEXT: movl %edx, %ebp
23660 ; FALLBACK28-NEXT: movb %al, %cl
23661 ; FALLBACK28-NEXT: shrl %cl, %ebp
23662 ; FALLBACK28-NEXT: movl 80(%esp,%esi), %edi
23663 ; FALLBACK28-NEXT: leal (%edi,%edi), %ebx
23664 ; FALLBACK28-NEXT: movb %ch, %cl
23665 ; FALLBACK28-NEXT: shll %cl, %ebx
23666 ; FALLBACK28-NEXT: orl %ebp, %ebx
23667 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23668 ; FALLBACK28-NEXT: movb %al, %cl
23669 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
23670 ; FALLBACK28-NEXT: shrl %cl, %ebx
23671 ; FALLBACK28-NEXT: addl %edx, %edx
23672 ; FALLBACK28-NEXT: movb %ch, %cl
23673 ; FALLBACK28-NEXT: shll %cl, %edx
23674 ; FALLBACK28-NEXT: orl %ebx, %edx
23675 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23676 ; FALLBACK28-NEXT: movl 84(%esp,%esi), %ebx
23677 ; FALLBACK28-NEXT: movl %ebx, %ebp
23678 ; FALLBACK28-NEXT: movl %eax, %edx
23679 ; FALLBACK28-NEXT: movb %dl, %cl
23680 ; FALLBACK28-NEXT: shrl %cl, %ebp
23681 ; FALLBACK28-NEXT: movl 88(%esp,%esi), %eax
23682 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23683 ; FALLBACK28-NEXT: addl %eax, %eax
23684 ; FALLBACK28-NEXT: movb %ch, %cl
23685 ; FALLBACK28-NEXT: shll %cl, %eax
23686 ; FALLBACK28-NEXT: orl %ebp, %eax
23687 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23688 ; FALLBACK28-NEXT: movb %dl, %cl
23689 ; FALLBACK28-NEXT: shrl %cl, %edi
23690 ; FALLBACK28-NEXT: addl %ebx, %ebx
23691 ; FALLBACK28-NEXT: movb %ch, %cl
23692 ; FALLBACK28-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
23693 ; FALLBACK28-NEXT: shll %cl, %ebx
23694 ; FALLBACK28-NEXT: orl %edi, %ebx
23695 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23696 ; FALLBACK28-NEXT: movl 92(%esp,%esi), %ebx
23697 ; FALLBACK28-NEXT: movl %ebx, %ebp
23698 ; FALLBACK28-NEXT: movb %dl, %cl
23699 ; FALLBACK28-NEXT: shrl %cl, %ebp
23700 ; FALLBACK28-NEXT: movl 96(%esp,%esi), %edi
23701 ; FALLBACK28-NEXT: leal (%edi,%edi), %eax
23702 ; FALLBACK28-NEXT: movb %ch, %cl
23703 ; FALLBACK28-NEXT: shll %cl, %eax
23704 ; FALLBACK28-NEXT: orl %ebp, %eax
23705 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23706 ; FALLBACK28-NEXT: movb %dl, %cl
23707 ; FALLBACK28-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23708 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23709 ; FALLBACK28-NEXT: shrl %cl, %eax
23710 ; FALLBACK28-NEXT: addl %ebx, %ebx
23711 ; FALLBACK28-NEXT: movb %ch, %cl
23712 ; FALLBACK28-NEXT: shll %cl, %ebx
23713 ; FALLBACK28-NEXT: orl %eax, %ebx
23714 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23715 ; FALLBACK28-NEXT: movl 100(%esp,%esi), %ebx
23716 ; FALLBACK28-NEXT: movl %ebx, %ebp
23717 ; FALLBACK28-NEXT: movb %dl, %cl
23718 ; FALLBACK28-NEXT: shrl %cl, %ebp
23719 ; FALLBACK28-NEXT: movl 104(%esp,%esi), %edx
23720 ; FALLBACK28-NEXT: leal (%edx,%edx), %eax
23721 ; FALLBACK28-NEXT: movb %ch, %cl
23722 ; FALLBACK28-NEXT: shll %cl, %eax
23723 ; FALLBACK28-NEXT: orl %ebp, %eax
23724 ; FALLBACK28-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23725 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23726 ; FALLBACK28-NEXT: movb %al, %cl
23727 ; FALLBACK28-NEXT: shrl %cl, %edi
23728 ; FALLBACK28-NEXT: addl %ebx, %ebx
23729 ; FALLBACK28-NEXT: movb %ch, %cl
23730 ; FALLBACK28-NEXT: shll %cl, %ebx
23731 ; FALLBACK28-NEXT: orl %edi, %ebx
23732 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23733 ; FALLBACK28-NEXT: movl 108(%esp,%esi), %edi
23734 ; FALLBACK28-NEXT: movl %edi, %ebp
23735 ; FALLBACK28-NEXT: movl %eax, %ecx
23736 ; FALLBACK28-NEXT: shrl %cl, %ebp
23737 ; FALLBACK28-NEXT: movl 112(%esp,%esi), %ecx
23738 ; FALLBACK28-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23739 ; FALLBACK28-NEXT: leal (%ecx,%ecx), %ebx
23740 ; FALLBACK28-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
23741 ; FALLBACK28-NEXT: movb %ch, %cl
23742 ; FALLBACK28-NEXT: shll %cl, %ebx
23743 ; FALLBACK28-NEXT: orl %ebp, %ebx
23744 ; FALLBACK28-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23745 ; FALLBACK28-NEXT: movb %al, %cl
23746 ; FALLBACK28-NEXT: shrl %cl, %edx
23747 ; FALLBACK28-NEXT: addl %edi, %edi
23748 ; FALLBACK28-NEXT: movb %ch, %cl
23749 ; FALLBACK28-NEXT: shll %cl, %edi
23750 ; FALLBACK28-NEXT: orl %edx, %edi
23751 ; FALLBACK28-NEXT: movl %esi, %edx
23752 ; FALLBACK28-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23753 ; FALLBACK28-NEXT: movl 116(%esp,%esi), %esi
23754 ; FALLBACK28-NEXT: movl %esi, %ebx
23755 ; FALLBACK28-NEXT: movb %al, %cl
23756 ; FALLBACK28-NEXT: shrl %cl, %ebx
23757 ; FALLBACK28-NEXT: movl 120(%esp,%edx), %eax
23758 ; FALLBACK28-NEXT: leal (%eax,%eax), %ebp
23759 ; FALLBACK28-NEXT: movb %ch, %cl
23760 ; FALLBACK28-NEXT: shll %cl, %ebp
23761 ; FALLBACK28-NEXT: orl %ebx, %ebp
23762 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23763 ; FALLBACK28-NEXT: movb %dl, %cl
23764 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
23765 ; FALLBACK28-NEXT: shrl %cl, %ebx
23766 ; FALLBACK28-NEXT: addl %esi, %esi
23767 ; FALLBACK28-NEXT: movb %ch, %cl
23768 ; FALLBACK28-NEXT: shll %cl, %esi
23769 ; FALLBACK28-NEXT: orl %ebx, %esi
23770 ; FALLBACK28-NEXT: movb %dl, %cl
23771 ; FALLBACK28-NEXT: shrl %cl, %eax
23772 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23773 ; FALLBACK28-NEXT: movl 124(%esp,%edx), %ebx
23774 ; FALLBACK28-NEXT: leal (%ebx,%ebx), %edx
23775 ; FALLBACK28-NEXT: movb %ch, %cl
23776 ; FALLBACK28-NEXT: shll %cl, %edx
23777 ; FALLBACK28-NEXT: orl %eax, %edx
23778 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23779 ; FALLBACK28-NEXT: # kill: def $cl killed $cl killed $ecx
23780 ; FALLBACK28-NEXT: sarl %cl, %ebx
23781 ; FALLBACK28-NEXT: movl {{[0-9]+}}(%esp), %eax
23782 ; FALLBACK28-NEXT: movl %ebx, 60(%eax)
23783 ; FALLBACK28-NEXT: movl %edx, 56(%eax)
23784 ; FALLBACK28-NEXT: movl %esi, 48(%eax)
23785 ; FALLBACK28-NEXT: movl %ebp, 52(%eax)
23786 ; FALLBACK28-NEXT: movl %edi, 40(%eax)
23787 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23788 ; FALLBACK28-NEXT: movl %ecx, 44(%eax)
23789 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23790 ; FALLBACK28-NEXT: movl %ecx, 32(%eax)
23791 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23792 ; FALLBACK28-NEXT: movl %ecx, 36(%eax)
23793 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23794 ; FALLBACK28-NEXT: movl %ecx, 24(%eax)
23795 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23796 ; FALLBACK28-NEXT: movl %ecx, 28(%eax)
23797 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23798 ; FALLBACK28-NEXT: movl %ecx, 16(%eax)
23799 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23800 ; FALLBACK28-NEXT: movl %ecx, 20(%eax)
23801 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23802 ; FALLBACK28-NEXT: movl %ecx, 8(%eax)
23803 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23804 ; FALLBACK28-NEXT: movl %ecx, 12(%eax)
23805 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23806 ; FALLBACK28-NEXT: movl %ecx, (%eax)
23807 ; FALLBACK28-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
23808 ; FALLBACK28-NEXT: movl %ecx, 4(%eax)
23809 ; FALLBACK28-NEXT: addl $204, %esp
23810 ; FALLBACK28-NEXT: popl %esi
23811 ; FALLBACK28-NEXT: popl %edi
23812 ; FALLBACK28-NEXT: popl %ebx
23813 ; FALLBACK28-NEXT: popl %ebp
23814 ; FALLBACK28-NEXT: vzeroupper
23815 ; FALLBACK28-NEXT: retl
23817 ; FALLBACK29-LABEL: ashr_64bytes:
23818 ; FALLBACK29: # %bb.0:
23819 ; FALLBACK29-NEXT: pushl %ebp
23820 ; FALLBACK29-NEXT: pushl %ebx
23821 ; FALLBACK29-NEXT: pushl %edi
23822 ; FALLBACK29-NEXT: pushl %esi
23823 ; FALLBACK29-NEXT: subl $188, %esp
23824 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ecx
23825 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %eax
23826 ; FALLBACK29-NEXT: vmovups (%eax), %ymm0
23827 ; FALLBACK29-NEXT: vmovups 32(%eax), %xmm1
23828 ; FALLBACK29-NEXT: movl 48(%eax), %edx
23829 ; FALLBACK29-NEXT: movl 52(%eax), %esi
23830 ; FALLBACK29-NEXT: movl 56(%eax), %edi
23831 ; FALLBACK29-NEXT: movl 60(%eax), %eax
23832 ; FALLBACK29-NEXT: movl (%ecx), %ecx
23833 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23834 ; FALLBACK29-NEXT: movl %edi, {{[0-9]+}}(%esp)
23835 ; FALLBACK29-NEXT: movl %esi, {{[0-9]+}}(%esp)
23836 ; FALLBACK29-NEXT: movl %edx, {{[0-9]+}}(%esp)
23837 ; FALLBACK29-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23838 ; FALLBACK29-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23839 ; FALLBACK29-NEXT: sarl $31, %eax
23840 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23841 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23842 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23843 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23844 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23845 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23846 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23847 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23848 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23849 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23850 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23851 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23852 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23853 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23854 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23855 ; FALLBACK29-NEXT: movl %eax, {{[0-9]+}}(%esp)
23856 ; FALLBACK29-NEXT: movl %ecx, %ebp
23857 ; FALLBACK29-NEXT: andl $60, %ebp
23858 ; FALLBACK29-NEXT: movl 56(%esp,%ebp), %edx
23859 ; FALLBACK29-NEXT: movl 52(%esp,%ebp), %eax
23860 ; FALLBACK29-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23861 ; FALLBACK29-NEXT: shll $3, %ecx
23862 ; FALLBACK29-NEXT: andl $24, %ecx
23863 ; FALLBACK29-NEXT: shrdl %cl, %edx, %eax
23864 ; FALLBACK29-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23865 ; FALLBACK29-NEXT: movl 64(%esp,%ebp), %edi
23866 ; FALLBACK29-NEXT: movl 60(%esp,%ebp), %eax
23867 ; FALLBACK29-NEXT: movl %eax, %esi
23868 ; FALLBACK29-NEXT: shrdl %cl, %edi, %esi
23869 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23870 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
23871 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23872 ; FALLBACK29-NEXT: movl 72(%esp,%ebp), %esi
23873 ; FALLBACK29-NEXT: movl 68(%esp,%ebp), %eax
23874 ; FALLBACK29-NEXT: movl %eax, %edx
23875 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
23876 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23877 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edi
23878 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23879 ; FALLBACK29-NEXT: movl 80(%esp,%ebp), %edi
23880 ; FALLBACK29-NEXT: movl 76(%esp,%ebp), %eax
23881 ; FALLBACK29-NEXT: movl %eax, %edx
23882 ; FALLBACK29-NEXT: shrdl %cl, %edi, %edx
23883 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23884 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
23885 ; FALLBACK29-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23886 ; FALLBACK29-NEXT: movl 88(%esp,%ebp), %esi
23887 ; FALLBACK29-NEXT: movl 84(%esp,%ebp), %eax
23888 ; FALLBACK29-NEXT: movl %eax, %edx
23889 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edx
23890 ; FALLBACK29-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23891 ; FALLBACK29-NEXT: movl %esi, %edx
23892 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edi
23893 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23894 ; FALLBACK29-NEXT: movl 96(%esp,%ebp), %esi
23895 ; FALLBACK29-NEXT: movl 92(%esp,%ebp), %eax
23896 ; FALLBACK29-NEXT: movl %eax, %edi
23897 ; FALLBACK29-NEXT: shrdl %cl, %esi, %edi
23898 ; FALLBACK29-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23899 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
23900 ; FALLBACK29-NEXT: movl %edx, (%esp) # 4-byte Spill
23901 ; FALLBACK29-NEXT: movl 104(%esp,%ebp), %edx
23902 ; FALLBACK29-NEXT: movl 100(%esp,%ebp), %eax
23903 ; FALLBACK29-NEXT: movl %eax, %edi
23904 ; FALLBACK29-NEXT: shrdl %cl, %edx, %edi
23905 ; FALLBACK29-NEXT: shrdl %cl, %eax, %esi
23906 ; FALLBACK29-NEXT: movl 48(%esp,%ebp), %ebx
23907 ; FALLBACK29-NEXT: movl 108(%esp,%ebp), %eax
23908 ; FALLBACK29-NEXT: shrdl %cl, %eax, %edx
23909 ; FALLBACK29-NEXT: movl {{[0-9]+}}(%esp), %ebp
23910 ; FALLBACK29-NEXT: movl %edx, 56(%ebp)
23911 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
23912 ; FALLBACK29-NEXT: shrdl %cl, %edx, %ebx
23913 ; FALLBACK29-NEXT: # kill: def $cl killed $cl killed $ecx
23914 ; FALLBACK29-NEXT: sarl %cl, %eax
23915 ; FALLBACK29-NEXT: movl %eax, 60(%ebp)
23916 ; FALLBACK29-NEXT: movl %esi, 48(%ebp)
23917 ; FALLBACK29-NEXT: movl %edi, 52(%ebp)
23918 ; FALLBACK29-NEXT: movl (%esp), %eax # 4-byte Reload
23919 ; FALLBACK29-NEXT: movl %eax, 40(%ebp)
23920 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23921 ; FALLBACK29-NEXT: movl %eax, 44(%ebp)
23922 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23923 ; FALLBACK29-NEXT: movl %eax, 32(%ebp)
23924 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23925 ; FALLBACK29-NEXT: movl %eax, 36(%ebp)
23926 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23927 ; FALLBACK29-NEXT: movl %eax, 24(%ebp)
23928 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23929 ; FALLBACK29-NEXT: movl %eax, 28(%ebp)
23930 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23931 ; FALLBACK29-NEXT: movl %eax, 16(%ebp)
23932 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23933 ; FALLBACK29-NEXT: movl %eax, 20(%ebp)
23934 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23935 ; FALLBACK29-NEXT: movl %eax, 8(%ebp)
23936 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23937 ; FALLBACK29-NEXT: movl %eax, 12(%ebp)
23938 ; FALLBACK29-NEXT: movl %ebx, (%ebp)
23939 ; FALLBACK29-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
23940 ; FALLBACK29-NEXT: movl %eax, 4(%ebp)
23941 ; FALLBACK29-NEXT: addl $188, %esp
23942 ; FALLBACK29-NEXT: popl %esi
23943 ; FALLBACK29-NEXT: popl %edi
23944 ; FALLBACK29-NEXT: popl %ebx
23945 ; FALLBACK29-NEXT: popl %ebp
23946 ; FALLBACK29-NEXT: vzeroupper
23947 ; FALLBACK29-NEXT: retl
23949 ; FALLBACK30-LABEL: ashr_64bytes:
23950 ; FALLBACK30: # %bb.0:
23951 ; FALLBACK30-NEXT: pushl %ebp
23952 ; FALLBACK30-NEXT: pushl %ebx
23953 ; FALLBACK30-NEXT: pushl %edi
23954 ; FALLBACK30-NEXT: pushl %esi
23955 ; FALLBACK30-NEXT: subl $204, %esp
23956 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
23957 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %ecx
23958 ; FALLBACK30-NEXT: vmovups (%ecx), %ymm0
23959 ; FALLBACK30-NEXT: vmovups 32(%ecx), %xmm1
23960 ; FALLBACK30-NEXT: movl 48(%ecx), %edx
23961 ; FALLBACK30-NEXT: movl 52(%ecx), %esi
23962 ; FALLBACK30-NEXT: movl 56(%ecx), %edi
23963 ; FALLBACK30-NEXT: movl 60(%ecx), %ecx
23964 ; FALLBACK30-NEXT: movl (%eax), %eax
23965 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23966 ; FALLBACK30-NEXT: movl %edi, {{[0-9]+}}(%esp)
23967 ; FALLBACK30-NEXT: movl %esi, {{[0-9]+}}(%esp)
23968 ; FALLBACK30-NEXT: movl %edx, {{[0-9]+}}(%esp)
23969 ; FALLBACK30-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
23970 ; FALLBACK30-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
23971 ; FALLBACK30-NEXT: sarl $31, %ecx
23972 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23973 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23974 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23975 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23976 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23977 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23978 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23979 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23980 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23981 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23982 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23983 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23984 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23985 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23986 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23987 ; FALLBACK30-NEXT: movl %ecx, {{[0-9]+}}(%esp)
23988 ; FALLBACK30-NEXT: movl %eax, %ecx
23989 ; FALLBACK30-NEXT: leal (,%eax,8), %edx
23990 ; FALLBACK30-NEXT: andl $24, %edx
23991 ; FALLBACK30-NEXT: andl $60, %ecx
23992 ; FALLBACK30-NEXT: movl 68(%esp,%ecx), %esi
23993 ; FALLBACK30-NEXT: movl 72(%esp,%ecx), %edi
23994 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23995 ; FALLBACK30-NEXT: shrxl %edx, %esi, %eax
23996 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
23997 ; FALLBACK30-NEXT: movl %edx, %ebx
23998 ; FALLBACK30-NEXT: notb %bl
23999 ; FALLBACK30-NEXT: leal (%edi,%edi), %ebp
24000 ; FALLBACK30-NEXT: shlxl %ebx, %ebp, %eax
24001 ; FALLBACK30-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
24002 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24003 ; FALLBACK30-NEXT: shrxl %edx, 64(%esp,%ecx), %edi
24004 ; FALLBACK30-NEXT: addl %esi, %esi
24005 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %eax
24006 ; FALLBACK30-NEXT: orl %edi, %eax
24007 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24008 ; FALLBACK30-NEXT: movl 80(%esp,%ecx), %esi
24009 ; FALLBACK30-NEXT: leal (%esi,%esi), %edi
24010 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24011 ; FALLBACK30-NEXT: movl 76(%esp,%ecx), %edi
24012 ; FALLBACK30-NEXT: shrxl %edx, %edi, %ebp
24013 ; FALLBACK30-NEXT: orl %ebp, %eax
24014 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24015 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
24016 ; FALLBACK30-NEXT: addl %edi, %edi
24017 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %edi
24018 ; FALLBACK30-NEXT: orl %eax, %edi
24019 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24020 ; FALLBACK30-NEXT: movl 88(%esp,%ecx), %eax
24021 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24022 ; FALLBACK30-NEXT: leal (%eax,%eax), %edi
24023 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24024 ; FALLBACK30-NEXT: movl 84(%esp,%ecx), %edi
24025 ; FALLBACK30-NEXT: shrxl %edx, %edi, %ebp
24026 ; FALLBACK30-NEXT: orl %ebp, %eax
24027 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24028 ; FALLBACK30-NEXT: shrxl %edx, %esi, %esi
24029 ; FALLBACK30-NEXT: addl %edi, %edi
24030 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24031 ; FALLBACK30-NEXT: orl %esi, %eax
24032 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24033 ; FALLBACK30-NEXT: movl 96(%esp,%ecx), %esi
24034 ; FALLBACK30-NEXT: leal (%esi,%esi), %edi
24035 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24036 ; FALLBACK30-NEXT: movl 92(%esp,%ecx), %edi
24037 ; FALLBACK30-NEXT: shrxl %edx, %edi, %ebp
24038 ; FALLBACK30-NEXT: orl %ebp, %eax
24039 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24040 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
24041 ; FALLBACK30-NEXT: addl %edi, %edi
24042 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %edi
24043 ; FALLBACK30-NEXT: orl %eax, %edi
24044 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24045 ; FALLBACK30-NEXT: movl 104(%esp,%ecx), %eax
24046 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24047 ; FALLBACK30-NEXT: leal (%eax,%eax), %edi
24048 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24049 ; FALLBACK30-NEXT: movl 100(%esp,%ecx), %edi
24050 ; FALLBACK30-NEXT: shrxl %edx, %edi, %ebp
24051 ; FALLBACK30-NEXT: orl %ebp, %eax
24052 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24053 ; FALLBACK30-NEXT: shrxl %edx, %esi, %esi
24054 ; FALLBACK30-NEXT: addl %edi, %edi
24055 ; FALLBACK30-NEXT: shlxl %ebx, %edi, %eax
24056 ; FALLBACK30-NEXT: orl %esi, %eax
24057 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24058 ; FALLBACK30-NEXT: movl 112(%esp,%ecx), %eax
24059 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24060 ; FALLBACK30-NEXT: leal (%eax,%eax), %esi
24061 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %eax
24062 ; FALLBACK30-NEXT: movl 108(%esp,%ecx), %esi
24063 ; FALLBACK30-NEXT: movl %ecx, %edi
24064 ; FALLBACK30-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24065 ; FALLBACK30-NEXT: shrxl %edx, %esi, %ebp
24066 ; FALLBACK30-NEXT: orl %ebp, %eax
24067 ; FALLBACK30-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24068 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
24069 ; FALLBACK30-NEXT: addl %esi, %esi
24070 ; FALLBACK30-NEXT: shlxl %ebx, %esi, %esi
24071 ; FALLBACK30-NEXT: orl %ecx, %esi
24072 ; FALLBACK30-NEXT: movl 120(%esp,%edi), %ebp
24073 ; FALLBACK30-NEXT: leal (%ebp,%ebp), %ecx
24074 ; FALLBACK30-NEXT: shlxl %ebx, %ecx, %ecx
24075 ; FALLBACK30-NEXT: movl 116(%esp,%edi), %eax
24076 ; FALLBACK30-NEXT: shrxl %edx, %eax, %edi
24077 ; FALLBACK30-NEXT: orl %edi, %ecx
24078 ; FALLBACK30-NEXT: shrxl %edx, {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
24079 ; FALLBACK30-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24080 ; FALLBACK30-NEXT: addl %eax, %eax
24081 ; FALLBACK30-NEXT: shlxl %ebx, %eax, %edi
24082 ; FALLBACK30-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
24083 ; FALLBACK30-NEXT: shrxl %edx, %ebp, %eax
24084 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
24085 ; FALLBACK30-NEXT: movl 124(%esp,%ebp), %ebp
24086 ; FALLBACK30-NEXT: sarxl %edx, %ebp, %edx
24087 ; FALLBACK30-NEXT: addl %ebp, %ebp
24088 ; FALLBACK30-NEXT: shlxl %ebx, %ebp, %ebx
24089 ; FALLBACK30-NEXT: orl %eax, %ebx
24090 ; FALLBACK30-NEXT: movl {{[0-9]+}}(%esp), %eax
24091 ; FALLBACK30-NEXT: movl %edx, 60(%eax)
24092 ; FALLBACK30-NEXT: movl %ebx, 56(%eax)
24093 ; FALLBACK30-NEXT: movl %edi, 48(%eax)
24094 ; FALLBACK30-NEXT: movl %ecx, 52(%eax)
24095 ; FALLBACK30-NEXT: movl %esi, 40(%eax)
24096 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24097 ; FALLBACK30-NEXT: movl %ecx, 44(%eax)
24098 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24099 ; FALLBACK30-NEXT: movl %ecx, 32(%eax)
24100 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24101 ; FALLBACK30-NEXT: movl %ecx, 36(%eax)
24102 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24103 ; FALLBACK30-NEXT: movl %ecx, 24(%eax)
24104 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24105 ; FALLBACK30-NEXT: movl %ecx, 28(%eax)
24106 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24107 ; FALLBACK30-NEXT: movl %ecx, 16(%eax)
24108 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24109 ; FALLBACK30-NEXT: movl %ecx, 20(%eax)
24110 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24111 ; FALLBACK30-NEXT: movl %ecx, 8(%eax)
24112 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24113 ; FALLBACK30-NEXT: movl %ecx, 12(%eax)
24114 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24115 ; FALLBACK30-NEXT: movl %ecx, (%eax)
24116 ; FALLBACK30-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24117 ; FALLBACK30-NEXT: movl %ecx, 4(%eax)
24118 ; FALLBACK30-NEXT: addl $204, %esp
24119 ; FALLBACK30-NEXT: popl %esi
24120 ; FALLBACK30-NEXT: popl %edi
24121 ; FALLBACK30-NEXT: popl %ebx
24122 ; FALLBACK30-NEXT: popl %ebp
24123 ; FALLBACK30-NEXT: vzeroupper
24124 ; FALLBACK30-NEXT: retl
24126 ; FALLBACK31-LABEL: ashr_64bytes:
24127 ; FALLBACK31: # %bb.0:
24128 ; FALLBACK31-NEXT: pushl %ebp
24129 ; FALLBACK31-NEXT: pushl %ebx
24130 ; FALLBACK31-NEXT: pushl %edi
24131 ; FALLBACK31-NEXT: pushl %esi
24132 ; FALLBACK31-NEXT: subl $188, %esp
24133 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ecx
24134 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %eax
24135 ; FALLBACK31-NEXT: vmovups (%eax), %ymm0
24136 ; FALLBACK31-NEXT: vmovups 32(%eax), %xmm1
24137 ; FALLBACK31-NEXT: movl 48(%eax), %edx
24138 ; FALLBACK31-NEXT: movl 52(%eax), %esi
24139 ; FALLBACK31-NEXT: movl 56(%eax), %edi
24140 ; FALLBACK31-NEXT: movl 60(%eax), %eax
24141 ; FALLBACK31-NEXT: movl (%ecx), %ecx
24142 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24143 ; FALLBACK31-NEXT: movl %edi, {{[0-9]+}}(%esp)
24144 ; FALLBACK31-NEXT: movl %esi, {{[0-9]+}}(%esp)
24145 ; FALLBACK31-NEXT: movl %edx, {{[0-9]+}}(%esp)
24146 ; FALLBACK31-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
24147 ; FALLBACK31-NEXT: vmovups %ymm0, {{[0-9]+}}(%esp)
24148 ; FALLBACK31-NEXT: sarl $31, %eax
24149 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24150 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24151 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24152 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24153 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24154 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24155 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24156 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24157 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24158 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24159 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24160 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24161 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24162 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24163 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24164 ; FALLBACK31-NEXT: movl %eax, {{[0-9]+}}(%esp)
24165 ; FALLBACK31-NEXT: movl %ecx, %ebp
24166 ; FALLBACK31-NEXT: andl $60, %ebp
24167 ; FALLBACK31-NEXT: movl 56(%esp,%ebp), %edx
24168 ; FALLBACK31-NEXT: movl 52(%esp,%ebp), %eax
24169 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24170 ; FALLBACK31-NEXT: shll $3, %ecx
24171 ; FALLBACK31-NEXT: andl $24, %ecx
24172 ; FALLBACK31-NEXT: shrdl %cl, %edx, %eax
24173 ; FALLBACK31-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24174 ; FALLBACK31-NEXT: movl 64(%esp,%ebp), %edi
24175 ; FALLBACK31-NEXT: movl 60(%esp,%ebp), %eax
24176 ; FALLBACK31-NEXT: movl %eax, %esi
24177 ; FALLBACK31-NEXT: shrdl %cl, %edi, %esi
24178 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24179 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
24180 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24181 ; FALLBACK31-NEXT: movl 72(%esp,%ebp), %esi
24182 ; FALLBACK31-NEXT: movl 68(%esp,%ebp), %eax
24183 ; FALLBACK31-NEXT: movl %eax, %edx
24184 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
24185 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24186 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edi
24187 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24188 ; FALLBACK31-NEXT: movl 80(%esp,%ebp), %edi
24189 ; FALLBACK31-NEXT: movl 76(%esp,%ebp), %eax
24190 ; FALLBACK31-NEXT: movl %eax, %edx
24191 ; FALLBACK31-NEXT: shrdl %cl, %edi, %edx
24192 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24193 ; FALLBACK31-NEXT: shrdl %cl, %eax, %esi
24194 ; FALLBACK31-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24195 ; FALLBACK31-NEXT: movl 88(%esp,%ebp), %ebx
24196 ; FALLBACK31-NEXT: movl 84(%esp,%ebp), %eax
24197 ; FALLBACK31-NEXT: movl %eax, %edx
24198 ; FALLBACK31-NEXT: shrdl %cl, %ebx, %edx
24199 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24200 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edi
24201 ; FALLBACK31-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24202 ; FALLBACK31-NEXT: movl 96(%esp,%ebp), %esi
24203 ; FALLBACK31-NEXT: movl 92(%esp,%ebp), %eax
24204 ; FALLBACK31-NEXT: movl %eax, %edx
24205 ; FALLBACK31-NEXT: shrdl %cl, %esi, %edx
24206 ; FALLBACK31-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24207 ; FALLBACK31-NEXT: shrdl %cl, %eax, %ebx
24208 ; FALLBACK31-NEXT: movl 104(%esp,%ebp), %eax
24209 ; FALLBACK31-NEXT: movl 100(%esp,%ebp), %edi
24210 ; FALLBACK31-NEXT: movl %edi, %edx
24211 ; FALLBACK31-NEXT: shrdl %cl, %eax, %edx
24212 ; FALLBACK31-NEXT: shrdl %cl, %edi, %esi
24213 ; FALLBACK31-NEXT: movl 48(%esp,%ebp), %edi
24214 ; FALLBACK31-NEXT: movl 108(%esp,%ebp), %ebp
24215 ; FALLBACK31-NEXT: movl %ebp, (%esp) # 4-byte Spill
24216 ; FALLBACK31-NEXT: shrdl %cl, %ebp, %eax
24217 ; FALLBACK31-NEXT: movl {{[0-9]+}}(%esp), %ebp
24218 ; FALLBACK31-NEXT: movl %eax, 56(%ebp)
24219 ; FALLBACK31-NEXT: movl %esi, 48(%ebp)
24220 ; FALLBACK31-NEXT: movl %edx, 52(%ebp)
24221 ; FALLBACK31-NEXT: movl %ebx, 40(%ebp)
24222 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24223 ; FALLBACK31-NEXT: movl %eax, 44(%ebp)
24224 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24225 ; FALLBACK31-NEXT: movl %eax, 32(%ebp)
24226 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24227 ; FALLBACK31-NEXT: movl %eax, 36(%ebp)
24228 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24229 ; FALLBACK31-NEXT: movl %eax, 24(%ebp)
24230 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24231 ; FALLBACK31-NEXT: movl %eax, 28(%ebp)
24232 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24233 ; FALLBACK31-NEXT: movl %eax, 16(%ebp)
24234 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24235 ; FALLBACK31-NEXT: movl %eax, 20(%ebp)
24236 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24237 ; FALLBACK31-NEXT: movl %eax, 8(%ebp)
24238 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
24239 ; FALLBACK31-NEXT: movl %eax, 12(%ebp)
24240 ; FALLBACK31-NEXT: sarxl %ecx, (%esp), %eax # 4-byte Folded Reload
24241 ; FALLBACK31-NEXT: # kill: def $cl killed $cl killed $ecx
24242 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24243 ; FALLBACK31-NEXT: shrdl %cl, %edx, %edi
24244 ; FALLBACK31-NEXT: movl %edi, (%ebp)
24245 ; FALLBACK31-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24246 ; FALLBACK31-NEXT: movl %ecx, 4(%ebp)
24247 ; FALLBACK31-NEXT: movl %eax, 60(%ebp)
24248 ; FALLBACK31-NEXT: addl $188, %esp
24249 ; FALLBACK31-NEXT: popl %esi
24250 ; FALLBACK31-NEXT: popl %edi
24251 ; FALLBACK31-NEXT: popl %ebx
24252 ; FALLBACK31-NEXT: popl %ebp
24253 ; FALLBACK31-NEXT: vzeroupper
24254 ; FALLBACK31-NEXT: retl
24255 %src = load i512, ptr %src.ptr, align 1
24256 %byteOff = load i512, ptr %byteOff.ptr, align 1
24257 %bitOff = shl i512 %byteOff, 3
24258 %res = ashr i512 %src, %bitOff
24259 store i512 %res, ptr %dst, align 1
24263 define void @ashr_64bytes_qwordOff(ptr %src.ptr, ptr %qwordOff.ptr, ptr %dst) nounwind {
24264 ; X64-SSE2-LABEL: ashr_64bytes_qwordOff:
24265 ; X64-SSE2: # %bb.0:
24266 ; X64-SSE2-NEXT: pushq %rbx
24267 ; X64-SSE2-NEXT: movq (%rdi), %rax
24268 ; X64-SSE2-NEXT: movq 8(%rdi), %rcx
24269 ; X64-SSE2-NEXT: movq 16(%rdi), %r8
24270 ; X64-SSE2-NEXT: movq 24(%rdi), %r9
24271 ; X64-SSE2-NEXT: movq 32(%rdi), %r10
24272 ; X64-SSE2-NEXT: movq 40(%rdi), %r11
24273 ; X64-SSE2-NEXT: movq 48(%rdi), %rbx
24274 ; X64-SSE2-NEXT: movq 56(%rdi), %rdi
24275 ; X64-SSE2-NEXT: movl (%rsi), %esi
24276 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24277 ; X64-SSE2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
24278 ; X64-SSE2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
24279 ; X64-SSE2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
24280 ; X64-SSE2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
24281 ; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
24282 ; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24283 ; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
24284 ; X64-SSE2-NEXT: sarq $63, %rdi
24285 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24286 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24287 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24288 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24289 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24290 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24291 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24292 ; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
24293 ; X64-SSE2-NEXT: andl $7, %esi
24294 ; X64-SSE2-NEXT: movq -128(%rsp,%rsi,8), %rax
24295 ; X64-SSE2-NEXT: movq -120(%rsp,%rsi,8), %rcx
24296 ; X64-SSE2-NEXT: movq -104(%rsp,%rsi,8), %rdi
24297 ; X64-SSE2-NEXT: movq -112(%rsp,%rsi,8), %r8
24298 ; X64-SSE2-NEXT: movq -88(%rsp,%rsi,8), %r9
24299 ; X64-SSE2-NEXT: movq -96(%rsp,%rsi,8), %r10
24300 ; X64-SSE2-NEXT: movq -72(%rsp,%rsi,8), %r11
24301 ; X64-SSE2-NEXT: movq -80(%rsp,%rsi,8), %rsi
24302 ; X64-SSE2-NEXT: movq %rsi, 48(%rdx)
24303 ; X64-SSE2-NEXT: movq %r11, 56(%rdx)
24304 ; X64-SSE2-NEXT: movq %r10, 32(%rdx)
24305 ; X64-SSE2-NEXT: movq %r9, 40(%rdx)
24306 ; X64-SSE2-NEXT: movq %r8, 16(%rdx)
24307 ; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
24308 ; X64-SSE2-NEXT: movq %rax, (%rdx)
24309 ; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
24310 ; X64-SSE2-NEXT: popq %rbx
24311 ; X64-SSE2-NEXT: retq
24313 ; X64-SSE42-LABEL: ashr_64bytes_qwordOff:
24314 ; X64-SSE42: # %bb.0:
24315 ; X64-SSE42-NEXT: pushq %rax
24316 ; X64-SSE42-NEXT: movups (%rdi), %xmm0
24317 ; X64-SSE42-NEXT: movups 16(%rdi), %xmm1
24318 ; X64-SSE42-NEXT: movups 32(%rdi), %xmm2
24319 ; X64-SSE42-NEXT: movq 48(%rdi), %rax
24320 ; X64-SSE42-NEXT: movq 56(%rdi), %rcx
24321 ; X64-SSE42-NEXT: movl (%rsi), %esi
24322 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24323 ; X64-SSE42-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
24324 ; X64-SSE42-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
24325 ; X64-SSE42-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
24326 ; X64-SSE42-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
24327 ; X64-SSE42-NEXT: sarq $63, %rcx
24328 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24329 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24330 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24331 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24332 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24333 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24334 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24335 ; X64-SSE42-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24336 ; X64-SSE42-NEXT: andl $7, %esi
24337 ; X64-SSE42-NEXT: movups -128(%rsp,%rsi,8), %xmm0
24338 ; X64-SSE42-NEXT: movups -112(%rsp,%rsi,8), %xmm1
24339 ; X64-SSE42-NEXT: movups -96(%rsp,%rsi,8), %xmm2
24340 ; X64-SSE42-NEXT: movups -80(%rsp,%rsi,8), %xmm3
24341 ; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
24342 ; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
24343 ; X64-SSE42-NEXT: movups %xmm2, 32(%rdx)
24344 ; X64-SSE42-NEXT: movups %xmm0, (%rdx)
24345 ; X64-SSE42-NEXT: popq %rax
24346 ; X64-SSE42-NEXT: retq
24348 ; X64-AVX-LABEL: ashr_64bytes_qwordOff:
24349 ; X64-AVX: # %bb.0:
24350 ; X64-AVX-NEXT: pushq %rax
24351 ; X64-AVX-NEXT: vmovups (%rdi), %ymm0
24352 ; X64-AVX-NEXT: vmovups 32(%rdi), %xmm1
24353 ; X64-AVX-NEXT: movq 48(%rdi), %rax
24354 ; X64-AVX-NEXT: movq 56(%rdi), %rcx
24355 ; X64-AVX-NEXT: movl (%rsi), %esi
24356 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24357 ; X64-AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
24358 ; X64-AVX-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
24359 ; X64-AVX-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
24360 ; X64-AVX-NEXT: sarq $63, %rcx
24361 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24362 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24363 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24364 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24365 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24366 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24367 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24368 ; X64-AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
24369 ; X64-AVX-NEXT: andl $7, %esi
24370 ; X64-AVX-NEXT: vmovups -128(%rsp,%rsi,8), %xmm0
24371 ; X64-AVX-NEXT: vmovups -112(%rsp,%rsi,8), %xmm1
24372 ; X64-AVX-NEXT: vmovups -96(%rsp,%rsi,8), %xmm2
24373 ; X64-AVX-NEXT: vmovups -80(%rsp,%rsi,8), %xmm3
24374 ; X64-AVX-NEXT: vmovups %xmm3, 48(%rdx)
24375 ; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
24376 ; X64-AVX-NEXT: vmovups %xmm2, 32(%rdx)
24377 ; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
24378 ; X64-AVX-NEXT: popq %rax
24379 ; X64-AVX-NEXT: vzeroupper
24380 ; X64-AVX-NEXT: retq
24382 ; X86-SSE2-LABEL: ashr_64bytes_qwordOff:
24383 ; X86-SSE2: # %bb.0:
24384 ; X86-SSE2-NEXT: pushl %ebp
24385 ; X86-SSE2-NEXT: pushl %ebx
24386 ; X86-SSE2-NEXT: pushl %edi
24387 ; X86-SSE2-NEXT: pushl %esi
24388 ; X86-SSE2-NEXT: subl $188, %esp
24389 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
24390 ; X86-SSE2-NEXT: movl (%eax), %ecx
24391 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24392 ; X86-SSE2-NEXT: movl 4(%eax), %ecx
24393 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24394 ; X86-SSE2-NEXT: movl 8(%eax), %ecx
24395 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24396 ; X86-SSE2-NEXT: movl 12(%eax), %ecx
24397 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24398 ; X86-SSE2-NEXT: movl 16(%eax), %ecx
24399 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24400 ; X86-SSE2-NEXT: movl 20(%eax), %ecx
24401 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24402 ; X86-SSE2-NEXT: movl 24(%eax), %ecx
24403 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24404 ; X86-SSE2-NEXT: movl 28(%eax), %ecx
24405 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24406 ; X86-SSE2-NEXT: movl 32(%eax), %ecx
24407 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24408 ; X86-SSE2-NEXT: movl 36(%eax), %ecx
24409 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24410 ; X86-SSE2-NEXT: movl 40(%eax), %ebp
24411 ; X86-SSE2-NEXT: movl 44(%eax), %ebx
24412 ; X86-SSE2-NEXT: movl 48(%eax), %edi
24413 ; X86-SSE2-NEXT: movl 52(%eax), %esi
24414 ; X86-SSE2-NEXT: movl 56(%eax), %edx
24415 ; X86-SSE2-NEXT: movl 60(%eax), %ecx
24416 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
24417 ; X86-SSE2-NEXT: movl (%eax), %eax
24418 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24419 ; X86-SSE2-NEXT: movl %esi, {{[0-9]+}}(%esp)
24420 ; X86-SSE2-NEXT: movl %edi, {{[0-9]+}}(%esp)
24421 ; X86-SSE2-NEXT: movl %ebx, {{[0-9]+}}(%esp)
24422 ; X86-SSE2-NEXT: movl %ebp, {{[0-9]+}}(%esp)
24423 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24424 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24425 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24426 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24427 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24428 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24429 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24430 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24431 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24432 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24433 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24434 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24435 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24436 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24437 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24438 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24439 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24440 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24441 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24442 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
24443 ; X86-SSE2-NEXT: movl %edx, {{[0-9]+}}(%esp)
24444 ; X86-SSE2-NEXT: sarl $31, %ecx
24445 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24446 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24447 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24448 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24449 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24450 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24451 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24452 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24453 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24454 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24455 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24456 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24457 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24458 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24459 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24460 ; X86-SSE2-NEXT: movl %ecx, {{[0-9]+}}(%esp)
24461 ; X86-SSE2-NEXT: andl $7, %eax
24462 ; X86-SSE2-NEXT: movl 48(%esp,%eax,8), %ecx
24463 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24464 ; X86-SSE2-NEXT: movl 52(%esp,%eax,8), %ecx
24465 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24466 ; X86-SSE2-NEXT: movl 60(%esp,%eax,8), %ecx
24467 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24468 ; X86-SSE2-NEXT: movl 56(%esp,%eax,8), %ecx
24469 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24470 ; X86-SSE2-NEXT: movl 68(%esp,%eax,8), %ecx
24471 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24472 ; X86-SSE2-NEXT: movl 64(%esp,%eax,8), %ecx
24473 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24474 ; X86-SSE2-NEXT: movl 76(%esp,%eax,8), %ecx
24475 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24476 ; X86-SSE2-NEXT: movl 72(%esp,%eax,8), %ecx
24477 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24478 ; X86-SSE2-NEXT: movl 84(%esp,%eax,8), %ecx
24479 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24480 ; X86-SSE2-NEXT: movl 80(%esp,%eax,8), %ecx
24481 ; X86-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
24482 ; X86-SSE2-NEXT: movl 92(%esp,%eax,8), %ebp
24483 ; X86-SSE2-NEXT: movl 88(%esp,%eax,8), %ebx
24484 ; X86-SSE2-NEXT: movl 100(%esp,%eax,8), %edi
24485 ; X86-SSE2-NEXT: movl 96(%esp,%eax,8), %esi
24486 ; X86-SSE2-NEXT: movl 108(%esp,%eax,8), %edx
24487 ; X86-SSE2-NEXT: movl 104(%esp,%eax,8), %ecx
24488 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
24489 ; X86-SSE2-NEXT: movl %ecx, 56(%eax)
24490 ; X86-SSE2-NEXT: movl %edx, 60(%eax)
24491 ; X86-SSE2-NEXT: movl %esi, 48(%eax)
24492 ; X86-SSE2-NEXT: movl %edi, 52(%eax)
24493 ; X86-SSE2-NEXT: movl %ebx, 40(%eax)
24494 ; X86-SSE2-NEXT: movl %ebp, 44(%eax)
24495 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24496 ; X86-SSE2-NEXT: movl %ecx, 32(%eax)
24497 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24498 ; X86-SSE2-NEXT: movl %ecx, 36(%eax)
24499 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24500 ; X86-SSE2-NEXT: movl %ecx, 24(%eax)
24501 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24502 ; X86-SSE2-NEXT: movl %ecx, 28(%eax)
24503 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24504 ; X86-SSE2-NEXT: movl %ecx, 16(%eax)
24505 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24506 ; X86-SSE2-NEXT: movl %ecx, 20(%eax)
24507 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24508 ; X86-SSE2-NEXT: movl %ecx, 8(%eax)
24509 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24510 ; X86-SSE2-NEXT: movl %ecx, 12(%eax)
24511 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24512 ; X86-SSE2-NEXT: movl %ecx, (%eax)
24513 ; X86-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
24514 ; X86-SSE2-NEXT: movl %ecx, 4(%eax)
24515 ; X86-SSE2-NEXT: addl $188, %esp
24516 ; X86-SSE2-NEXT: popl %esi
24517 ; X86-SSE2-NEXT: popl %edi
24518 ; X86-SSE2-NEXT: popl %ebx
24519 ; X86-SSE2-NEXT: popl %ebp
24520 ; X86-SSE2-NEXT: retl
24522 ; X86-SSE42-LABEL: ashr_64bytes_qwordOff:
24523 ; X86-SSE42: # %bb.0:
24524 ; X86-SSE42-NEXT: pushl %ebx
24525 ; X86-SSE42-NEXT: pushl %edi
24526 ; X86-SSE42-NEXT: pushl %esi
24527 ; X86-SSE42-NEXT: subl $128, %esp
24528 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
24529 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %ecx
24530 ; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %edx
24531 ; X86-SSE42-NEXT: movups (%edx), %xmm0
24532 ; X86-SSE42-NEXT: movups 16(%edx), %xmm1
24533 ; X86-SSE42-NEXT: movups 32(%edx), %xmm2
24534 ; X86-SSE42-NEXT: movl 48(%edx), %esi
24535 ; X86-SSE42-NEXT: movl 52(%edx), %edi
24536 ; X86-SSE42-NEXT: movl 56(%edx), %ebx
24537 ; X86-SSE42-NEXT: movl 60(%edx), %edx
24538 ; X86-SSE42-NEXT: movl (%ecx), %ecx
24539 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24540 ; X86-SSE42-NEXT: movl %ebx, {{[0-9]+}}(%esp)
24541 ; X86-SSE42-NEXT: movl %edi, {{[0-9]+}}(%esp)
24542 ; X86-SSE42-NEXT: movl %esi, {{[0-9]+}}(%esp)
24543 ; X86-SSE42-NEXT: movaps %xmm2, {{[0-9]+}}(%esp)
24544 ; X86-SSE42-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
24545 ; X86-SSE42-NEXT: movaps %xmm0, (%esp)
24546 ; X86-SSE42-NEXT: sarl $31, %edx
24547 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24548 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24549 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24550 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24551 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24552 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24553 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24554 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24555 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24556 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24557 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24558 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24559 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24560 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24561 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24562 ; X86-SSE42-NEXT: movl %edx, {{[0-9]+}}(%esp)
24563 ; X86-SSE42-NEXT: andl $7, %ecx
24564 ; X86-SSE42-NEXT: movups (%esp,%ecx,8), %xmm0
24565 ; X86-SSE42-NEXT: movups 16(%esp,%ecx,8), %xmm1
24566 ; X86-SSE42-NEXT: movups 32(%esp,%ecx,8), %xmm2
24567 ; X86-SSE42-NEXT: movups 48(%esp,%ecx,8), %xmm3
24568 ; X86-SSE42-NEXT: movups %xmm3, 48(%eax)
24569 ; X86-SSE42-NEXT: movups %xmm2, 32(%eax)
24570 ; X86-SSE42-NEXT: movups %xmm1, 16(%eax)
24571 ; X86-SSE42-NEXT: movups %xmm0, (%eax)
24572 ; X86-SSE42-NEXT: addl $128, %esp
24573 ; X86-SSE42-NEXT: popl %esi
24574 ; X86-SSE42-NEXT: popl %edi
24575 ; X86-SSE42-NEXT: popl %ebx
24576 ; X86-SSE42-NEXT: retl
24578 ; X86-AVX-LABEL: ashr_64bytes_qwordOff:
24579 ; X86-AVX: # %bb.0:
24580 ; X86-AVX-NEXT: pushl %ebx
24581 ; X86-AVX-NEXT: pushl %edi
24582 ; X86-AVX-NEXT: pushl %esi
24583 ; X86-AVX-NEXT: subl $128, %esp
24584 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
24585 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
24586 ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx
24587 ; X86-AVX-NEXT: vmovups (%edx), %ymm0
24588 ; X86-AVX-NEXT: vmovups 32(%edx), %xmm1
24589 ; X86-AVX-NEXT: movl 48(%edx), %esi
24590 ; X86-AVX-NEXT: movl 52(%edx), %edi
24591 ; X86-AVX-NEXT: movl 56(%edx), %ebx
24592 ; X86-AVX-NEXT: movl 60(%edx), %edx
24593 ; X86-AVX-NEXT: movl (%ecx), %ecx
24594 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24595 ; X86-AVX-NEXT: movl %ebx, {{[0-9]+}}(%esp)
24596 ; X86-AVX-NEXT: movl %edi, {{[0-9]+}}(%esp)
24597 ; X86-AVX-NEXT: movl %esi, {{[0-9]+}}(%esp)
24598 ; X86-AVX-NEXT: vmovaps %xmm1, {{[0-9]+}}(%esp)
24599 ; X86-AVX-NEXT: vmovups %ymm0, (%esp)
24600 ; X86-AVX-NEXT: sarl $31, %edx
24601 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24602 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24603 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24604 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24605 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24606 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24607 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24608 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24609 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24610 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24611 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24612 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24613 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24614 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24615 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24616 ; X86-AVX-NEXT: movl %edx, {{[0-9]+}}(%esp)
24617 ; X86-AVX-NEXT: andl $7, %ecx
24618 ; X86-AVX-NEXT: vmovups (%esp,%ecx,8), %xmm0
24619 ; X86-AVX-NEXT: vmovups 16(%esp,%ecx,8), %xmm1
24620 ; X86-AVX-NEXT: vmovups 32(%esp,%ecx,8), %xmm2
24621 ; X86-AVX-NEXT: vmovups 48(%esp,%ecx,8), %xmm3
24622 ; X86-AVX-NEXT: vmovups %xmm3, 48(%eax)
24623 ; X86-AVX-NEXT: vmovups %xmm2, 32(%eax)
24624 ; X86-AVX-NEXT: vmovups %xmm1, 16(%eax)
24625 ; X86-AVX-NEXT: vmovups %xmm0, (%eax)
24626 ; X86-AVX-NEXT: addl $128, %esp
24627 ; X86-AVX-NEXT: popl %esi
24628 ; X86-AVX-NEXT: popl %edi
24629 ; X86-AVX-NEXT: popl %ebx
24630 ; X86-AVX-NEXT: vzeroupper
24631 ; X86-AVX-NEXT: retl
24632 %src = load i512, ptr %src.ptr, align 1
24633 %qwordOff = load i512, ptr %qwordOff.ptr, align 1
24634 %bitOff = shl i512 %qwordOff, 6
24635 %res = ashr i512 %src, %bitOff
24636 store i512 %res, ptr %dst, align 1
24640 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: