1 /* Pentium optimized __mpn_rshift --
2 Copyright (C) 1992,94,95,96,97,98,2000,2005 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA. */
21 #include "asm-syntax.h"
25 #define PARMS LINKAGE+16 /* space for 4 saved regs */
27 #define S RES+PTR_SIZE
28 #define SIZE S+PTR_SIZE
32 ENTRY (BP_SYM (__mpn_rshift))
36 cfi_adjust_cfa_offset (4)
38 cfi_adjust_cfa_offset (4)
40 cfi_adjust_cfa_offset (4)
41 cfi_rel_offset (ebp, 0)
43 cfi_adjust_cfa_offset (4)
46 cfi_rel_offset (edi, 12)
48 cfi_rel_offset (esi, 8)
50 cfi_rel_offset (ebx, 0)
52 #if __BOUNDED_POINTERS__
53 shll $2, %ebx /* convert limbs to bytes */
54 CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ebx)
55 CHECK_BOUNDS_BOTH_WIDE (%esi, S(%esp), %ebx)
59 /* We can use faster code for shift-by-1 under certain conditions. */
64 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
65 leal (%edi,%ebx,4),%eax
67 jnc L(special) /* jump if s_ptr >= res_ptr + size */
73 shrdl %cl,%edx,%eax /* compute carry limb */
74 pushl %eax /* push carry limb onto stack */
75 cfi_adjust_cfa_offset (4)
79 cfi_adjust_cfa_offset (4)
83 movl (%edi),%eax /* fetch destination cache line */
86 L(oop): movl 28(%edi),%eax /* fetch destination cache line */
123 cfi_adjust_cfa_offset (-4)
128 shrdl %cl,%eax,%edx /* compute result limb */
137 shrl %cl,%edx /* compute most significant limb */
138 movl %edx,(%edi) /* store it */
140 popl %eax /* pop carry limb */
141 cfi_adjust_cfa_offset (-4)
144 cfi_adjust_cfa_offset (-4)
147 cfi_adjust_cfa_offset (-4)
150 cfi_adjust_cfa_offset (-4)
153 cfi_adjust_cfa_offset (-4)
159 /* We loop from least significant end of the arrays, which is only
160 permissible if the source and destination don't overlap, since the
161 function is documented to work for overlapping source and destination.
164 cfi_adjust_cfa_offset (16)
165 cfi_rel_offset (edi, 12)
166 cfi_rel_offset (esi, 8)
167 cfi_rel_offset (ebp, 4)
168 cfi_rel_offset (ebx, 0)
170 leal -4(%edi,%ebx,4),%edi
171 leal -4(%esi,%ebx,4),%esi
178 cfi_adjust_cfa_offset (4)
186 movl (%edi),%eax /* fetch destination cache line */
190 movl -28(%edi),%eax /* fetch destination cache line */
221 leal -32(%esi),%esi /* use leal not to clobber carry */
228 cfi_adjust_cfa_offset (-4)
229 sbbl %eax,%eax /* save carry in %eax */
232 addl %eax,%eax /* restore carry from eax */
239 leal -4(%esi),%esi /* use leal not to clobber carry */
246 addl %eax,%eax /* restore carry from eax */
247 L(L1): movl %edx,(%edi) /* store last limb */
253 cfi_adjust_cfa_offset (-4)
256 cfi_adjust_cfa_offset (-4)
259 cfi_adjust_cfa_offset (-4)
262 cfi_adjust_cfa_offset (-4)
267 END (BP_SYM (__mpn_rshift))