2 Copyright (c) 1990-2001 Info-ZIP. All rights reserved.
4 See the accompanying file LICENSE, version 2000-Apr-09 or later
5 (the contents of which are also included in zip.h) for terms of use.
6 If, for some reason, all these files are missing, the Info-ZIP license
7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
10 * crc_i386.S, optimized CRC calculation function for Zip and UnZip,
11 * created by Paul Kienitz and Christian Spieler. Last revised 24 Dec 98.
13 * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm
14 * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66)
16 * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide
17 * aligned reads for most of the data from buffer), can be
18 * disabled by defining the macro NO_32_BIT_LOADS
20 * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized
21 * CPUs (like the Pentium Pro, Pentium II, and probably some
22 * Pentium clones). This optimization is controlled by the
23 * preprocessor switch "__686" and is disabled by default.
24 * (This default is based on the assumption that most users
25 * do not yet work on a Pentium Pro or Pentium II machine ...)
27 * FLAT memory model assumed. Calling interface:
28 * - args are pushed onto the stack from right to left,
29 * - return value is given in the EAX register,
30 * - all other registers (with exception of EFLAGS) are preserved. (With
31 * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving
32 * them nevertheless adds only 4 single byte instructions.)
34 * This source generates the function
35 * ulg crc32(ulg crc, ZCONST uch *buf, extent len).
37 * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
38 * This results in shorter code at the expense of reduced performance.
41 /* This file is NOT used in conjunction with zlib. */
44 /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
45 * external symbols with an underline character '_'.
47 #if defined(NO_UNDERLINE) || defined(__ELF__)
49 # define _get_crc_table get_crc_table
51 /* Use 16-byte alignment if your assembler supports it. Warning: gas
52 * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4
53 * the parameter is a number of bytes.
56 # define ALIGNMENT .align 4,0x90
59 #if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386)
61 /* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas).
62 * Warning: it uses the AT&T syntax: mov source,dest
63 * This file is only optional. If you want to use the C version,
64 * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string.
69 #if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME)
70 # undef USE_STACKFRAME
72 /* The default is to use standard stack frame entry, because it
73 * results in smaller code!
75 # ifndef USE_STD_STACKFRAME
76 # define USE_STD_STACKFRAME
80 #ifdef USE_STD_STACKFRAME
81 # define _STD_ENTRY pushl %ebp ; movl %esp,%ebp
83 # define arg2 12(%ebp)
84 # define arg3 16(%ebp)
85 # define _STD_LEAVE popl %ebp
86 #else /* !USE_STD_STACKFRAME */
88 # define arg1 24(%esp)
89 # define arg2 28(%esp)
90 # define arg3 32(%esp)
92 #endif /* ?USE_STD_STACKFRAME */
95 * These two (three) macros make up the loop body of the CRC32 cruncher.
98 * esi : pointer to next data byte (or lword) "buf++"
100 * edi : pointer to base of crc_table array
102 * ebx : index into crc_table array
103 * (requires upper three bytes = 0 when __686 is undefined)
105 #ifndef __686 /* optimize for 386, 486, Pentium */
106 #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
107 movb %al, %bl ;/* tmp = c & 0xFF */\
108 shrl $8, %eax ;/* c = (c >> 8) */\
109 xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */
110 #else /* __686 : optimize for Pentium Pro and compatible CPUs */
111 #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
112 movzbl %al, %ebx ;/* tmp = c & 0xFF */\
113 shrl $8, %eax ;/* c = (c >> 8) */\
114 xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */
117 #define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
118 xorb (%esi), %al ;/* c ^= *buf */\
119 incl %esi ;/* buf++ */\
122 #ifndef NO_32_BIT_LOADS
123 #define Do_CRC_lword \
124 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\
125 addl $4, %esi ;/* ((ulg *)buf)++ */\
130 #endif /* !NO_32_BIT_LOADS */
137 _crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */
145 movl arg2, %esi /* 2nd arg: uch *buf */
146 subl %eax, %eax /* > if (!buf) */
147 testl %esi, %esi /* > return 0; */
148 jz .L_fine /* > else { */
151 movl arg1, %eax /* 1st arg: ulg crc */
153 subl %ebx, %ebx /* ebx=0; bl usable as dword */
155 movl arg3, %ecx /* 3rd arg: extent len */
156 notl %eax /* > c = ~crc; */
159 #ifndef NO_UNROLLED_LOOPS
161 # ifndef NO_32_BIT_LOADS
162 /* Assert now have positive length */
164 testl $3, %esi /* Align buf on lword boundary */
170 # endif /* !NO_32_BIT_LOADS */
171 movl %ecx, %edx /* save len in edx */
172 shrl $3, %ecx /* ecx = len / 8 */
174 /* align loop head at start of 486 internal cache line !! */
177 # ifndef NO_32_BIT_LOADS
179 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
180 addl $4, %esi ;/* ((ulg *)buf)++ */
186 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
187 addl $4, %esi ;/* ((ulg *)buf)++ */
192 # else /* NO_32_BIT_LOADS */
201 # endif /* ?NO_32_BIT_LOADS */
207 andl $7, %ecx /* ecx = len % 8 */
208 #endif /* !NO_UNROLLED_LOOPS */
209 jz .L_bail /* > if (len) */
210 /* align loop head at start of 486 internal cache line !! */
212 .L_loupe: /* > do { */
213 Do_CRC_byte /* c = CRC32(c, *buf++); */
214 decl %ecx /* > } while (--len); */
218 notl %eax /* > return ~c; */
229 error: this asm version is for 386 only
230 #endif /* i386 || _i386 || _I386 || __i386 */
232 #endif /* !USE_ZLIB */