1 /* inffas86.c is a hand tuned assembler version of
3 * inffast.c -- fast decoding
4 * Copyright (C) 1995-2003 Mark Adler
5 * For conditions of distribution and use, see copyright notice in zlib.h
7 * Copyright (C) 2003 Chris Anderson <christop@charm.net>
8 * Please use the copyright conditions above.
10 * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
11 * slightly quicker on x86 systems because, instead of using rep movsb to copy
12 * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
13 * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
14 * from http://fedora.linux.duke.edu/fc1_x86_64
15 * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
16 * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
17 * when decompressing mozilla-source-1.3.tar.gz.
19 * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
20 * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
21 * the moment. I have successfully compiled and tested this code with gcc2.96,
22 * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
23 * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
24 * enabled. I will attempt to merge the MMX code into this version. Newer
25 * versions of this and inffast.S can be found at
26 * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
34 /* Mark Adler's comments from inffast.c: */
37 Decode literal, length, and distance codes and write out the resulting
38 literal and match bytes until either not enough input or output is
39 available, an end-of-block is encountered, or a data error is encountered.
40 When large enough input and output buffers are supplied to inflate(), for
41 example, a 16K input buffer and a 64K output buffer, more than 95% of the
42 inflate execution time is spent in this routine.
48 strm->avail_out >= 258
49 start >= strm->avail_out
52 On return, state->mode is one of:
54 LEN -- ran out of enough output space or enough available input
55 TYPE -- reached end of block code, inflate() to interpret next block
56 BAD -- error in block data
60 - The maximum input bits used by a length/distance pair is 15 bits for the
61 length code, 5 bits for the length extra, 15 bits for the distance code,
62 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
63 Therefore if strm->avail_in >= 6, then there is enough input to avoid
64 checking for available input while decoding.
66 - The maximum bytes that a single length/distance pair can output is 258
67 bytes, which is the maximum length that can be coded. inflate_fast()
68 requires strm->avail_out >= 258 for each loop to avoid checking for
71 void inflate_fast(strm
, start
)
73 unsigned start
; /* inflate()'s starting value for strm->avail_out */
75 struct inflate_state FAR
*state
;
77 /* 64 32 x86 x86_64 */
78 /* ar offset register */
79 /* 0 0 */ void *esp
; /* esp save */
80 /* 8 4 */ void *ebp
; /* ebp save */
81 /* 16 8 */ unsigned char FAR
*in
; /* esi rsi local strm->next_in */
82 /* 24 12 */ unsigned char FAR
*last
; /* r9 while in < last */
83 /* 32 16 */ unsigned char FAR
*out
; /* edi rdi local strm->next_out */
84 /* 40 20 */ unsigned char FAR
*beg
; /* inflate()'s init next_out */
85 /* 48 24 */ unsigned char FAR
*end
; /* r10 while out < end */
86 /* 56 28 */ unsigned char FAR
*window
;/* size of window, wsize!=0 */
87 /* 64 32 */ code
const FAR
*lcode
; /* ebp rbp local strm->lencode */
88 /* 72 36 */ code
const FAR
*dcode
; /* r11 local strm->distcode */
89 /* 80 40 */ unsigned long hold
; /* edx rdx local strm->hold */
90 /* 88 44 */ unsigned bits
; /* ebx rbx local strm->bits */
91 /* 92 48 */ unsigned wsize
; /* window size */
92 /* 96 52 */ unsigned write
; /* window write index */
93 /*100 56 */ unsigned lmask
; /* r12 mask for lcode */
94 /*104 60 */ unsigned dmask
; /* r13 mask for dcode */
95 /*108 64 */ unsigned len
; /* r14 match length */
96 /*112 68 */ unsigned dist
; /* r15 match distance */
97 /*116 72 */ unsigned status
; /* set when state chng*/
100 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
101 #define PAD_AVAIL_IN 6
102 #define PAD_AVAIL_OUT 258
104 #define PAD_AVAIL_IN 5
105 #define PAD_AVAIL_OUT 257
108 /* copy state to local variables */
109 state
= (struct inflate_state FAR
*)strm
->state
;
110 ar
.in
= strm
->next_in
;
111 ar
.last
= ar
.in
+ (strm
->avail_in
- PAD_AVAIL_IN
);
112 ar
.out
= strm
->next_out
;
113 ar
.beg
= ar
.out
- (start
- strm
->avail_out
);
114 ar
.end
= ar
.out
+ (strm
->avail_out
- PAD_AVAIL_OUT
);
115 ar
.wsize
= state
->wsize
;
116 ar
.write
= state
->wnext
;
117 ar
.window
= state
->window
;
118 ar
.hold
= state
->hold
;
119 ar
.bits
= state
->bits
;
120 ar
.lcode
= state
->lencode
;
121 ar
.dcode
= state
->distcode
;
122 ar
.lmask
= (1U << state
->lenbits
) - 1;
123 ar
.dmask
= (1U << state
->distbits
) - 1;
125 /* decode literals and length/distances until end-of-block or not enough
126 input data or output space */
128 /* align in on 1/2 hold size boundary */
129 while (((unsigned long)(void *)ar
.in
& (sizeof(ar
.hold
) / 2 - 1)) != 0) {
130 ar
.hold
+= (unsigned long)*ar
.in
++ << ar
.bits
;
134 #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
135 __asm__
__volatile__ (
137 " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
138 " movq %%rsp, (%%rax)\n"
139 " movq %%rax, %%rsp\n" /* make rsp point to &ar */
140 " movq 16(%%rsp), %%rsi\n" /* rsi = in */
141 " movq 32(%%rsp), %%rdi\n" /* rdi = out */
142 " movq 24(%%rsp), %%r9\n" /* r9 = last */
143 " movq 48(%%rsp), %%r10\n" /* r10 = end */
144 " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
145 " movq 72(%%rsp), %%r11\n" /* r11 = dcode */
146 " movq 80(%%rsp), %%rdx\n" /* rdx = hold */
147 " movl 88(%%rsp), %%ebx\n" /* ebx = bits */
148 " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
149 " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
153 " cmpq %%rdi, %%r10\n"
154 " je .L_one_time\n" /* if only one decode left */
155 " cmpq %%rsi, %%r9\n"
160 " movq %%r12, %%r8\n" /* r8 = lmask */
162 " ja .L_get_length_code_one_time\n"
164 " lodsl\n" /* eax = *(uint *)in++ */
165 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
166 " addb $32, %%bl\n" /* bits += 32 */
167 " shlq %%cl, %%rax\n"
168 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
169 " jmp .L_get_length_code_one_time\n"
173 " cmpq %%rdi, %%r10\n"
174 " jbe .L_break_loop\n"
175 " cmpq %%rsi, %%r9\n"
176 " jbe .L_break_loop\n"
179 " movq %%r12, %%r8\n" /* r8 = lmask */
181 " ja .L_get_length_code\n" /* if (32 < bits) */
183 " lodsl\n" /* eax = *(uint *)in++ */
184 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
185 " addb $32, %%bl\n" /* bits += 32 */
186 " shlq %%cl, %%rax\n"
187 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
189 ".L_get_length_code:\n"
190 " andq %%rdx, %%r8\n" /* r8 &= hold */
191 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
193 " movb %%ah, %%cl\n" /* cl = this.bits */
194 " subb %%ah, %%bl\n" /* bits -= this.bits */
195 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
197 " testb %%al, %%al\n"
198 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
200 " movq %%r12, %%r8\n" /* r8 = lmask */
201 " shrl $16, %%eax\n" /* output this.val char */
204 ".L_get_length_code_one_time:\n"
205 " andq %%rdx, %%r8\n" /* r8 &= hold */
206 " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
209 " movb %%ah, %%cl\n" /* cl = this.bits */
210 " subb %%ah, %%bl\n" /* bits -= this.bits */
211 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
213 " testb %%al, %%al\n"
214 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
216 " shrl $16, %%eax\n" /* output this.val char */
218 " jmp .L_while_test\n"
221 ".L_test_for_length_base:\n"
222 " movl %%eax, %%r14d\n" /* len = this */
223 " shrl $16, %%r14d\n" /* len = this.val */
227 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
228 " andb $15, %%cl\n" /* op &= 15 */
229 " jz .L_decode_distance\n" /* if (!op) */
231 ".L_add_bits_to_len:\n"
233 " xorl %%eax, %%eax\n"
235 " shll %%cl, %%eax\n"
237 " andl %%edx, %%eax\n" /* eax &= hold */
238 " shrq %%cl, %%rdx\n"
239 " addl %%eax, %%r14d\n" /* len += hold & mask[op] */
241 ".L_decode_distance:\n"
242 " movq %%r13, %%r8\n" /* r8 = dmask */
244 " ja .L_get_distance_code\n" /* if (32 < bits) */
246 " lodsl\n" /* eax = *(uint *)in++ */
247 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
248 " addb $32, %%bl\n" /* bits += 32 */
249 " shlq %%cl, %%rax\n"
250 " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
252 ".L_get_distance_code:\n"
253 " andq %%rdx, %%r8\n" /* r8 &= hold */
254 " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
257 " movl %%eax, %%r15d\n" /* dist = this */
258 " shrl $16, %%r15d\n" /* dist = this.val */
260 " subb %%ah, %%bl\n" /* bits -= this.bits */
261 " shrq %%cl, %%rdx\n" /* hold >>= this.bits */
262 " movb %%al, %%cl\n" /* cl = this.op */
264 " testb $16, %%al\n" /* if ((op & 16) == 0) */
265 " jz .L_test_for_second_level_dist\n"
266 " andb $15, %%cl\n" /* op &= 15 */
267 " jz .L_check_dist_one\n"
269 ".L_add_bits_to_dist:\n"
271 " xorl %%eax, %%eax\n"
273 " shll %%cl, %%eax\n"
274 " decl %%eax\n" /* (1 << op) - 1 */
275 " andl %%edx, %%eax\n" /* eax &= hold */
276 " shrq %%cl, %%rdx\n"
277 " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
280 " movq %%rsi, %%r8\n" /* save in so from can use it's reg */
281 " movq %%rdi, %%rax\n"
282 " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
284 " cmpl %%r15d, %%eax\n"
285 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
287 " movl %%r14d, %%ecx\n" /* ecx = len */
288 " movq %%rdi, %%rsi\n"
289 " subq %%r15, %%rsi\n" /* from = out - dist */
292 " jnc .L_copy_two\n" /* if len % 2 == 0 */
295 " movb (%%rsi), %%al\n"
296 " movb %%al, (%%rdi)\n"
299 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
300 " jmp .L_while_test\n"
304 " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
305 " jmp .L_while_test\n"
308 ".L_check_dist_one:\n"
309 " cmpl $1, %%r15d\n" /* if dist 1, is a memset */
310 " jne .L_check_window\n"
311 " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
312 " je .L_check_window\n"
314 " movl %%r14d, %%ecx\n" /* ecx = len */
315 " movb -1(%%rdi), %%al\n"
320 " movb %%al, (%%rdi)\n"
325 " jmp .L_while_test\n"
328 ".L_test_for_second_level_length:\n"
330 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
332 " xorl %%eax, %%eax\n"
334 " shll %%cl, %%eax\n"
336 " andl %%edx, %%eax\n" /* eax &= hold */
337 " addl %%r14d, %%eax\n" /* eax += len */
338 " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
342 ".L_test_for_second_level_dist:\n"
344 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
346 " xorl %%eax, %%eax\n"
348 " shll %%cl, %%eax\n"
350 " andl %%edx, %%eax\n" /* eax &= hold */
351 " addl %%r15d, %%eax\n" /* eax += dist */
352 " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
357 " movl %%eax, %%ecx\n" /* ecx = nbytes */
358 " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
359 " negl %%ecx\n" /* nbytes = -nbytes */
361 " cmpl %%r15d, %%eax\n"
362 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
364 " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
365 " cmpl $0, 96(%%rsp)\n"
366 " jne .L_wrap_around_window\n" /* if (write != 0) */
368 " movq 56(%%rsp), %%rsi\n" /* from = window */
369 " subl %%ecx, %%eax\n" /* eax -= nbytes */
370 " addq %%rax, %%rsi\n" /* from += wsize - nbytes */
372 " movl %%r14d, %%eax\n" /* eax = len */
373 " cmpl %%ecx, %%r14d\n"
374 " jbe .L_do_copy\n" /* if (nbytes >= len) */
376 " subl %%ecx, %%eax\n" /* eax -= nbytes */
378 " movq %%rdi, %%rsi\n"
379 " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
383 ".L_wrap_around_window:\n"
384 " movl 96(%%rsp), %%eax\n" /* eax = write */
385 " cmpl %%eax, %%ecx\n"
386 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
388 " movl 92(%%rsp), %%esi\n" /* from = wsize */
389 " addq 56(%%rsp), %%rsi\n" /* from += window */
390 " addq %%rax, %%rsi\n" /* from += write */
391 " subq %%rcx, %%rsi\n" /* from -= nbytes */
392 " subl %%eax, %%ecx\n" /* nbytes -= write */
394 " movl %%r14d, %%eax\n" /* eax = len */
395 " cmpl %%ecx, %%eax\n"
396 " jbe .L_do_copy\n" /* if (nbytes >= len) */
398 " subl %%ecx, %%eax\n" /* len -= nbytes */
400 " movq 56(%%rsp), %%rsi\n" /* from = window */
401 " movl 96(%%rsp), %%ecx\n" /* nbytes = write */
402 " cmpl %%ecx, %%eax\n"
403 " jbe .L_do_copy\n" /* if (nbytes >= len) */
405 " subl %%ecx, %%eax\n" /* len -= nbytes */
407 " movq %%rdi, %%rsi\n"
408 " subq %%r15, %%rsi\n" /* from = out - dist */
412 ".L_contiguous_in_window:\n"
413 " movq 56(%%rsp), %%rsi\n" /* rsi = window */
414 " addq %%rax, %%rsi\n"
415 " subq %%rcx, %%rsi\n" /* from += write - nbytes */
417 " movl %%r14d, %%eax\n" /* eax = len */
418 " cmpl %%ecx, %%eax\n"
419 " jbe .L_do_copy\n" /* if (nbytes >= len) */
421 " subl %%ecx, %%eax\n" /* len -= nbytes */
423 " movq %%rdi, %%rsi\n"
424 " subq %%r15, %%rsi\n" /* from = out - dist */
425 " jmp .L_do_copy\n" /* if (nbytes >= len) */
429 " movl %%eax, %%ecx\n" /* ecx = len */
432 " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
433 " jmp .L_while_test\n"
435 ".L_test_for_end_of_block:\n"
437 " jz .L_invalid_literal_length_code\n"
438 " movl $1, 116(%%rsp)\n"
439 " jmp .L_break_loop_with_status\n"
441 ".L_invalid_literal_length_code:\n"
442 " movl $2, 116(%%rsp)\n"
443 " jmp .L_break_loop_with_status\n"
445 ".L_invalid_distance_code:\n"
446 " movl $3, 116(%%rsp)\n"
447 " jmp .L_break_loop_with_status\n"
449 ".L_invalid_distance_too_far:\n"
450 " movl $4, 116(%%rsp)\n"
451 " jmp .L_break_loop_with_status\n"
454 " movl $0, 116(%%rsp)\n"
456 ".L_break_loop_with_status:\n"
457 /* put in, out, bits, and hold back into ar and pop esp */
458 " movq %%rsi, 16(%%rsp)\n" /* in */
459 " movq %%rdi, 32(%%rsp)\n" /* out */
460 " movl %%ebx, 88(%%rsp)\n" /* bits */
461 " movq %%rdx, 80(%%rsp)\n" /* hold */
462 " movq (%%rsp), %%rax\n" /* restore rbp and rsp */
463 " movq 8(%%rsp), %%rbp\n"
464 " movq %%rax, %%rsp\n"
467 : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
468 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
470 #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
471 __asm__
__volatile__ (
473 " movl %%esp, (%%eax)\n" /* save esp, ebp */
474 " movl %%ebp, 4(%%eax)\n"
475 " movl %%eax, %%esp\n"
476 " movl 8(%%esp), %%esi\n" /* esi = in */
477 " movl 16(%%esp), %%edi\n" /* edi = out */
478 " movl 40(%%esp), %%edx\n" /* edx = hold */
479 " movl 44(%%esp), %%ebx\n" /* ebx = bits */
480 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
487 " cmpl %%edi, 24(%%esp)\n" /* out < end */
488 " jbe .L_break_loop\n"
489 " cmpl %%esi, 12(%%esp)\n" /* in < last */
490 " jbe .L_break_loop\n"
494 " ja .L_get_length_code\n" /* if (15 < bits) */
496 " xorl %%eax, %%eax\n"
497 " lodsw\n" /* al = *(ushort *)in++ */
498 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
499 " addb $16, %%bl\n" /* bits += 16 */
500 " shll %%cl, %%eax\n"
501 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
503 ".L_get_length_code:\n"
504 " movl 56(%%esp), %%eax\n" /* eax = lmask */
505 " andl %%edx, %%eax\n" /* eax &= hold */
506 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
509 " movb %%ah, %%cl\n" /* cl = this.bits */
510 " subb %%ah, %%bl\n" /* bits -= this.bits */
511 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
513 " testb %%al, %%al\n"
514 " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
516 " shrl $16, %%eax\n" /* output this.val char */
518 " jmp .L_while_test\n"
521 ".L_test_for_length_base:\n"
522 " movl %%eax, %%ecx\n" /* len = this */
523 " shrl $16, %%ecx\n" /* len = this.val */
524 " movl %%ecx, 64(%%esp)\n" /* save len */
528 " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
529 " andb $15, %%cl\n" /* op &= 15 */
530 " jz .L_decode_distance\n" /* if (!op) */
532 " jae .L_add_bits_to_len\n" /* if (op <= bits) */
534 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
535 " xorl %%eax, %%eax\n"
536 " lodsw\n" /* al = *(ushort *)in++ */
537 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
538 " addb $16, %%bl\n" /* bits += 16 */
539 " shll %%cl, %%eax\n"
540 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
541 " movb %%ch, %%cl\n" /* move op back to ecx */
543 ".L_add_bits_to_len:\n"
545 " xorl %%eax, %%eax\n"
547 " shll %%cl, %%eax\n"
549 " andl %%edx, %%eax\n" /* eax &= hold */
550 " shrl %%cl, %%edx\n"
551 " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
553 ".L_decode_distance:\n"
555 " ja .L_get_distance_code\n" /* if (15 < bits) */
557 " xorl %%eax, %%eax\n"
558 " lodsw\n" /* al = *(ushort *)in++ */
559 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
560 " addb $16, %%bl\n" /* bits += 16 */
561 " shll %%cl, %%eax\n"
562 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
564 ".L_get_distance_code:\n"
565 " movl 60(%%esp), %%eax\n" /* eax = dmask */
566 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
567 " andl %%edx, %%eax\n" /* eax &= hold */
568 " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
571 " movl %%eax, %%ebp\n" /* dist = this */
572 " shrl $16, %%ebp\n" /* dist = this.val */
574 " subb %%ah, %%bl\n" /* bits -= this.bits */
575 " shrl %%cl, %%edx\n" /* hold >>= this.bits */
576 " movb %%al, %%cl\n" /* cl = this.op */
578 " testb $16, %%al\n" /* if ((op & 16) == 0) */
579 " jz .L_test_for_second_level_dist\n"
580 " andb $15, %%cl\n" /* op &= 15 */
581 " jz .L_check_dist_one\n"
583 " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
585 " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
586 " xorl %%eax, %%eax\n"
587 " lodsw\n" /* al = *(ushort *)in++ */
588 " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
589 " addb $16, %%bl\n" /* bits += 16 */
590 " shll %%cl, %%eax\n"
591 " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
592 " movb %%ch, %%cl\n" /* move op back to ecx */
594 ".L_add_bits_to_dist:\n"
596 " xorl %%eax, %%eax\n"
598 " shll %%cl, %%eax\n"
599 " decl %%eax\n" /* (1 << op) - 1 */
600 " andl %%edx, %%eax\n" /* eax &= hold */
601 " shrl %%cl, %%edx\n"
602 " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
605 " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
606 " movl %%edi, %%eax\n"
607 " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
609 " cmpl %%ebp, %%eax\n"
610 " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
612 " movl 64(%%esp), %%ecx\n" /* ecx = len */
613 " movl %%edi, %%esi\n"
614 " subl %%ebp, %%esi\n" /* from = out - dist */
617 " jnc .L_copy_two\n" /* if len % 2 == 0 */
620 " movb (%%esi), %%al\n"
621 " movb %%al, (%%edi)\n"
624 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
625 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
626 " jmp .L_while_test\n"
630 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
631 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
632 " jmp .L_while_test\n"
635 ".L_check_dist_one:\n"
636 " cmpl $1, %%ebp\n" /* if dist 1, is a memset */
637 " jne .L_check_window\n"
638 " cmpl %%edi, 20(%%esp)\n"
639 " je .L_check_window\n" /* out == beg, if outside window */
641 " movl 64(%%esp), %%ecx\n" /* ecx = len */
642 " movb -1(%%edi), %%al\n"
647 " movb %%al, (%%edi)\n"
652 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
653 " jmp .L_while_test\n"
656 ".L_test_for_second_level_length:\n"
658 " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
660 " xorl %%eax, %%eax\n"
662 " shll %%cl, %%eax\n"
664 " andl %%edx, %%eax\n" /* eax &= hold */
665 " addl 64(%%esp), %%eax\n" /* eax += len */
666 " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
670 ".L_test_for_second_level_dist:\n"
672 " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
674 " xorl %%eax, %%eax\n"
676 " shll %%cl, %%eax\n"
678 " andl %%edx, %%eax\n" /* eax &= hold */
679 " addl %%ebp, %%eax\n" /* eax += dist */
680 " movl 36(%%esp), %%ecx\n" /* ecx = dcode */
681 " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
686 " movl %%eax, %%ecx\n"
687 " movl 48(%%esp), %%eax\n" /* eax = wsize */
688 " negl %%ecx\n" /* nbytes = -nbytes */
689 " movl 28(%%esp), %%esi\n" /* from = window */
691 " cmpl %%ebp, %%eax\n"
692 " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
694 " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
695 " cmpl $0, 52(%%esp)\n"
696 " jne .L_wrap_around_window\n" /* if (write != 0) */
698 " subl %%ecx, %%eax\n"
699 " addl %%eax, %%esi\n" /* from += wsize - nbytes */
701 " movl 64(%%esp), %%eax\n" /* eax = len */
702 " cmpl %%ecx, %%eax\n"
703 " jbe .L_do_copy\n" /* if (nbytes >= len) */
705 " subl %%ecx, %%eax\n" /* len -= nbytes */
707 " movl %%edi, %%esi\n"
708 " subl %%ebp, %%esi\n" /* from = out - dist */
712 ".L_wrap_around_window:\n"
713 " movl 52(%%esp), %%eax\n" /* eax = write */
714 " cmpl %%eax, %%ecx\n"
715 " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
717 " addl 48(%%esp), %%esi\n" /* from += wsize */
718 " addl %%eax, %%esi\n" /* from += write */
719 " subl %%ecx, %%esi\n" /* from -= nbytes */
720 " subl %%eax, %%ecx\n" /* nbytes -= write */
722 " movl 64(%%esp), %%eax\n" /* eax = len */
723 " cmpl %%ecx, %%eax\n"
724 " jbe .L_do_copy\n" /* if (nbytes >= len) */
726 " subl %%ecx, %%eax\n" /* len -= nbytes */
728 " movl 28(%%esp), %%esi\n" /* from = window */
729 " movl 52(%%esp), %%ecx\n" /* nbytes = write */
730 " cmpl %%ecx, %%eax\n"
731 " jbe .L_do_copy\n" /* if (nbytes >= len) */
733 " subl %%ecx, %%eax\n" /* len -= nbytes */
735 " movl %%edi, %%esi\n"
736 " subl %%ebp, %%esi\n" /* from = out - dist */
740 ".L_contiguous_in_window:\n"
741 " addl %%eax, %%esi\n"
742 " subl %%ecx, %%esi\n" /* from += write - nbytes */
744 " movl 64(%%esp), %%eax\n" /* eax = len */
745 " cmpl %%ecx, %%eax\n"
746 " jbe .L_do_copy\n" /* if (nbytes >= len) */
748 " subl %%ecx, %%eax\n" /* len -= nbytes */
750 " movl %%edi, %%esi\n"
751 " subl %%ebp, %%esi\n" /* from = out - dist */
752 " jmp .L_do_copy\n" /* if (nbytes >= len) */
756 " movl %%eax, %%ecx\n"
759 " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
760 " movl 32(%%esp), %%ebp\n" /* ebp = lcode */
761 " jmp .L_while_test\n"
763 ".L_test_for_end_of_block:\n"
765 " jz .L_invalid_literal_length_code\n"
766 " movl $1, 72(%%esp)\n"
767 " jmp .L_break_loop_with_status\n"
769 ".L_invalid_literal_length_code:\n"
770 " movl $2, 72(%%esp)\n"
771 " jmp .L_break_loop_with_status\n"
773 ".L_invalid_distance_code:\n"
774 " movl $3, 72(%%esp)\n"
775 " jmp .L_break_loop_with_status\n"
777 ".L_invalid_distance_too_far:\n"
778 " movl 8(%%esp), %%esi\n"
779 " movl $4, 72(%%esp)\n"
780 " jmp .L_break_loop_with_status\n"
783 " movl $0, 72(%%esp)\n"
785 ".L_break_loop_with_status:\n"
786 /* put in, out, bits, and hold back into ar and pop esp */
787 " movl %%esi, 8(%%esp)\n" /* save in */
788 " movl %%edi, 16(%%esp)\n" /* save out */
789 " movl %%ebx, 44(%%esp)\n" /* save bits */
790 " movl %%edx, 40(%%esp)\n" /* save hold */
791 " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
792 " movl (%%esp), %%esp\n"
795 : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
797 #elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
800 mov
[eax
], esp
/* save esp, ebp */
803 mov esi
, [esp
+8] /* esi = in */
804 mov edi
, [esp
+16] /* edi = out */
805 mov edx
, [esp
+40] /* edx = hold */
806 mov ebx
, [esp
+44] /* ebx = bits */
807 mov ebp
, [esp
+32] /* ebp = lcode */
821 ja L_get_length_code
/* if (15 < bits) */
824 lodsw
/* al = *(ushort *)in++ */
825 mov cl
, bl
/* cl = bits, needs it for shifting */
826 add bl
, 16 /* bits += 16 */
828 or edx
, eax
/* hold |= *((ushort *)in)++ << bits */
831 mov eax
, [esp
+56] /* eax = lmask */
832 and eax
, edx
/* eax &= hold */
833 mov eax
, [ebp
+eax
*4] /* eax = lcode[hold & lmask] */
836 mov cl
, ah
/* cl = this.bits */
837 sub bl
, ah
/* bits -= this.bits */
838 shr edx
, cl
/* hold >>= this.bits */
841 jnz L_test_for_length_base
/* if (op != 0) 45.7% */
843 shr eax
, 16 /* output this.val char */
848 L_test_for_length_base
:
849 mov ecx
, eax
/* len = this */
850 shr ecx
, 16 /* len = this.val */
851 mov
[esp
+64], ecx
/* save len */
855 jz L_test_for_second_level_length
/* if ((op & 16) == 0) 8% */
856 and cl
, 15 /* op &= 15 */
857 jz L_decode_distance
/* if (!op) */
859 jae L_add_bits_to_len
/* if (op <= bits) */
861 mov ch
, cl
/* stash op in ch, freeing cl */
863 lodsw
/* al = *(ushort *)in++ */
864 mov cl
, bl
/* cl = bits, needs it for shifting */
865 add bl
, 16 /* bits += 16 */
867 or edx
, eax
/* hold |= *((ushort *)in)++ << bits */
868 mov cl
, ch
/* move op back to ecx */
876 and eax
, edx
/* eax &= hold */
878 add
[esp
+64], eax
/* len += hold & mask[op] */
882 ja L_get_distance_code
/* if (15 < bits) */
885 lodsw
/* al = *(ushort *)in++ */
886 mov cl
, bl
/* cl = bits, needs it for shifting */
887 add bl
, 16 /* bits += 16 */
889 or edx
, eax
/* hold |= *((ushort *)in)++ << bits */
892 mov eax
, [esp
+60] /* eax = dmask */
893 mov ecx
, [esp
+36] /* ecx = dcode */
894 and eax
, edx
/* eax &= hold */
895 mov eax
, [ecx
+eax
*4]/* eax = dcode[hold & dmask] */
898 mov ebp
, eax
/* dist = this */
899 shr ebp
, 16 /* dist = this.val */
901 sub bl
, ah
/* bits -= this.bits */
902 shr edx
, cl
/* hold >>= this.bits */
903 mov cl
, al
/* cl = this.op */
905 test al
, 16 /* if ((op & 16) == 0) */
906 jz L_test_for_second_level_dist
907 and cl
, 15 /* op &= 15 */
910 jae L_add_bits_to_dist
/* if (op <= bits) 97.6% */
912 mov ch
, cl
/* stash op in ch, freeing cl */
914 lodsw
/* al = *(ushort *)in++ */
915 mov cl
, bl
/* cl = bits, needs it for shifting */
916 add bl
, 16 /* bits += 16 */
918 or edx
, eax
/* hold |= *((ushort *)in)++ << bits */
919 mov cl
, ch
/* move op back to ecx */
926 dec eax
/* (1 << op) - 1 */
927 and eax
, edx
/* eax &= hold */
929 add ebp
, eax
/* dist += hold & ((1 << op) - 1) */
932 mov
[esp
+8], esi
/* save in so from can use it's reg */
934 sub eax
, [esp
+20] /* nbytes = out - beg */
937 jb L_clip_window
/* if (dist > nbytes) 4.2% */
939 mov ecx
, [esp
+64] /* ecx = len */
941 sub esi
, ebp
/* from = out - dist */
951 mov esi
, [esp
+8] /* move in back to %esi, toss from */
952 mov ebp
, [esp
+32] /* ebp = lcode */
957 mov esi
, [esp
+8] /* move in back to %esi, toss from */
958 mov ebp
, [esp
+32] /* ebp = lcode */
963 cmp ebp
, 1 /* if dist 1, is a memset */
966 je L_check_window
/* out == beg, if outside window */
968 mov ecx
, [esp
+64] /* ecx = len */
974 mov
[edi
], al
/* memset out with from[-1] */
979 mov ebp
, [esp
+32] /* ebp = lcode */
983 L_test_for_second_level_length
:
985 jnz L_test_for_end_of_block
/* if ((op & 64) != 0) */
991 and eax
, edx
/* eax &= hold */
992 add eax
, [esp
+64] /* eax += len */
993 mov eax
, [ebp
+eax
*4] /* eax = lcode[val+(hold&mask[op])]*/
997 L_test_for_second_level_dist
:
999 jnz L_invalid_distance_code
/* if ((op & 64) != 0) */
1005 and eax
, edx
/* eax &= hold */
1006 add eax
, ebp
/* eax += dist */
1007 mov ecx
, [esp
+36] /* ecx = dcode */
1008 mov eax
, [ecx
+eax
*4] /* eax = dcode[val+(hold&mask[op])]*/
1014 mov eax
, [esp
+48] /* eax = wsize */
1015 neg ecx
/* nbytes = -nbytes */
1016 mov esi
, [esp
+28] /* from = window */
1019 jb L_invalid_distance_too_far
/* if (dist > wsize) */
1021 add ecx
, ebp
/* nbytes = dist - nbytes */
1022 cmp dword ptr
[esp
+52], 0
1023 jne L_wrap_around_window
/* if (write != 0) */
1026 add esi
, eax
/* from += wsize - nbytes */
1028 mov eax
, [esp
+64] /* eax = len */
1030 jbe L_do_copy
/* if (nbytes >= len) */
1032 sub eax
, ecx
/* len -= nbytes */
1035 sub esi
, ebp
/* from = out - dist */
1039 L_wrap_around_window
:
1040 mov eax
, [esp
+52] /* eax = write */
1042 jbe L_contiguous_in_window
/* if (write >= nbytes) */
1044 add esi
, [esp
+48] /* from += wsize */
1045 add esi
, eax
/* from += write */
1046 sub esi
, ecx
/* from -= nbytes */
1047 sub ecx
, eax
/* nbytes -= write */
1049 mov eax
, [esp
+64] /* eax = len */
1051 jbe L_do_copy
/* if (nbytes >= len) */
1053 sub eax
, ecx
/* len -= nbytes */
1055 mov esi
, [esp
+28] /* from = window */
1056 mov ecx
, [esp
+52] /* nbytes = write */
1058 jbe L_do_copy
/* if (nbytes >= len) */
1060 sub eax
, ecx
/* len -= nbytes */
1063 sub esi
, ebp
/* from = out - dist */
1067 L_contiguous_in_window
:
1069 sub esi
, ecx
/* from += write - nbytes */
1071 mov eax
, [esp
+64] /* eax = len */
1073 jbe L_do_copy
/* if (nbytes >= len) */
1075 sub eax
, ecx
/* len -= nbytes */
1078 sub esi
, ebp
/* from = out - dist */
1086 mov esi
, [esp
+8] /* move in back to %esi, toss from */
1087 mov ebp
, [esp
+32] /* ebp = lcode */
1090 L_test_for_end_of_block
:
1092 jz L_invalid_literal_length_code
1093 mov dword ptr
[esp
+72], 1
1094 jmp L_break_loop_with_status
1096 L_invalid_literal_length_code
:
1097 mov dword ptr
[esp
+72], 2
1098 jmp L_break_loop_with_status
1100 L_invalid_distance_code
:
1101 mov dword ptr
[esp
+72], 3
1102 jmp L_break_loop_with_status
1104 L_invalid_distance_too_far
:
1106 mov dword ptr
[esp
+72], 4
1107 jmp L_break_loop_with_status
1110 mov dword ptr
[esp
+72], 0
1112 L_break_loop_with_status
:
1113 /* put in, out, bits, and hold back into ar and pop esp */
1114 mov
[esp
+8], esi
/* save in */
1115 mov
[esp
+16], edi
/* save out */
1116 mov
[esp
+44], ebx
/* save bits */
1117 mov
[esp
+40], edx
/* save hold */
1118 mov ebp
, [esp
+4] /* restore esp, ebp */
1122 #error "x86 architecture not defined"
1125 if (ar
.status
> 1) {
1127 strm
->msg
= "invalid literal/length code";
1128 else if (ar
.status
== 3)
1129 strm
->msg
= "invalid distance code";
1131 strm
->msg
= "invalid distance too far back";
1134 else if ( ar
.status
== 1 ) {
1138 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
1139 ar
.len
= ar
.bits
>> 3;
1141 ar
.bits
-= ar
.len
<< 3;
1142 ar
.hold
&= (1U << ar
.bits
) - 1;
1144 /* update state and return */
1145 strm
->next_in
= ar
.in
;
1146 strm
->next_out
= ar
.out
;
1147 strm
->avail_in
= (unsigned)(ar
.in
< ar
.last
?
1148 PAD_AVAIL_IN
+ (ar
.last
- ar
.in
) :
1149 PAD_AVAIL_IN
- (ar
.in
- ar
.last
));
1150 strm
->avail_out
= (unsigned)(ar
.out
< ar
.end
?
1151 PAD_AVAIL_OUT
+ (ar
.end
- ar
.out
) :
1152 PAD_AVAIL_OUT
- (ar
.out
- ar
.end
));
1153 state
->hold
= ar
.hold
;
1154 state
->bits
= ar
.bits
;