1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
5 ; * Copyright (C) 1995-2003 Mark Adler
6 ; * For conditions of distribution and use, see copyright notice in zlib.h
8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
9 ; * Please use the copyright conditions above.
11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
17 ; * versions of this and inffast.S can be found at
18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
20 ; * 2005 : modification by Gilles Vollant
22 ; For Visual C++ 4.x and higher and ML 6.x and higher
23 ; ml.exe is in directory \MASM611C of Win95 DDK
24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
28 ; compile with command line option
29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
31 ; if you define NO_GZIP (see inflate.h), compile with
32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
37 ; in inflate_state in inflate.h)
42 INFLATE_MODE_TYPE
equ 11
43 INFLATE_MODE_BAD
equ 26
46 INFLATE_MODE_TYPE
equ 11
47 INFLATE_MODE_BAD
equ 26
49 INFLATE_MODE_TYPE
equ 3
50 INFLATE_MODE_BAD
equ 17
58 ;;;GLOBAL _inflate_fast
80 jmp inflate_fast_entry
85 db 'Fast decoding Code from Chris Anderson'
89 invalid_literal_length_code_msg:
90 db 'invalid literal/length code'
94 invalid_distance_code_msg:
95 db 'invalid distance code'
99 invalid_distance_too_far_msg:
100 db 'invalid distance too far back'
141 mode_state
equ 0 ;/* state->mode */
142 wsize_state
equ (32+zlib1222sup
) ;/* state->wsize */
143 write_state
equ (36+4+zlib1222sup
) ;/* state->write */
144 window_state
equ (40+4+zlib1222sup
) ;/* state->window */
145 hold_state
equ (44+4+zlib1222sup
) ;/* state->hold */
146 bits_state
equ (48+4+zlib1222sup
) ;/* state->bits */
147 lencode_state
equ (64+4+zlib1222sup
) ;/* state->lencode */
148 distcode_state
equ (68+4+zlib1222sup
) ;/* state->distcode */
149 lenbits_state
equ (72+4+zlib1222sup
) ;/* state->lenbits */
150 distbits_state
equ (76+4+zlib1222sup
) ;/* state->distbits */
155 ;GLOBAL inflate_fast_use_mmx
160 ; GLOBAL inflate_fast_use_mmx:object
161 ;.size inflate_fast_use_mmx, 4
211 mov eax, [edi+lencode_state
]
212 mov ecx, [edi+distcode_state
]
218 mov ecx, [edi+lenbits_state
]
224 mov ecx, [edi+distbits_state
]
229 mov eax, [edi+wsize_state
]
230 mov ecx, [edi+write_state
]
231 mov edx, [edi+window_state
]
237 mov ebp, [edi+hold_state
]
238 mov ebx, [edi+bits_state
]
275 cmp dword ptr [inflate_fast_use_mmx
],2
285 xor dword ptr [esp],0200000h
313 mov dword ptr [inflate_fast_use_mmx
],2
316 mov dword ptr [inflate_fast_use_mmx
],3
360 jnz L_test_for_length_base
375 L_test_for_length_base:
382 jz L_test_for_second_level_length
386 jae L_add_bits_to_len
413 ja L_get_distance_code
439 jz L_test_for_second_level_dist
443 jae L_add_bits_to_dist
512 L_test_for_second_level_length:
518 jnz L_test_for_end_of_block
530 L_test_for_second_level_dist:
536 jnz L_invalid_distance_code
556 jb L_invalid_distance_too_far
559 cmp dword ptr [esp+48],0
560 jne L_wrap_around_window
584 L_wrap_around_window:
588 jbe L_contiguous_in_window
613 L_contiguous_in_window:
647 movd mm4
,dword ptr [esp+0]
649 movd mm5
,dword ptr [esp+4]
660 ja L_get_length_code_mmx
663 movd mm7
,dword ptr [esi]
669 L_get_length_code_mmx:
681 jnz L_test_for_length_base_mmx
696 L_test_for_length_base_mmx:
702 jz L_test_for_second_level_length_mmx
704 jz L_decode_distance_mmx
710 and ecx, [inflate_fast_mask
+eax*4]
713 L_decode_distance_mmx:
717 ja L_get_dist_code_mmx
720 movd mm7
,dword ptr [esi]
742 jz L_test_for_second_level_dist_mmx
744 jz L_check_dist_one_mmx
746 L_add_bits_to_dist_mmx:
751 and ecx, [inflate_fast_mask
+eax*4]
782 L_check_dist_one_mmx:
784 jne L_check_window_mmx
786 je L_check_window_mmx
803 L_test_for_second_level_length_mmx:
805 jnz L_test_for_end_of_block
810 and ecx, [inflate_fast_mask
+eax*4]
816 L_test_for_second_level_dist_mmx:
818 jnz L_invalid_distance_code
823 and ecx, [inflate_fast_mask
+eax*4]
838 jb L_invalid_distance_too_far
841 cmp dword ptr [esp+48],0
842 jne L_wrap_around_window_mmx
865 L_wrap_around_window_mmx:
869 jbe L_contiguous_in_window_mmx
893 L_contiguous_in_window_mmx:
917 L_invalid_distance_code:
923 mov ecx, invalid_distance_code_msg
924 mov edx,INFLATE_MODE_BAD
925 jmp L_update_stream_state
927 L_test_for_end_of_block:
934 jz L_invalid_literal_length_code
937 mov edx,INFLATE_MODE_TYPE
938 jmp L_update_stream_state
940 L_invalid_literal_length_code:
946 mov ecx, invalid_literal_length_code_msg
947 mov edx,INFLATE_MODE_BAD
948 jmp L_update_stream_state
950 L_invalid_distance_too_far:
955 mov ecx, invalid_distance_too_far_msg
956 mov edx,INFLATE_MODE_BAD
957 jmp L_update_stream_state
959 L_update_stream_state:
967 mov [eax+mode_state
],edx
973 cmp dword ptr [inflate_fast_use_mmx
],2
990 mov [edx+bits_state
],ebx
1016 cmp dword ptr [inflate_fast_use_mmx
],2
1031 mov [edx+hold_state
],ebp
1038 jbe L_last_is_smaller
1057 jbe L_end_is_smaller