2 * Fill Window with SSE2-optimized hash shifting
4 * Copyright (C) 2013 Intel Corporation
6 * Arjan van de Ven <arjan@linux.intel.com>
7 * Jim Kukunas <james.t.kukunas@linux.intel.com>
9 * For conditions of distribution and use, see copyright notice in zlib.h
12 #include <immintrin.h>
15 #define UPDATE_HASH(s,h,i) \
18 h = (3483 * (s->window[i]) +\
19 23081* (s->window[i+1]) +\
20 6954 * (s->window[i+2]) +\
21 20947* (s->window[i+3])) & s->hash_mask;\
23 h = (25881* (s->window[i]) +\
24 24674* (s->window[i+1]) +\
25 25811* (s->window[i+2])) & s->hash_mask;\
29 extern int read_buf OF((z_streamp strm, Bytef *buf, unsigned size));
31 void fill_window_sse(deflate_state
*s
)
33 const __m128i xmm_wsize
= _mm_set1_epi16(s
->w_size
);
37 unsigned more
; /* Amount of free space at the end of the window. */
38 uInt wsize
= s
->w_size
;
40 Assert(s
->lookahead
< MIN_LOOKAHEAD
, "already enough lookahead");
43 more
= (unsigned)(s
->window_size
-(ulg
)s
->lookahead
-(ulg
)s
->strstart
);
45 /* Deal with !@#$% 64K limit: */
46 if (sizeof(int) <= 2) {
47 if (more
== 0 && s
->strstart
== 0 && s
->lookahead
== 0) {
50 } else if (more
== (unsigned)(-1)) {
51 /* Very unlikely, but possible on 16 bit machine if
52 * strstart == 0 && lookahead == 1 (input done a byte at time)
58 /* If the window is almost full and there is insufficient lookahead,
59 * move the upper half to the lower one to make room in the upper half.
61 if (s
->strstart
>= wsize
+MAX_DIST(s
)) {
63 zmemcpy(s
->window
, s
->window
+wsize
, (unsigned)wsize
);
64 s
->match_start
-= wsize
;
65 s
->strstart
-= wsize
; /* we now have strstart >= MAX_DIST */
66 s
->block_start
-= (long) wsize
;
68 /* Slide the hash table (could be avoided with 32 bit values
69 at the expense of memory usage). We slide even when level == 0
70 to keep the hash table consistent if we switch back to level > 0
71 later. (Using level 0 permanently is not an optimal usage of
72 zlib, so we don't care about this pathological case.)
78 __m128i value
, result
;
80 value
= _mm_loadu_si128((__m128i
*)p
);
81 result
= _mm_subs_epu16(value
, xmm_wsize
);
82 _mm_storeu_si128((__m128i
*)p
, result
);
93 __m128i value
, result
;
95 value
= _mm_loadu_si128((__m128i
*)p
);
96 result
= _mm_subs_epu16(value
, xmm_wsize
);
97 _mm_storeu_si128((__m128i
*)p
, result
);
105 if (s
->strm
->avail_in
== 0) break;
107 /* If there was no sliding:
108 * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
109 * more == window_size - lookahead - strstart
110 * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
111 * => more >= window_size - 2*WSIZE + 2
112 * In the BIG_MEM or MMAP case (not yet supported),
113 * window_size == input_size + MIN_LOOKAHEAD &&
114 * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
115 * Otherwise, window_size == 2*WSIZE so more >= 2.
116 * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
118 Assert(more
>= 2, "more < 2");
120 n
= read_buf(s
->strm
, s
->window
+ s
->strstart
+ s
->lookahead
, more
);
123 /* Initialize the hash value now that we have some input: */
124 if (s
->lookahead
>= MIN_MATCH
) {
125 uInt str
= s
->strstart
;
126 s
->ins_h
= s
->window
[str
];
128 UPDATE_HASH(s
, s
->ins_h
, str
+ 1 - (MIN_MATCH
-1));
130 Call
UPDATE_HASH() MIN_MATCH
-3 more times
133 /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
134 * but this is not important since only literal bytes will be emitted.
137 } while (s
->lookahead
< MIN_LOOKAHEAD
&& s
->strm
->avail_in
!= 0);
139 /* If the WIN_INIT bytes after the end of the current data have never been
140 * written, then zero those bytes in order to avoid memory check reports of
141 * the use of uninitialized (or uninitialised as Julian writes) bytes by
142 * the longest match routines. Update the high water mark for the next
143 * time through here. WIN_INIT is set to MAX_MATCH since the longest match
144 * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
146 if (s
->high_water
< s
->window_size
) {
147 ulg curr
= s
->strstart
+ (ulg
)(s
->lookahead
);
150 if (s
->high_water
< curr
) {
151 /* Previous high water mark below current data -- zero WIN_INIT
152 * bytes or up to end of window, whichever is less.
154 init
= s
->window_size
- curr
;
157 zmemzero(s
->window
+ curr
, (unsigned)init
);
158 s
->high_water
= curr
+ init
;
160 else if (s
->high_water
< (ulg
)curr
+ WIN_INIT
) {
161 /* High water mark at or above current data, but below current data
162 * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
163 * to end of window, whichever is less.
165 init
= (ulg
)curr
+ WIN_INIT
- s
->high_water
;
166 if (init
> s
->window_size
- s
->high_water
)
167 init
= s
->window_size
- s
->high_water
;
168 zmemzero(s
->window
+ s
->high_water
, (unsigned)init
);
169 s
->high_water
+= init
;
173 Assert((ulg
)s
->strstart
<= s
->window_size
- MIN_LOOKAHEAD
,
174 "not enough room for search");