1 /* deflate/gzip encoding backend */
19 #include "encoding/deflate.h"
20 #include "encoding/encoding.h"
21 #include "util/memory.h"
23 /* How many bytes of compressed data to read before decompressing. */
24 #define ELINKS_DEFLATE_BUFFER_LENGTH 5000
26 struct deflate_enc_data
{
27 z_stream deflate_stream
;
29 /* The file descriptor from which we read. */
32 unsigned int last_read
:1;
33 unsigned int after_first_read
:1;
34 unsigned int after_end
:1;
36 /* A buffer for data that has been read from the file but not
37 * yet decompressed. z_stream.next_in and z_stream.avail_in
38 * refer to this buffer. */
39 unsigned char buf
[ELINKS_DEFLATE_BUFFER_LENGTH
];
43 deflate_open(int window_size
, struct stream_encoded
*stream
, int fd
)
45 /* A zero-initialized z_stream. The compiler ensures that all
46 * pointer members in it are null. (Can't do this with memset
47 * because C99 does not require all-bits-zero to be a null
49 static const z_stream null_z_stream
= {0};
52 struct deflate_enc_data
*data
= mem_alloc(sizeof(*data
));
59 /* Initialize all members of *data, except data->buf[], which
60 * will be initialized on demand by deflate_read. */
61 copy_struct(&data
->deflate_stream
, &null_z_stream
);
64 data
->after_first_read
= 0;
66 err
= inflateInit2(&data
->deflate_stream
, window_size
);
78 deflate_raw_open(struct stream_encoded
*stream
, int fd
)
80 /* raw DEFLATE with neither zlib nor gzip header */
81 return deflate_open(-MAX_WBITS
, stream
, fd
);
86 deflate_gzip_open(struct stream_encoded
*stream
, int fd
)
88 /* detect gzip header, else assume zlib header */
89 return deflate_open(MAX_WBITS
+ 32, stream
, fd
);
93 deflate_read(struct stream_encoded
*stream
, unsigned char *buf
, int len
)
95 struct deflate_enc_data
*data
= (struct deflate_enc_data
*) stream
->data
;
103 if (data
->last_read
) return 0;
105 data
->deflate_stream
.avail_out
= len
;
106 data
->deflate_stream
.next_out
= buf
;
109 if (data
->deflate_stream
.avail_in
== 0) {
110 l
= safe_read(data
->fdread
, data
->buf
,
111 ELINKS_DEFLATE_BUFFER_LENGTH
);
117 return -1; /* I/O error */
119 /* EOF. It is error: we wait for more bytes */
123 data
->deflate_stream
.next_in
= data
->buf
;
124 data
->deflate_stream
.avail_in
= l
;
127 err
= inflate(&data
->deflate_stream
, Z_SYNC_FLUSH
);
128 if (err
== Z_DATA_ERROR
&& !data
->after_first_read
129 && data
->deflate_stream
.next_out
== buf
) {
130 /* RFC 2616 requires a zlib header for
131 * "Content-Encoding: deflate", but some HTTP
132 * servers (Microsoft-IIS/6.0 at blogs.msdn.com,
133 * and reportedly Apache with mod_deflate) omit
134 * that, causing Z_DATA_ERROR. Clarification of
135 * the term "deflate" has been requested for the
136 * next version of HTTP:
137 * http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73
139 * Try to recover by telling zlib not to expect
140 * the header. If the error does not happen on
141 * the first inflate() call, then it is too late
142 * to recover because ELinks may already have
143 * discarded part of the input data.
145 * TODO: This fallback to raw DEFLATE is currently
146 * enabled for "Content-Encoding: gzip" too. It
147 * might be better to fall back to no compression
148 * at all, because Apache can send that header for
149 * uncompressed *.gz.md5 files. */
150 data
->after_first_read
= 1;
151 inflateEnd(&data
->deflate_stream
);
152 data
->deflate_stream
.avail_out
= len
;
153 data
->deflate_stream
.next_out
= buf
;
154 data
->deflate_stream
.next_in
= data
->buf
;
155 data
->deflate_stream
.avail_in
= l
;
156 err
= inflateInit2(&data
->deflate_stream
, -MAX_WBITS
);
157 if (err
== Z_OK
) goto restart
;
159 data
->after_first_read
= 1;
160 if (err
== Z_STREAM_END
) {
163 } else if (err
!= Z_OK
) {
167 } while (data
->deflate_stream
.avail_out
> 0);
169 assert(len
- data
->deflate_stream
.avail_out
== data
->deflate_stream
.next_out
- buf
);
170 return len
- data
->deflate_stream
.avail_out
;
173 static unsigned char *
174 deflate_decode_buffer(struct stream_encoded
*st
, int window_size
, unsigned char *data
, int len
, int *new_len
)
176 struct deflate_enc_data
*enc_data
= (struct deflate_enc_data
*) st
->data
;
177 z_stream
*stream
= &enc_data
->deflate_stream
;
178 unsigned char *buffer
= NULL
;
181 *new_len
= 0; /* default, left there if an error occurs */
183 if (!len
) return NULL
;
184 stream
->next_in
= data
;
185 stream
->avail_in
= len
;
186 stream
->total_out
= 0;
189 unsigned char *new_buffer
;
190 size_t size
= stream
->total_out
+ MAX_STR_LEN
;
192 new_buffer
= mem_realloc(buffer
, size
);
199 stream
->next_out
= buffer
+ stream
->total_out
;
200 stream
->avail_out
= MAX_STR_LEN
;
202 error
= inflate(stream
, Z_SYNC_FLUSH
);
203 if (error
== Z_STREAM_END
) {
206 } while (error
== Z_OK
&& stream
->avail_in
> 0);
208 if (error
== Z_STREAM_END
) {
210 enc_data
->after_end
= 1;
215 *new_len
= stream
->total_out
;
218 if (buffer
) mem_free(buffer
);
223 static unsigned char *
224 deflate_raw_decode_buffer(struct stream_encoded
*st
, unsigned char *data
, int len
, int *new_len
)
226 /* raw DEFLATE with neither zlib nor gzip header */
227 return deflate_decode_buffer(st
, -MAX_WBITS
, data
, len
, new_len
);
230 static unsigned char *
231 deflate_gzip_decode_buffer(struct stream_encoded
*st
, unsigned char *data
, int len
, int *new_len
)
233 /* detect gzip header, else assume zlib header */
234 return deflate_decode_buffer(st
, MAX_WBITS
+ 32, data
, len
, new_len
);
238 deflate_close(struct stream_encoded
*stream
)
240 struct deflate_enc_data
*data
= (struct deflate_enc_data
*) stream
->data
;
243 if (!data
->after_end
) {
244 inflateEnd(&data
->deflate_stream
);
246 if (data
->fdread
!= -1) {
254 static const unsigned char *const deflate_extensions
[] = { NULL
};
256 const struct decoding_backend deflate_decoding_backend
= {
261 deflate_raw_decode_buffer
,
265 static const unsigned char *const gzip_extensions
[] = { ".gz", ".tgz", NULL
};
267 const struct decoding_backend gzip_decoding_backend
= {
272 deflate_gzip_decode_buffer
,