iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / encoding / deflate.c
bloba62e123d1e33d97d24cd71f2d210f06ed0e62849
1 /* deflate/gzip encoding backend */
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
7 #include <stdio.h>
8 #include <string.h>
9 #ifdef HAVE_UNISTD_H
10 #include <unistd.h>
11 #endif
12 #ifdef HAVE_ZLIB_H
13 #include <zlib.h>
14 #endif
15 #include <errno.h>
17 #include "elinks.h"
19 #include "encoding/deflate.h"
20 #include "encoding/encoding.h"
21 #include "util/memory.h"
23 /* How many bytes of compressed data to read before decompressing. */
24 #define ELINKS_DEFLATE_BUFFER_LENGTH 5000
26 struct deflate_enc_data {
27 z_stream deflate_stream;
29 /* The file descriptor from which we read. */
30 int fdread;
32 unsigned int last_read:1;
33 unsigned int after_first_read:1;
34 unsigned int after_end:1;
36 /* A buffer for data that has been read from the file but not
37 * yet decompressed. z_stream.next_in and z_stream.avail_in
38 * refer to this buffer. */
39 unsigned char buf[ELINKS_DEFLATE_BUFFER_LENGTH];
42 static int
43 deflate_open(int window_size, struct stream_encoded *stream, int fd)
45 /* A zero-initialized z_stream. The compiler ensures that all
46 * pointer members in it are null. (Can't do this with memset
47 * because C99 does not require all-bits-zero to be a null
48 * pointer.) */
49 static const z_stream null_z_stream = {0};
50 int err;
52 struct deflate_enc_data *data = mem_alloc(sizeof(*data));
54 stream->data = NULL;
55 if (!data) {
56 return -1;
59 /* Initialize all members of *data, except data->buf[], which
60 * will be initialized on demand by deflate_read. */
61 copy_struct(&data->deflate_stream, &null_z_stream);
62 data->fdread = fd;
63 data->last_read = 0;
64 data->after_first_read = 0;
66 err = inflateInit2(&data->deflate_stream, window_size);
67 if (err != Z_OK) {
68 mem_free(data);
69 return -1;
71 stream->data = data;
73 return 0;
76 #if 0
77 static int
78 deflate_raw_open(struct stream_encoded *stream, int fd)
80 /* raw DEFLATE with neither zlib nor gzip header */
81 return deflate_open(-MAX_WBITS, stream, fd);
83 #endif
85 static int
86 deflate_gzip_open(struct stream_encoded *stream, int fd)
88 /* detect gzip header, else assume zlib header */
89 return deflate_open(MAX_WBITS + 32, stream, fd);
92 static int
93 deflate_read(struct stream_encoded *stream, unsigned char *buf, int len)
95 struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
96 int err = 0;
97 int l = 0;
99 if (!data) return -1;
101 assert(len > 0);
103 if (data->last_read) return 0;
105 data->deflate_stream.avail_out = len;
106 data->deflate_stream.next_out = buf;
108 do {
109 if (data->deflate_stream.avail_in == 0) {
110 l = safe_read(data->fdread, data->buf,
111 ELINKS_DEFLATE_BUFFER_LENGTH);
113 if (l == -1) {
114 if (errno == EAGAIN)
115 break;
116 else
117 return -1; /* I/O error */
118 } else if (l == 0) {
119 /* EOF. It is error: we wait for more bytes */
120 return -1;
123 data->deflate_stream.next_in = data->buf;
124 data->deflate_stream.avail_in = l;
126 restart:
127 err = inflate(&data->deflate_stream, Z_SYNC_FLUSH);
128 if (err == Z_DATA_ERROR && !data->after_first_read
129 && data->deflate_stream.next_out == buf) {
130 /* RFC 2616 requires a zlib header for
131 * "Content-Encoding: deflate", but some HTTP
132 * servers (Microsoft-IIS/6.0 at blogs.msdn.com,
133 * and reportedly Apache with mod_deflate) omit
134 * that, causing Z_DATA_ERROR. Clarification of
135 * the term "deflate" has been requested for the
136 * next version of HTTP:
137 * http://www3.tools.ietf.org/wg/httpbis/trac/ticket/73
139 * Try to recover by telling zlib not to expect
140 * the header. If the error does not happen on
141 * the first inflate() call, then it is too late
142 * to recover because ELinks may already have
143 * discarded part of the input data.
145 * TODO: This fallback to raw DEFLATE is currently
146 * enabled for "Content-Encoding: gzip" too. It
147 * might be better to fall back to no compression
148 * at all, because Apache can send that header for
149 * uncompressed *.gz.md5 files. */
150 data->after_first_read = 1;
151 inflateEnd(&data->deflate_stream);
152 data->deflate_stream.avail_out = len;
153 data->deflate_stream.next_out = buf;
154 data->deflate_stream.next_in = data->buf;
155 data->deflate_stream.avail_in = l;
156 err = inflateInit2(&data->deflate_stream, -MAX_WBITS);
157 if (err == Z_OK) goto restart;
159 data->after_first_read = 1;
160 if (err == Z_STREAM_END) {
161 data->last_read = 1;
162 break;
163 } else if (err != Z_OK) {
164 data->last_read = 1;
165 break;
167 } while (data->deflate_stream.avail_out > 0);
169 assert(len - data->deflate_stream.avail_out == data->deflate_stream.next_out - buf);
170 return len - data->deflate_stream.avail_out;
173 static unsigned char *
174 deflate_decode_buffer(struct stream_encoded *st, int window_size, unsigned char *data, int len, int *new_len)
176 struct deflate_enc_data *enc_data = (struct deflate_enc_data *) st->data;
177 z_stream *stream = &enc_data->deflate_stream;
178 unsigned char *buffer = NULL;
179 int error;
181 *new_len = 0; /* default, left there if an error occurs */
183 if (!len) return NULL;
184 stream->next_in = data;
185 stream->avail_in = len;
186 stream->total_out = 0;
188 do {
189 unsigned char *new_buffer;
190 size_t size = stream->total_out + MAX_STR_LEN;
192 new_buffer = mem_realloc(buffer, size);
193 if (!new_buffer) {
194 error = Z_MEM_ERROR;
195 break;
198 buffer = new_buffer;
199 stream->next_out = buffer + stream->total_out;
200 stream->avail_out = MAX_STR_LEN;
202 error = inflate(stream, Z_SYNC_FLUSH);
203 if (error == Z_STREAM_END) {
204 break;
206 } while (error == Z_OK && stream->avail_in > 0);
208 if (error == Z_STREAM_END) {
209 inflateEnd(stream);
210 enc_data->after_end = 1;
211 error = Z_OK;
214 if (error == Z_OK) {
215 *new_len = stream->total_out;
216 return buffer;
217 } else {
218 if (buffer) mem_free(buffer);
219 return NULL;
223 static unsigned char *
224 deflate_raw_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
226 /* raw DEFLATE with neither zlib nor gzip header */
227 return deflate_decode_buffer(st, -MAX_WBITS, data, len, new_len);
230 static unsigned char *
231 deflate_gzip_decode_buffer(struct stream_encoded *st, unsigned char *data, int len, int *new_len)
233 /* detect gzip header, else assume zlib header */
234 return deflate_decode_buffer(st, MAX_WBITS + 32, data, len, new_len);
237 static void
238 deflate_close(struct stream_encoded *stream)
240 struct deflate_enc_data *data = (struct deflate_enc_data *) stream->data;
242 if (data) {
243 if (!data->after_end) {
244 inflateEnd(&data->deflate_stream);
246 if (data->fdread != -1) {
247 close(data->fdread);
249 mem_free(data);
250 stream->data = 0;
254 static const unsigned char *const deflate_extensions[] = { NULL };
256 const struct decoding_backend deflate_decoding_backend = {
257 "deflate",
258 deflate_extensions,
259 deflate_gzip_open,
260 deflate_read,
261 deflate_raw_decode_buffer,
262 deflate_close,
265 static const unsigned char *const gzip_extensions[] = { ".gz", ".tgz", NULL };
267 const struct decoding_backend gzip_decoding_backend = {
268 "gzip",
269 gzip_extensions,
270 deflate_gzip_open,
271 deflate_read,
272 deflate_gzip_decode_buffer,
273 deflate_close,