12 #include "igzip_lib.h"
14 // exported from zlib.h
23 #ifndef Z_STREAM_ERROR
24 #define Z_STREAM_ERROR (-2)
32 #define BUF_SIZE (1<<22)
36 #define HDR_SIZE (1<<16)
47 #ifndef COM_LVL_DEFAULT
48 #define COM_LVL_DEFAULT 3 // was 2
51 const int com_lvls
[4] = {
52 ISAL_DEF_LVL0_DEFAULT
,
53 ISAL_DEF_LVL1_DEFAULT
,
54 ISAL_DEF_LVL2_DEFAULT
,
64 struct isal_gzip_header
*gzip_header
;
65 struct inflate_state
*state
;
66 struct isal_zstream
*zstream
;
77 typedef gzFile_t
* gzFile
;
82 inline int is_gz(FILE* fp
);
83 inline int is_plain(FILE* fp
);
84 inline uint32_t get_posix_filetime(FILE* fp
);
85 inline int ingest_gzip_header(gzFile fp
);
86 inline gzFile
gzopen(const char *in
, const char *mode
);
87 inline gzFile
gzdopen(int fd
, const char *mode
);
88 inline int gzread(gzFile fp
, void *buf
, size_t len
);
89 inline char* gzgets(gzFile fp
, char *buf
, int len
);
90 inline int gzwrite(gzFile fp
, const void *buf
, size_t len
);
91 inline int gzputc(gzFile fp
, int c
);
92 inline int gzputs(gzFile fp
, const char *s
);
93 inline int gzeof(gzFile fp
);
94 inline int64_t gzoffset(gzFile fp
);
95 inline int set_compress_level(gzFile fp
, int level
);
96 inline int gzclose(gzFile fp
);
106 if (fread(buf
, 1, 2, fp
) == 2)
107 if (((int)buf
[0] == 0x1f) && ((int)(buf
[1]&0xFF) == 0x8b))
108 gzip
= 1; // normal gzip
109 fseek(fp
, 12, SEEK_SET
);
110 if (fread(buf
, 1, 2, fp
) == 2)
111 if (gzip
== 1 && (int)buf
[0] == 0x42 && (int)(buf
[1]&0xFF) == 0x43)
112 gzip
= 2; // bgzf format, need to require the normal gzip header
113 fseek(fp
, 0, SEEK_SET
);
117 int is_plain(FILE* fp
)
129 uint32_t get_posix_filetime(FILE* fp
)
131 struct stat file_stats
;
132 fstat(fileno(fp
), &file_stats
);
133 return file_stats
.st_mtime
;
136 int ingest_gzip_header(gzFile fp
) {
137 // assume fp->state->avail_in > 0
138 int status
= isal_read_gzip_header(fp
->state
, fp
->gzip_header
);
139 while (status
== ISAL_END_INPUT
&& !feof(fp
->fp
)) {
140 fp
->state
->next_in
= fp
->buf_in
;
141 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
142 status
= isal_read_gzip_header(fp
->state
, fp
->gzip_header
);
147 gzFile
gzopen(const char *in
, const char *mode
)
149 gzFile fp
= (gzFile_t
*)calloc(1, sizeof(gzFile_t
));
151 fp
->fd
= open(in
, O_RDONLY
);
152 else if (*mode
== 'w')
153 fp
->fd
= open(in
, O_WRONLY
| O_CREAT
| O_TRUNC
, S_IRUSR
|S_IWUSR
|S_IRGRP
|S_IROTH
);
159 fp
->fp
= fdopen(fp
->fd
, mode
);
165 fp
->mode
= strdup(mode
);
167 if (*mode
== 'r' && (fp
->is_plain
= !is_gz(fp
->fp
)))
170 fp
->gzip_header
= (struct isal_gzip_header
*)calloc(1, sizeof(struct isal_gzip_header
));
171 isal_gzip_header_init(fp
->gzip_header
);
172 if (*mode
== 'r') // read
174 fp
->state
= (struct inflate_state
*)calloc(1, sizeof(struct inflate_state
));
175 fp
->buf_in_size
= BUF_SIZE
;
176 fp
->buf_in
= (uint8_t *)malloc(fp
->buf_in_size
* sizeof(uint8_t));
177 fp
->buf_get_size
= BUF_SIZE
;
178 fp
->buf_get
= (char*)malloc(fp
->buf_get_size
* sizeof(char));
179 isal_inflate_init(fp
->state
);
180 fp
->state
->crc_flag
= ISAL_GZIP_NO_HDR_VER
;
181 fp
->state
->next_in
= fp
->buf_in
;
182 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
183 if (ingest_gzip_header(fp
) != ISAL_DECOMP_OK
)
189 else if (*mode
== 'w') // write
191 fp
->gzip_header
->os
= UNIX
; // FIXME auto parse OS
192 fp
->gzip_header
->time
= get_posix_filetime(fp
->fp
);
193 fp
->gzip_header
->name
= strdup(in
);
194 fp
->gzip_header
->name_buf_len
= strlen(fp
->gzip_header
->name
) + 1;
195 fp
->buf_out_size
= BUF_SIZE
;
196 fp
->buf_out
= (uint8_t *)calloc(fp
->buf_out_size
, sizeof(uint8_t));
197 fp
->zstream
= (struct isal_zstream
*)calloc(1, sizeof(struct isal_zstream
));
198 isal_deflate_init(fp
->zstream
);
199 fp
->zstream
->avail_in
= 0;
200 fp
->zstream
->flush
= NO_FLUSH
;
201 fp
->zstream
->level
= COM_LVL_DEFAULT
;
202 fp
->zstream
->level_buf_size
= com_lvls
[fp
->zstream
->level
];
203 fp
->zstream
->level_buf
= (uint8_t *)calloc(fp
->zstream
->level_buf_size
, sizeof(uint8_t));
204 fp
->zstream
->gzip_flag
= IGZIP_GZIP_NO_HDR
;
205 fp
->zstream
->avail_out
= fp
->buf_out_size
;
206 fp
->zstream
->next_out
= fp
->buf_out
;
207 if (isal_write_gzip_header(fp
->zstream
, fp
->gzip_header
) != ISAL_DECOMP_OK
)
216 gzFile
gzdopen(int fd
, const char *mode
)
218 char path
[10]; /* identifier for error messages */
221 gzFile fp
= (gzFile_t
*)calloc(1, sizeof(gzFile_t
));
223 if (!(fp
->fp
= fdopen(fd
, mode
)))
228 fp
->mode
= strdup(mode
);
230 if (*mode
== 'r' && (fp
->is_plain
= is_plain(fp
->fp
)))
233 fp
->gzip_header
= (struct isal_gzip_header
*)calloc(1, sizeof(struct isal_gzip_header
));
234 isal_gzip_header_init(fp
->gzip_header
);
235 if (*mode
== 'r') // read
237 fp
->state
= (struct inflate_state
*)calloc(1, sizeof(struct inflate_state
));
238 fp
->buf_in_size
= BUF_SIZE
;
239 fp
->buf_in
= (uint8_t *)malloc(fp
->buf_in_size
* sizeof(uint8_t));
240 isal_inflate_init(fp
->state
);
241 fp
->state
->crc_flag
= ISAL_GZIP_NO_HDR_VER
;
242 fp
->state
->next_in
= fp
->buf_in
;
243 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
244 if (ingest_gzip_header(fp
) != ISAL_DECOMP_OK
)
250 else if (*mode
== 'w') // write
252 fp
->gzip_header
->os
= UNIX
; // FIXME auto parse OS
253 fp
->gzip_header
->time
= get_posix_filetime(fp
->fp
);
254 fp
->gzip_header
->name
= strdup(path
);
255 fp
->gzip_header
->name_buf_len
= strlen(fp
->gzip_header
->name
) + 1;
256 fp
->buf_out_size
= BUF_SIZE
;
257 fp
->buf_out
= (uint8_t *)calloc(fp
->buf_out_size
, sizeof(uint8_t));
258 fp
->zstream
= (struct isal_zstream
*)calloc(1, sizeof(struct isal_zstream
));
259 isal_deflate_init(fp
->zstream
);
260 fp
->zstream
->avail_in
= 0;
261 fp
->zstream
->flush
= NO_FLUSH
;
262 fp
->zstream
->level
= COM_LVL_DEFAULT
;
263 fp
->zstream
->level_buf_size
= com_lvls
[fp
->zstream
->level
];
264 fp
->zstream
->level_buf
= (uint8_t *)calloc(fp
->zstream
->level_buf_size
, sizeof(uint8_t));
265 fp
->zstream
->gzip_flag
= IGZIP_GZIP_NO_HDR
;
266 fp
->zstream
->avail_out
= fp
->buf_out_size
;
267 fp
->zstream
->next_out
= fp
->buf_out
;
268 if (isal_write_gzip_header(fp
->zstream
, fp
->gzip_header
) != ISAL_DECOMP_OK
)
277 int gzclose(gzFile fp
)
280 if (!fp
) return Z_STREAM_ERROR
;
281 if (fp
->mode
&& fp
->mode
[0] != 'r' && fp
->mode
[0] != 'w') return Z_STREAM_ERROR
;
282 if (fp
->mode
) free(fp
->mode
);
283 if (fp
->zstream
&& fp
->fp
) gzwrite(fp
, NULL
, 0);
286 if (fp
->gzip_header
->extra
) free(fp
->gzip_header
->extra
);
287 if (fp
->gzip_header
->name
) free(fp
->gzip_header
->name
);
288 if (fp
->gzip_header
->comment
) free(fp
->gzip_header
->comment
);
289 free(fp
->gzip_header
);
291 if (fp
->state
) free(fp
->state
);
292 if (fp
->buf_in
) free(fp
->buf_in
);
293 if (fp
->buf_get
) free(fp
->buf_get
);
294 if (fp
->buf_out
) free(fp
->buf_out
);
297 if (fp
->zstream
->level_buf
) free(fp
->zstream
->level_buf
);
300 if (fp
->fp
&& fclose(fp
->fp
)) ret
= Z_ERRNO
;
301 if (fp
->fd
&& close(fp
->fd
)) ret
= Z_ERRNO
;
306 int gzread(gzFile fp
, void *buf
, size_t len
)
308 int buf_data_len
= 0;
312 buf_data_len
= fread((uint8_t *)buf
, 1, len
, fp
->fp
);
315 do // Start reading in compressed data and decompress
317 if (!feof(fp
->fp
) && !fp
->state
->avail_in
)
319 fp
->state
->next_in
= fp
->buf_in
;
320 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
322 fp
->state
->next_out
= (uint8_t *)buf
;
323 fp
->state
->avail_out
= len
;
324 if (isal_inflate(fp
->state
) != ISAL_DECOMP_OK
)
326 if ((buf_data_len
= fp
->state
->next_out
- (uint8_t *)buf
))
328 } while (fp
->state
->block_state
!= ISAL_BLOCK_FINISH
// while not done
329 && (!feof(fp
->fp
) || !fp
->state
->avail_out
)); // and work to do
330 // Add the following to look for and decode additional concatenated files
331 if (!feof(fp
->fp
) && !fp
->state
->avail_in
)
333 fp
->state
->next_in
= fp
->buf_in
;
334 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
336 while (fp
->state
->avail_in
&& fp
->state
->next_in
[0] == 31) // 0x1f
338 // Look for magic numbers for gzip header. Follows the gzread() decision
339 // whether to treat as trailing junk
340 if (fp
->state
->avail_in
> 1 && fp
->state
->next_in
[1] != 139) // 0x8b
342 isal_inflate_reset(fp
->state
);
343 isal_gzip_header_init(fp
->gzip_header
);
344 fp
->state
->crc_flag
= ISAL_GZIP_NO_HDR_VER
;
345 if (ingest_gzip_header(fp
) != ISAL_DECOMP_OK
)
346 return -3; // fail to parse header
349 if (!feof(fp
->fp
) && !fp
->state
->avail_in
)
351 fp
->state
->next_in
= fp
->buf_in
;
352 fp
->state
->avail_in
= fread(fp
->state
->next_in
, 1, fp
->buf_in_size
, fp
->fp
);
354 fp
->state
->next_out
= (uint8_t *)buf
;
355 fp
->state
->avail_out
= len
;
356 if (isal_inflate(fp
->state
) != ISAL_DECOMP_OK
)
358 if ((buf_data_len
= fp
->state
->next_out
- (uint8_t *)buf
))
360 } while (fp
->state
->block_state
!= ISAL_BLOCK_FINISH
361 && (!feof(fp
->fp
) || !fp
->state
->avail_out
));
366 char* gzgets(gzFile fp
, char *buf
, int len
)
371 return fgets(buf
, len
, fp
->fp
);
374 if (len
> fp
->buf_get_size
)
376 fp
->buf_get_size
= 2 * len
;
377 fp
->buf_get
= (char *)realloc(fp
->buf_get
, 2*len
*sizeof(char));
382 if ((xlen
= fp
->buf_get_len
- fp
->buf_get_out
) > 0)
384 char* fbo
= fp
->buf_get
+ fp
->buf_get_out
;
385 if ((pn
= strchr(fbo
, '\n')))
387 if (pn
- fbo
< len
- 1)
389 memcpy(buf
, fbo
, (pn
- fbo
+ 1) * sizeof(char));
390 buf
[pn
- fbo
+ 1] = '\0';
391 fp
->buf_get_out
+= pn
- fbo
+ 1;
396 memcpy(buf
, fbo
, len
- 1);
398 fp
->buf_get_out
+= len
- 1;
402 else if (xlen
>= len
- 1)
404 memcpy(buf
, fbo
, len
- 1);
406 fp
->buf_get_out
+= len
- 1;
411 memcpy(buf
, fbo
, xlen
);
414 int rlen
= gzread(fp
, buf
+ xlen
, len
- xlen
- 1);
421 buf
[xlen
+rlen
] = '\0';
422 pn
= strchr(buf
+ xlen
, '\n');
425 fp
->buf_get_len
= xlen
+ rlen
- (pn
- buf
+ 1);
426 memcpy(fp
->buf_get
, pn
+ 1, fp
->buf_get_len
);
427 fp
->buf_get
[fp
->buf_get_len
] = '\0';
439 int rlen
= gzread(fp
, buf
, len
- 1);
445 pn
= strchr(buf
, '\n');
448 fp
->buf_get_len
= rlen
- (pn
- buf
+ 1);
449 memcpy(fp
->buf_get
, pn
+ 1, fp
->buf_get_len
);
450 fp
->buf_get
[fp
->buf_get_len
] = '\0';
461 int set_compress_level(gzFile fp
, int level
)
463 if (!fp
|| !fp
->mode
|| *fp
->mode
!= 'w') return -1;
464 if (level
< MIN_COM_LVL
|| level
> MAX_COM_LVL
) return -1;
465 if (fp
->zstream
->level
!= level
)
467 fp
->zstream
->level
= level
;
468 fp
->zstream
->level_buf_size
= com_lvls
[fp
->zstream
->level
];
469 fp
->zstream
->level_buf
= (uint8_t *)realloc(fp
->zstream
->level_buf
,
470 fp
->zstream
->level_buf_size
* sizeof(uint8_t));
475 int gzwrite(gzFile fp
, const void *buf
, size_t _len
)
477 fp
->zstream
->next_in
= (uint8_t *)buf
;
478 fp
->zstream
->avail_in
= _len
;
479 fp
->zstream
->end_of_stream
= !buf
;
483 if (!fp
->zstream
->next_out
)
485 fp
->zstream
->next_out
= fp
->buf_out
;
486 fp
->zstream
->avail_out
= fp
->buf_out_size
;
488 int ret
= isal_deflate(fp
->zstream
);
489 if (ret
!= ISAL_DECOMP_OK
) return -3;
490 len
+= fwrite(fp
->buf_out
, 1, fp
->zstream
->next_out
- fp
->buf_out
, fp
->fp
);
491 fp
->zstream
->next_out
= NULL
;
492 } while (!fp
->zstream
->avail_out
);
496 int gzputc(gzFile fp
, int c
)
498 return gzwrite(fp
, &c
, 1);
501 int gzputs(gzFile fp
, const char *s
)
503 return gzwrite(fp
, s
, strlen(s
));
510 return fp
->mode
[0] == 'r' ? feof(fp
->fp
) : 0;
513 int64_t gzoffset(gzFile fp
)
517 if (fp
->mode
[0] == 'w')
518 return lseek(fp
->fd
, 0, SEEK_CUR
);
519 else if (fp
->mode
[0] == 'r')
520 return lseek(fp
->fd
, 0, SEEK_CUR
) - fp
->state
->avail_in
;