1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // gzip compressed file format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_gzip
);
13 #define GZIPFLAG_FTEXT 0x01
14 #define GZIPFLAG_FHCRC 0x02
15 #define GZIPFLAG_FEXTRA 0x04
16 #define GZIPFLAG_FNAME 0x08
17 #define GZIPFLAG_FCOMMENT 0x10
23 struct de_timestamp mod_time_ts
;
26 typedef struct lctx_struct
{
28 struct de_crcobj
*crco
;
31 static const char *get_os_name(u8 n
)
33 const char *names
[14] = { "FAT", "Amiga", "VMS", "Unix",
34 "VM/CMS", "Atari", "HPFS", "Mac", "Z-System", "CP/M",
35 "TOPS-20", "NTFS", "QDOS", "RISCOS" };
36 const char *name
= "?";
38 if((unsigned int)n
<=13) {
39 name
= names
[(unsigned int)n
];
44 static int do_gzip_read_member(deark
*c
, lctx
*d
, i64 pos1
, i64
*member_size
)
54 de_ucstring
*member_name
= NULL
;
55 int saved_indent_level
;
57 struct member_data
*md
= NULL
;
60 md
= de_malloc(c
, sizeof(struct member_data
));
62 de_dbg_indent_save(c
, &saved_indent_level
);
64 de_dbg(c
, "gzip member at %d", (int)pos1
);
68 b0
= de_getbyte(pos
+0);
69 b1
= de_getbyte(pos
+1);
70 if(b0
!=0x1f || b1
!=0x8b) {
71 de_err(c
, "Invalid gzip signature at %d. This is not a valid gzip file.",
76 md
->cmpr_code
= de_getbyte(pos
+2);
77 if(md
->cmpr_code
!=0x08) {
78 de_err(c
, "Unsupported compression type (%d)", (int)md
->cmpr_code
);
82 md
->flags
= de_getbyte(pos
+3);
83 de_dbg(c
, "flags: 0x%02x", (unsigned int)md
->flags
);
86 mod_time_unix
= de_getu32le(pos
);
87 de_unix_time_to_timestamp(mod_time_unix
, &md
->mod_time_ts
, 0x1);
88 if(md
->mod_time_ts
.is_valid
) {
89 char timestamp_buf
[64];
90 de_timestamp_to_string(&md
->mod_time_ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
91 de_dbg(c
, "mod time: %" I64_FMT
" (%s)", mod_time_unix
, timestamp_buf
);
95 b0
= de_getbyte(pos
++);
96 de_dbg(c
, "extra flags: 0x%02x", (unsigned int)b0
);
98 b0
= de_getbyte(pos
++);
99 de_dbg(c
, "OS or filesystem: %d (%s)", (int)b0
, get_os_name(b0
));
101 if(md
->flags
& GZIPFLAG_FEXTRA
) {
102 n
= de_getu16le(pos
); // XLEN
103 // TODO: It might be interesting to dissect these extra fields, but it's
104 // hard to find even a single file that uses them.
105 de_dbg(c
, "[extra fields at %d, dpos=%d, dlen=%d]",
106 (int)pos
, (int)(pos
+2), (int)n
);
111 if(md
->flags
& GZIPFLAG_FNAME
) {
112 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
- pos
,
115 de_err(c
, "Invalid NAME field");
119 string_len
= foundpos
- pos
;
121 member_name
= ucstring_create(c
);
122 #define DE_GZIP_MAX_FNLEN 300
123 dbuf_read_to_ucstring_n(c
->infile
, pos
, string_len
, DE_GZIP_MAX_FNLEN
,
124 member_name
, 0, DE_ENCODING_LATIN1
);
125 de_dbg(c
, "file name at %d, len=%d: \"%s\"", (int)pos
, (int)string_len
,
126 ucstring_getpsz_d(member_name
));
130 if(md
->flags
& GZIPFLAG_FCOMMENT
) {
131 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
- pos
,
134 de_err(c
, "Invalid COMMENT field");
140 if(md
->flags
& GZIPFLAG_FHCRC
) {
141 md
->crc16_reported
= (u32
)de_getu16le(pos
);
142 de_dbg(c
, "crc16 (reported): 0x%04x", (unsigned int)md
->crc16_reported
);
146 de_dbg(c
, "compressed blocks at %d", (int)pos
);
148 if(!d
->output_file
) {
149 // Although any member can have a name and mod time, this metadata
150 // is ignored for members after the first one.
153 fi
= de_finfo_create(c
);
155 if(member_name
&& c
->filenames_from_file
) {
156 de_finfo_set_name_from_ucstring(c
, fi
, member_name
, 0);
157 fi
->original_filename_flag
= 1;
160 if(md
->mod_time_ts
.is_valid
) {
161 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->mod_time_ts
;
164 d
->output_file
= dbuf_create_output_file(c
, member_name
?NULL
:"bin", fi
, 0);
165 dbuf_enable_wbuffer(d
->output_file
);
167 de_finfo_destroy(c
, fi
);
170 dbuf_set_writelistener(d
->output_file
, de_writelistener_for_crc
, (void*)d
->crco
);
171 de_crcobj_reset(d
->crco
);
173 ret
= fmtutil_decompress_deflate(c
->infile
, pos
, c
->infile
->len
- pos
, d
->output_file
,
174 0, &cmpr_data_len
, 0);
175 dbuf_flush(d
->output_file
);
177 crc_calculated
= de_crcobj_getval(d
->crco
);
178 dbuf_set_writelistener(d
->output_file
, NULL
, NULL
);
181 pos
+= cmpr_data_len
;
183 de_dbg(c
, "crc32 (calculated): 0x%08x", (unsigned int)crc_calculated
);
185 md
->crc32_reported
= (u32
)de_getu32le(pos
);
186 de_dbg(c
, "crc32 (reported) : 0x%08x", (unsigned int)md
->crc32_reported
);
189 if(crc_calculated
!= md
->crc32_reported
) {
190 de_warn(c
, "CRC check failed: Expected 0x%08x, got 0x%08x",
191 (unsigned int)md
->crc32_reported
, (unsigned int)crc_calculated
);
194 md
->isize
= de_getu32le(pos
);
195 de_dbg(c
, "uncompressed size (mod 2^32): %u", (unsigned int)md
->isize
);
202 *member_size
= pos
- pos1
;
205 ucstring_destroy(member_name
);
207 de_dbg_indent_restore(c
, saved_indent_level
);
211 static void de_run_gzip(deark
*c
, de_module_params
*mparams
)
217 d
= de_malloc(c
, sizeof(lctx
));
218 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
222 if(pos
>= c
->infile
->len
) break;
223 if(!do_gzip_read_member(c
, d
, pos
, &member_size
)) {
226 if(member_size
<=0) break;
230 dbuf_close(d
->output_file
);
233 de_crcobj_destroy(d
->crco
);
238 static int de_identify_gzip(deark
*c
)
243 if(buf
[0]==0x1f && buf
[1]==0x8b) {
244 if(buf
[2]==0x08) return 100;
250 void de_module_gzip(deark
*c
, struct deark_module_info
*mi
)
253 mi
->desc
= "gzip compressed file";
254 mi
->run_fn
= de_run_gzip
;
255 mi
->identify_fn
= de_identify_gzip
;