1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // gzip compressed file format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
10 DE_DECLARE_MODULE(de_module_gzip
);
13 #define GZIPFLAG_FTEXT 0x01
14 #define GZIPFLAG_FHCRC 0x02
15 #define GZIPFLAG_FEXTRA 0x04
16 #define GZIPFLAG_FNAME 0x08
17 #define GZIPFLAG_FCOMMENT 0x10
23 struct de_timestamp mod_time_ts
;
25 struct de_crcobj
*crco
; // A copy of lctx->crco
28 typedef struct lctx_struct
{
30 struct de_crcobj
*crco
;
33 static const char *get_os_name(u8 n
)
35 const char *names
[14] = { "FAT", "Amiga", "VMS", "Unix",
36 "VM/CMS", "Atari", "HPFS", "Mac", "Z-System", "CP/M",
37 "TOPS-20", "NTFS", "QDOS", "RISCOS" };
38 const char *name
= "?";
40 if((unsigned int)n
<=13) {
41 name
= names
[(unsigned int)n
];
46 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
48 struct member_data
*md
= (struct member_data
*)userdata
;
49 de_crcobj_addbuf(md
->crco
, buf
, buf_len
);
52 static int do_gzip_read_member(deark
*c
, lctx
*d
, i64 pos1
, i64
*member_size
)
62 de_ucstring
*member_name
= NULL
;
63 int saved_indent_level
;
65 struct member_data
*md
= NULL
;
68 md
= de_malloc(c
, sizeof(struct member_data
));
70 de_dbg_indent_save(c
, &saved_indent_level
);
72 de_dbg(c
, "gzip member at %d", (int)pos1
);
76 b0
= de_getbyte(pos
+0);
77 b1
= de_getbyte(pos
+1);
78 if(b0
!=0x1f || b1
!=0x8b) {
79 de_err(c
, "Invalid gzip signature at %d. This is not a valid gzip file.",
84 md
->cmpr_code
= de_getbyte(pos
+2);
85 if(md
->cmpr_code
!=0x08) {
86 de_err(c
, "Unsupported compression type (%d)", (int)md
->cmpr_code
);
90 md
->flags
= de_getbyte(pos
+3);
91 de_dbg(c
, "flags: 0x%02x", (unsigned int)md
->flags
);
94 mod_time_unix
= de_getu32le(pos
);
95 de_unix_time_to_timestamp(mod_time_unix
, &md
->mod_time_ts
, 0x1);
96 if(md
->mod_time_ts
.is_valid
) {
97 char timestamp_buf
[64];
98 de_timestamp_to_string(&md
->mod_time_ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
99 de_dbg(c
, "mod time: %" I64_FMT
" (%s)", mod_time_unix
, timestamp_buf
);
103 b0
= de_getbyte(pos
++);
104 de_dbg(c
, "extra flags: 0x%02x", (unsigned int)b0
);
106 b0
= de_getbyte(pos
++);
107 de_dbg(c
, "OS or filesystem: %d (%s)", (int)b0
, get_os_name(b0
));
109 if(md
->flags
& GZIPFLAG_FEXTRA
) {
110 n
= de_getu16le(pos
); // XLEN
111 // TODO: It might be interesting to dissect these extra fields, but it's
112 // hard to find even a single file that uses them.
113 de_dbg(c
, "[extra fields at %d, dpos=%d, dlen=%d]",
114 (int)pos
, (int)(pos
+2), (int)n
);
119 if(md
->flags
& GZIPFLAG_FNAME
) {
120 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
- pos
,
123 de_err(c
, "Invalid NAME field");
127 string_len
= foundpos
- pos
;
129 member_name
= ucstring_create(c
);
130 #define DE_GZIP_MAX_FNLEN 300
131 dbuf_read_to_ucstring_n(c
->infile
, pos
, string_len
, DE_GZIP_MAX_FNLEN
,
132 member_name
, 0, DE_ENCODING_LATIN1
);
133 de_dbg(c
, "file name at %d, len=%d: \"%s\"", (int)pos
, (int)string_len
,
134 ucstring_getpsz_d(member_name
));
138 if(md
->flags
& GZIPFLAG_FCOMMENT
) {
139 ret
= dbuf_search_byte(c
->infile
, 0x00, pos
, c
->infile
->len
- pos
,
142 de_err(c
, "Invalid COMMENT field");
148 if(md
->flags
& GZIPFLAG_FHCRC
) {
149 md
->crc16_reported
= (u32
)de_getu16le(pos
);
150 de_dbg(c
, "crc16 (reported): 0x%04x", (unsigned int)md
->crc16_reported
);
154 de_dbg(c
, "compressed blocks at %d", (int)pos
);
156 if(!d
->output_file
) {
157 // Although any member can have a name and mod time, this metadata
158 // is ignored for members after the first one.
161 fi
= de_finfo_create(c
);
163 if(member_name
&& c
->filenames_from_file
) {
164 de_finfo_set_name_from_ucstring(c
, fi
, member_name
, 0);
165 fi
->original_filename_flag
= 1;
168 if(md
->mod_time_ts
.is_valid
) {
169 fi
->timestamp
[DE_TIMESTAMPIDX_MODIFY
] = md
->mod_time_ts
;
172 d
->output_file
= dbuf_create_output_file(c
, member_name
?NULL
:"bin", fi
, 0);
174 de_finfo_destroy(c
, fi
);
177 dbuf_set_writelistener(d
->output_file
, our_writelistener_cb
, (void*)md
);
179 de_crcobj_reset(md
->crco
);
181 ret
= fmtutil_decompress_deflate(c
->infile
, pos
, c
->infile
->len
- pos
, d
->output_file
,
182 0, &cmpr_data_len
, 0);
184 crc_calculated
= de_crcobj_getval(md
->crco
);
185 dbuf_set_writelistener(d
->output_file
, NULL
, NULL
);
188 pos
+= cmpr_data_len
;
190 de_dbg(c
, "crc32 (calculated): 0x%08x", (unsigned int)crc_calculated
);
192 md
->crc32_reported
= (u32
)de_getu32le(pos
);
193 de_dbg(c
, "crc32 (reported) : 0x%08x", (unsigned int)md
->crc32_reported
);
196 if(crc_calculated
!= md
->crc32_reported
) {
197 de_warn(c
, "CRC check failed: Expected 0x%08x, got 0x%08x",
198 (unsigned int)md
->crc32_reported
, (unsigned int)crc_calculated
);
201 md
->isize
= de_getu32le(pos
);
202 de_dbg(c
, "uncompressed size (mod 2^32): %u", (unsigned int)md
->isize
);
209 *member_size
= pos
- pos1
;
212 ucstring_destroy(member_name
);
214 de_dbg_indent_restore(c
, saved_indent_level
);
218 static void de_run_gzip(deark
*c
, de_module_params
*mparams
)
224 d
= de_malloc(c
, sizeof(lctx
));
225 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
229 if(pos
>= c
->infile
->len
) break;
230 if(!do_gzip_read_member(c
, d
, pos
, &member_size
)) {
233 if(member_size
<=0) break;
237 dbuf_close(d
->output_file
);
240 de_crcobj_destroy(d
->crco
);
245 static int de_identify_gzip(deark
*c
)
250 if(buf
[0]==0x1f && buf
[1]==0x8b) {
251 if(buf
[2]==0x08) return 100;
257 void de_module_gzip(deark
*c
, struct deark_module_info
*mi
)
260 mi
->desc
= "gzip compressed file";
261 mi
->run_fn
= de_run_gzip
;
262 mi
->identify_fn
= de_identify_gzip
;