New "ea_data" module
[deark.git] / modules / rpm.c
bloba8251974d65db97439d7dd49ed8dbeb52ce51032
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // RPM package manager
7 #include <deark-config.h>
8 #include <deark-private.h>
9 DE_DECLARE_MODULE(de_module_rpm);
11 #define DE_RPM_STRING_TYPE 6
13 #define DE_RPMTAG_NAME 1000
14 #define DE_RPMTAG_VERSION 1001
15 #define DE_RPMTAG_RELEASE 1002
16 #define DE_RPMTAG_PAYLOADFORMAT 1124
17 #define DE_RPMTAG_PAYLOADCOMPRESSOR 1125
19 #define DE_RPM_CMPR_UNKNOWN 0
20 #define DE_RPM_CMPR_GZIP 1
21 #define DE_RPM_CMPR_BZIP2 2
22 #define DE_RPM_CMPR_LZMA 3
23 #define DE_RPM_CMPR_XZ 4
25 typedef struct localctx_struct {
26 u8 ver_major, ver_minor;
27 int cmpr_type;
29 struct de_stringreaderdata *name_srd;
30 struct de_stringreaderdata *version_srd;
31 struct de_stringreaderdata *release_srd;
32 } lctx;
34 static int do_lead_section(deark *c, lctx *d)
36 int retval = 0;
38 de_dbg(c, "lead section at %d", 0);
39 de_dbg_indent(c, 1);
41 d->ver_major = de_getbyte(4);
42 d->ver_minor = de_getbyte(5);
43 de_dbg(c, "RPM format version: %d.%d", (int)d->ver_major, (int)d->ver_minor);
44 if(d->ver_major < 3) {
45 de_err(c, "Unsupported RPM version (%d.%d)", (int)d->ver_major, (int)d->ver_minor);
46 goto done;
49 retval = 1;
50 done:
51 de_dbg_indent(c, -1);
52 return retval;
55 static void read_compression_type(deark *c, lctx *d, i64 pos)
57 u8 buf[16];
59 de_dbg(c, "compression type at %d", (int)pos);
61 de_read(buf, pos, sizeof(buf));
63 if(!de_memcmp(buf, "lzma\0", 5)) {
64 d->cmpr_type = DE_RPM_CMPR_LZMA;
66 // Other valid compression types are "gzip", "bzip2", and "xz".
67 // We'll autodetect most of them, but lzma is hard to detect.
70 // Note that a header *structure* is distinct from the header *section*.
71 // Both the signature section and the header section use a header structure.
72 static int do_header_structure(deark *c, lctx *d, int is_sig, i64 pos1,
73 i64 *section_size)
75 i64 pos;
76 i64 indexcount;
77 i64 storesize;
78 u8 buf[4];
79 u8 header_ver;
80 i64 i;
81 i64 tag_id, tag_type, tag_offset, tag_count;
82 i64 data_store_pos;
83 const char *hdrname;
84 int retval = 0;
86 hdrname = is_sig?"sig":"hdr";
87 pos = pos1;
88 de_dbg(c, "%s section at %d", hdrname, (int)pos1);
89 de_dbg_indent(c, 1);
91 de_read(buf, pos, 4);
92 if(buf[0]!=0x8e || buf[1]!=0xad || buf[2]!=0xe8) {
93 de_err(c, "Bad header signature at %d", (int)pos);
94 goto done;
96 header_ver = buf[3];
97 if(header_ver != 1) {
98 de_err(c, "Unsupported header version");
99 goto done;
101 pos += 8;
103 indexcount = de_getu32be(pos);
104 storesize = de_getu32be(pos+4);
105 de_dbg(c, "%s: pos=%d indexcount=%d storesize=%d", hdrname,
106 (int)pos, (int)indexcount, (int)storesize);
107 pos += 8;
109 if(indexcount>1000) goto done;
111 data_store_pos = pos + 16*indexcount;
113 de_dbg(c, "%s: tag table at %d", hdrname, (int)pos);
114 de_dbg_indent(c, 1);
116 for(i=0; i<indexcount; i++) {
117 tag_id = de_getu32be(pos);
118 tag_type = de_getu32be(pos+4);
119 tag_offset = de_getu32be(pos+8);
120 tag_count = de_getu32be(pos+12);
122 de_dbg2(c, "tag #%d type=%d offset=%d count=%d", (int)tag_id,
123 (int)tag_type, (int)tag_offset, (int)tag_count);
126 if(is_sig==0 && tag_id==DE_RPMTAG_PAYLOADCOMPRESSOR && tag_type==DE_RPM_STRING_TYPE) {
127 read_compression_type(c, d, data_store_pos+tag_offset);
129 else if(is_sig==0 && tag_id==DE_RPMTAG_NAME && tag_type==DE_RPM_STRING_TYPE) {
130 if(!d->name_srd) {
131 d->name_srd = dbuf_read_string(c->infile, data_store_pos+tag_offset,
132 DE_DBG_MAX_STRLEN, DE_DBG_MAX_STRLEN,
133 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
134 de_dbg(c, "name: \"%s\"", ucstring_getpsz(d->name_srd->str));
137 else if(is_sig==0 && tag_id==DE_RPMTAG_VERSION && tag_type==DE_RPM_STRING_TYPE) {
138 if(!d->version_srd) {
139 d->version_srd = dbuf_read_string(c->infile, data_store_pos+tag_offset,
140 DE_DBG_MAX_STRLEN, DE_DBG_MAX_STRLEN,
141 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
142 de_dbg(c, "version: \"%s\"", ucstring_getpsz(d->version_srd->str));
145 else if(is_sig==0 && tag_id==DE_RPMTAG_RELEASE && tag_type==DE_RPM_STRING_TYPE) {
146 if(!d->release_srd) {
147 d->release_srd = dbuf_read_string(c->infile, data_store_pos+tag_offset,
148 DE_DBG_MAX_STRLEN, DE_DBG_MAX_STRLEN,
149 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
150 de_dbg(c, "release: \"%s\"", ucstring_getpsz(d->release_srd->str));
154 pos += 16;
157 de_dbg_indent(c, -1);
159 pos = data_store_pos;
160 de_dbg(c, "%s: data store at %d", hdrname, (int)pos);
161 pos += storesize;
163 *section_size = pos - pos1;
164 retval = 1;
165 done:
166 de_dbg_indent(c, -1);
167 return retval;
170 static void de_run_rpm(deark *c, de_module_params *mparams)
172 lctx *d = NULL;
173 i64 pos;
174 u8 buf[8];
175 const char *ext;
176 i64 section_size = 0;
177 de_finfo *fi = NULL;
178 char filename[128];
180 d = de_malloc(c, sizeof(lctx));
182 if(!do_lead_section(c, d)) {
183 goto done;
186 pos = 96;
188 if(!do_header_structure(c, d, 1, pos, &section_size)) {
189 goto done;
191 pos += section_size;
193 // Header structures are 8-byte aligned. The first one always starts at
194 // offset 96, so we don't have to worry about it. But we need to make
195 // sure the second one is aligned.
196 pos = ((pos + 7)/8)*8;
198 if(!do_header_structure(c, d, 0, pos, &section_size)) {
199 goto done;
201 pos += section_size;
203 de_dbg(c, "data pos: %d", (int)pos);
204 if(pos > c->infile->len) goto done;
206 // There is usually a tag that indicates the compression format, but we
207 // primarily figure out the format by sniffing its magic number, on the
208 // theory that that's more reliable.
210 // TODO: I think it's also theoretically possible that it could use an archive
211 // format other than cpio.
213 de_read(buf, pos, 8);
215 if(buf[0]==0x1f && buf[1]==0x8b) {
216 ext = "cpio.gz";
218 else if(buf[0]==0x42 && buf[1]==0x5a && buf[2]==0x68) {
219 ext = "cpio.bz2";
221 else if(buf[0]==0xfd && buf[1]==0x37 && buf[2]==0x7a) {
222 ext = "cpio.xz";
224 else if(d->cmpr_type==DE_RPM_CMPR_LZMA || buf[0]==0x5d) {
225 ext = "cpio.lzma";
227 else {
228 de_warn(c, "Unidentified compression or archive format");
229 ext = "cpio.bin";
232 if(d->name_srd && c->filenames_from_file) {
233 const char *version2 = "x";
234 const char *release2 = "x";
236 if(d->version_srd) version2 = d->version_srd->sz;
237 if(d->release_srd) release2 = d->release_srd->sz;
239 fi = de_finfo_create(c);
240 de_snprintf(filename, sizeof(filename), "%s-%s.%s",
241 d->name_srd->sz, version2, release2);
242 de_finfo_set_name_from_sz(c, fi, filename, 0, DE_ENCODING_ASCII);
245 dbuf_create_file_from_slice(c->infile, pos, c->infile->len - pos, ext, fi, 0);
247 done:
248 de_finfo_destroy(c, fi);
249 if(d) {
250 de_destroy_stringreaderdata(c, d->name_srd);
251 de_destroy_stringreaderdata(c, d->release_srd);
252 de_destroy_stringreaderdata(c, d->version_srd);
253 de_free(c, d);
257 static int de_identify_rpm(deark *c)
259 if(!dbuf_memcmp(c->infile, 0, "\xed\xab\xee\xdb", 4))
260 return 100;
261 return 0;
264 void de_module_rpm(deark *c, struct deark_module_info *mi)
266 mi->id = "rpm";
267 mi->desc = "RPM Package Manager";
268 mi->run_fn = de_run_rpm;
269 mi->identify_fn = de_identify_rpm;