zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / unsupported.c
blob1974c278f4533d161383414d2e826e8badc9d475
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // File types we recognize but don't support.
6 // The point is to print a better error message than "unknown format".
7 // This is most useful for file types that might be mistaken for one we
8 // do support.
10 #include <deark-config.h>
11 #include <deark-private.h>
12 DE_DECLARE_MODULE(de_module_unsupported);
14 struct fmtinfo_struct {
15 int confidence;
16 int special_message;
17 const char *descr;
20 // fmti is allocated by the caller.
21 // get_fmt initializes it. If a format is unidentified,
22 // it sets ->confidence to 0.
23 static void get_fmt(deark *c, struct fmtinfo_struct *fmti)
25 u8 b[32];
27 de_zeromem(fmti, sizeof(struct fmtinfo_struct));
29 de_read(b, 0, sizeof(b));
31 if(!de_memcmp(b, "\x42\x5a\x68", 3) &&
32 !de_memcmp(&b[4], "\x31\x41\x59\x26\x53\x59", 6) )
34 fmti->confidence = 90;
35 fmti->descr = "a bzip2-compressed file";
36 return;
39 if(!de_memcmp(b, "7z\xbc\xaf\x27\x1c", 6)) {
40 fmti->confidence = 90;
41 fmti->descr = "a 7z file";
42 return;
45 // Note - Make sure BSAVE has lower confidence.
46 if(!de_memcmp(b, "\xfd\x37\x7a\x58\x5a\x00", 6)) {
47 fmti->confidence = 90;
48 fmti->descr = "an xz-compressed file";
49 return;
52 if(!de_memcmp(b, "LZIP", 4)) {
53 fmti->confidence = 50;
54 fmti->descr = "an lzip-compressed file";
55 return;
58 if(!de_memcmp(b, "<?xpacket", 9)) {
59 fmti->confidence = 20;
60 fmti->descr = "an XMP file";
61 return;
64 if(!de_memcmp(b, "ISc(", 4)) {
65 fmti->confidence = 40;
66 fmti->descr = "an InstallShield CAB file";
67 return;
70 if(b[0]=='H' && b[1]=='P' && b[2]=='H' && b[3]=='P' &&
71 b[4]=='4' && (b[5]=='8' || b[5]=='9'))
73 fmti->confidence = 90;
74 fmti->descr = "a non-GROB HP-48/49 file";
75 return;
78 if(!de_memcmp(b, "%PDF-", 5)) {
79 fmti->confidence = 90;
80 fmti->descr = "a PDF file";
81 return;
84 if(!de_memcmp(b, "\x7f" "ELF", 4)) {
85 fmti->confidence = 40;
86 fmti->descr = "an ELF binary";
87 return;
90 if(!de_memcmp(b, "\xff" "WPC", 4)) {
91 fmti->confidence = 40;
92 fmti->descr = "a WordPerfect document";
93 return;
96 if(!de_memcmp(b, "Rar!\x1a\x07", 6)) {
97 fmti->confidence = 90;
98 fmti->descr = "a RAR archive";
99 return;
102 if((!de_memcmp(b, "StuffIt", 7)) && (b[7]=='!' || b[7]=='?')) {
103 fmti->confidence = 90;
104 fmti->descr = "a StuffIt X archive";
105 return;
108 if(!de_memcmp(b, "ICE!", 4) ||
109 !de_memcmp(b, "Ice!", 4))
111 fmti->confidence = 75;
112 fmti->descr = "a Pack-Ice compressed file";
113 return;
116 // Note - Make sure Atari CAS has lower confidence.
117 if(!de_memcmp(b, "FUJIFILMCCD-RAW", 15)) {
118 fmti->confidence = 100;
119 fmti->descr = "a Fujifilm RAF file";
120 return;
123 if(!de_memcmp(b, "AutoCAD Slide\r\n\x1a", 16)) {
124 fmti->confidence = 100;
125 fmti->descr = "an AutoCAD Slide file";
126 return;
129 if(!de_memcmp(b, "Top!", 4)) {
130 // A format often found alongside RISC OS Draw files
131 fmti->confidence = 9;
132 fmti->descr = "an ArtWorks image";
133 return;
136 if(!de_memcmp(b, "CPT", 3) &&
137 (b[3]>='7' && b[3]<='9') &&
138 !de_memcmp(&b[4], "FILE", 4))
140 fmti->confidence = 91;
141 fmti->descr = "a Corel Photo-Paint image";
142 return;
145 // We're not trying to detect every HTML file, but we want to make sure
146 // we can detect the ones we generate.
147 if(!de_memcmp(b, "<!DOCTYPE html", 14) ||
148 !de_memcmp(b, "\xef\xbb\xbf<!DOCTYPE html", 17) ||
149 !de_memcmp(b, "<html", 5))
151 fmti->confidence = 20;
152 fmti->descr = "an HTML file";
153 return;
157 static void de_run_unsupported(deark *c, de_module_params *mparams)
159 struct fmtinfo_struct fmti;
160 get_fmt(c, &fmti);
161 if(fmti.confidence>0 && fmti.descr) {
162 if(fmti.special_message) {
163 de_err(c, "This looks like %s", fmti.descr);
165 else {
166 de_err(c, "This looks like %s, which is not a supported format.", fmti.descr);
171 static int de_identify_unsupported(deark *c)
173 struct fmtinfo_struct fmti;
174 get_fmt(c, &fmti);
175 return fmti.confidence;
178 void de_module_unsupported(deark *c, struct deark_module_info *mi)
180 mi->id = "unsupported";
181 mi->desc = "Identify some unsupported formats";
182 mi->run_fn = de_run_unsupported;
183 mi->identify_fn = de_identify_unsupported;
184 mi->flags |= DE_MODFLAG_HIDDEN | DE_MODFLAG_NOEXTRACT;
185 mi->unique_id = 1;