iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / mime / mime.c
blob1c8268cc0fa0c554ccb964af7306245fa169bc05
1 /* Functionality for handling mime types */
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE /* XXX: we _WANT_ strcasestr() ! */
5 #endif
7 #ifdef HAVE_CONFIG_H
8 #include "config.h"
9 #endif
11 #include <string.h>
13 #include "elinks.h"
15 #include "cache/cache.h"
16 #include "config/options.h"
17 #include "encoding/encoding.h"
18 #include "intl/gettext/libintl.h"
19 #include "main/module.h"
20 #include "mime/backend/common.h"
21 #include "mime/mime.h"
22 #include "protocol/header.h" /* For parse_header() */
23 #include "protocol/uri.h"
24 #include "util/conv.h"
25 #include "util/file.h"
26 #include "util/memory.h"
27 #include "util/string.h"
30 enum mime_options {
31 MIME_TREE,
32 MIME_DEFAULT_TYPE,
34 MIME_OPTIONS,
37 static union option_info mime_options[] = {
38 INIT_OPT_TREE("", N_("MIME"),
39 "mime", OPT_SORT,
40 N_("MIME-related options (handlers of various MIME types).")),
42 INIT_OPT_STRING("mime", N_("Default MIME-type"),
43 "default_type", 0, DEFAULT_MIME_TYPE,
44 N_("Document MIME-type to assume by default "
45 "(when we are unable to guess it properly "
46 "from known information about the document).")),
48 NULL_OPTION_INFO,
51 #define get_opt_mime(which) mime_options[(which)].option
52 #define get_default_mime_type() get_opt_mime(MIME_DEFAULT_TYPE).value.string
54 /* Checks protocols headers for a suitable filename */
55 static unsigned char *
56 get_content_filename(struct uri *uri, struct cache_entry *cached)
58 unsigned char *filename, *pos;
60 if (!cached) cached = find_in_cache(uri);
62 if (!cached || !cached->head)
63 return NULL;
65 pos = parse_header(cached->head, "Content-Disposition", NULL);
66 if (!pos) return NULL;
68 parse_header_param(pos, "filename", &filename);
69 mem_free(pos);
70 if (!filename) return NULL;
72 /* Remove start and ending quotes. */
73 if (filename[0] == '"') {
74 int len = strlen(filename);
76 if (len > 1 && filename[len - 1] == '"') {
77 filename[len - 1] = 0;
78 memmove(filename, filename + 1, len);
81 /* It was an empty quotation: "" */
82 if (!filename[1]) {
83 mem_free(filename);
84 return NULL;
88 /* We don't want to add any directories from the path so make sure we
89 * only add the filename. */
90 pos = get_filename_position(filename);
91 if (!*pos) {
92 mem_free(filename);
93 return NULL;
96 if (pos > filename)
97 memmove(filename, pos, strlen(pos) + 1);
99 return filename;
102 /* Checks if application/x-<extension> has any handlers. */
103 static inline unsigned char *
104 check_extension_type(unsigned char *extension)
106 /* Trim the extension so only last .<extension> is used. */
107 unsigned char *trimmed = strrchr(extension, '.');
108 struct mime_handler *handler;
109 unsigned char *content_type;
111 if (!trimmed)
112 return NULL;
114 content_type = straconcat("application/x-", trimmed + 1,
115 (unsigned char *) NULL);
116 if (!content_type)
117 return NULL;
119 handler = get_mime_type_handler(content_type, 1);
120 if (handler) {
121 mem_free(handler);
122 return content_type;
125 mem_free(content_type);
126 return NULL;
129 /* Check if part of the extension coresponds to a supported encoding and if it
130 * has any handlers. */
131 static inline unsigned char *
132 check_encoding_type(unsigned char *extension)
134 enum stream_encoding encoding = guess_encoding(extension);
135 const unsigned char *const *extension_list;
136 unsigned char *last_extension = strrchr(extension, '.');
138 if (encoding == ENCODING_NONE || !last_extension)
139 return NULL;
141 for (extension_list = listext_encoded(encoding);
142 extension_list && *extension_list;
143 extension_list++) {
144 unsigned char *content_type;
146 if (strcmp(*extension_list, last_extension))
147 continue;
149 *last_extension = '\0';
150 content_type = get_content_type_backends(extension);
151 *last_extension = '.';
153 return content_type;
156 return NULL;
159 #if 0
160 #define DEBUG_CONTENT_TYPE
161 #endif
163 #ifdef DEBUG_CONTENT_TYPE
164 #define debug_get_content_type_params(cached) \
165 DBG("get_content_type(head, url)\n=== head ===\n%s\n=== url ===\n%s\n", (cached)->head, struri((cached)->uri))
166 #define debug_ctype(ctype__) DBG("ctype= %s", (ctype__))
167 #define debug_extension(extension__) DBG("extension= %s", (extension__))
168 #else
169 #define debug_get_content_type_params(cached)
170 #define debug_ctype(ctype__)
171 #define debug_extension(extension__)
172 #endif
174 unsigned char *
175 get_extension_content_type(unsigned char *extension)
177 unsigned char *ctype;
179 assert(extension && *extension);
181 ctype = get_content_type_backends(extension);
182 debug_ctype(ctype);
183 if (ctype) return ctype;
185 ctype = check_encoding_type(extension);
186 debug_ctype(ctype);
187 if (ctype) return ctype;
189 ctype = check_extension_type(extension);
190 debug_ctype(ctype);
191 return ctype;
194 unsigned char *
195 get_cache_header_content_type(struct cache_entry *cached)
197 unsigned char *extension, *ctype;
199 ctype = parse_header(cached->head, "Content-Type", NULL);
200 if (ctype) {
201 unsigned char *end = strchr(ctype, ';');
202 int ctypelen;
204 if (end) *end = '\0';
206 ctypelen = strlen(ctype);
207 while (ctypelen && ctype[--ctypelen] <= ' ')
208 ctype[ctypelen] = '\0';
210 debug_ctype(ctype);
212 if (*ctype) {
213 return ctype;
216 mem_free(ctype);
219 /* This searches cached->head for filename so put here */
220 extension = get_content_filename(cached->uri, cached);
221 debug_extension(extension);
222 if (extension) {
223 ctype = get_extension_content_type(extension);
224 mem_free(extension);
225 if (ctype) {
226 return ctype;
230 return NULL;
233 static unsigned char *
234 get_fragment_content_type(struct cache_entry *cached)
236 struct fragment *fragment;
237 size_t length;
238 unsigned char *sample;
239 unsigned char *ctype = NULL;
241 if (list_empty(cached->frag))
242 return NULL;
244 fragment = cached->frag.next;
245 if (fragment->offset)
246 return NULL;
248 length = fragment->length > 1024 ? 1024 : fragment->length;
249 sample = memacpy(fragment->data, length);
250 if (!sample)
251 return NULL;
253 if (c_strcasestr(sample, "<html>"))
254 ctype = stracpy("text/html");
256 mem_free(sample);
258 return ctype;
261 unsigned char *
262 get_content_type(struct cache_entry *cached)
264 unsigned char *extension, *ctype;
266 debug_get_content_type_params(cached);
268 if (cached->content_type)
269 return cached->content_type;
271 /* If there's one in header, it's simple.. */
272 if (cached->head) {
273 ctype = get_cache_header_content_type(cached);
274 if (ctype && *ctype) {
275 cached->content_type = ctype;
276 return ctype;
278 mem_free_if(ctype);
281 /* We can't use the extension string we are getting below, because we
282 * want to support also things like "ps.gz" - that'd never work, as we
283 * would always compare only to "gz". */
284 /* Guess type accordingly to the extension */
285 extension = get_extension_from_uri(cached->uri);
286 debug_extension(extension);
288 if (extension) {
289 /* XXX: A little hack for making extension handling case
290 * insensitive. We could probably do it better by making
291 * guess_encoding() case independent the real problem however
292 * is with default (via option system) and mimetypes resolving
293 * doing that option and hash lookup will not be easy to
294 * convert. --jonas */
295 convert_to_lowercase_locale_indep(extension, strlen(extension));
297 ctype = get_extension_content_type(extension);
298 mem_free(extension);
299 if (ctype && *ctype) {
300 cached->content_type = ctype;
301 return ctype;
303 mem_free_if(ctype);
306 ctype = get_fragment_content_type(cached);
307 if (ctype && *ctype) {
308 cached->content_type = ctype;
309 return ctype;
312 debug_ctype(get_default_mime_type());
314 /* text/plain for pager mode */
315 if (cached->uri && cached->uri->string
316 && !strcmp(cached->uri->string, "file:///dev/stdin")) {
317 cached->content_type = stracpy("text/plain");
318 } else
319 /* Fallback.. use some hardwired default */
320 cached->content_type = stracpy(get_default_mime_type());
322 return cached->content_type;
325 struct mime_handler *
326 get_mime_type_handler(unsigned char *content_type, int xwin)
328 return get_mime_handler_backends(content_type, xwin);
331 struct string *
332 add_mime_filename_to_string(struct string *string, struct uri *uri)
334 unsigned char *filename = get_content_filename(uri, NULL);
336 assert(uri->data);
338 if (filename) {
339 add_shell_safe_to_string(string, filename, strlen(filename));
340 mem_free(filename);
342 return string;
345 return add_uri_to_string(string, uri, URI_FILENAME);
348 /* Backends dynamic area: */
350 #include "mime/backend/default.h"
351 #include "mime/backend/mailcap.h"
352 #include "mime/backend/mimetypes.h"
354 static struct module *mime_submodules[] = {
355 &default_mime_module,
356 #ifdef CONFIG_MAILCAP
357 &mailcap_mime_module,
358 #endif
359 #ifdef CONFIG_MIMETYPES
360 &mimetypes_mime_module,
361 #endif
362 NULL,
365 struct module mime_module = struct_module(
366 /* name: */ N_("MIME"),
367 /* options: */ mime_options,
368 /* hooks: */ NULL,
369 /* submodules: */ mime_submodules,
370 /* data: */ NULL,
371 /* init: */ NULL,
372 /* done: */ NULL