r4888: Don't try to thumbnail zero-byte files. They're either empty, or special and
[rox-filer/translations.git] / ROX-Filer / src / xdgmimemagic.c
blob19f563cb62c9a945b003dc4ca4575f5c23f8f8d5
1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimemagic.: Private file. Datastructure for storing magic files.
4 * More info can be found at http://www.freedesktop.org/standards/
6 * Copyright (C) 2003 Red Hat, Inc.
7 * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu>
9 * Licensed under the Academic Free License version 2.0
10 * Or under the following terms:
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2 of the License, or (at your option) any later version.
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, write to the
24 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 * Boston, MA 02111-1307, USA.
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
32 #include <assert.h>
33 #include <glib.h>
34 #include "xdgmimemagic.h"
35 #include "xdgmimeint.h"
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include <limits.h>
43 #ifndef FALSE
44 #define FALSE (0)
45 #endif
47 #ifndef TRUE
48 #define TRUE (!FALSE)
49 #endif
51 extern int errno;
53 typedef struct XdgMimeMagicMatch XdgMimeMagicMatch;
54 typedef struct XdgMimeMagicMatchlet XdgMimeMagicMatchlet;
56 typedef enum
58 XDG_MIME_MAGIC_SECTION,
59 XDG_MIME_MAGIC_MAGIC,
60 XDG_MIME_MAGIC_ERROR,
61 XDG_MIME_MAGIC_EOF
62 } XdgMimeMagicState;
64 struct XdgMimeMagicMatch
66 const char *mime_type;
67 int priority;
68 XdgMimeMagicMatchlet *matchlet;
69 XdgMimeMagicMatch *next;
73 struct XdgMimeMagicMatchlet
75 int indent;
76 int offset;
77 unsigned int value_length;
78 unsigned char *value;
79 unsigned char *mask;
80 unsigned int range_length;
81 unsigned int word_size;
82 XdgMimeMagicMatchlet *next;
86 struct XdgMimeMagic
88 XdgMimeMagicMatch *match_list;
89 int max_extent;
92 static XdgMimeMagicMatch *
93 _xdg_mime_magic_match_new (void)
95 return calloc (1, sizeof (XdgMimeMagicMatch));
99 static XdgMimeMagicMatchlet *
100 _xdg_mime_magic_matchlet_new (void)
102 XdgMimeMagicMatchlet *matchlet;
104 matchlet = malloc (sizeof (XdgMimeMagicMatchlet));
106 matchlet->indent = 0;
107 matchlet->offset = 0;
108 matchlet->value_length = 0;
109 matchlet->value = NULL;
110 matchlet->mask = NULL;
111 matchlet->range_length = 1;
112 matchlet->word_size = 1;
113 matchlet->next = NULL;
115 return matchlet;
119 static void
120 _xdg_mime_magic_matchlet_free (XdgMimeMagicMatchlet *mime_magic_matchlet)
122 if (mime_magic_matchlet)
124 if (mime_magic_matchlet->next)
125 _xdg_mime_magic_matchlet_free (mime_magic_matchlet->next);
126 if (mime_magic_matchlet->value)
127 free (mime_magic_matchlet->value);
128 if (mime_magic_matchlet->mask)
129 free (mime_magic_matchlet->mask);
130 free (mime_magic_matchlet);
135 /* Frees mime_magic_match and the remainder of its list
137 static void
138 _xdg_mime_magic_match_free (XdgMimeMagicMatch *mime_magic_match)
140 XdgMimeMagicMatch *ptr, *next;
142 ptr = mime_magic_match;
143 while (ptr)
145 next = ptr->next;
147 if (ptr->mime_type)
148 free ((void *) ptr->mime_type);
149 if (ptr->matchlet)
150 _xdg_mime_magic_matchlet_free (ptr->matchlet);
151 free (ptr);
153 ptr = next;
157 /* Reads in a hunk of data until a newline character or a '\000' is hit. The
158 * returned string is null terminated, and doesn't include the newline.
160 static unsigned char *
161 _xdg_mime_magic_read_to_newline (FILE *magic_file,
162 int *end_of_file)
164 unsigned char *retval;
165 int c;
166 int len, pos;
168 len = 128;
169 pos = 0;
170 retval = malloc (len);
171 *end_of_file = FALSE;
173 while (TRUE)
175 c = getc_unlocked (magic_file);
176 if (c == EOF)
178 *end_of_file = TRUE;
179 break;
181 if (c == '\n' || c == '\000')
182 break;
183 retval[pos++] = (unsigned char) c;
184 if (pos % 128 == 127)
186 len = len + 128;
187 retval = realloc (retval, len);
191 retval[pos] = '\000';
192 return retval;
195 /* Returns the number read from the file, or -1 if no number could be read.
197 static int
198 _xdg_mime_magic_read_a_number (FILE *magic_file,
199 int *end_of_file)
201 /* LONG_MAX is about 20 characters on my system */
202 #define MAX_NUMBER_SIZE 30
203 char number_string[MAX_NUMBER_SIZE + 1];
204 int pos = 0;
205 int c;
206 long retval = -1;
208 while (TRUE)
210 c = getc_unlocked (magic_file);
212 if (c == EOF)
214 *end_of_file = TRUE;
215 break;
217 if (! isdigit (c))
219 ungetc (c, magic_file);
220 break;
222 number_string[pos] = (char) c;
223 pos++;
224 if (pos == MAX_NUMBER_SIZE)
225 break;
227 if (pos > 0)
229 number_string[pos] = '\000';
230 errno = 0;
231 retval = strtol (number_string, NULL, 10);
233 if ((retval < INT_MIN) || (retval > INT_MAX) || (errno != 0))
234 return -1;
237 return retval;
240 /* Headers are of the format:
241 * [<priority>:<mime-type>]
243 static XdgMimeMagicState
244 _xdg_mime_magic_parse_header (FILE *magic_file, XdgMimeMagicMatch *match)
246 int c;
247 char *buffer;
248 char *end_ptr;
249 int end_of_file = 0;
251 assert (magic_file != NULL);
252 assert (match != NULL);
254 c = getc_unlocked (magic_file);
255 if (c == EOF)
256 return XDG_MIME_MAGIC_EOF;
257 if (c != '[')
258 return XDG_MIME_MAGIC_ERROR;
260 match->priority = _xdg_mime_magic_read_a_number (magic_file, &end_of_file);
261 if (end_of_file)
262 return XDG_MIME_MAGIC_EOF;
263 if (match->priority == -1)
264 return XDG_MIME_MAGIC_ERROR;
266 c = getc_unlocked (magic_file);
267 if (c == EOF)
268 return XDG_MIME_MAGIC_EOF;
269 if (c != ':')
270 return XDG_MIME_MAGIC_ERROR;
272 buffer = (char *)_xdg_mime_magic_read_to_newline (magic_file, &end_of_file);
273 if (end_of_file)
274 return XDG_MIME_MAGIC_EOF;
276 end_ptr = buffer;
277 while (*end_ptr != ']' && *end_ptr != '\000' && *end_ptr != '\n')
278 end_ptr++;
279 if (*end_ptr != ']')
281 free (buffer);
282 return XDG_MIME_MAGIC_ERROR;
284 *end_ptr = '\000';
286 match->mime_type = strdup (buffer);
287 free (buffer);
289 return XDG_MIME_MAGIC_MAGIC;
292 static XdgMimeMagicState
293 _xdg_mime_magic_parse_error (FILE *magic_file)
295 int c;
297 while (1)
299 c = getc_unlocked (magic_file);
300 if (c == EOF)
301 return XDG_MIME_MAGIC_EOF;
302 if (c == '\n')
303 return XDG_MIME_MAGIC_SECTION;
307 /* Headers are of the format:
308 * [ indent ] ">" start-offset "=" value
309 * [ "&" mask ] [ "~" word-size ] [ "+" range-length ] "\n"
311 static XdgMimeMagicState
312 _xdg_mime_magic_parse_magic_line (FILE *magic_file,
313 XdgMimeMagicMatch *match)
315 XdgMimeMagicMatchlet *matchlet;
316 int c;
317 int end_of_file;
318 int indent = 0;
319 int bytes_read;
321 assert (magic_file != NULL);
323 /* Sniff the buffer to make sure it's a valid line */
324 c = getc_unlocked (magic_file);
325 if (c == EOF)
326 return XDG_MIME_MAGIC_EOF;
327 else if (c == '[')
329 ungetc (c, magic_file);
330 return XDG_MIME_MAGIC_SECTION;
332 else if (c == '\n')
333 return XDG_MIME_MAGIC_MAGIC;
335 /* At this point, it must be a digit or a '>' */
336 end_of_file = FALSE;
337 if (isdigit (c))
339 ungetc (c, magic_file);
340 indent = _xdg_mime_magic_read_a_number (magic_file, &end_of_file);
341 if (end_of_file)
342 return XDG_MIME_MAGIC_EOF;
343 if (indent == -1)
344 return XDG_MIME_MAGIC_ERROR;
345 c = getc_unlocked (magic_file);
346 if (c == EOF)
347 return XDG_MIME_MAGIC_EOF;
350 if (c != '>')
351 return XDG_MIME_MAGIC_ERROR;
353 matchlet = _xdg_mime_magic_matchlet_new ();
354 matchlet->indent = indent;
355 matchlet->offset = _xdg_mime_magic_read_a_number (magic_file, &end_of_file);
356 if (end_of_file)
358 _xdg_mime_magic_matchlet_free (matchlet);
359 return XDG_MIME_MAGIC_EOF;
361 if (matchlet->offset == -1)
363 _xdg_mime_magic_matchlet_free (matchlet);
364 return XDG_MIME_MAGIC_ERROR;
366 c = getc_unlocked (magic_file);
367 if (c == EOF)
369 _xdg_mime_magic_matchlet_free (matchlet);
370 return XDG_MIME_MAGIC_EOF;
372 else if (c != '=')
374 _xdg_mime_magic_matchlet_free (matchlet);
375 return XDG_MIME_MAGIC_ERROR;
378 /* Next two bytes determine how long the value is */
379 matchlet->value_length = 0;
380 c = getc_unlocked (magic_file);
381 if (c == EOF)
383 _xdg_mime_magic_matchlet_free (matchlet);
384 return XDG_MIME_MAGIC_EOF;
386 matchlet->value_length = c & 0xFF;
387 matchlet->value_length = matchlet->value_length << 8;
389 c = getc_unlocked (magic_file);
390 if (c == EOF)
392 _xdg_mime_magic_matchlet_free (matchlet);
393 return XDG_MIME_MAGIC_EOF;
395 matchlet->value_length = matchlet->value_length + (c & 0xFF);
397 matchlet->value = malloc (matchlet->value_length);
399 /* OOM */
400 if (matchlet->value == NULL)
402 _xdg_mime_magic_matchlet_free (matchlet);
403 return XDG_MIME_MAGIC_ERROR;
405 bytes_read = fread (matchlet->value, 1, matchlet->value_length, magic_file);
406 if (bytes_read != matchlet->value_length)
408 _xdg_mime_magic_matchlet_free (matchlet);
409 if (feof (magic_file))
410 return XDG_MIME_MAGIC_EOF;
411 else
412 return XDG_MIME_MAGIC_ERROR;
415 c = getc_unlocked (magic_file);
416 if (c == '&')
418 matchlet->mask = malloc (matchlet->value_length);
419 /* OOM */
420 if (matchlet->mask == NULL)
422 _xdg_mime_magic_matchlet_free (matchlet);
423 return XDG_MIME_MAGIC_ERROR;
425 bytes_read = fread (matchlet->mask, 1, matchlet->value_length, magic_file);
426 if (bytes_read != matchlet->value_length)
428 _xdg_mime_magic_matchlet_free (matchlet);
429 if (feof (magic_file))
430 return XDG_MIME_MAGIC_EOF;
431 else
432 return XDG_MIME_MAGIC_ERROR;
434 c = getc_unlocked (magic_file);
437 if (c == '~')
439 matchlet->word_size = _xdg_mime_magic_read_a_number (magic_file, &end_of_file);
440 if (end_of_file)
442 _xdg_mime_magic_matchlet_free (matchlet);
443 return XDG_MIME_MAGIC_EOF;
445 if (matchlet->word_size != 0 &&
446 matchlet->word_size != 1 &&
447 matchlet->word_size != 2 &&
448 matchlet->word_size != 4)
450 _xdg_mime_magic_matchlet_free (matchlet);
451 return XDG_MIME_MAGIC_ERROR;
453 c = getc_unlocked (magic_file);
456 if (c == '+')
458 matchlet->range_length = _xdg_mime_magic_read_a_number (magic_file, &end_of_file);
459 if (end_of_file)
461 _xdg_mime_magic_matchlet_free (matchlet);
462 return XDG_MIME_MAGIC_EOF;
464 if (matchlet->range_length == -1)
466 _xdg_mime_magic_matchlet_free (matchlet);
467 return XDG_MIME_MAGIC_ERROR;
469 c = getc_unlocked (magic_file);
473 if (c == '\n')
475 /* We clean up the matchlet, byte swapping if needed */
476 if (matchlet->word_size > 1)
478 int i;
479 if (matchlet->value_length % matchlet->word_size != 0)
481 _xdg_mime_magic_matchlet_free (matchlet);
482 return XDG_MIME_MAGIC_ERROR;
484 /* FIXME: need to get this defined in a <config.h> style file */
485 #if LITTLE_ENDIAN
486 for (i = 0; i < matchlet->value_length; i = i + matchlet->word_size)
488 if (matchlet->word_size == 2)
489 *((xdg_uint16_t *) matchlet->value + i) = SWAP_BE16_TO_LE16 (*((xdg_uint16_t *) (matchlet->value + i)));
490 else if (matchlet->word_size == 4)
491 *((xdg_uint32_t *) matchlet->value + i) = SWAP_BE32_TO_LE32 (*((xdg_uint32_t *) (matchlet->value + i)));
492 if (matchlet->mask)
494 if (matchlet->word_size == 2)
495 *((xdg_uint16_t *) matchlet->mask + i) = SWAP_BE16_TO_LE16 (*((xdg_uint16_t *) (matchlet->mask + i)));
496 else if (matchlet->word_size == 4)
497 *((xdg_uint32_t *) matchlet->mask + i) = SWAP_BE32_TO_LE32 (*((xdg_uint32_t *) (matchlet->mask + i)));
501 #endif
504 matchlet->next = match->matchlet;
505 match->matchlet = matchlet;
508 return XDG_MIME_MAGIC_MAGIC;
511 _xdg_mime_magic_matchlet_free (matchlet);
512 if (c == EOF)
513 return XDG_MIME_MAGIC_EOF;
515 return XDG_MIME_MAGIC_ERROR;
518 static int
519 _xdg_mime_magic_matchlet_compare_to_data (XdgMimeMagicMatchlet *matchlet,
520 const void *data,
521 size_t len)
523 int i, j;
524 for (i = matchlet->offset; i < matchlet->offset + matchlet->range_length; i++)
526 int valid_matchlet = TRUE;
528 if (i + matchlet->value_length > len)
529 return FALSE;
531 if (matchlet->mask)
533 for (j = 0; j < matchlet->value_length; j++)
535 if ((matchlet->value[j] & matchlet->mask[j]) !=
536 ((((unsigned char *) data)[j + i]) & matchlet->mask[j]))
538 valid_matchlet = FALSE;
539 break;
543 else
545 for (j = 0; j < matchlet->value_length; j++)
547 if (matchlet->value[j] != ((unsigned char *) data)[j + i])
549 valid_matchlet = FALSE;
550 break;
554 if (valid_matchlet)
555 return TRUE;
557 return FALSE;
560 static int
561 _xdg_mime_magic_matchlet_compare_level (XdgMimeMagicMatchlet *matchlet,
562 const void *data,
563 size_t len,
564 int indent)
566 while ((matchlet != NULL) && (matchlet->indent == indent))
568 if (_xdg_mime_magic_matchlet_compare_to_data (matchlet, data, len))
570 if ((matchlet->next == NULL) ||
571 (matchlet->next->indent <= indent))
572 return TRUE;
574 if (_xdg_mime_magic_matchlet_compare_level (matchlet->next,
575 data,
576 len,
577 indent + 1))
578 return TRUE;
583 matchlet = matchlet->next;
585 while (matchlet && matchlet->indent > indent);
588 return FALSE;
591 static int
592 _xdg_mime_magic_match_compare_to_data (XdgMimeMagicMatch *match,
593 const void *data,
594 size_t len)
596 return _xdg_mime_magic_matchlet_compare_level (match->matchlet, data, len, 0);
599 static void
600 _xdg_mime_magic_insert_match (XdgMimeMagic *mime_magic,
601 XdgMimeMagicMatch *match)
603 XdgMimeMagicMatch *list;
605 if (mime_magic->match_list == NULL)
607 mime_magic->match_list = match;
608 return;
611 if (match->priority > mime_magic->match_list->priority)
613 match->next = mime_magic->match_list;
614 mime_magic->match_list = match;
615 return;
618 list = mime_magic->match_list;
619 while (list->next != NULL)
621 if (list->next->priority < match->priority)
623 match->next = list->next;
624 list->next = match;
625 return;
627 list = list->next;
629 list->next = match;
630 match->next = NULL;
633 XdgMimeMagic *
634 _xdg_mime_magic_new (void)
636 return calloc (1, sizeof (XdgMimeMagic));
639 void
640 _xdg_mime_magic_free (XdgMimeMagic *mime_magic)
642 if (mime_magic) {
643 _xdg_mime_magic_match_free (mime_magic->match_list);
644 free (mime_magic);
649 _xdg_mime_magic_get_buffer_extents (XdgMimeMagic *mime_magic)
651 return mime_magic->max_extent;
654 static gboolean buffer_looks_like_text (const void *data, const size_t len)
656 gchar *end;
658 if (g_utf8_validate (data, len, (const gchar**)&end))
660 /* g_utf8_validate allows control characters */
661 int i;
662 for (i = 0; i < len; i++)
664 unsigned char c = ((const guchar *) data)[i];
665 if (c < 32 && c != '\r' && c != '\n' && c != '\t')
666 return FALSE;
668 return TRUE;
669 } else {
670 /* Check whether the string was truncated in the middle of
671 * a valid UTF8 char, or if we really have an invalid
672 * UTF8 string
674 gint remaining_bytes = len;
676 remaining_bytes -= (end-((gchar*)data));
678 if (g_utf8_get_char_validated(end, remaining_bytes) == -2)
679 return TRUE;
680 #if defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC)
681 else {
682 size_t wlen;
683 wchar_t wc;
684 gchar *src, *end;
685 mbstate_t state;
687 src = data;
688 end = data+len;
690 memset (&state, 0, sizeof (state));
691 while (src < end) {
692 /* Don't allow embedded zeros in textfiles */
693 if (*src == 0)
694 return FALSE;
696 wlen = mbrtowc(&wc, src, end - src, &state);
698 if (wlen == (size_t)(-1)) {
699 /* Illegal mb sequence */
700 return FALSE;
703 if (wlen == (size_t)(-2)) {
704 /* No complete mb char before end
705 * Probably a cut off char which is ok */
706 return TRUE;
709 if (wlen == 0) {
710 /* Don't allow embedded zeros in textfiles */
711 return FALSE;
714 if (!iswspace (wc) && !iswprint(wc)) {
715 /* Not a printable or whitspace
716 * Probably not a text file */
717 return FALSE;
720 src += wlen;
722 return TRUE;
724 #endif /* defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC) */
726 return FALSE;
730 const char *
731 _xdg_mime_magic_lookup_data (XdgMimeMagic *mime_magic,
732 const void *data,
733 size_t len)
735 XdgMimeMagicMatch *match;
736 const char *mime_type;
738 mime_type = NULL;
739 for (match = mime_magic->match_list; match; match = match->next)
741 if (_xdg_mime_magic_match_compare_to_data (match, data, len))
743 if ((mime_type == NULL) || (xdg_mime_mime_type_subclass (match->mime_type, mime_type))) {
744 mime_type = match->mime_type;
749 if (mime_type == NULL)
750 if (buffer_looks_like_text(data, len))
751 mime_type = XDG_MIME_TYPE_UNKNOWN_TEXT;
753 return mime_type;
756 static void
757 _xdg_mime_update_mime_magic_extents (XdgMimeMagic *mime_magic)
759 XdgMimeMagicMatch *match;
760 int max_extent = 0;
762 for (match = mime_magic->match_list; match; match = match->next)
764 XdgMimeMagicMatchlet *matchlet;
766 for (matchlet = match->matchlet; matchlet; matchlet = matchlet->next)
768 int extent;
770 extent = matchlet->value_length + matchlet->offset + matchlet->range_length;
771 if (max_extent < extent)
772 max_extent = extent;
776 mime_magic->max_extent = max_extent;
779 static XdgMimeMagicMatchlet *
780 _xdg_mime_magic_matchlet_mirror (XdgMimeMagicMatchlet *matchlets)
782 XdgMimeMagicMatchlet *new_list;
783 XdgMimeMagicMatchlet *tmp;
785 if ((matchlets == NULL) || (matchlets->next == NULL))
786 return matchlets;
788 new_list = NULL;
789 tmp = matchlets;
790 while (tmp != NULL)
792 XdgMimeMagicMatchlet *matchlet;
794 matchlet = tmp;
795 tmp = tmp->next;
796 matchlet->next = new_list;
797 new_list = matchlet;
800 return new_list;
804 static void
805 _xdg_mime_magic_read_magic_file (XdgMimeMagic *mime_magic,
806 FILE *magic_file)
808 XdgMimeMagicState state;
809 XdgMimeMagicMatch *match = NULL; /* Quiet compiler */
811 state = XDG_MIME_MAGIC_SECTION;
813 while (state != XDG_MIME_MAGIC_EOF)
815 switch (state)
817 case XDG_MIME_MAGIC_SECTION:
818 match = _xdg_mime_magic_match_new ();
819 state = _xdg_mime_magic_parse_header (magic_file, match);
820 if (state == XDG_MIME_MAGIC_EOF || state == XDG_MIME_MAGIC_ERROR)
821 _xdg_mime_magic_match_free (match);
822 break;
823 case XDG_MIME_MAGIC_MAGIC:
824 state = _xdg_mime_magic_parse_magic_line (magic_file, match);
825 if (state == XDG_MIME_MAGIC_SECTION ||
826 (state == XDG_MIME_MAGIC_EOF && match->mime_type))
828 match->matchlet = _xdg_mime_magic_matchlet_mirror (match->matchlet);
829 _xdg_mime_magic_insert_match (mime_magic, match);
831 else if (state == XDG_MIME_MAGIC_EOF || state == XDG_MIME_MAGIC_ERROR)
832 _xdg_mime_magic_match_free (match);
833 break;
834 case XDG_MIME_MAGIC_ERROR:
835 state = _xdg_mime_magic_parse_error (magic_file);
836 break;
837 case XDG_MIME_MAGIC_EOF:
838 default:
839 /* Make the compiler happy */
840 assert (0);
843 _xdg_mime_update_mime_magic_extents (mime_magic);
846 void
847 _xdg_mime_magic_read_from_file (XdgMimeMagic *mime_magic,
848 const char *file_name)
850 FILE *magic_file;
851 char header[12];
853 magic_file = fopen (file_name, "r");
855 if (magic_file == NULL)
856 return;
858 if (fread (header, 1, 12, magic_file) == 12)
860 if (memcmp ("MIME-Magic\0\n", header, 12) == 0)
861 _xdg_mime_magic_read_magic_file (mime_magic, magic_file);
864 fclose (magic_file);