1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimemagic.: Private file. Datastructure for storing magic files.
4 * More info can be found at http://www.freedesktop.org/standards/
6 * Copyright (C) 2003 Red Hat, Inc.
7 * Copyright (C) 2003 Jonathan Blandford <jrb@alum.mit.edu>
9 * Licensed under the Academic Free License version 2.0
10 * Or under the following terms:
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2 of the License, or (at your option) any later version.
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, write to the
24 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 * Boston, MA 02111-1307, USA.
34 #include "xdgmimemagic.h"
35 #include "xdgmimeint.h"
53 typedef struct XdgMimeMagicMatch XdgMimeMagicMatch
;
54 typedef struct XdgMimeMagicMatchlet XdgMimeMagicMatchlet
;
58 XDG_MIME_MAGIC_SECTION
,
64 struct XdgMimeMagicMatch
66 const char *mime_type
;
68 XdgMimeMagicMatchlet
*matchlet
;
69 XdgMimeMagicMatch
*next
;
73 struct XdgMimeMagicMatchlet
77 unsigned int value_length
;
80 unsigned int range_length
;
81 unsigned int word_size
;
82 XdgMimeMagicMatchlet
*next
;
88 XdgMimeMagicMatch
*match_list
;
92 static XdgMimeMagicMatch
*
93 _xdg_mime_magic_match_new (void)
95 return calloc (1, sizeof (XdgMimeMagicMatch
));
99 static XdgMimeMagicMatchlet
*
100 _xdg_mime_magic_matchlet_new (void)
102 XdgMimeMagicMatchlet
*matchlet
;
104 matchlet
= malloc (sizeof (XdgMimeMagicMatchlet
));
106 matchlet
->indent
= 0;
107 matchlet
->offset
= 0;
108 matchlet
->value_length
= 0;
109 matchlet
->value
= NULL
;
110 matchlet
->mask
= NULL
;
111 matchlet
->range_length
= 1;
112 matchlet
->word_size
= 1;
113 matchlet
->next
= NULL
;
120 _xdg_mime_magic_matchlet_free (XdgMimeMagicMatchlet
*mime_magic_matchlet
)
122 if (mime_magic_matchlet
)
124 if (mime_magic_matchlet
->next
)
125 _xdg_mime_magic_matchlet_free (mime_magic_matchlet
->next
);
126 if (mime_magic_matchlet
->value
)
127 free (mime_magic_matchlet
->value
);
128 if (mime_magic_matchlet
->mask
)
129 free (mime_magic_matchlet
->mask
);
130 free (mime_magic_matchlet
);
135 /* Frees mime_magic_match and the remainder of its list
138 _xdg_mime_magic_match_free (XdgMimeMagicMatch
*mime_magic_match
)
140 XdgMimeMagicMatch
*ptr
, *next
;
142 ptr
= mime_magic_match
;
148 free ((void *) ptr
->mime_type
);
150 _xdg_mime_magic_matchlet_free (ptr
->matchlet
);
157 /* Reads in a hunk of data until a newline character or a '\000' is hit. The
158 * returned string is null terminated, and doesn't include the newline.
160 static unsigned char *
161 _xdg_mime_magic_read_to_newline (FILE *magic_file
,
164 unsigned char *retval
;
170 retval
= malloc (len
);
171 *end_of_file
= FALSE
;
175 c
= getc_unlocked (magic_file
);
181 if (c
== '\n' || c
== '\000')
183 retval
[pos
++] = (unsigned char) c
;
184 if (pos
% 128 == 127)
187 retval
= realloc (retval
, len
);
191 retval
[pos
] = '\000';
195 /* Returns the number read from the file, or -1 if no number could be read.
198 _xdg_mime_magic_read_a_number (FILE *magic_file
,
201 /* LONG_MAX is about 20 characters on my system */
202 #define MAX_NUMBER_SIZE 30
203 char number_string
[MAX_NUMBER_SIZE
+ 1];
210 c
= getc_unlocked (magic_file
);
219 ungetc (c
, magic_file
);
222 number_string
[pos
] = (char) c
;
224 if (pos
== MAX_NUMBER_SIZE
)
229 number_string
[pos
] = '\000';
231 retval
= strtol (number_string
, NULL
, 10);
233 if ((retval
< INT_MIN
) || (retval
> INT_MAX
) || (errno
!= 0))
240 /* Headers are of the format:
241 * [<priority>:<mime-type>]
243 static XdgMimeMagicState
244 _xdg_mime_magic_parse_header (FILE *magic_file
, XdgMimeMagicMatch
*match
)
251 assert (magic_file
!= NULL
);
252 assert (match
!= NULL
);
254 c
= getc_unlocked (magic_file
);
256 return XDG_MIME_MAGIC_EOF
;
258 return XDG_MIME_MAGIC_ERROR
;
260 match
->priority
= _xdg_mime_magic_read_a_number (magic_file
, &end_of_file
);
262 return XDG_MIME_MAGIC_EOF
;
263 if (match
->priority
== -1)
264 return XDG_MIME_MAGIC_ERROR
;
266 c
= getc_unlocked (magic_file
);
268 return XDG_MIME_MAGIC_EOF
;
270 return XDG_MIME_MAGIC_ERROR
;
272 buffer
= (char *)_xdg_mime_magic_read_to_newline (magic_file
, &end_of_file
);
274 return XDG_MIME_MAGIC_EOF
;
277 while (*end_ptr
!= ']' && *end_ptr
!= '\000' && *end_ptr
!= '\n')
282 return XDG_MIME_MAGIC_ERROR
;
286 match
->mime_type
= strdup (buffer
);
289 return XDG_MIME_MAGIC_MAGIC
;
292 static XdgMimeMagicState
293 _xdg_mime_magic_parse_error (FILE *magic_file
)
299 c
= getc_unlocked (magic_file
);
301 return XDG_MIME_MAGIC_EOF
;
303 return XDG_MIME_MAGIC_SECTION
;
307 /* Headers are of the format:
308 * [ indent ] ">" start-offset "=" value
309 * [ "&" mask ] [ "~" word-size ] [ "+" range-length ] "\n"
311 static XdgMimeMagicState
312 _xdg_mime_magic_parse_magic_line (FILE *magic_file
,
313 XdgMimeMagicMatch
*match
)
315 XdgMimeMagicMatchlet
*matchlet
;
321 assert (magic_file
!= NULL
);
323 /* Sniff the buffer to make sure it's a valid line */
324 c
= getc_unlocked (magic_file
);
326 return XDG_MIME_MAGIC_EOF
;
329 ungetc (c
, magic_file
);
330 return XDG_MIME_MAGIC_SECTION
;
333 return XDG_MIME_MAGIC_MAGIC
;
335 /* At this point, it must be a digit or a '>' */
339 ungetc (c
, magic_file
);
340 indent
= _xdg_mime_magic_read_a_number (magic_file
, &end_of_file
);
342 return XDG_MIME_MAGIC_EOF
;
344 return XDG_MIME_MAGIC_ERROR
;
345 c
= getc_unlocked (magic_file
);
347 return XDG_MIME_MAGIC_EOF
;
351 return XDG_MIME_MAGIC_ERROR
;
353 matchlet
= _xdg_mime_magic_matchlet_new ();
354 matchlet
->indent
= indent
;
355 matchlet
->offset
= _xdg_mime_magic_read_a_number (magic_file
, &end_of_file
);
358 _xdg_mime_magic_matchlet_free (matchlet
);
359 return XDG_MIME_MAGIC_EOF
;
361 if (matchlet
->offset
== -1)
363 _xdg_mime_magic_matchlet_free (matchlet
);
364 return XDG_MIME_MAGIC_ERROR
;
366 c
= getc_unlocked (magic_file
);
369 _xdg_mime_magic_matchlet_free (matchlet
);
370 return XDG_MIME_MAGIC_EOF
;
374 _xdg_mime_magic_matchlet_free (matchlet
);
375 return XDG_MIME_MAGIC_ERROR
;
378 /* Next two bytes determine how long the value is */
379 matchlet
->value_length
= 0;
380 c
= getc_unlocked (magic_file
);
383 _xdg_mime_magic_matchlet_free (matchlet
);
384 return XDG_MIME_MAGIC_EOF
;
386 matchlet
->value_length
= c
& 0xFF;
387 matchlet
->value_length
= matchlet
->value_length
<< 8;
389 c
= getc_unlocked (magic_file
);
392 _xdg_mime_magic_matchlet_free (matchlet
);
393 return XDG_MIME_MAGIC_EOF
;
395 matchlet
->value_length
= matchlet
->value_length
+ (c
& 0xFF);
397 matchlet
->value
= malloc (matchlet
->value_length
);
400 if (matchlet
->value
== NULL
)
402 _xdg_mime_magic_matchlet_free (matchlet
);
403 return XDG_MIME_MAGIC_ERROR
;
405 bytes_read
= fread (matchlet
->value
, 1, matchlet
->value_length
, magic_file
);
406 if (bytes_read
!= matchlet
->value_length
)
408 _xdg_mime_magic_matchlet_free (matchlet
);
409 if (feof (magic_file
))
410 return XDG_MIME_MAGIC_EOF
;
412 return XDG_MIME_MAGIC_ERROR
;
415 c
= getc_unlocked (magic_file
);
418 matchlet
->mask
= malloc (matchlet
->value_length
);
420 if (matchlet
->mask
== NULL
)
422 _xdg_mime_magic_matchlet_free (matchlet
);
423 return XDG_MIME_MAGIC_ERROR
;
425 bytes_read
= fread (matchlet
->mask
, 1, matchlet
->value_length
, magic_file
);
426 if (bytes_read
!= matchlet
->value_length
)
428 _xdg_mime_magic_matchlet_free (matchlet
);
429 if (feof (magic_file
))
430 return XDG_MIME_MAGIC_EOF
;
432 return XDG_MIME_MAGIC_ERROR
;
434 c
= getc_unlocked (magic_file
);
439 matchlet
->word_size
= _xdg_mime_magic_read_a_number (magic_file
, &end_of_file
);
442 _xdg_mime_magic_matchlet_free (matchlet
);
443 return XDG_MIME_MAGIC_EOF
;
445 if (matchlet
->word_size
!= 0 &&
446 matchlet
->word_size
!= 1 &&
447 matchlet
->word_size
!= 2 &&
448 matchlet
->word_size
!= 4)
450 _xdg_mime_magic_matchlet_free (matchlet
);
451 return XDG_MIME_MAGIC_ERROR
;
453 c
= getc_unlocked (magic_file
);
458 matchlet
->range_length
= _xdg_mime_magic_read_a_number (magic_file
, &end_of_file
);
461 _xdg_mime_magic_matchlet_free (matchlet
);
462 return XDG_MIME_MAGIC_EOF
;
464 if (matchlet
->range_length
== -1)
466 _xdg_mime_magic_matchlet_free (matchlet
);
467 return XDG_MIME_MAGIC_ERROR
;
469 c
= getc_unlocked (magic_file
);
475 /* We clean up the matchlet, byte swapping if needed */
476 if (matchlet
->word_size
> 1)
479 if (matchlet
->value_length
% matchlet
->word_size
!= 0)
481 _xdg_mime_magic_matchlet_free (matchlet
);
482 return XDG_MIME_MAGIC_ERROR
;
484 /* FIXME: need to get this defined in a <config.h> style file */
486 for (i
= 0; i
< matchlet
->value_length
; i
= i
+ matchlet
->word_size
)
488 if (matchlet
->word_size
== 2)
489 *((xdg_uint16_t
*) matchlet
->value
+ i
) = SWAP_BE16_TO_LE16 (*((xdg_uint16_t
*) (matchlet
->value
+ i
)));
490 else if (matchlet
->word_size
== 4)
491 *((xdg_uint32_t
*) matchlet
->value
+ i
) = SWAP_BE32_TO_LE32 (*((xdg_uint32_t
*) (matchlet
->value
+ i
)));
494 if (matchlet
->word_size
== 2)
495 *((xdg_uint16_t
*) matchlet
->mask
+ i
) = SWAP_BE16_TO_LE16 (*((xdg_uint16_t
*) (matchlet
->mask
+ i
)));
496 else if (matchlet
->word_size
== 4)
497 *((xdg_uint32_t
*) matchlet
->mask
+ i
) = SWAP_BE32_TO_LE32 (*((xdg_uint32_t
*) (matchlet
->mask
+ i
)));
504 matchlet
->next
= match
->matchlet
;
505 match
->matchlet
= matchlet
;
508 return XDG_MIME_MAGIC_MAGIC
;
511 _xdg_mime_magic_matchlet_free (matchlet
);
513 return XDG_MIME_MAGIC_EOF
;
515 return XDG_MIME_MAGIC_ERROR
;
519 _xdg_mime_magic_matchlet_compare_to_data (XdgMimeMagicMatchlet
*matchlet
,
524 for (i
= matchlet
->offset
; i
< matchlet
->offset
+ matchlet
->range_length
; i
++)
526 int valid_matchlet
= TRUE
;
528 if (i
+ matchlet
->value_length
> len
)
533 for (j
= 0; j
< matchlet
->value_length
; j
++)
535 if ((matchlet
->value
[j
] & matchlet
->mask
[j
]) !=
536 ((((unsigned char *) data
)[j
+ i
]) & matchlet
->mask
[j
]))
538 valid_matchlet
= FALSE
;
545 for (j
= 0; j
< matchlet
->value_length
; j
++)
547 if (matchlet
->value
[j
] != ((unsigned char *) data
)[j
+ i
])
549 valid_matchlet
= FALSE
;
561 _xdg_mime_magic_matchlet_compare_level (XdgMimeMagicMatchlet
*matchlet
,
566 while ((matchlet
!= NULL
) && (matchlet
->indent
== indent
))
568 if (_xdg_mime_magic_matchlet_compare_to_data (matchlet
, data
, len
))
570 if ((matchlet
->next
== NULL
) ||
571 (matchlet
->next
->indent
<= indent
))
574 if (_xdg_mime_magic_matchlet_compare_level (matchlet
->next
,
583 matchlet
= matchlet
->next
;
585 while (matchlet
&& matchlet
->indent
> indent
);
592 _xdg_mime_magic_match_compare_to_data (XdgMimeMagicMatch
*match
,
596 return _xdg_mime_magic_matchlet_compare_level (match
->matchlet
, data
, len
, 0);
600 _xdg_mime_magic_insert_match (XdgMimeMagic
*mime_magic
,
601 XdgMimeMagicMatch
*match
)
603 XdgMimeMagicMatch
*list
;
605 if (mime_magic
->match_list
== NULL
)
607 mime_magic
->match_list
= match
;
611 if (match
->priority
> mime_magic
->match_list
->priority
)
613 match
->next
= mime_magic
->match_list
;
614 mime_magic
->match_list
= match
;
618 list
= mime_magic
->match_list
;
619 while (list
->next
!= NULL
)
621 if (list
->next
->priority
< match
->priority
)
623 match
->next
= list
->next
;
634 _xdg_mime_magic_new (void)
636 return calloc (1, sizeof (XdgMimeMagic
));
640 _xdg_mime_magic_free (XdgMimeMagic
*mime_magic
)
643 _xdg_mime_magic_match_free (mime_magic
->match_list
);
649 _xdg_mime_magic_get_buffer_extents (XdgMimeMagic
*mime_magic
)
651 return mime_magic
->max_extent
;
654 static gboolean
buffer_looks_like_text (const void *data
, const size_t len
)
658 if (g_utf8_validate (data
, len
, (const gchar
**)&end
))
660 /* g_utf8_validate allows control characters */
662 for (i
= 0; i
< len
; i
++)
664 unsigned char c
= ((const guchar
*) data
)[i
];
665 if (c
< 32 && c
!= '\r' && c
!= '\n' && c
!= '\t')
670 /* Check whether the string was truncated in the middle of
671 * a valid UTF8 char, or if we really have an invalid
674 gint remaining_bytes
= len
;
676 remaining_bytes
-= (end
-((gchar
*)data
));
678 if (g_utf8_get_char_validated(end
, remaining_bytes
) == -2)
680 #if defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC)
690 memset (&state
, 0, sizeof (state
));
692 /* Don't allow embedded zeros in textfiles */
696 wlen
= mbrtowc(&wc
, src
, end
- src
, &state
);
698 if (wlen
== (size_t)(-1)) {
699 /* Illegal mb sequence */
703 if (wlen
== (size_t)(-2)) {
704 /* No complete mb char before end
705 * Probably a cut off char which is ok */
710 /* Don't allow embedded zeros in textfiles */
714 if (!iswspace (wc
) && !iswprint(wc
)) {
715 /* Not a printable or whitspace
716 * Probably not a text file */
724 #endif /* defined(HAVE_WCTYPE_H) && defined (HAVE_MBRTOWC) */
731 _xdg_mime_magic_lookup_data (XdgMimeMagic
*mime_magic
,
735 XdgMimeMagicMatch
*match
;
736 const char *mime_type
;
739 for (match
= mime_magic
->match_list
; match
; match
= match
->next
)
741 if (_xdg_mime_magic_match_compare_to_data (match
, data
, len
))
743 if ((mime_type
== NULL
) || (xdg_mime_mime_type_subclass (match
->mime_type
, mime_type
))) {
744 mime_type
= match
->mime_type
;
749 if (mime_type
== NULL
)
750 if (buffer_looks_like_text(data
, len
))
751 mime_type
= XDG_MIME_TYPE_UNKNOWN_TEXT
;
757 _xdg_mime_update_mime_magic_extents (XdgMimeMagic
*mime_magic
)
759 XdgMimeMagicMatch
*match
;
762 for (match
= mime_magic
->match_list
; match
; match
= match
->next
)
764 XdgMimeMagicMatchlet
*matchlet
;
766 for (matchlet
= match
->matchlet
; matchlet
; matchlet
= matchlet
->next
)
770 extent
= matchlet
->value_length
+ matchlet
->offset
+ matchlet
->range_length
;
771 if (max_extent
< extent
)
776 mime_magic
->max_extent
= max_extent
;
779 static XdgMimeMagicMatchlet
*
780 _xdg_mime_magic_matchlet_mirror (XdgMimeMagicMatchlet
*matchlets
)
782 XdgMimeMagicMatchlet
*new_list
;
783 XdgMimeMagicMatchlet
*tmp
;
785 if ((matchlets
== NULL
) || (matchlets
->next
== NULL
))
792 XdgMimeMagicMatchlet
*matchlet
;
796 matchlet
->next
= new_list
;
805 _xdg_mime_magic_read_magic_file (XdgMimeMagic
*mime_magic
,
808 XdgMimeMagicState state
;
809 XdgMimeMagicMatch
*match
= NULL
; /* Quiet compiler */
811 state
= XDG_MIME_MAGIC_SECTION
;
813 while (state
!= XDG_MIME_MAGIC_EOF
)
817 case XDG_MIME_MAGIC_SECTION
:
818 match
= _xdg_mime_magic_match_new ();
819 state
= _xdg_mime_magic_parse_header (magic_file
, match
);
820 if (state
== XDG_MIME_MAGIC_EOF
|| state
== XDG_MIME_MAGIC_ERROR
)
821 _xdg_mime_magic_match_free (match
);
823 case XDG_MIME_MAGIC_MAGIC
:
824 state
= _xdg_mime_magic_parse_magic_line (magic_file
, match
);
825 if (state
== XDG_MIME_MAGIC_SECTION
||
826 (state
== XDG_MIME_MAGIC_EOF
&& match
->mime_type
))
828 match
->matchlet
= _xdg_mime_magic_matchlet_mirror (match
->matchlet
);
829 _xdg_mime_magic_insert_match (mime_magic
, match
);
831 else if (state
== XDG_MIME_MAGIC_EOF
|| state
== XDG_MIME_MAGIC_ERROR
)
832 _xdg_mime_magic_match_free (match
);
834 case XDG_MIME_MAGIC_ERROR
:
835 state
= _xdg_mime_magic_parse_error (magic_file
);
837 case XDG_MIME_MAGIC_EOF
:
839 /* Make the compiler happy */
843 _xdg_mime_update_mime_magic_extents (mime_magic
);
847 _xdg_mime_magic_read_from_file (XdgMimeMagic
*mime_magic
,
848 const char *file_name
)
853 magic_file
= fopen (file_name
, "r");
855 if (magic_file
== NULL
)
858 if (fread (header
, 1, 12, magic_file
) == 12)
860 if (memcmp ("MIME-Magic\0\n", header
, 12) == 0)
861 _xdg_mime_magic_read_magic_file (mime_magic
, magic_file
);