cvsimport
[beagle.git] / glue / xdgmime / xdgmimecache.c
blobd08c89b1cd71b588fd3ccf28d6e31036389bfe9c
1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimealias.c: Private file. mmappable caches for mime data
4 * More info can be found at http://www.freedesktop.org/standards/
6 * Copyright (C) 2005 Matthias Clasen <mclasen@redhat.com>
8 * Licensed under the Academic Free License version 2.0
9 * Or under the following terms:
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <fnmatch.h>
38 #include <assert.h>
40 #include <netinet/in.h> /* for ntohl/ntohs */
42 #ifdef HAVE_MMAP
43 #include <sys/mman.h>
44 #endif
46 #include <sys/stat.h>
47 #include <sys/types.h>
49 #include "xdgmimecache.h"
50 #include "xdgmimeint.h"
52 #ifndef MAX
53 #define MAX(a,b) ((a) > (b) ? (a) : (b))
54 #endif
56 #ifndef FALSE
57 #define FALSE (0)
58 #endif
60 #ifndef TRUE
61 #define TRUE (!FALSE)
62 #endif
64 #ifndef _O_BINARY
65 #define _O_BINARY 0
66 #endif
68 #ifndef MAP_FAILED
69 #define MAP_FAILED ((void *) -1)
70 #endif
72 #define MAJOR_VERSION 1
73 #define MINOR_VERSION 0
75 struct _XdgMimeCache
77 int ref_count;
79 size_t size;
80 char *buffer;
83 #define GET_UINT16(cache,offset) (ntohs(*(xdg_uint16_t*)((cache) + (offset))))
84 #define GET_UINT32(cache,offset) (ntohl(*(xdg_uint32_t*)((cache) + (offset))))
86 XdgMimeCache *
87 _xdg_mime_cache_ref (XdgMimeCache *cache)
89 cache->ref_count++;
90 return cache;
93 void
94 _xdg_mime_cache_unref (XdgMimeCache *cache)
96 cache->ref_count--;
98 if (cache->ref_count == 0)
100 #ifdef HAVE_MMAP
101 munmap (cache->buffer, cache->size);
102 #endif
103 free (cache);
107 XdgMimeCache *
108 _xdg_mime_cache_new_from_file (const char *file_name)
110 XdgMimeCache *cache = NULL;
112 #ifdef HAVE_MMAP
113 int fd = -1;
114 struct stat st;
115 char *buffer = NULL;
117 /* Open the file and map it into memory */
118 fd = open (file_name, O_RDONLY|_O_BINARY, 0);
120 if (fd < 0)
121 return NULL;
123 if (fstat (fd, &st) < 0 || st.st_size < 4)
124 goto done;
126 buffer = (char *) mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
128 if (buffer == MAP_FAILED)
129 goto done;
131 /* Verify version */
132 if (GET_UINT16 (buffer, 0) != MAJOR_VERSION ||
133 GET_UINT16 (buffer, 2) != MINOR_VERSION)
135 munmap (buffer, st.st_size);
137 goto done;
140 cache = (XdgMimeCache *) malloc (sizeof (XdgMimeCache));
141 cache->ref_count = 1;
142 cache->buffer = buffer;
143 cache->size = st.st_size;
145 done:
146 if (fd != -1)
147 close (fd);
149 #endif /* HAVE_MMAP */
151 return cache;
154 static int
155 cache_magic_matchlet_compare_to_data (XdgMimeCache *cache,
156 xdg_uint32_t offset,
157 const void *data,
158 size_t len)
160 xdg_uint32_t range_start = GET_UINT32 (cache->buffer, offset);
161 xdg_uint32_t range_length = GET_UINT32 (cache->buffer, offset + 4);
162 xdg_uint32_t data_length = GET_UINT32 (cache->buffer, offset + 12);
163 xdg_uint32_t data_offset = GET_UINT32 (cache->buffer, offset + 16);
164 xdg_uint32_t mask_offset = GET_UINT32 (cache->buffer, offset + 20);
166 int i, j;
168 for (i = range_start; i <= range_start + range_length; i++)
170 int valid_matchlet = TRUE;
172 if (i + data_length > len)
173 return FALSE;
175 if (mask_offset)
177 for (j = 0; j < data_length; j++)
179 if ((((unsigned char *)cache->buffer)[data_offset + j] & ((unsigned char *)cache->buffer)[mask_offset + j]) !=
180 ((((unsigned char *) data)[j + i]) & ((unsigned char *)cache->buffer)[mask_offset + j]))
182 valid_matchlet = FALSE;
183 break;
187 else
189 for (j = 0; j < data_length; j++)
191 if (((unsigned char *)cache->buffer)[data_offset + j] != ((unsigned char *) data)[j + i])
193 valid_matchlet = FALSE;
194 break;
199 if (valid_matchlet)
200 return TRUE;
203 return FALSE;
206 static int
207 cache_magic_matchlet_compare (XdgMimeCache *cache,
208 xdg_uint32_t offset,
209 const void *data,
210 size_t len)
212 xdg_uint32_t n_children = GET_UINT32 (cache->buffer, offset + 24);
213 xdg_uint32_t child_offset = GET_UINT32 (cache->buffer, offset + 28);
215 int i;
217 if (cache_magic_matchlet_compare_to_data (cache, offset, data, len))
219 if (n_children == 0)
220 return TRUE;
222 for (i = 0; i < n_children; i++)
224 if (cache_magic_matchlet_compare (cache, child_offset + 32 * i,
225 data, len))
226 return TRUE;
230 return FALSE;
233 static const char *
234 cache_magic_compare_to_data (XdgMimeCache *cache,
235 xdg_uint32_t offset,
236 const void *data,
237 size_t len,
238 int *prio)
240 xdg_uint32_t priority = GET_UINT32 (cache->buffer, offset);
241 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, offset + 4);
242 xdg_uint32_t n_matchlets = GET_UINT32 (cache->buffer, offset + 8);
243 xdg_uint32_t matchlet_offset = GET_UINT32 (cache->buffer, offset + 12);
245 int i;
247 for (i = 0; i < n_matchlets; i++)
249 if (cache_magic_matchlet_compare (cache, matchlet_offset + i * 32,
250 data, len))
252 *prio = priority;
254 return cache->buffer + mimetype_offset;
258 return NULL;
261 static const char *
262 cache_magic_lookup_data (XdgMimeCache *cache,
263 const void *data,
264 size_t len,
265 int *prio,
266 const char *mime_types[],
267 int n_mime_types)
269 xdg_uint32_t list_offset;
270 xdg_uint32_t n_entries;
271 xdg_uint32_t offset;
273 int j, n;
275 *prio = 0;
277 list_offset = GET_UINT32 (cache->buffer, 24);
278 n_entries = GET_UINT32 (cache->buffer, list_offset);
279 offset = GET_UINT32 (cache->buffer, list_offset + 8);
281 for (j = 0; j < n_entries; j++)
283 const char *match;
285 match = cache_magic_compare_to_data (cache, offset + 16 * j,
286 data, len, prio);
287 if (match)
288 return match;
289 else
291 xdg_uint32_t mimetype_offset;
292 const char *non_match;
294 mimetype_offset = GET_UINT32 (cache->buffer, offset + 16 * j + 4);
295 non_match = cache->buffer + mimetype_offset;
297 for (n = 0; n < n_mime_types; n++)
299 if (mime_types[n] &&
300 xdg_mime_mime_type_equal (mime_types[n], non_match))
301 mime_types[n] = NULL;
306 return NULL;
309 static const char *
310 cache_alias_lookup (const char *alias)
312 const char *ptr;
313 int i, min, max, mid, cmp;
315 for (i = 0; _caches[i]; i++)
317 XdgMimeCache *cache = _caches[i];
318 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 4);
319 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
320 xdg_uint32_t offset;
322 min = 0;
323 max = n_entries - 1;
324 while (max >= min)
326 mid = (min + max) / 2;
328 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid);
329 ptr = cache->buffer + offset;
330 cmp = strcmp (ptr, alias);
332 if (cmp < 0)
333 min = mid + 1;
334 else if (cmp > 0)
335 max = mid - 1;
336 else
338 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid + 4);
339 return cache->buffer + offset;
344 return NULL;
347 static int
348 cache_glob_lookup_literal (const char *file_name,
349 const char *mime_types[],
350 int n_mime_types)
352 const char *ptr;
353 int i, min, max, mid, cmp;
355 for (i = 0; _caches[i]; i++)
357 XdgMimeCache *cache = _caches[i];
358 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 12);
359 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
360 xdg_uint32_t offset;
362 min = 0;
363 max = n_entries - 1;
364 while (max >= min)
366 mid = (min + max) / 2;
368 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid);
369 ptr = cache->buffer + offset;
370 cmp = strcmp (ptr, file_name);
372 if (cmp < 0)
373 min = mid + 1;
374 else if (cmp > 0)
375 max = mid - 1;
376 else
378 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid + 4);
379 mime_types[0] = (const char *)(cache->buffer + offset);
381 return 1;
386 return 0;
389 static int
390 cache_glob_lookup_fnmatch (const char *file_name,
391 const char *mime_types[],
392 int n_mime_types)
394 const char *mime_type;
395 const char *ptr;
397 int i, j, n;
399 n = 0;
400 for (i = 0; _caches[i]; i++)
402 XdgMimeCache *cache = _caches[i];
404 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 20);
405 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
407 for (j = 0; j < n_entries && n < n_mime_types; j++)
409 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * j);
410 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * j + 4);
411 ptr = cache->buffer + offset;
412 mime_type = cache->buffer + mimetype_offset;
414 /* FIXME: Not UTF-8 safe */
415 if (fnmatch (ptr, file_name, 0) == 0)
416 mime_types[n++] = mime_type;
419 if (n > 0)
420 return n;
423 return 0;
426 static int
427 cache_glob_node_lookup_suffix (XdgMimeCache *cache,
428 xdg_uint32_t n_entries,
429 xdg_uint32_t offset,
430 const char *suffix,
431 int ignore_case,
432 const char *mime_types[],
433 int n_mime_types)
435 xdg_unichar_t character;
436 xdg_unichar_t match_char;
437 xdg_uint32_t mimetype_offset;
438 xdg_uint32_t n_children;
439 xdg_uint32_t child_offset;
441 int min, max, mid, n, i;
443 character = _xdg_utf8_to_ucs4 (suffix);
444 if (ignore_case)
445 character = _xdg_ucs4_to_lower (character);
447 min = 0;
448 max = n_entries - 1;
449 while (max >= min)
451 mid = (min + max) / 2;
453 match_char = GET_UINT32 (cache->buffer, offset + 16 * mid);
455 if (match_char < character)
456 min = mid + 1;
457 else if (match_char > character)
458 max = mid - 1;
459 else
461 suffix = _xdg_utf8_next_char (suffix);
462 if (*suffix == '\0')
464 mimetype_offset = GET_UINT32 (cache->buffer, offset + 16 * mid + 4);
465 n = 0;
466 mime_types[n++] = cache->buffer + mimetype_offset;
468 n_children = GET_UINT32 (cache->buffer, offset + 16 * mid + 8);
469 child_offset = GET_UINT32 (cache->buffer, offset + 16 * mid + 12);
470 i = 0;
471 while (n < n_mime_types && i < n_children)
473 match_char = GET_UINT32 (cache->buffer, child_offset + 16 * i);
474 mimetype_offset = GET_UINT32 (cache->buffer, offset + 16 * i + 4);
475 if (match_char != 0)
476 break;
478 mime_types[n++] = cache->buffer + mimetype_offset;
479 i++;
482 return n;
484 else
486 n_children = GET_UINT32 (cache->buffer, offset + 16 * mid + 8);
487 child_offset = GET_UINT32 (cache->buffer, offset + 16 * mid + 12);
489 return cache_glob_node_lookup_suffix (cache,
490 n_children, child_offset,
491 suffix, ignore_case,
492 mime_types,
493 n_mime_types);
498 return 0;
501 static int
502 cache_glob_lookup_suffix (const char *suffix,
503 int ignore_case,
504 const char *mime_types[],
505 int n_mime_types)
507 int i, n;
509 for (i = 0; _caches[i]; i++)
511 XdgMimeCache *cache = _caches[i];
513 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 16);
514 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
515 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4);
517 n = cache_glob_node_lookup_suffix (cache,
518 n_entries, offset,
519 suffix, ignore_case,
520 mime_types,
521 n_mime_types);
522 if (n > 0)
523 return n;
526 return 0;
529 static void
530 find_stopchars (char *stopchars)
532 int i, j, k, l;
534 k = 0;
535 for (i = 0; _caches[i]; i++)
537 XdgMimeCache *cache = _caches[i];
539 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 16);
540 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
541 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4);
543 for (j = 0; j < n_entries; j++)
545 xdg_uint32_t match_char = GET_UINT32 (cache->buffer, offset);
547 if (match_char < 128)
549 for (l = 0; l < k; l++)
550 if (stopchars[l] == match_char)
551 break;
552 if (l == k)
554 stopchars[k] = (char) match_char;
555 k++;
559 offset += 16;
563 stopchars[k] = '\0';
566 static int
567 cache_glob_lookup_file_name (const char *file_name,
568 const char *mime_types[],
569 int n_mime_types)
571 const char *ptr;
572 char stopchars[128];
573 int n;
575 assert (file_name != NULL);
577 /* First, check the literals */
578 n = cache_glob_lookup_literal (file_name, mime_types, n_mime_types);
579 if (n > 0)
580 return n;
582 find_stopchars (stopchars);
584 /* Next, check suffixes */
585 ptr = strpbrk (file_name, stopchars);
586 while (ptr)
588 n = cache_glob_lookup_suffix (ptr, FALSE, mime_types, n_mime_types);
589 if (n > 0)
590 return n;
592 n = cache_glob_lookup_suffix (ptr, TRUE, mime_types, n_mime_types);
593 if (n > 0)
594 return n;
596 ptr = strpbrk (ptr + 1, stopchars);
599 /* Last, try fnmatch */
600 return cache_glob_lookup_fnmatch (file_name, mime_types, n_mime_types);
604 _xdg_mime_cache_get_max_buffer_extents (void)
606 xdg_uint32_t offset;
607 xdg_uint32_t max_extent;
608 int i;
610 max_extent = 0;
611 for (i = 0; _caches[i]; i++)
613 XdgMimeCache *cache = _caches[i];
615 offset = GET_UINT32 (cache->buffer, 24);
616 max_extent = MAX (max_extent, GET_UINT32 (cache->buffer, offset + 4));
619 return max_extent;
622 static const char *
623 cache_get_mime_type_for_data (const void *data,
624 size_t len,
625 const char *mime_types[],
626 int n_mime_types)
628 const char *mime_type;
629 int i, n, priority;
631 priority = 0;
632 mime_type = NULL;
633 for (i = 0; _caches[i]; i++)
635 XdgMimeCache *cache = _caches[i];
637 int prio;
638 const char *match;
640 match = cache_magic_lookup_data (cache, data, len, &prio,
641 mime_types, n_mime_types);
642 if (prio > priority)
644 priority = prio;
645 mime_type = match;
649 if (priority > 0)
650 return mime_type;
652 for (n = 0; n < n_mime_types; n++)
654 if (mime_types[n])
655 return mime_types[n];
658 return XDG_MIME_TYPE_UNKNOWN;
661 const char *
662 _xdg_mime_cache_get_mime_type_for_data (const void *data,
663 size_t len)
665 return cache_get_mime_type_for_data (data, len, NULL, 0);
668 const char *
669 _xdg_mime_cache_get_mime_type_for_file (const char *file_name,
670 struct stat *statbuf)
672 const char *mime_type;
673 const char *mime_types[2];
674 FILE *file;
675 unsigned char *data;
676 int max_extent;
677 int bytes_read;
678 struct stat buf;
679 const char *base_name;
680 int n;
682 if (file_name == NULL)
683 return NULL;
685 if (! _xdg_utf8_validate (file_name))
686 return NULL;
688 base_name = _xdg_get_base_name (file_name);
689 n = cache_glob_lookup_file_name (base_name, mime_types, 2);
691 if (n == 1)
692 return mime_types[0];
694 if (!statbuf)
696 if (stat (file_name, &buf) != 0)
697 return XDG_MIME_TYPE_UNKNOWN;
699 statbuf = &buf;
702 if (!S_ISREG (statbuf->st_mode))
703 return XDG_MIME_TYPE_UNKNOWN;
705 /* FIXME: Need to make sure that max_extent isn't totally broken. This could
706 * be large and need getting from a stream instead of just reading it all
707 * in. */
708 max_extent = _xdg_mime_cache_get_max_buffer_extents ();
709 data = malloc (max_extent);
710 if (data == NULL)
711 return XDG_MIME_TYPE_UNKNOWN;
713 file = fopen (file_name, "r");
714 if (file == NULL)
716 free (data);
717 return XDG_MIME_TYPE_UNKNOWN;
720 bytes_read = fread (data, 1, max_extent, file);
721 if (ferror (file))
723 free (data);
724 fclose (file);
725 return XDG_MIME_TYPE_UNKNOWN;
728 mime_type = cache_get_mime_type_for_data (data, bytes_read,
729 mime_types, n);
731 free (data);
732 fclose (file);
734 return mime_type;
737 const char *
738 _xdg_mime_cache_get_mime_type_from_file_name (const char *file_name)
740 const char *mime_type;
742 if (cache_glob_lookup_file_name (file_name, &mime_type, 1))
743 return mime_type;
744 else
745 return XDG_MIME_TYPE_UNKNOWN;
748 #if 1
749 static int
750 is_super_type (const char *mime)
752 int length;
753 const char *type;
755 length = strlen (mime);
756 type = &(mime[length - 2]);
758 if (strcmp (type, "/*") == 0)
759 return 1;
761 return 0;
763 #endif
766 _xdg_mime_cache_mime_type_subclass (const char *mime,
767 const char *base)
769 const char *umime, *ubase;
771 int i, j, min, max, med, cmp;
773 umime = _xdg_mime_cache_unalias_mime_type (mime);
774 ubase = _xdg_mime_cache_unalias_mime_type (base);
776 if (strcmp (umime, ubase) == 0)
777 return 1;
779 /* We really want to handle text/ * in GtkFileFilter, so we just
780 * turn on the supertype matching
782 #if 1
783 /* Handle supertypes */
784 if (is_super_type (ubase) &&
785 xdg_mime_media_type_equal (umime, ubase))
786 return 1;
787 #endif
789 /* Handle special cases text/plain and application/octet-stream */
790 if (strcmp (ubase, "text/plain") == 0 &&
791 strncmp (umime, "text/", 5) == 0)
792 return 1;
794 if (strcmp (ubase, "application/octet-stream") == 0)
795 return 1;
797 for (i = 0; _caches[i]; i++)
799 XdgMimeCache *cache = _caches[i];
801 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 8);
802 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
803 xdg_uint32_t offset, n_parents, parent_offset;
805 min = 0;
806 max = n_entries - 1;
807 while (max >= min)
809 med = (min + max)/2;
811 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * med);
812 cmp = strcmp (cache->buffer + offset, umime);
813 if (cmp < 0)
814 min = med + 1;
815 else if (cmp > 0)
816 max = med - 1;
817 else
819 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * med + 4);
820 n_parents = GET_UINT32 (cache->buffer, offset);
822 for (j = 0; j < n_parents; j++)
824 parent_offset = GET_UINT32 (cache->buffer, offset + 4 + 4 * j);
825 if (_xdg_mime_cache_mime_type_subclass (cache->buffer + parent_offset, ubase))
826 return 1;
829 break;
834 return 0;
837 const char *
838 _xdg_mime_cache_unalias_mime_type (const char *mime)
840 const char *lookup;
842 lookup = cache_alias_lookup (mime);
844 if (lookup)
845 return lookup;
847 return mime;
850 char **
851 _xdg_mime_cache_list_mime_parents (const char *mime)
853 int i, j, p;
854 char *all_parents[128]; /* we'll stop at 128 */
855 char **result;
857 p = 0;
858 for (i = 0; _caches[i]; i++)
860 XdgMimeCache *cache = _caches[i];
862 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 8);
863 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
865 for (j = 0; j < n_entries; j++)
867 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * i);
868 xdg_uint32_t parents_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * i + 4);
870 if (strcmp (cache->buffer + mimetype_offset, mime) == 0)
872 xdg_uint32_t n_parents = GET_UINT32 (cache->buffer, parents_offset);
874 for (j = 0; j < n_parents; j++)
875 all_parents[p++] = cache->buffer + parents_offset + 4 + 4 * j;
877 break;
881 all_parents[p++] = 0;
883 result = (char **) malloc (p * sizeof (char *));
884 memcpy (result, all_parents, p * sizeof (char *));
886 return result;