utf8: add unit test for g_utf8_make_valid
[glib.git] / gio / gvdb / gvdb-reader.c
blob1b80e70040688cb35b190a635d7211ae1d65607b
1 /*
2 * Copyright © 2010 Codethink Limited
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the licence, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 * Author: Ryan Lortie <desrt@desrt.ca>
20 #include "gvdb-reader.h"
21 #include "gvdb-format.h"
23 #include <string.h>
25 struct _GvdbTable {
26 gint ref_count;
28 const gchar *data;
29 gsize size;
31 gpointer user_data;
32 GvdbRefFunc ref_user_data;
33 GDestroyNotify unref_user_data;
35 gboolean byteswapped;
36 gboolean trusted;
38 const guint32_le *bloom_words;
39 guint32 n_bloom_words;
40 guint bloom_shift;
42 const guint32_le *hash_buckets;
43 guint32 n_buckets;
45 struct gvdb_hash_item *hash_items;
46 guint32 n_hash_items;
49 static const gchar *
50 gvdb_table_item_get_key (GvdbTable *file,
51 const struct gvdb_hash_item *item,
52 gsize *size)
54 guint32 start, end;
56 start = guint32_from_le (item->key_start);
57 *size = guint16_from_le (item->key_size);
58 end = start + *size;
60 if G_UNLIKELY (start > end || end > file->size)
61 return NULL;
63 return file->data + start;
66 static gconstpointer
67 gvdb_table_dereference (GvdbTable *file,
68 const struct gvdb_pointer *pointer,
69 gint alignment,
70 gsize *size)
72 guint32 start, end;
74 start = guint32_from_le (pointer->start);
75 end = guint32_from_le (pointer->end);
77 if G_UNLIKELY (start > end || end > file->size || start & (alignment - 1))
78 return NULL;
80 *size = end - start;
82 return file->data + start;
85 static void
86 gvdb_table_setup_root (GvdbTable *file,
87 const struct gvdb_pointer *pointer)
89 const struct gvdb_hash_header *header;
90 guint32 n_bloom_words;
91 guint32 n_buckets;
92 gsize size;
94 header = gvdb_table_dereference (file, pointer, 4, &size);
96 if G_UNLIKELY (header == NULL || size < sizeof *header)
97 return;
99 size -= sizeof *header;
101 n_bloom_words = guint32_from_le (header->n_bloom_words);
102 n_buckets = guint32_from_le (header->n_buckets);
103 n_bloom_words &= (1u << 27) - 1;
105 if G_UNLIKELY (n_bloom_words * sizeof (guint32_le) > size)
106 return;
108 file->bloom_words = (gpointer) (header + 1);
109 size -= n_bloom_words * sizeof (guint32_le);
110 file->n_bloom_words = n_bloom_words;
112 if G_UNLIKELY (n_buckets > G_MAXUINT / sizeof (guint32_le) ||
113 n_buckets * sizeof (guint32_le) > size)
114 return;
116 file->hash_buckets = file->bloom_words + file->n_bloom_words;
117 size -= n_buckets * sizeof (guint32_le);
118 file->n_buckets = n_buckets;
120 if G_UNLIKELY (size % sizeof (struct gvdb_hash_item))
121 return;
123 file->hash_items = (gpointer) (file->hash_buckets + n_buckets);
124 file->n_hash_items = size / sizeof (struct gvdb_hash_item);
127 static GvdbTable *
128 new_from_data (const void *data,
129 gsize data_len,
130 gboolean trusted,
131 gpointer user_data,
132 GvdbRefFunc ref,
133 GDestroyNotify unref,
134 const char *filename,
135 GError **error)
137 GvdbTable *file;
139 file = g_slice_new0 (GvdbTable);
140 file->data = data;
141 file->size = data_len;
142 file->trusted = trusted;
143 file->ref_count = 1;
144 file->ref_user_data = ref;
145 file->unref_user_data = unref;
146 file->user_data = user_data;
148 if (sizeof (struct gvdb_header) <= file->size)
150 const struct gvdb_header *header = (gpointer) file->data;
152 if (header->signature[0] == GVDB_SIGNATURE0 &&
153 header->signature[1] == GVDB_SIGNATURE1 &&
154 guint32_from_le (header->version) == 0)
155 file->byteswapped = FALSE;
157 else if (header->signature[0] == GVDB_SWAPPED_SIGNATURE0 &&
158 header->signature[1] == GVDB_SWAPPED_SIGNATURE1 &&
159 guint32_from_le (header->version) == 0)
160 file->byteswapped = TRUE;
162 else
164 if (filename)
165 g_set_error (error, G_FILE_ERROR, G_FILE_ERROR_INVAL,
166 "%s: invalid header", filename);
167 else
168 g_set_error (error, G_FILE_ERROR, G_FILE_ERROR_INVAL,
169 "invalid gvdb header");
170 g_slice_free (GvdbTable, file);
171 if (unref)
172 unref (user_data);
174 return NULL;
177 gvdb_table_setup_root (file, &header->root);
180 return file;
184 * gvdb_table_new:
185 * @filename: the path to the hash file
186 * @trusted: if the contents of @filename are trusted
187 * @error: %NULL, or a pointer to a %NULL #GError
189 * Creates a new #GvdbTable from the contents of the file found at
190 * @filename.
192 * The only time this function fails is if the file cannot be opened.
193 * In that case, the #GError that is returned will be an error from
194 * g_mapped_file_new().
196 * An empty or otherwise corrupted file is considered to be a valid
197 * #GvdbTable with no entries.
199 * You should call gvdb_table_unref() on the return result when you no
200 * longer require it.
202 * Returns: a new #GvdbTable
204 GvdbTable *
205 gvdb_table_new (const gchar *filename,
206 gboolean trusted,
207 GError **error)
209 GMappedFile *mapped;
211 if ((mapped = g_mapped_file_new (filename, FALSE, error)) == NULL)
212 return NULL;
214 return new_from_data (g_mapped_file_get_contents (mapped),
215 g_mapped_file_get_length (mapped),
216 trusted,
217 mapped,
218 (GvdbRefFunc)g_mapped_file_ref,
219 (GDestroyNotify)g_mapped_file_unref,
220 filename,
221 error);
225 * gvdb_table_new_from_data:
226 * @data: the data
227 * @data_len: the length of @data in bytes
228 * @trusted: if the contents of @data are trusted
229 * @user_data: User supplied data that owns @data
230 * @ref: Ref function for @user_data
231 * @unref: Unref function for @user_data
233 * Creates a new #GvdbTable from the data in @data.
235 * An empty or otherwise corrupted data is considered to be a valid
236 * #GvdbTable with no entries.
238 * You should call gvdb_table_unref() on the return result when you no
239 * longer require it.
241 * Returns: a new #GvdbTable
243 GvdbTable *
244 gvdb_table_new_from_data (const void *data,
245 gsize data_len,
246 gboolean trusted,
247 gpointer user_data,
248 GvdbRefFunc ref,
249 GDestroyNotify unref,
250 GError **error)
252 return new_from_data (data, data_len,
253 trusted,
254 user_data, ref, unref,
255 NULL,
256 error);
259 static gboolean
260 gvdb_table_bloom_filter (GvdbTable *file,
261 guint32 hash_value)
263 guint32 word, mask;
265 if (file->n_bloom_words == 0)
266 return TRUE;
268 word = (hash_value / 32) % file->n_bloom_words;
269 mask = 1 << (hash_value & 31);
270 mask |= 1 << ((hash_value >> file->bloom_shift) & 31);
272 return (guint32_from_le (file->bloom_words[word]) & mask) == mask;
275 static gboolean
276 gvdb_table_check_name (GvdbTable *file,
277 struct gvdb_hash_item *item,
278 const gchar *key,
279 guint key_length)
281 const gchar *this_key;
282 gsize this_size;
283 guint32 parent;
285 this_key = gvdb_table_item_get_key (file, item, &this_size);
287 if G_UNLIKELY (this_key == NULL || this_size > key_length)
288 return FALSE;
290 key_length -= this_size;
292 if G_UNLIKELY (memcmp (this_key, key + key_length, this_size) != 0)
293 return FALSE;
295 parent = guint32_from_le (item->parent);
296 if (key_length == 0 && parent == 0xffffffffu)
297 return TRUE;
299 if G_LIKELY (parent < file->n_hash_items && this_size > 0)
300 return gvdb_table_check_name (file,
301 &file->hash_items[parent],
302 key, key_length);
304 return FALSE;
307 static const struct gvdb_hash_item *
308 gvdb_table_lookup (GvdbTable *file,
309 const gchar *key,
310 gchar type)
312 guint32 hash_value = 5381;
313 guint key_length;
314 guint32 bucket;
315 guint32 lastno;
316 guint32 itemno;
318 if G_UNLIKELY (file->n_buckets == 0 || file->n_hash_items == 0)
319 return NULL;
321 for (key_length = 0; key[key_length]; key_length++)
322 hash_value = (hash_value * 33) + ((signed char *) key)[key_length];
324 if (!gvdb_table_bloom_filter (file, hash_value))
325 return NULL;
327 bucket = hash_value % file->n_buckets;
328 itemno = guint32_from_le (file->hash_buckets[bucket]);
330 if (bucket == file->n_buckets - 1 ||
331 (lastno = guint32_from_le(file->hash_buckets[bucket + 1])) > file->n_hash_items)
332 lastno = file->n_hash_items;
334 while G_LIKELY (itemno < lastno)
336 struct gvdb_hash_item *item = &file->hash_items[itemno];
338 if (hash_value == guint32_from_le (item->hash_value))
339 if G_LIKELY (gvdb_table_check_name (file, item, key, key_length))
340 if G_LIKELY (item->type == type)
341 return item;
343 itemno++;
346 return NULL;
349 static const struct gvdb_hash_item *
350 gvdb_table_get_item (GvdbTable *table,
351 guint32_le item_no)
353 guint32 item_no_native = guint32_from_le (item_no);
355 if G_LIKELY (item_no_native < table->n_hash_items)
356 return table->hash_items + item_no_native;
358 return NULL;
361 static gboolean
362 gvdb_table_list_from_item (GvdbTable *table,
363 const struct gvdb_hash_item *item,
364 const guint32_le **list,
365 guint *length)
367 gsize size;
369 *list = gvdb_table_dereference (table, &item->value.pointer, 4, &size);
371 if G_LIKELY (*list == NULL || size % 4)
372 return FALSE;
374 *length = size / 4;
376 return TRUE;
381 * gvdb_table_list:
382 * @file: a #GvdbTable
383 * @key: a string
385 * List all of the keys that appear below @key. The nesting of keys
386 * within the hash file is defined by the program that created the hash
387 * file. One thing is constant: each item in the returned array can be
388 * concatenated to @key to obtain the full name of that key.
390 * It is not possible to tell from this function if a given key is
391 * itself a path, a value, or another hash table; you are expected to
392 * know this for yourself.
394 * You should call g_strfreev() on the return result when you no longer
395 * require it.
397 * Returns: a %NULL-terminated string array
399 gchar **
400 gvdb_table_list (GvdbTable *file,
401 const gchar *key)
403 const struct gvdb_hash_item *item;
404 const guint32_le *list;
405 gchar **strv;
406 guint length;
407 guint i;
409 if ((item = gvdb_table_lookup (file, key, 'L')) == NULL)
410 return NULL;
412 if (!gvdb_table_list_from_item (file, item, &list, &length))
413 return NULL;
415 strv = g_new (gchar *, length + 1);
416 for (i = 0; i < length; i++)
418 guint32 itemno = guint32_from_le (list[i]);
420 if (itemno < file->n_hash_items)
422 const struct gvdb_hash_item *item;
423 const gchar *string;
424 gsize strsize;
426 item = file->hash_items + itemno;
428 string = gvdb_table_item_get_key (file, item, &strsize);
430 if (string != NULL)
431 strv[i] = g_strndup (string, strsize);
432 else
433 strv[i] = g_malloc0 (1);
435 else
436 strv[i] = g_malloc0 (1);
439 strv[i] = NULL;
441 return strv;
445 * gvdb_table_has_value:
446 * @file: a #GvdbTable
447 * @key: a string
449 * Checks for a value named @key in @file.
451 * Note: this function does not consider non-value nodes (other hash
452 * tables, for example).
454 * Returns: %TRUE if @key is in the table
456 gboolean
457 gvdb_table_has_value (GvdbTable *file,
458 const gchar *key)
460 return gvdb_table_lookup (file, key, 'v') != NULL;
463 static GVariant *
464 gvdb_table_value_from_item (GvdbTable *table,
465 const struct gvdb_hash_item *item)
467 GVariant *variant, *value;
468 gconstpointer data;
469 gsize size;
471 data = gvdb_table_dereference (table, &item->value.pointer, 8, &size);
473 if G_UNLIKELY (data == NULL)
474 return NULL;
476 variant = g_variant_new_from_data (G_VARIANT_TYPE_VARIANT,
477 data, size, table->trusted,
478 table->unref_user_data,
479 table->ref_user_data ? table->ref_user_data (table->user_data) : table->user_data);
480 value = g_variant_get_variant (variant);
481 g_variant_unref (variant);
483 return value;
487 * gvdb_table_get_value:
488 * @file: a #GvdbTable
489 * @key: a string
491 * Looks up a value named @key in @file.
493 * If the value is not found then %NULL is returned. Otherwise, a new
494 * #GVariant instance is returned. The #GVariant does not depend on the
495 * continued existence of @file.
497 * You should call g_variant_unref() on the return result when you no
498 * longer require it.
500 * Returns: a #GVariant, or %NULL
502 GVariant *
503 gvdb_table_get_value (GvdbTable *file,
504 const gchar *key)
506 const struct gvdb_hash_item *item;
507 GVariant *value;
509 if ((item = gvdb_table_lookup (file, key, 'v')) == NULL)
510 return NULL;
512 value = gvdb_table_value_from_item (file, item);
514 if (value && file->byteswapped)
516 GVariant *tmp;
518 tmp = g_variant_byteswap (value);
519 g_variant_unref (value);
520 value = tmp;
523 return value;
527 * gvdb_table_get_raw_value:
528 * @table: a #GvdbTable
529 * @key: a string
531 * Looks up a value named @key in @file.
533 * This call is equivalent to gvdb_table_get_value() except that it
534 * never byteswaps the value.
536 * Returns: a #GVariant, or %NULL
538 GVariant *
539 gvdb_table_get_raw_value (GvdbTable *table,
540 const gchar *key)
542 const struct gvdb_hash_item *item;
544 if ((item = gvdb_table_lookup (table, key, 'v')) == NULL)
545 return NULL;
547 return gvdb_table_value_from_item (table, item);
551 * gvdb_table_get_table:
552 * @file: a #GvdbTable
553 * @key: a string
555 * Looks up the hash table named @key in @file.
557 * The toplevel hash table in a #GvdbTable can contain reference to
558 * child hash tables (and those can contain further references...).
560 * If @key is not found in @file then %NULL is returned. Otherwise, a
561 * new #GvdbTable is returned, referring to the child hashtable as
562 * contained in the file. This newly-created #GvdbTable does not depend
563 * on the continued existence of @file.
565 * You should call gvdb_table_unref() on the return result when you no
566 * longer require it.
568 * Returns: a new #GvdbTable, or %NULL
570 GvdbTable *
571 gvdb_table_get_table (GvdbTable *file,
572 const gchar *key)
574 const struct gvdb_hash_item *item;
575 GvdbTable *new;
577 item = gvdb_table_lookup (file, key, 'H');
579 if (item == NULL)
580 return NULL;
582 new = g_slice_new0 (GvdbTable);
583 new->user_data = file->ref_user_data ? file->ref_user_data (file->user_data) : file->user_data;
584 new->ref_user_data = file->ref_user_data;
585 new->unref_user_data = file->unref_user_data;
586 new->byteswapped = file->byteswapped;
587 new->trusted = file->trusted;
588 new->data = file->data;
589 new->size = file->size;
590 new->ref_count = 1;
592 gvdb_table_setup_root (new, &item->value.pointer);
594 return new;
598 * gvdb_table_ref:
599 * @file: a #GvdbTable
601 * Increases the reference count on @file.
603 * Returns: a new reference on @file
605 GvdbTable *
606 gvdb_table_ref (GvdbTable *file)
608 g_atomic_int_inc (&file->ref_count);
610 return file;
614 * gvdb_table_unref:
615 * @file: a #GvdbTable
617 * Decreases the reference count on @file, possibly freeing it.
619 * Since: 2.26
621 void
622 gvdb_table_unref (GvdbTable *file)
624 if (g_atomic_int_dec_and_test (&file->ref_count))
626 if (file->unref_user_data)
627 file->unref_user_data (file->user_data);
628 g_slice_free (GvdbTable, file);
633 * gvdb_table_is_valid:
634 * @table: a #GvdbTable
636 * Checks if the table is still valid.
638 * An on-disk GVDB can be marked as invalid. This happens when the file
639 * has been replaced. The appropriate action is typically to reopen the
640 * file.
642 * Returns: %TRUE if @table is still valid
644 gboolean
645 gvdb_table_is_valid (GvdbTable *table)
647 return !!*table->data;
651 * gvdb_table_walk:
652 * @table: a #GvdbTable
653 * @key: a key corresponding to a list
654 * @open_func: the #GvdbWalkOpenFunc
655 * @value_func: the #GvdbWalkValueFunc
656 * @close_func: the #GvdbWalkCloseFunc
657 * @user_data: data to pass to the callbacks
659 * Looks up the list at @key and iterate over the items in it.
661 * First, @open_func is called to signal that we are starting to iterate over
662 * the list. Then the list is iterated. When all items in the list have been
663 * iterated over, the @close_func is called.
665 * When iterating, if a given item in the list is a value then @value_func is
666 * called.
668 * If a given item in the list is itself a list then @open_func is called. If
669 * that function returns %TRUE then the walk begins iterating the items in the
670 * sublist, until there are no more items, at which point a matching
671 * @close_func call is made. If @open_func returns %FALSE then no iteration of
672 * the sublist occurs and no corresponding @close_func call is made.
674 void
675 gvdb_table_walk (GvdbTable *table,
676 const gchar *key,
677 GvdbWalkOpenFunc open_func,
678 GvdbWalkValueFunc value_func,
679 GvdbWalkCloseFunc close_func,
680 gpointer user_data)
682 const struct gvdb_hash_item *item;
683 const guint32_le *pointers[64];
684 const guint32_le *enders[64];
685 gsize name_lengths[64];
686 gint index = 0;
688 item = gvdb_table_lookup (table, key, 'L');
689 name_lengths[0] = 0;
690 pointers[0] = NULL;
691 enders[0] = NULL;
692 goto start_here;
694 while (index)
696 close_func (name_lengths[index], user_data);
697 index--;
699 while (pointers[index] < enders[index])
701 const gchar *name;
702 gsize name_len;
704 item = gvdb_table_get_item (table, *pointers[index]++);
705 start_here:
707 if (item != NULL &&
708 (name = gvdb_table_item_get_key (table, item, &name_len)))
710 if (item->type == 'L')
712 if (open_func (name, name_len, user_data))
714 guint length = 0;
716 index++;
717 g_assert (index < 64);
719 gvdb_table_list_from_item (table, item,
720 &pointers[index],
721 &length);
722 enders[index] = pointers[index] + length;
723 name_lengths[index] = name_len;
726 else if (item->type == 'v')
728 GVariant *value;
730 value = gvdb_table_value_from_item (table, item);
732 if (value != NULL)
734 if (table->byteswapped)
736 GVariant *tmp;
738 tmp = g_variant_byteswap (value);
739 g_variant_unref (value);
740 value = tmp;
743 value_func (name, name_len, value, user_data);
744 g_variant_unref (value);