4 * Purple is the legal property of its developers, whose names are too
5 * numerous to list here. Please refer to the COPYRIGHT file distributed
6 * with this source distribution
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
28 #include "memorypool.h"
30 /* A single internal (that don't have any children) consists
31 * of 256 + 4 pointers. That's 1040 bytes on 32-bit machine or 2080 bytes
34 * Thus, in 10500-byte pool block we can hold about 5-10 internal states.
35 * Threshold of 100 states means, we'd need 10-20 "small" blocks before
36 * switching to ~1-2 large blocks.
38 #define PURPLE_TRIE_LARGE_THRESHOLD 100
39 #define PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE 10880
40 #define PURPLE_TRIE_STATES_LARGE_POOL_BLOCK_SIZE 102400
42 typedef struct _PurpleTrieRecord PurpleTrieRecord
;
43 typedef struct _PurpleTrieState PurpleTrieState
;
44 typedef struct _PurpleTrieRecordList PurpleTrieRecordList
;
49 * The trie object instance.
58 gboolean reset_on_match
;
60 PurpleMemoryPool
*records_str_mempool
;
61 PurpleMemoryPool
*records_obj_mempool
;
62 PurpleTrieRecordList
*records
;
63 GHashTable
*records_map
;
64 gsize records_total_size
;
66 PurpleMemoryPool
*states_mempool
;
67 PurpleTrieState
*root_state
;
70 struct _PurpleTrieRecord
77 struct _PurpleTrieRecordList
79 PurpleTrieRecord
*rec
;
80 PurpleTrieRecordList
*next
;
81 PurpleTrieRecordList
*prev
;
86 struct _PurpleTrieState
88 PurpleTrieState
*parent
;
89 PurpleTrieState
**children
;
91 PurpleTrieState
*longest_suffix
;
93 PurpleTrieRecord
*found_word
;
98 PurpleTrieState
*state
;
100 PurpleTrieState
*root_state
;
101 gboolean reset_on_match
;
103 PurpleTrieReplaceCb replace_cb
;
104 PurpleTrieFindCb find_cb
;
108 /* TODO: an option to make it eager or lazy (now, it's eager) */
116 static GParamSpec
*properties
[PROP_LAST
];
118 G_DEFINE_TYPE_WITH_PRIVATE(PurpleTrie
, purple_trie
, G_TYPE_OBJECT
);
120 /*******************************************************************************
122 ******************************************************************************/
124 static PurpleTrieRecordList
*
125 purple_record_list_new(PurpleMemoryPool
*mpool
, PurpleTrieRecord
*rec
)
127 PurpleTrieRecordList
*node
;
129 node
= purple_memory_pool_alloc0(mpool
,
130 sizeof(PurpleTrieRecordList
), sizeof(gpointer
));
131 g_return_val_if_fail(node
!= NULL
, NULL
);
138 static PurpleTrieRecordList
*
139 purple_record_list_prepend(PurpleMemoryPool
*mpool
,
140 PurpleTrieRecordList
*old_head
, PurpleTrieRecord
*rec
)
142 PurpleTrieRecordList
*new_head
;
144 new_head
= purple_record_list_new(mpool
, rec
);
145 g_return_val_if_fail(new_head
!= NULL
, NULL
);
147 new_head
->next
= old_head
;
149 old_head
->prev
= new_head
;
154 static PurpleTrieRecordList
*
155 purple_record_list_copy(PurpleMemoryPool
*mpool
,
156 const PurpleTrieRecordList
*head
)
158 PurpleTrieRecordList
*new_head
= NULL
, *new_tail
= NULL
;
161 PurpleTrieRecordList
*node
;
163 node
= purple_record_list_new(mpool
, head
->rec
);
164 g_return_val_if_fail(node
!= NULL
, NULL
); /* there is no leak */
166 node
->prev
= new_tail
;
168 new_tail
->next
= node
;
179 static PurpleTrieRecordList
*
180 purple_record_list_remove(PurpleTrieRecordList
*head
,
181 PurpleTrieRecordList
*node
)
183 g_return_val_if_fail(head
!= NULL
, NULL
);
184 g_return_val_if_fail(node
!= NULL
, head
);
185 g_return_val_if_fail(head
->prev
== NULL
, NULL
);
188 if (head
->next
!= NULL
)
189 head
->next
->prev
= NULL
;
192 g_return_val_if_fail(node
->prev
!= NULL
, NULL
);
193 node
->prev
->next
= node
->next
;
194 if (node
->next
!= NULL
)
195 node
->next
->prev
= node
->prev
;
201 /*******************************************************************************
203 ******************************************************************************/
206 purple_trie_states_cleanup(PurpleTriePrivate
*priv
)
208 if (priv
->root_state
!= NULL
) {
209 purple_memory_pool_cleanup(priv
->states_mempool
);
210 priv
->root_state
= NULL
;
214 /* Allocates a state and binds it to the parent. */
215 static PurpleTrieState
*
216 purple_trie_state_new(PurpleTriePrivate
*priv
, PurpleTrieState
*parent
,
219 PurpleTrieState
*state
;
221 state
= purple_memory_pool_alloc0(priv
->states_mempool
,
222 sizeof(PurpleTrieState
), sizeof(gpointer
));
223 g_return_val_if_fail(state
!= NULL
, NULL
);
228 state
->parent
= parent
;
229 if (parent
->children
== NULL
) {
230 parent
->children
= purple_memory_pool_alloc0(
231 priv
->states_mempool
,
232 /* PurpleTrieState *children[G_MAXUCHAR + 1] */
233 256 * sizeof(gpointer
),
237 if (parent
->children
== NULL
) {
238 purple_memory_pool_free(priv
->states_mempool
, state
);
243 parent
->children
[character
] = state
;
249 purple_trie_states_build(PurpleTriePrivate
*priv
)
251 PurpleTrieState
*root
;
252 PurpleMemoryPool
*reclist_mpool
;
253 PurpleTrieRecordList
*reclist
, *it
;
256 if (priv
->root_state
!= NULL
)
259 if (priv
->records_total_size
< PURPLE_TRIE_LARGE_THRESHOLD
) {
260 purple_memory_pool_set_block_size(priv
->states_mempool
,
261 PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE
);
263 purple_memory_pool_set_block_size(priv
->states_mempool
,
264 PURPLE_TRIE_STATES_LARGE_POOL_BLOCK_SIZE
);
267 priv
->root_state
= root
= purple_trie_state_new(priv
, NULL
, '\0');
268 g_return_val_if_fail(root
!= NULL
, FALSE
);
269 g_assert(root
->longest_suffix
== NULL
);
271 /* reclist is a list of words not yet added to the trie. Shorter words
272 * are removed from the list, when they are fully added to the trie. */
273 reclist_mpool
= purple_memory_pool_new();
274 reclist
= purple_record_list_copy(reclist_mpool
, priv
->records
);
276 /* extra_data on every element of reclist will be a pointer to a trie
277 * node -- the prefix of the word with len of cur_len */
278 for (it
= reclist
; it
!= NULL
; it
= it
->next
) {
279 it
->extra_data
= root
;
282 /* Iterate over indexes of words -- every loop iteration checks certain
283 * index of all remaining words. Loop finishes when there are no words
284 * longer than cur_len. */
285 for (cur_len
= 0; reclist
!= NULL
; cur_len
++) {
286 for (it
= reclist
; it
; it
= it
->next
) {
287 PurpleTrieRecord
*rec
= it
->rec
;
288 guchar character
= rec
->word
[cur_len
];
289 PurpleTrieState
*prefix
= it
->extra_data
;
290 PurpleTrieState
*lon_suf_parent
;
292 g_assert(character
!= '\0');
294 if (prefix
->children
&& prefix
->children
[character
]) {
295 /* Word's prefix is already in the trie, added
296 * by the other word. */
297 prefix
= prefix
->children
[character
];
299 /* We need to create a new branch of trie. */
300 prefix
= purple_trie_state_new(priv
, prefix
,
304 g_object_unref(reclist_mpool
);
308 it
->extra_data
= prefix
;
309 /* prefix is now of length increased by one character. */
311 /* The whole word is now added to the trie. */
312 if (rec
->word
[cur_len
+ 1] == '\0') {
313 if (prefix
->found_word
== NULL
)
314 prefix
->found_word
= rec
;
316 purple_debug_warning("trie", "found "
317 "a collision of \"%s\" words",
321 /* "it" is not modified here, so it->next is
323 reclist
= purple_record_list_remove(reclist
, it
);
326 /* We need to fill the longest_suffix field -- a longest
327 * complete suffix of the prefix we created. We look for
328 * that suffix in any path starting in root and ending
329 * in the (cur_len - 1) level of trie. */
330 if (prefix
->longest_suffix
!= NULL
)
332 lon_suf_parent
= prefix
->parent
->longest_suffix
;
333 while (lon_suf_parent
) {
334 if (lon_suf_parent
->children
&&
335 lon_suf_parent
->children
[character
])
337 prefix
->longest_suffix
= lon_suf_parent
->
341 lon_suf_parent
= lon_suf_parent
->longest_suffix
;
343 if (prefix
->longest_suffix
== NULL
)
344 prefix
->longest_suffix
= root
;
345 if (prefix
->found_word
== NULL
) {
347 prefix
->longest_suffix
->found_word
;
352 g_object_unref(reclist_mpool
);
357 /*******************************************************************************
359 ******************************************************************************/
362 purple_trie_advance(PurpleTrieMachine
*m
, const guchar character
)
364 /* change state after processing a character */
366 /* Perfect fit - next character is the same, as the child of the
367 * prefix we reached so far. */
368 if (m
->state
->children
&& m
->state
->children
[character
]) {
369 m
->state
= m
->state
->children
[character
];
373 /* We reached root, that's a pity. */
374 if (m
->state
== m
->root_state
)
377 /* Let's try a bit shorter suffix. */
378 m
->state
= m
->state
->longest_suffix
;
383 purple_trie_replace_do_replacement(PurpleTrieMachine
*m
, GString
*out
)
385 gboolean was_replaced
= FALSE
;
388 /* if we reached a "found" state, let's process it */
389 if (!m
->state
->found_word
)
392 /* let's get back to the beginning of the word */
393 g_assert(out
->len
>= m
->state
->found_word
->word_len
- 1);
394 str_old_len
= out
->len
;
395 out
->len
-= m
->state
->found_word
->word_len
- 1;
397 was_replaced
= m
->replace_cb(out
, m
->state
->found_word
->word
,
398 m
->state
->found_word
->data
, m
->user_data
);
400 /* output was untouched, revert to the previous position */
402 out
->len
= str_old_len
;
405 if (was_replaced
|| m
->reset_on_match
)
406 m
->state
= m
->root_state
;
412 purple_trie_find_do_discovery(PurpleTrieMachine
*m
)
414 gboolean was_accepted
;
416 /* if we reached a "found" state, let's process it */
417 if (!m
->state
->found_word
)
421 was_accepted
= m
->find_cb(m
->state
->found_word
->word
,
422 m
->state
->found_word
->data
, m
->user_data
);
427 if (was_accepted
&& m
->reset_on_match
)
428 m
->state
= m
->root_state
;
434 purple_trie_replace(PurpleTrie
*trie
, const gchar
*src
,
435 PurpleTrieReplaceCb replace_cb
, gpointer user_data
)
437 PurpleTriePrivate
*priv
= NULL
;
438 PurpleTrieMachine machine
;
445 g_return_val_if_fail(replace_cb
!= NULL
, g_strdup(src
));
446 g_return_val_if_fail(PURPLE_IS_TRIE(trie
), NULL
);
448 priv
= purple_trie_get_instance_private(trie
);
450 purple_trie_states_build(priv
);
452 machine
.state
= priv
->root_state
;
453 machine
.root_state
= priv
->root_state
;
454 machine
.reset_on_match
= priv
->reset_on_match
;
455 machine
.replace_cb
= replace_cb
;
456 machine
.user_data
= user_data
;
458 out
= g_string_new(NULL
);
460 while (src
[i
] != '\0') {
461 guchar character
= src
[i
++];
462 gboolean was_replaced
;
464 purple_trie_advance(&machine
, character
);
465 was_replaced
= purple_trie_replace_do_replacement(&machine
, out
);
467 /* We skipped a character without finding any records,
468 * let's just copy it to the output. */
470 g_string_append_c(out
, character
);
473 return g_string_free(out
, FALSE
);
477 purple_trie_multi_replace(const GSList
*tries
, const gchar
*src
,
478 PurpleTrieReplaceCb replace_cb
, gpointer user_data
)
480 guint tries_count
, m_idx
;
481 PurpleTrieMachine
*machines
;
488 g_return_val_if_fail(replace_cb
!= NULL
, g_strdup(src
));
490 tries_count
= g_slist_length((GSList
*)tries
);
491 if (tries_count
== 0)
492 return g_strdup(src
);
494 /* Initialize all machines. */
495 machines
= g_new(PurpleTrieMachine
, tries_count
);
496 for (i
= 0; i
< tries_count
; i
++, tries
= tries
->next
) {
497 PurpleTrie
*trie
= tries
->data
;
498 PurpleTriePrivate
*priv
= NULL
;
500 if (!PURPLE_TRIE(trie
)) {
506 priv
= purple_trie_get_instance_private(trie
);
508 purple_trie_states_build(priv
);
510 machines
[i
].state
= priv
->root_state
;
511 machines
[i
].root_state
= priv
->root_state
;
512 machines
[i
].reset_on_match
= priv
->reset_on_match
;
513 machines
[i
].replace_cb
= replace_cb
;
514 machines
[i
].user_data
= user_data
;
517 out
= g_string_new(NULL
);
519 while (src
[i
] != '\0') {
520 guchar character
= src
[i
++];
521 gboolean was_replaced
= FALSE
;
523 /* Advance every machine and possibly perform a replacement. */
524 for (m_idx
= 0; m_idx
< tries_count
; m_idx
++) {
525 purple_trie_advance(&machines
[m_idx
], character
);
528 was_replaced
= purple_trie_replace_do_replacement(
529 &machines
[m_idx
], out
);
532 /* We skipped a character without finding any records,
533 * let's just copy it to the output. */
535 g_string_append_c(out
, character
);
537 /* If we replaced a word, reset _all_ machines */
539 for (m_idx
= 0; m_idx
< tries_count
; m_idx
++) {
540 machines
[m_idx
].state
=
541 machines
[m_idx
].root_state
;
547 return g_string_free(out
, FALSE
);
551 purple_trie_find(PurpleTrie
*trie
, const gchar
*src
,
552 PurpleTrieFindCb find_cb
, gpointer user_data
)
554 PurpleTriePrivate
*priv
= NULL
;
555 PurpleTrieMachine machine
;
556 gulong found_count
= 0;
562 g_return_val_if_fail(PURPLE_IS_TRIE(trie
), 0);
564 priv
= purple_trie_get_instance_private(trie
);
566 purple_trie_states_build(priv
);
568 machine
.state
= priv
->root_state
;
569 machine
.root_state
= priv
->root_state
;
570 machine
.reset_on_match
= priv
->reset_on_match
;
571 machine
.find_cb
= find_cb
;
572 machine
.user_data
= user_data
;
575 while (src
[i
] != '\0') {
576 guchar character
= src
[i
++];
579 purple_trie_advance(&machine
, character
);
581 was_found
= purple_trie_find_do_discovery(&machine
);
591 purple_trie_multi_find(const GSList
*tries
, const gchar
*src
,
592 PurpleTrieFindCb find_cb
, gpointer user_data
)
594 guint tries_count
, m_idx
;
595 PurpleTrieMachine
*machines
;
596 gulong found_count
= 0;
602 tries_count
= g_slist_length((GSList
*)tries
);
603 if (tries_count
== 0)
606 /* Initialize all machines. */
607 machines
= g_new(PurpleTrieMachine
, tries_count
);
608 for (i
= 0; i
< tries_count
; i
++, tries
= tries
->next
) {
609 PurpleTrie
*trie
= tries
->data
;
610 PurpleTriePrivate
*priv
= NULL
;
612 if (!PURPLE_IS_TRIE(trie
)) {
618 priv
= purple_trie_get_instance_private(trie
);
620 purple_trie_states_build(priv
);
622 machines
[i
].state
= priv
->root_state
;
623 machines
[i
].root_state
= priv
->root_state
;
624 machines
[i
].reset_on_match
= priv
->reset_on_match
;
625 machines
[i
].find_cb
= find_cb
;
626 machines
[i
].user_data
= user_data
;
630 while (src
[i
] != '\0') {
631 guchar character
= src
[i
++];
632 gboolean was_found
= FALSE
;
634 /* Advance every machine and possibly perform a replacement. */
635 for (m_idx
= 0; m_idx
< tries_count
; m_idx
++) {
636 purple_trie_advance(&machines
[m_idx
], character
);
640 purple_trie_find_do_discovery(&machines
[m_idx
]);
645 /* If we replaced a word, reset _all_ machines */
647 for (m_idx
= 0; m_idx
< tries_count
; m_idx
++) {
648 if (!machines
[m_idx
].reset_on_match
)
650 machines
[m_idx
].state
=
651 machines
[m_idx
].root_state
;
661 /*******************************************************************************
663 ******************************************************************************/
666 purple_trie_add(PurpleTrie
*trie
, const gchar
*word
, gpointer data
)
668 PurpleTriePrivate
*priv
= NULL
;
669 PurpleTrieRecord
*rec
;
671 g_return_val_if_fail(PURPLE_IS_TRIE(trie
), FALSE
);
672 g_return_val_if_fail(word
!= NULL
, FALSE
);
673 g_return_val_if_fail(word
[0] != '\0', FALSE
);
675 priv
= purple_trie_get_instance_private(trie
);
677 if (g_hash_table_lookup(priv
->records_map
, word
) != NULL
) {
678 purple_debug_warning("trie", "record exists: %s", word
);
682 /* Every change in a trie invalidates longest_suffix map.
683 * These prefixes could be updated instead of cleaning the whole graph.
685 purple_trie_states_cleanup(priv
);
687 rec
= purple_memory_pool_alloc(priv
->records_obj_mempool
,
688 sizeof(PurpleTrieRecord
), sizeof(gpointer
));
689 rec
->word
= purple_memory_pool_strdup(priv
->records_str_mempool
, word
);
690 rec
->word_len
= strlen(word
);
691 g_assert(rec
->word_len
> 0);
694 priv
->records_total_size
+= rec
->word_len
;
695 priv
->records
= purple_record_list_prepend(priv
->records_obj_mempool
,
697 g_hash_table_insert(priv
->records_map
, rec
->word
, priv
->records
);
703 purple_trie_remove(PurpleTrie
*trie
, const gchar
*word
)
705 PurpleTriePrivate
*priv
= NULL
;
706 PurpleTrieRecordList
*it
;
708 g_return_if_fail(PURPLE_IS_TRIE(trie
));
709 g_return_if_fail(word
!= NULL
);
710 g_return_if_fail(word
[0] != '\0');
712 priv
= purple_trie_get_instance_private(trie
);
714 it
= g_hash_table_lookup(priv
->records_map
, word
);
718 /* see purple_trie_add */
719 purple_trie_states_cleanup(priv
);
721 priv
->records_total_size
-= it
->rec
->word_len
;
722 priv
->records
= purple_record_list_remove(priv
->records
, it
);
723 g_hash_table_remove(priv
->records_map
, it
->rec
->word
);
725 purple_memory_pool_free(priv
->records_str_mempool
, it
->rec
->word
);
726 purple_memory_pool_free(priv
->records_obj_mempool
, it
->rec
);
727 purple_memory_pool_free(priv
->records_obj_mempool
, it
);
731 purple_trie_get_size(PurpleTrie
*trie
)
733 PurpleTriePrivate
*priv
= NULL
;
735 g_return_val_if_fail(PURPLE_IS_TRIE(trie
), 0);
737 priv
= purple_trie_get_instance_private(trie
);
738 return g_hash_table_size(priv
->records_map
);
742 /*******************************************************************************
744 ******************************************************************************/
747 purple_trie_get_reset_on_match(PurpleTrie
*trie
)
749 PurpleTriePrivate
*priv
= NULL
;
751 g_return_val_if_fail(PURPLE_IS_TRIE(trie
), FALSE
);
753 priv
= purple_trie_get_instance_private(trie
);
754 return priv
->reset_on_match
;
758 purple_trie_set_reset_on_match(PurpleTrie
*trie
, gboolean reset
)
760 PurpleTriePrivate
*priv
= NULL
;
762 g_return_if_fail(PURPLE_IS_TRIE(trie
));
764 priv
= purple_trie_get_instance_private(trie
);
765 priv
->reset_on_match
= reset
;
766 g_object_notify_by_pspec(G_OBJECT(trie
), properties
[PROP_RESET_ON_MATCH
]);
769 /*******************************************************************************
771 ******************************************************************************/
774 purple_trie_new(void)
776 return g_object_new(PURPLE_TYPE_TRIE
, NULL
);
780 purple_trie_init(PurpleTrie
*trie
)
782 PurpleTriePrivate
*priv
= purple_trie_get_instance_private(trie
);
784 priv
->records_obj_mempool
= purple_memory_pool_new();
785 priv
->records_str_mempool
= purple_memory_pool_new();
786 priv
->states_mempool
= purple_memory_pool_new();
787 purple_memory_pool_set_block_size(priv
->states_mempool
,
788 PURPLE_TRIE_STATES_SMALL_POOL_BLOCK_SIZE
);
790 priv
->records_map
= g_hash_table_new(g_str_hash
, g_str_equal
);
794 purple_trie_finalize(GObject
*obj
)
796 PurpleTriePrivate
*priv
=
797 purple_trie_get_instance_private(PURPLE_TRIE(obj
));
799 g_hash_table_destroy(priv
->records_map
);
800 g_object_unref(priv
->records_obj_mempool
);
801 g_object_unref(priv
->records_str_mempool
);
802 g_object_unref(priv
->states_mempool
);
804 G_OBJECT_CLASS(purple_trie_parent_class
)->finalize(obj
);
808 purple_trie_get_property(GObject
*obj
, guint param_id
, GValue
*value
,
811 PurpleTrie
*trie
= PURPLE_TRIE(obj
);
812 PurpleTriePrivate
*priv
= purple_trie_get_instance_private(trie
);
815 case PROP_RESET_ON_MATCH
:
816 g_value_set_boolean(value
, priv
->reset_on_match
);
819 G_OBJECT_WARN_INVALID_PROPERTY_ID(obj
, param_id
, pspec
);
824 purple_trie_set_property(GObject
*obj
, guint param_id
,
825 const GValue
*value
, GParamSpec
*pspec
)
827 PurpleTrie
*trie
= PURPLE_TRIE(obj
);
828 PurpleTriePrivate
*priv
= purple_trie_get_instance_private(trie
);
831 case PROP_RESET_ON_MATCH
:
832 priv
->reset_on_match
= g_value_get_boolean(value
);
835 G_OBJECT_WARN_INVALID_PROPERTY_ID(obj
, param_id
, pspec
);
840 purple_trie_class_init(PurpleTrieClass
*klass
)
842 GObjectClass
*obj_class
= G_OBJECT_CLASS(klass
);
844 obj_class
->finalize
= purple_trie_finalize
;
845 obj_class
->get_property
= purple_trie_get_property
;
846 obj_class
->set_property
= purple_trie_set_property
;
848 properties
[PROP_RESET_ON_MATCH
] = g_param_spec_boolean("reset-on-match",
849 "Reset on match", "Determines, if the search state machine "
850 "should be reset to the initial state on every match. This "
851 "ensures, that every match is distinct from each other. "
852 "Please note, that it's not well-defined for a replace "
853 "operation, so it's better to leave this value default, unless "
854 "you perform only find operations.", TRUE
,
855 G_PARAM_READWRITE
| G_PARAM_CONSTRUCT
| G_PARAM_STATIC_STRINGS
);
857 g_object_class_install_properties(obj_class
, PROP_LAST
, properties
);