4 * Purple is the legal property of its developers, whose names are too
5 * numerous to list here. Please refer to the COPYRIGHT file distributed
6 * with this source distribution
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
28 * @section_id: libpurple-trie
29 * @short_description: a structure for linear-time text searching
32 * A #PurpleTrie is a structure for quick searching of multiple phrases within
33 * a text. It's intended for repeated searches of the same set of patterns
34 * within multiple source texts (or a single, big one).
36 * It's preparation time is <literal>O(p)</literal>, where <literal>p</literal>
37 * is the total length of searched phrases. In current implementation, the
38 * internal structure is invalidated after every modification of the
39 * #PurpleTrie's contents, so it's not efficient to do alternating modifications
40 * and searches. Search time does not depend on patterns being stored within
41 * a trie and is always <literal>O(n)</literal>, where <literal>n</literal> is
44 * Its main drawback is a significant memory usage - every internal trie node
45 * needs about 1kB of memory on 32-bit machine and 2kB on 64-bit. Fortunately,
46 * the trie grows slower when more words (with common prefixes) are added.
47 * We could avoid invalidating the whole tree when altering it, but it would
48 * require figuring out, how to update <literal>longest_suffix</literal> fields
52 #include <glib-object.h>
54 #define PURPLE_TYPE_TRIE (purple_trie_get_type())
55 #define PURPLE_TRIE(obj) \
56 (G_TYPE_CHECK_INSTANCE_CAST((obj), PURPLE_TYPE_TRIE, PurpleTrie))
57 #define PURPLE_TRIE_CLASS(klass) \
58 (G_TYPE_CHECK_CLASS_CAST((klass), PURPLE_TYPE_TRIE, PurpleTrieClass))
59 #define PURPLE_IS_TRIE(obj) \
60 (G_TYPE_CHECK_INSTANCE_TYPE((obj), PURPLE_TYPE_TRIE))
61 #define PURPLE_IS_TRIE_CLASS(klass) \
62 (G_TYPE_CHECK_CLASS_TYPE((klass), PURPLE_TYPE_TRIE))
63 #define PURPLE_TRIE_GET_CLASS(obj) \
64 (G_TYPE_INSTANCE_GET_CLASS((obj), PURPLE_TYPE_TRIE, PurpleTrieClass))
66 typedef struct _PurpleTrie PurpleTrie
;
67 typedef struct _PurpleTrieClass PurpleTrieClass
;
72 * The trie object instance.
77 GObject parent_instance
;
83 * Base class for #PurpleTrie objects.
85 struct _PurpleTrieClass
88 GObjectClass parent_class
;
90 void (*purple_reserved1
)(void);
91 void (*purple_reserved2
)(void);
92 void (*purple_reserved3
)(void);
93 void (*purple_reserved4
)(void);
97 * PurpleTrieReplaceCb:
98 * @out: currently built output string, append replacement to it.
100 * @word_data: the user data bound with this word, when added with
102 * @user_data: the user supplied data passed when calling #purple_trie_replace.
104 * A funtion called on every matching substring to be replaced.
106 * If you decide to replace the word, append your text to @out and return %TRUE.
107 * Otherwise, you must not touch @out. In both cases, you must not do any
108 * operations on @out other than appending text to it.
110 * Returns: %TRUE if the word was replaced, %FALSE otherwise.
112 typedef gboolean (*PurpleTrieReplaceCb
)(GString
*out
, const gchar
*word
,
113 gpointer word_data
, gpointer user_data
);
118 * @word_data: the user data bound with this word, when added with
120 * @user_data: the user data passed when calling #purple_trie_find.
122 * A function called on every matching substring.
124 * You can decide to count the match or not (for the total number of found
125 * words, that is returned by #purple_trie_find). In both cases you can
126 * obviously do some processing outside the #PurpleTrie object.
128 * If you decide to count the word and #PurpleTrie:reset-on-match property
129 * is set, no overlapping words will be found - the processing will skip after
130 * the end of this word.
132 * Returns: %TRUE if the word should be counter, %FALSE otherwise.
134 typedef gboolean (*PurpleTrieFindCb
)(const gchar
*word
, gpointer word_data
,
140 * purple_trie_get_type:
142 * Returns: the #GType for a #PurpleTrie.
145 purple_trie_get_type(void);
150 * Creates a new trie.
152 * Returns: the new #PurpleTrie.
155 purple_trie_new(void);
158 * purple_trie_get_reset_on_match:
161 * Checks, if the trie will reset its internal state after every match.
163 * Returns: %TRUE, if trie will reset, %FALSE otherwise.
166 purple_trie_get_reset_on_match(PurpleTrie
*trie
);
169 * purple_trie_set_reset_on_match:
171 * @reset: %TRUE, if trie should reset, %FALSE otherwise.
173 * Enables or disables a feature of resetting trie's state after every match.
174 * When enabled, it will not search for overlapping matches.
176 * It's well defined for #purple_trie_find, but not for replace operations.
177 * Thus, for the latter, it's better to stay with this option enabled, because
178 * its behavior may be changed in future.
181 purple_trie_set_reset_on_match(PurpleTrie
*trie
, gboolean reset
);
187 * @data: the word-related data (may be %NULL).
189 * Adds a word to the trie. Current implementation doesn't allow for duplicates,
190 * so please avoid adding those.
192 * Please note, that altering a trie invalidates its internal structure, so by
193 * the occasion of next search, it will be rebuilt. It's done in
194 * <literal>O(n)</literal>, where n is the total length of strings
197 * Returns: %TRUE if succeeded, %FALSE otherwise.
200 purple_trie_add(PurpleTrie
*trie
, const gchar
*word
, gpointer data
);
203 * purple_trie_remove:
207 * Removes a word from the trie. Depending on used memory pool, this may not
208 * free allocated memory (that will be freed when destroying the whole
209 * collection), so use it wisely. See #purple_memory_pool_free.
211 * Please note, that altering a trie invalidates its internal structure.
212 * See #purple_trie_add.
215 purple_trie_remove(PurpleTrie
*trie
, const gchar
*word
);
218 * purple_trie_get_size:
221 * Returns the number of elements contained in the #PurpleTrie.
223 * Returns: the number of stored words in @trie.
226 purple_trie_get_size(PurpleTrie
*trie
);
229 * purple_trie_replace:
231 * @src: the source string.
232 * @replace_cb: (scope call): the replacement function.
233 * @user_data: custom data to be passed to @replace_cb.
235 * Processes @src string and replaces all occuriences of words added to @trie.
236 * It's <literal>O(strlen(src))</literal>, if @replace_cb runs in
237 * <literal>O(strlen(word))</literal> and #PurpleTrie:reset-on-match is set.
239 * Returns: resulting string. Must be #g_free'd when you are done using it.
242 purple_trie_replace(PurpleTrie
*trie
, const gchar
*src
,
243 PurpleTrieReplaceCb replace_cb
, gpointer user_data
);
246 * purple_trie_multi_replace:
247 * @tries: (element-type PurpleTrie): the list of tries.
248 * @src: the source string.
249 * @replace_cb: (scope call): the replacement function.
250 * @user_data: custom data to be passed to @replace_cb.
252 * Processes @src and replaces all occuriences of words added to tries in list
253 * @tries. Entries added to tries on the beginning of the list have higher
254 * priority, than ones added further.
256 * Different #GSList's can be combined to possess common parts, so you can create
259 * Returns: resulting string. Must be #g_free'd when you are done using it.
262 purple_trie_multi_replace(const GSList
*tries
, const gchar
*src
,
263 PurpleTrieReplaceCb replace_cb
, gpointer user_data
);
268 * @src: the source string.
269 * @find_cb: (nullable) (scope call): the callback for the found entries (may be %NULL).
270 * @user_data: custom data to be passed to @find_cb.
272 * Processes @src string and finds all occuriences of words added to @trie.
273 * It's <literal>O(strlen(src))</literal>, if find_cb runs
274 * in <literal>O(1)</literal>.
276 * The word is counted as found if it's found and the callback returns %TRUE.
278 * Returns: the number of found words.
281 purple_trie_find(PurpleTrie
*trie
, const gchar
*src
,
282 PurpleTrieFindCb find_cb
, gpointer user_data
);
285 * purple_trie_multi_find:
286 * @tries: (element-type PurpleTrie): the list of tries.
287 * @src: the source string.
288 * @find_cb: (nullable) (scope call): the callback for the found entries (may be %NULL).
289 * @user_data: custom data to be passed to @find_cb.
291 * Processes @src and replaces all occuriences of words added to tries in
292 * list @tries. Entries added to tries on the beginning of the list have higher
293 * priority, than ones added further.
295 * Different #GSList's can be combined to possess common parts, so you can create
298 * Returns: the number of found words.
301 purple_trie_multi_find(const GSList
*tries
, const gchar
*src
,
302 PurpleTrieFindCb find_cb
, gpointer user_data
);
306 #endif /* PURPLE_MEMORY_POOL_H */