2 * Copyright (c) 2004 Tim J. Robbins.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * "Set of characters" ADT implemented as a splay tree of extents, with
28 * a lookup table cache to simplify looking up the first bunch of
29 * characters (which are presumably more common than others).
39 static struct csnode
*cset_delete(struct csnode
*, wchar_t);
40 static int cset_rangecmp(struct csnode
*, wchar_t);
41 static struct csnode
*cset_splay(struct csnode
*, wchar_t);
45 * Allocate a set of characters.
52 if ((cs
= malloc(sizeof (*cs
))) == NULL
)
55 cs
->cs_classes
= NULL
;
56 cs
->cs_havecache
= false;
57 cs
->cs_invert
= false;
63 * Add a character to the set.
66 cset_add(struct cset
*cs
, wchar_t ch
)
68 struct csnode
*csn
, *ncsn
;
71 cs
->cs_havecache
= false;
74 * Inserting into empty tree; new item becomes the root.
76 if (cs
->cs_root
== NULL
) {
77 csn
= malloc(sizeof (*cs
->cs_root
));
80 csn
->csn_left
= csn
->csn_right
= NULL
;
81 csn
->csn_min
= csn
->csn_max
= ch
;
87 * Splay to check whether the item already exists, and otherwise,
88 * where we should put it.
90 csn
= cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
93 * Avoid adding duplicate nodes.
95 if (cset_rangecmp(csn
, ch
) == 0)
99 * Allocate a new node and make it the new root.
101 ncsn
= malloc(sizeof (*ncsn
));
104 ncsn
->csn_min
= ncsn
->csn_max
= ch
;
105 if (cset_rangecmp(csn
, ch
) < 0) {
106 ncsn
->csn_left
= csn
->csn_left
;
107 ncsn
->csn_right
= csn
;
108 csn
->csn_left
= NULL
;
110 ncsn
->csn_right
= csn
->csn_right
;
111 ncsn
->csn_left
= csn
;
112 csn
->csn_right
= NULL
;
117 * Coalesce with left and right neighbours if possible.
119 if (ncsn
->csn_left
!= NULL
) {
120 ncsn
->csn_left
= cset_splay(ncsn
->csn_left
, ncsn
->csn_min
- 1);
121 if (ncsn
->csn_left
->csn_max
== ncsn
->csn_min
- 1) {
122 oval
= ncsn
->csn_left
->csn_min
;
123 ncsn
->csn_left
= cset_delete(ncsn
->csn_left
,
124 ncsn
->csn_left
->csn_min
);
125 ncsn
->csn_min
= oval
;
128 if (ncsn
->csn_right
!= NULL
) {
129 ncsn
->csn_right
= cset_splay(ncsn
->csn_right
,
131 if (ncsn
->csn_right
->csn_min
== ncsn
->csn_max
+ 1) {
132 oval
= ncsn
->csn_right
->csn_max
;
133 ncsn
->csn_right
= cset_delete(ncsn
->csn_right
,
134 ncsn
->csn_right
->csn_min
);
135 ncsn
->csn_max
= oval
;
144 * Determine whether a character is in the set without using
148 cset_in_hard(struct cset
*cs
, wchar_t ch
)
152 for (csc
= cs
->cs_classes
; csc
!= NULL
; csc
= csc
->csc_next
)
153 if (csc
->csc_invert
^ (iswctype(ch
, csc
->csc_type
) != 0))
154 return (cs
->cs_invert
^ true);
155 if (cs
->cs_root
!= NULL
) {
156 cs
->cs_root
= cset_splay(cs
->cs_root
, ch
);
157 return (cs
->cs_invert
^ (cset_rangecmp(cs
->cs_root
, ch
) == 0));
159 return (cs
->cs_invert
^ false);
167 cset_cache(struct cset
*cs
)
171 for (i
= 0; i
< CS_CACHE_SIZE
; i
++)
172 cs
->cs_cache
[i
] = cset_in_hard(cs
, i
);
174 cs
->cs_havecache
= true;
179 * Invert the character set.
182 cset_invert(struct cset
*cs
)
185 cs
->cs_invert
^= true;
186 cs
->cs_havecache
= false;
191 * Add a wctype()-style character class to the set, optionally
195 cset_addclass(struct cset
*cs
, wctype_t type
, bool invert
)
199 csc
= malloc(sizeof (*csc
));
202 csc
->csc_type
= type
;
203 csc
->csc_invert
= invert
;
204 csc
->csc_next
= cs
->cs_classes
;
205 cs
->cs_classes
= csc
;
206 cs
->cs_havecache
= false;
211 cset_rangecmp(struct csnode
*t
, wchar_t ch
)
221 static struct csnode
*
222 cset_splay(struct csnode
*t
, wchar_t ch
)
224 struct csnode N
, *l
, *r
, *y
;
227 * Based on public domain code from Sleator.
232 N
.csn_left
= N
.csn_right
= NULL
;
235 if (cset_rangecmp(t
, ch
) < 0) {
236 if (t
->csn_left
!= NULL
&&
237 cset_rangecmp(t
->csn_left
, ch
) < 0) {
239 t
->csn_left
= y
->csn_right
;
243 if (t
->csn_left
== NULL
)
248 } else if (cset_rangecmp(t
, ch
) > 0) {
249 if (t
->csn_right
!= NULL
&&
250 cset_rangecmp(t
->csn_right
, ch
) > 0) {
252 t
->csn_right
= y
->csn_left
;
256 if (t
->csn_right
== NULL
)
264 l
->csn_right
= t
->csn_left
;
265 r
->csn_left
= t
->csn_right
;
266 t
->csn_left
= N
.csn_right
;
267 t
->csn_right
= N
.csn_left
;
271 static struct csnode
*
272 cset_delete(struct csnode
*t
, wchar_t ch
)
277 t
= cset_splay(t
, ch
);
278 assert(cset_rangecmp(t
, ch
) == 0);
279 if (t
->csn_left
== NULL
)
282 x
= cset_splay(t
->csn_left
, ch
);
283 x
->csn_right
= t
->csn_right
;