2 * Copyright © 2012 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_SET_PRIVATE_HH
28 #define HB_SET_PRIVATE_HH
30 #include "hb-private.hh"
32 #include "hb-object-private.hh"
36 * The set digests here implement various "filters" that support
37 * "approximate member query". Conceptually these are like Bloom
38 * Filter and Quotient Filter, however, much smaller, faster, and
39 * designed to fit the requirements of our uses for glyph coverage
40 * queries. As a result, our filters have much higher.
43 template <typename mask_t
, unsigned int shift
>
44 struct hb_set_digest_lowest_bits_t
48 static const unsigned int mask_bytes
= sizeof (mask_t
);
49 static const unsigned int mask_bits
= sizeof (mask_t
) * 8;
50 static const unsigned int num_bits
= 0
51 + (mask_bytes
>= 1 ? 3 : 0)
52 + (mask_bytes
>= 2 ? 1 : 0)
53 + (mask_bytes
>= 4 ? 1 : 0)
54 + (mask_bytes
>= 8 ? 1 : 0)
55 + (mask_bytes
>= 16? 1 : 0)
58 ASSERT_STATIC (shift
< sizeof (hb_codepoint_t
) * 8);
59 ASSERT_STATIC (shift
+ num_bits
<= sizeof (hb_codepoint_t
) * 8);
61 inline void init (void) {
65 inline void add (hb_codepoint_t g
) {
69 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
) {
70 if ((b
>> shift
) - (a
>> shift
) >= mask_bits
- 1)
73 mask_t ma
= mask_for (a
);
74 mask_t mb
= mask_for (b
);
75 mask
|= mb
+ (mb
- ma
) - (mb
< ma
);
79 inline bool may_have (hb_codepoint_t g
) const {
80 return !!(mask
& mask_for (g
));
85 static inline mask_t
mask_for (hb_codepoint_t g
) {
86 return ((mask_t
) 1) << ((g
>> shift
) & (mask_bits
- 1));
91 template <typename head_t
, typename tail_t
>
92 struct hb_set_digest_combiner_t
96 inline void init (void) {
101 inline void add (hb_codepoint_t g
) {
106 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
) {
107 head
.add_range (a
, b
);
108 tail
.add_range (a
, b
);
111 inline bool may_have (hb_codepoint_t g
) const {
112 return head
.may_have (g
) && tail
.may_have (g
);
124 * This is a combination of digests that performs "best".
125 * There is not much science to this: it's a result of intuition
128 typedef hb_set_digest_combiner_t
130 hb_set_digest_lowest_bits_t
<unsigned long, 4>,
131 hb_set_digest_combiner_t
133 hb_set_digest_lowest_bits_t
<unsigned long, 0>,
134 hb_set_digest_lowest_bits_t
<unsigned long, 9>
145 /* TODO Make this faster and memmory efficient. */
149 hb_object_header_t header
;
153 inline void init (void) {
157 inline void fini (void) {
159 inline void clear (void) {
160 if (unlikely (hb_object_is_inert (this)))
163 memset (elts
, 0, sizeof elts
);
165 inline bool is_empty (void) const {
166 for (unsigned int i
= 0; i
< ARRAY_LENGTH (elts
); i
++)
171 inline void add (hb_codepoint_t g
)
173 if (unlikely (in_error
)) return;
174 if (unlikely (g
== INVALID
)) return;
175 if (unlikely (g
> MAX_G
)) return;
178 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
)
180 if (unlikely (in_error
)) return;
182 for (unsigned int i
= a
; i
< b
+ 1; i
++)
185 inline void del (hb_codepoint_t g
)
187 if (unlikely (in_error
)) return;
188 if (unlikely (g
> MAX_G
)) return;
189 elt (g
) &= ~mask (g
);
191 inline void del_range (hb_codepoint_t a
, hb_codepoint_t b
)
193 if (unlikely (in_error
)) return;
195 for (unsigned int i
= a
; i
< b
+ 1; i
++)
198 inline bool has (hb_codepoint_t g
) const
200 if (unlikely (g
> MAX_G
)) return false;
201 return !!(elt (g
) & mask (g
));
203 inline bool intersects (hb_codepoint_t first
,
204 hb_codepoint_t last
) const
206 if (unlikely (first
> MAX_G
)) return false;
207 if (unlikely (last
> MAX_G
)) last
= MAX_G
;
208 unsigned int end
= last
+ 1;
209 for (hb_codepoint_t i
= first
; i
< end
; i
++)
214 inline bool is_equal (const hb_set_t
*other
) const
216 for (unsigned int i
= 0; i
< ELTS
; i
++)
217 if (elts
[i
] != other
->elts
[i
])
221 inline void set (const hb_set_t
*other
)
223 if (unlikely (in_error
)) return;
224 for (unsigned int i
= 0; i
< ELTS
; i
++)
225 elts
[i
] = other
->elts
[i
];
227 inline void union_ (const hb_set_t
*other
)
229 if (unlikely (in_error
)) return;
230 for (unsigned int i
= 0; i
< ELTS
; i
++)
231 elts
[i
] |= other
->elts
[i
];
233 inline void intersect (const hb_set_t
*other
)
235 if (unlikely (in_error
)) return;
236 for (unsigned int i
= 0; i
< ELTS
; i
++)
237 elts
[i
] &= other
->elts
[i
];
239 inline void subtract (const hb_set_t
*other
)
241 if (unlikely (in_error
)) return;
242 for (unsigned int i
= 0; i
< ELTS
; i
++)
243 elts
[i
] &= ~other
->elts
[i
];
245 inline void symmetric_difference (const hb_set_t
*other
)
247 if (unlikely (in_error
)) return;
248 for (unsigned int i
= 0; i
< ELTS
; i
++)
249 elts
[i
] ^= other
->elts
[i
];
251 inline void invert (void)
253 if (unlikely (in_error
)) return;
254 for (unsigned int i
= 0; i
< ELTS
; i
++)
257 inline bool next (hb_codepoint_t
*codepoint
) const
259 if (unlikely (*codepoint
== INVALID
)) {
260 hb_codepoint_t i
= get_min ();
265 *codepoint
= INVALID
;
269 for (hb_codepoint_t i
= *codepoint
+ 1; i
< MAX_G
+ 1; i
++)
274 *codepoint
= INVALID
;
277 inline bool next_range (hb_codepoint_t
*first
, hb_codepoint_t
*last
) const
284 *last
= *first
= INVALID
;
289 while (next (&i
) && i
== *last
+ 1)
295 inline unsigned int get_population (void) const
297 unsigned int count
= 0;
298 for (unsigned int i
= 0; i
< ELTS
; i
++)
299 count
+= _hb_popcount32 (elts
[i
]);
302 inline hb_codepoint_t
get_min (void) const
304 for (unsigned int i
= 0; i
< ELTS
; i
++)
306 for (unsigned int j
= 0; j
< BITS
; j
++)
307 if (elts
[i
] & (1 << j
))
311 inline hb_codepoint_t
get_max (void) const
313 for (unsigned int i
= ELTS
; i
; i
--)
315 for (unsigned int j
= BITS
; j
; j
--)
316 if (elts
[i
- 1] & (1 << (j
- 1)))
317 return (i
- 1) * BITS
+ (j
- 1);
321 typedef uint32_t elt_t
;
322 static const unsigned int MAX_G
= 65536 - 1; /* XXX Fix this... */
323 static const unsigned int SHIFT
= 5;
324 static const unsigned int BITS
= (1 << SHIFT
);
325 static const unsigned int MASK
= BITS
- 1;
326 static const unsigned int ELTS
= (MAX_G
+ 1 + (BITS
- 1)) / BITS
;
327 static const hb_codepoint_t INVALID
= HB_SET_VALUE_INVALID
;
329 elt_t
&elt (hb_codepoint_t g
) { return elts
[g
>> SHIFT
]; }
330 elt_t
elt (hb_codepoint_t g
) const { return elts
[g
>> SHIFT
]; }
331 elt_t
mask (hb_codepoint_t g
) const { return elt_t (1) << (g
& MASK
); }
333 elt_t elts
[ELTS
]; /* XXX 8kb */
335 ASSERT_STATIC (sizeof (elt_t
) * 8 == BITS
);
336 ASSERT_STATIC (sizeof (elt_t
) * 8 * ELTS
> MAX_G
);
341 #endif /* HB_SET_PRIVATE_HH */