2 * Copyright © 2012 Google, Inc.
4 * This is part of HarfBuzz, a text shaping library.
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 * Google Author(s): Behdad Esfahbod
27 #ifndef HB_SET_PRIVATE_HH
28 #define HB_SET_PRIVATE_HH
30 #include "hb-private.hh"
31 #include "hb-object-private.hh"
35 * The set digests here implement various "filters" that support
36 * "approximate member query". Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
41 * Our filters are highly accurate if the lookup covers fairly local
42 * set of glyphs, but fully flooded and ineffective if coverage is
45 * The frozen-set can be used instead of a digest, to trade more
46 * memory for 100% accuracy, but in practice, that doesn't look like
47 * an attractive trade-off.
50 template <typename mask_t
, unsigned int shift
>
51 struct hb_set_digest_lowest_bits_t
55 static const unsigned int mask_bytes
= sizeof (mask_t
);
56 static const unsigned int mask_bits
= sizeof (mask_t
) * 8;
57 static const unsigned int num_bits
= 0
58 + (mask_bytes
>= 1 ? 3 : 0)
59 + (mask_bytes
>= 2 ? 1 : 0)
60 + (mask_bytes
>= 4 ? 1 : 0)
61 + (mask_bytes
>= 8 ? 1 : 0)
62 + (mask_bytes
>= 16? 1 : 0)
65 ASSERT_STATIC (shift
< sizeof (hb_codepoint_t
) * 8);
66 ASSERT_STATIC (shift
+ num_bits
<= sizeof (hb_codepoint_t
) * 8);
68 inline void init (void) {
72 inline void add (hb_codepoint_t g
) {
76 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
) {
77 if ((b
>> shift
) - (a
>> shift
) >= mask_bits
- 1)
80 mask_t ma
= mask_for (a
);
81 mask_t mb
= mask_for (b
);
82 mask
|= mb
+ (mb
- ma
) - (mb
< ma
);
86 inline bool may_have (hb_codepoint_t g
) const {
87 return !!(mask
& mask_for (g
));
92 static inline mask_t
mask_for (hb_codepoint_t g
) {
93 return ((mask_t
) 1) << ((g
>> shift
) & (mask_bits
- 1));
98 template <typename head_t
, typename tail_t
>
99 struct hb_set_digest_combiner_t
103 inline void init (void) {
108 inline void add (hb_codepoint_t g
) {
113 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
) {
114 head
.add_range (a
, b
);
115 tail
.add_range (a
, b
);
118 inline bool may_have (hb_codepoint_t g
) const {
119 return head
.may_have (g
) && tail
.may_have (g
);
131 * This is a combination of digests that performs "best".
132 * There is not much science to this: it's a result of intuition
135 typedef hb_set_digest_combiner_t
137 hb_set_digest_lowest_bits_t
<unsigned long, 4>,
138 hb_set_digest_combiner_t
140 hb_set_digest_lowest_bits_t
<unsigned long, 0>,
141 hb_set_digest_lowest_bits_t
<unsigned long, 9>
152 /* TODO Make this faster and memmory efficient. */
156 friend struct hb_frozen_set_t
;
158 hb_object_header_t header
;
162 inline void init (void) {
163 hb_object_init (this);
166 inline void fini (void) {
168 inline void clear (void) {
169 if (unlikely (hb_object_is_inert (this)))
172 memset (elts
, 0, sizeof elts
);
174 inline bool is_empty (void) const {
175 for (unsigned int i
= 0; i
< ARRAY_LENGTH (elts
); i
++)
180 inline void add (hb_codepoint_t g
)
182 if (unlikely (in_error
)) return;
183 if (unlikely (g
== INVALID
)) return;
184 if (unlikely (g
> MAX_G
)) return;
187 inline void add_range (hb_codepoint_t a
, hb_codepoint_t b
)
189 if (unlikely (in_error
)) return;
191 for (unsigned int i
= a
; i
< b
+ 1; i
++)
194 inline void del (hb_codepoint_t g
)
196 if (unlikely (in_error
)) return;
197 if (unlikely (g
> MAX_G
)) return;
198 elt (g
) &= ~mask (g
);
200 inline void del_range (hb_codepoint_t a
, hb_codepoint_t b
)
202 if (unlikely (in_error
)) return;
204 for (unsigned int i
= a
; i
< b
+ 1; i
++)
207 inline bool has (hb_codepoint_t g
) const
209 if (unlikely (g
> MAX_G
)) return false;
210 return !!(elt (g
) & mask (g
));
212 inline bool intersects (hb_codepoint_t first
,
213 hb_codepoint_t last
) const
215 if (unlikely (first
> MAX_G
)) return false;
216 if (unlikely (last
> MAX_G
)) last
= MAX_G
;
217 unsigned int end
= last
+ 1;
218 for (hb_codepoint_t i
= first
; i
< end
; i
++)
223 inline bool is_equal (const hb_set_t
*other
) const
225 for (unsigned int i
= 0; i
< ELTS
; i
++)
226 if (elts
[i
] != other
->elts
[i
])
230 inline void set (const hb_set_t
*other
)
232 if (unlikely (in_error
)) return;
233 for (unsigned int i
= 0; i
< ELTS
; i
++)
234 elts
[i
] = other
->elts
[i
];
236 inline void union_ (const hb_set_t
*other
)
238 if (unlikely (in_error
)) return;
239 for (unsigned int i
= 0; i
< ELTS
; i
++)
240 elts
[i
] |= other
->elts
[i
];
242 inline void intersect (const hb_set_t
*other
)
244 if (unlikely (in_error
)) return;
245 for (unsigned int i
= 0; i
< ELTS
; i
++)
246 elts
[i
] &= other
->elts
[i
];
248 inline void subtract (const hb_set_t
*other
)
250 if (unlikely (in_error
)) return;
251 for (unsigned int i
= 0; i
< ELTS
; i
++)
252 elts
[i
] &= ~other
->elts
[i
];
254 inline void symmetric_difference (const hb_set_t
*other
)
256 if (unlikely (in_error
)) return;
257 for (unsigned int i
= 0; i
< ELTS
; i
++)
258 elts
[i
] ^= other
->elts
[i
];
260 inline void invert (void)
262 if (unlikely (in_error
)) return;
263 for (unsigned int i
= 0; i
< ELTS
; i
++)
266 inline bool next (hb_codepoint_t
*codepoint
) const
268 if (unlikely (*codepoint
== INVALID
)) {
269 hb_codepoint_t i
= get_min ();
274 *codepoint
= INVALID
;
278 for (hb_codepoint_t i
= *codepoint
+ 1; i
< MAX_G
+ 1; i
++)
283 *codepoint
= INVALID
;
286 inline bool next_range (hb_codepoint_t
*first
, hb_codepoint_t
*last
) const
293 *last
= *first
= INVALID
;
298 while (next (&i
) && i
== *last
+ 1)
304 inline unsigned int get_population (void) const
306 unsigned int count
= 0;
307 for (unsigned int i
= 0; i
< ELTS
; i
++)
308 count
+= _hb_popcount32 (elts
[i
]);
311 inline hb_codepoint_t
get_min (void) const
313 for (unsigned int i
= 0; i
< ELTS
; i
++)
315 for (unsigned int j
= 0; j
< BITS
; j
++)
316 if (elts
[i
] & (1 << j
))
320 inline hb_codepoint_t
get_max (void) const
322 for (unsigned int i
= ELTS
; i
; i
--)
324 for (unsigned int j
= BITS
; j
; j
--)
325 if (elts
[i
- 1] & (1 << (j
- 1)))
326 return (i
- 1) * BITS
+ (j
- 1);
330 typedef uint32_t elt_t
;
331 static const unsigned int MAX_G
= 65536 - 1; /* XXX Fix this... */
332 static const unsigned int SHIFT
= 5;
333 static const unsigned int BITS
= (1 << SHIFT
);
334 static const unsigned int MASK
= BITS
- 1;
335 static const unsigned int ELTS
= (MAX_G
+ 1 + (BITS
- 1)) / BITS
;
336 static const hb_codepoint_t INVALID
= HB_SET_VALUE_INVALID
;
338 elt_t
&elt (hb_codepoint_t g
) { return elts
[g
>> SHIFT
]; }
339 elt_t
const &elt (hb_codepoint_t g
) const { return elts
[g
>> SHIFT
]; }
340 elt_t
mask (hb_codepoint_t g
) const { return elt_t (1) << (g
& MASK
); }
342 elt_t elts
[ELTS
]; /* XXX 8kb */
344 ASSERT_STATIC (sizeof (elt_t
) * 8 == BITS
);
345 ASSERT_STATIC (sizeof (elt_t
) * 8 * ELTS
> MAX_G
);
348 struct hb_frozen_set_t
350 static const unsigned int SHIFT
= hb_set_t::SHIFT
;
351 static const unsigned int BITS
= hb_set_t::BITS
;
352 static const unsigned int MASK
= hb_set_t::MASK
;
353 typedef hb_set_t::elt_t elt_t
;
355 inline void init (const hb_set_t
&set
)
360 unsigned int max
= set
.get_max ();
361 if (max
== set
.INVALID
)
363 unsigned int min
= set
.get_min ();
364 const elt_t
&min_elt
= set
.elt (min
);
367 count
= max
- start
+ 1;
368 unsigned int num_elts
= (count
+ BITS
- 1) / BITS
;
369 unsigned int elts_size
= num_elts
* sizeof (elt_t
);
370 elts
= (elt_t
*) malloc (elts_size
);
371 if (unlikely (!elts
))
376 memcpy (elts
, &min_elt
, elts_size
);
379 inline void fini (void)
385 inline bool has (hb_codepoint_t g
) const
387 /* hb_codepoint_t is unsigned. */
389 if (unlikely (g
> count
)) return false;
390 return !!(elt (g
) & mask (g
));
393 elt_t
const &elt (hb_codepoint_t g
) const { return elts
[g
>> SHIFT
]; }
394 elt_t
mask (hb_codepoint_t g
) const { return elt_t (1) << (g
& MASK
); }
397 hb_codepoint_t start
, count
;
402 #endif /* HB_SET_PRIVATE_HH */