Add ICU message format support
[chromium-blink-merge.git] / third_party / harfbuzz-ng / src / hb-common.cc
blobd510c9be4f676e8c4eeae2064112231b36bfaaec
1 /*
2 * Copyright © 2009,2010 Red Hat, Inc.
3 * Copyright © 2011,2012 Google, Inc.
5 * This is part of HarfBuzz, a text shaping library.
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
29 #include "hb-private.hh"
31 #include "hb-mutex-private.hh"
32 #include "hb-object-private.hh"
34 #include <locale.h>
37 /* hb_options_t */
39 hb_options_union_t _hb_options;
41 void
42 _hb_options_init (void)
44 hb_options_union_t u;
45 u.i = 0;
46 u.opts.initialized = 1;
48 char *c = getenv ("HB_OPTIONS");
49 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
51 /* This is idempotent and threadsafe. */
52 _hb_options = u;
56 /* hb_tag_t */
58 /**
59 * hb_tag_from_string:
60 * @str: (array length=len) (element-type uint8_t):
61 * @len:
65 * Return value:
67 * Since: 1.0
68 **/
69 hb_tag_t
70 hb_tag_from_string (const char *str, int len)
72 char tag[4];
73 unsigned int i;
75 if (!str || !len || !*str)
76 return HB_TAG_NONE;
78 if (len < 0 || len > 4)
79 len = 4;
80 for (i = 0; i < (unsigned) len && str[i]; i++)
81 tag[i] = str[i];
82 for (; i < 4; i++)
83 tag[i] = ' ';
85 return HB_TAG_CHAR4 (tag);
88 /**
89 * hb_tag_to_string:
90 * @tag:
91 * @buf: (array fixed-size=4):
95 * Since: 0.9.5
96 **/
97 void
98 hb_tag_to_string (hb_tag_t tag, char *buf)
100 buf[0] = (char) (uint8_t) (tag >> 24);
101 buf[1] = (char) (uint8_t) (tag >> 16);
102 buf[2] = (char) (uint8_t) (tag >> 8);
103 buf[3] = (char) (uint8_t) (tag >> 0);
107 /* hb_direction_t */
109 const char direction_strings[][4] = {
110 "ltr",
111 "rtl",
112 "ttb",
113 "btt"
117 * hb_direction_from_string:
118 * @str: (array length=len) (element-type uint8_t):
119 * @len:
123 * Return value:
125 * Since: 1.0
127 hb_direction_t
128 hb_direction_from_string (const char *str, int len)
130 if (unlikely (!str || !len || !*str))
131 return HB_DIRECTION_INVALID;
133 /* Lets match loosely: just match the first letter, such that
134 * all of "ltr", "left-to-right", etc work!
136 char c = TOLOWER (str[0]);
137 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
138 if (c == direction_strings[i][0])
139 return (hb_direction_t) (HB_DIRECTION_LTR + i);
141 return HB_DIRECTION_INVALID;
145 * hb_direction_to_string:
146 * @direction:
150 * Return value: (transfer none):
152 * Since: 1.0
154 const char *
155 hb_direction_to_string (hb_direction_t direction)
157 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
158 < ARRAY_LENGTH (direction_strings)))
159 return direction_strings[direction - HB_DIRECTION_LTR];
161 return "invalid";
165 /* hb_language_t */
167 struct hb_language_impl_t {
168 const char s[1];
171 static const char canon_map[256] = {
172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
175 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
176 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
177 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
178 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
179 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
182 static bool
183 lang_equal (hb_language_t v1,
184 const void *v2)
186 const unsigned char *p1 = (const unsigned char *) v1;
187 const unsigned char *p2 = (const unsigned char *) v2;
189 while (*p1 && *p1 == canon_map[*p2])
190 p1++, p2++;
192 return *p1 == canon_map[*p2];
195 #if 0
196 static unsigned int
197 lang_hash (const void *key)
199 const unsigned char *p = key;
200 unsigned int h = 0;
201 while (canon_map[*p])
203 h = (h << 5) - h + canon_map[*p];
204 p++;
207 return h;
209 #endif
212 struct hb_language_item_t {
214 struct hb_language_item_t *next;
215 hb_language_t lang;
217 inline bool operator == (const char *s) const {
218 return lang_equal (lang, s);
221 inline hb_language_item_t & operator = (const char *s) {
222 lang = (hb_language_t) strdup (s);
223 for (unsigned char *p = (unsigned char *) lang; *p; p++)
224 *p = canon_map[*p];
226 return *this;
229 void finish (void) { free ((void *) lang); }
233 /* Thread-safe lock-free language list */
235 static hb_language_item_t *langs;
237 #ifdef HB_USE_ATEXIT
238 static
239 void free_langs (void)
241 while (langs) {
242 hb_language_item_t *next = langs->next;
243 langs->finish ();
244 free (langs);
245 langs = next;
248 #endif
250 static hb_language_item_t *
251 lang_find_or_insert (const char *key)
253 retry:
254 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
256 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
257 if (*lang == key)
258 return lang;
260 /* Not found; allocate one. */
261 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
262 if (unlikely (!lang))
263 return NULL;
264 lang->next = first_lang;
265 *lang = key;
267 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
268 lang->finish ();
269 free (lang);
270 goto retry;
273 #ifdef HB_USE_ATEXIT
274 if (!first_lang)
275 atexit (free_langs); /* First person registers atexit() callback. */
276 #endif
278 return lang;
283 * hb_language_from_string:
284 * @str: (array length=len) (element-type uint8_t):
285 * @len:
289 * Return value: (transfer none):
291 * Since: 1.0
293 hb_language_t
294 hb_language_from_string (const char *str, int len)
296 char strbuf[64];
298 if (!str || !len || !*str)
299 return HB_LANGUAGE_INVALID;
301 if (len >= 0)
303 /* NUL-terminate it. */
304 len = MIN (len, (int) sizeof (strbuf) - 1);
305 memcpy (strbuf, str, len);
306 strbuf[len] = '\0';
307 str = strbuf;
310 hb_language_item_t *item = lang_find_or_insert (str);
312 return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
316 * hb_language_to_string:
317 * @language:
321 * Return value: (transfer none):
323 * Since: 1.0
325 const char *
326 hb_language_to_string (hb_language_t language)
328 /* This is actually NULL-safe! */
329 return language->s;
333 * hb_language_get_default:
337 * Return value: (transfer none):
339 * Since: 1.0
341 hb_language_t
342 hb_language_get_default (void)
344 static hb_language_t default_language = HB_LANGUAGE_INVALID;
346 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
347 if (unlikely (language == HB_LANGUAGE_INVALID)) {
348 language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
349 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
352 return default_language;
356 /* hb_script_t */
359 * hb_script_from_iso15924_tag:
360 * @tag:
364 * Return value:
366 * Since: 1.0
368 hb_script_t
369 hb_script_from_iso15924_tag (hb_tag_t tag)
371 if (unlikely (tag == HB_TAG_NONE))
372 return HB_SCRIPT_INVALID;
374 /* Be lenient, adjust case (one capital letter followed by three small letters) */
375 tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
377 switch (tag) {
379 /* These graduated from the 'Q' private-area codes, but
380 * the old code is still aliased by Unicode, and the Qaai
381 * one in use by ICU. */
382 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
383 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
385 /* Script variants from http://unicode.org/iso15924/ */
386 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
387 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
388 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
389 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
390 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
391 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
394 /* If it looks right, just use the tag as a script */
395 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
396 return (hb_script_t) tag;
398 /* Otherwise, return unknown */
399 return HB_SCRIPT_UNKNOWN;
403 * hb_script_from_string:
404 * @s: (array length=len) (element-type uint8_t):
405 * @len:
409 * Return value:
411 * Since: 1.0
413 hb_script_t
414 hb_script_from_string (const char *s, int len)
416 return hb_script_from_iso15924_tag (hb_tag_from_string (s, len));
420 * hb_script_to_iso15924_tag:
421 * @script:
425 * Return value:
427 * Since: 1.0
429 hb_tag_t
430 hb_script_to_iso15924_tag (hb_script_t script)
432 return (hb_tag_t) script;
436 * hb_script_get_horizontal_direction:
437 * @script:
441 * Return value:
443 * Since: 1.0
445 hb_direction_t
446 hb_script_get_horizontal_direction (hb_script_t script)
448 /* http://goo.gl/x9ilM */
449 switch ((hb_tag_t) script)
451 /* Unicode-1.1 additions */
452 case HB_SCRIPT_ARABIC:
453 case HB_SCRIPT_HEBREW:
455 /* Unicode-3.0 additions */
456 case HB_SCRIPT_SYRIAC:
457 case HB_SCRIPT_THAANA:
459 /* Unicode-4.0 additions */
460 case HB_SCRIPT_CYPRIOT:
462 /* Unicode-4.1 additions */
463 case HB_SCRIPT_KHAROSHTHI:
465 /* Unicode-5.0 additions */
466 case HB_SCRIPT_PHOENICIAN:
467 case HB_SCRIPT_NKO:
469 /* Unicode-5.1 additions */
470 case HB_SCRIPT_LYDIAN:
472 /* Unicode-5.2 additions */
473 case HB_SCRIPT_AVESTAN:
474 case HB_SCRIPT_IMPERIAL_ARAMAIC:
475 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
476 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
477 case HB_SCRIPT_OLD_SOUTH_ARABIAN:
478 case HB_SCRIPT_OLD_TURKIC:
479 case HB_SCRIPT_SAMARITAN:
481 /* Unicode-6.0 additions */
482 case HB_SCRIPT_MANDAIC:
484 /* Unicode-6.1 additions */
485 case HB_SCRIPT_MEROITIC_CURSIVE:
486 case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
488 /* Unicode-7.0 additions */
489 case HB_SCRIPT_MANICHAEAN:
490 case HB_SCRIPT_MENDE_KIKAKUI:
491 case HB_SCRIPT_NABATAEAN:
492 case HB_SCRIPT_OLD_NORTH_ARABIAN:
493 case HB_SCRIPT_PALMYRENE:
494 case HB_SCRIPT_PSALTER_PAHLAVI:
496 /* Unicode-8.0 additions */
497 case HB_SCRIPT_OLD_HUNGARIAN:
499 return HB_DIRECTION_RTL;
502 return HB_DIRECTION_LTR;
506 /* hb_user_data_array_t */
508 bool
509 hb_user_data_array_t::set (hb_user_data_key_t *key,
510 void * data,
511 hb_destroy_func_t destroy,
512 hb_bool_t replace)
514 if (!key)
515 return false;
517 if (replace) {
518 if (!data && !destroy) {
519 items.remove (key, lock);
520 return true;
523 hb_user_data_item_t item = {key, data, destroy};
524 bool ret = !!items.replace_or_insert (item, lock, replace);
526 return ret;
529 void *
530 hb_user_data_array_t::get (hb_user_data_key_t *key)
532 hb_user_data_item_t item = {NULL };
534 return items.find (key, &item, lock) ? item.data : NULL;
538 /* hb_version */
541 * hb_version:
542 * @major: (out): Library major version component.
543 * @minor: (out): Library minor version component.
544 * @micro: (out): Library micro version component.
546 * Returns library version as three integer components.
548 * Since: 1.0
550 void
551 hb_version (unsigned int *major,
552 unsigned int *minor,
553 unsigned int *micro)
555 *major = HB_VERSION_MAJOR;
556 *minor = HB_VERSION_MINOR;
557 *micro = HB_VERSION_MICRO;
561 * hb_version_string:
563 * Returns library version as a string with three components.
565 * Return value: library version string.
567 * Since: 1.0
569 const char *
570 hb_version_string (void)
572 return HB_VERSION_STRING;
576 * hb_version_atleast:
577 * @major:
578 * @minor:
579 * @micro:
583 * Return value:
585 * Since: 0.9.30
587 hb_bool_t
588 hb_version_atleast (unsigned int major,
589 unsigned int minor,
590 unsigned int micro)
592 return HB_VERSION_ATLEAST (major, minor, micro);