1 /* Categories of Unicode characters.
2 Copyright (C) 2002, 2006-2007, 2011-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2011.
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
31 static const char u_category_long_name
[30][22] =
44 "Connector Punctuation",
48 "Initial Punctuation",
57 "Paragraph Separator",
66 uc_general_category_long_name (uc_general_category_t category
)
68 uint32_t bitmask
= category
.bitmask
;
69 /* bitmask should consist of a single bit. */
72 if ((bitmask
& (bitmask
- 1)) == 0)
75 /* Take log2 using a variant of Robert Harley's method.
76 Found by Bruno Haible 1996. */
78 static const char ord2_tab
[64] =
80 -1, 0, 1, 12, 2, 6, -1, 13, 3, -1, 7, -1, -1, -1, -1, 14,
81 10, 4, -1, -1, 8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15,
82 31, 11, 5, -1, -1, -1, -1, -1, 9, -1, -1, 24, -1, -1, 20, 26,
83 30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1
88 bit
= ord2_tab
[n
>> 26];
90 if (bit
< sizeof (u_category_long_name
) / sizeof (u_category_long_name
[0]))
91 return u_category_long_name
[bit
];
95 if (bitmask
== UC_CATEGORY_MASK_L
)
97 if (bitmask
== UC_CATEGORY_MASK_LC
)
98 return "Cased Letter";
99 if (bitmask
== UC_CATEGORY_MASK_M
)
101 if (bitmask
== UC_CATEGORY_MASK_N
)
103 if (bitmask
== UC_CATEGORY_MASK_P
)
104 return "Punctuation";
105 if (bitmask
== UC_CATEGORY_MASK_S
)
107 if (bitmask
== UC_CATEGORY_MASK_Z
)
109 if (bitmask
== UC_CATEGORY_MASK_C
)