1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: unichars.c,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
32 #include "osl/diagnose.h"
33 #include "sal/types.h"
35 int ImplIsNoncharacter(sal_uInt32 nUtf32
)
37 /* All code points that are noncharacters, as of Unicode 3.1.1. */
38 return (nUtf32
>= 0xFDD0 && nUtf32
<= 0xFDEF)
39 || (nUtf32
& 0xFFFF) >= 0xFFFE
43 int ImplIsControlOrFormat(sal_uInt32 nUtf32
)
45 /* All code points of <http://www.unicode.org/Public/UNIDATA/
46 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc
47 (Other, Control) or Cf (Other, Format).
49 return nUtf32
<= 0x001F
50 || (nUtf32
>= 0x007F && nUtf32
<= 0x009F)
51 || nUtf32
== 0x070F /* SYRIAC ABBREVIATION MARK */
52 || nUtf32
== 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */
53 || nUtf32
== 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */
54 || nUtf32
== 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */
55 || nUtf32
== 0x180E /* MONGOLIAN VOWEL SEPARATOR */
56 || nUtf32
== 0x200C /* ZERO WIDTH NON-JOINER */
57 || nUtf32
== 0x200D /* ZERO WIDTH JOINER */
58 || nUtf32
== 0x200E /* LEFT-TO-RIGHT MARK */
59 || nUtf32
== 0x200F /* RIGHT-TO-LEFT MARK */
60 || nUtf32
== 0x202A /* LEFT-TO-RIGHT EMBEDDING */
61 || nUtf32
== 0x202B /* RIGHT-TO-LEFT EMBEDDING */
62 || nUtf32
== 0x202C /* POP DIRECTIONAL FORMATTING */
63 || nUtf32
== 0x202D /* LEFT-TO-RIGHT OVERRIDE */
64 || nUtf32
== 0x202E /* RIGHT-TO-LEFT OVERRIDE */
65 || nUtf32
== 0x206A /* INHIBIT SYMMETRIC SWAPPING */
66 || nUtf32
== 0x206B /* ACTIVATE SYMMETRIC SWAPPING */
67 || nUtf32
== 0x206C /* INHIBIT ARABIC FORM SHAPING */
68 || nUtf32
== 0x206D /* ACTIVATE ARABIC FORM SHAPING */
69 || nUtf32
== 0x206E /* NATIONAL DIGIT SHAPES */
70 || nUtf32
== 0x206F /* NOMINAL DIGIT SHAPES */
71 || nUtf32
== 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */
72 || nUtf32
== 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */
73 || nUtf32
== 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */
74 || nUtf32
== 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */
75 || nUtf32
== 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */
76 || nUtf32
== 0x1D174 /* MUSICAL SYMBOL END BEAM */
77 || nUtf32
== 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */
78 || nUtf32
== 0x1D176 /* MUSICAL SYMBOL END TIE */
79 || nUtf32
== 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */
80 || nUtf32
== 0x1D178 /* MUSICAL SYMBOL END SLUR */
81 || nUtf32
== 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */
82 || nUtf32
== 0x1D17A /* MUSICAL SYMBOL END PHRASE */
83 || nUtf32
== 0xE0001 /* LANGUAGE TAG */
84 || (nUtf32
>= 0xE0020 && nUtf32
<= 0xE007F);
87 int ImplIsHighSurrogate(sal_uInt32 nUtf32
)
89 /* All code points that are high-surrogates, as of Unicode 3.1.1. */
90 return nUtf32
>= 0xD800 && nUtf32
<= 0xDBFF;
93 int ImplIsLowSurrogate(sal_uInt32 nUtf32
)
95 /* All code points that are low-surrogates, as of Unicode 3.1.1. */
96 return nUtf32
>= 0xDC00 && nUtf32
<= 0xDFFF;
99 int ImplIsPrivateUse(sal_uInt32 nUtf32
)
101 /* All code points of <http://www.unicode.org/Public/UNIDATA/
102 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co
103 (Other, Private Use).
105 return (nUtf32
>= 0xE000 && nUtf32
<= 0xF8FF)
106 || (nUtf32
>= 0xF0000 && nUtf32
<= 0xFFFFD)
107 || (nUtf32
>= 0x100000 && nUtf32
<= 0x10FFFD);
110 int ImplIsZeroWidth(sal_uInt32 nUtf32
)
112 /* All code points of <http://www.unicode.org/Public/UNIDATA/
113 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their
116 return nUtf32
== 0x200B /* ZERO WIDTH SPACE */
117 || nUtf32
== 0x200C /* ZERO WIDTH NON-JOINER */
118 || nUtf32
== 0x200D /* ZERO WIDTH JOINER */
119 || nUtf32
== 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */
122 sal_uInt32
ImplGetHighSurrogate(sal_uInt32 nUtf32
)
124 OSL_ENSURE(nUtf32
>= 0x10000, "specification violation");
125 return ((nUtf32
- 0x10000) >> 10) | 0xD800;
128 sal_uInt32
ImplGetLowSurrogate(sal_uInt32 nUtf32
)
130 OSL_ENSURE(nUtf32
>= 0x10000, "specification violation");
131 return ((nUtf32
- 0x10000) & 0x3FF) | 0xDC00;
134 sal_uInt32
ImplCombineSurrogates(sal_uInt32 nHigh
, sal_uInt32 nLow
)
136 OSL_ENSURE(ImplIsHighSurrogate(nHigh
) && ImplIsLowSurrogate(nLow
),
137 "specification violation");
138 return (((nHigh
& 0x3FF) << 10) | (nLow
& 0x3FF)) + 0x10000;