2 * Copyright (C) 2014 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 #include "platform/PlatformExport.h"
35 #include "platform/text/TextDirection.h"
36 #include "platform/text/TextPath.h"
37 #include "platform/text/TextRun.h"
38 #include "wtf/HashSet.h"
39 #include "wtf/text/CharacterNames.h"
40 #include "wtf/text/WTFString.h"
44 class PLATFORM_EXPORT Character
{
46 static CodePath
characterRangeCodePath(const LChar
*, unsigned) { return SimplePath
; }
47 static CodePath
characterRangeCodePath(const UChar
*, unsigned len
);
49 static inline bool isInRange(UChar32 character
, UChar32 lowerBound
, UChar32 upperBound
)
51 return character
>= lowerBound
&& character
<= upperBound
;
54 static inline bool isUnicodeVariationSelector(UChar32 character
)
56 // http://www.unicode.org/Public/UCD/latest/ucd/StandardizedVariants.html
57 return isInRange(character
, 0x180B, 0x180D) // MONGOLIAN FREE VARIATION SELECTOR ONE to THREE
58 || isInRange(character
, 0xFE00, 0xFE0F) // VARIATION SELECTOR-1 to 16
59 || isInRange(character
, 0xE0100, 0xE01EF); // VARIATION SELECTOR-17 to 256
62 static bool isCJKIdeograph(UChar32
);
63 static bool isCJKIdeographOrSymbol(UChar32
);
65 static unsigned expansionOpportunityCount(const LChar
*, size_t length
, TextDirection
, bool& isAfterExpansion
, const TextJustify
);
66 static unsigned expansionOpportunityCount(const UChar
*, size_t length
, TextDirection
, bool& isAfterExpansion
, const TextJustify
);
68 static bool isUprightInMixedVertical(UChar32 character
);
70 static bool treatAsSpace(UChar c
)
72 return c
== spaceCharacter
73 || c
== tabulationCharacter
74 || c
== newlineCharacter
75 || c
== noBreakSpaceCharacter
;
77 static bool treatAsZeroWidthSpace(UChar c
)
79 return treatAsZeroWidthSpaceInComplexScript(c
)
80 || c
== zeroWidthNonJoinerCharacter
81 || c
== zeroWidthJoinerCharacter
;
83 static bool treatAsZeroWidthSpaceInComplexScript(UChar c
)
85 return c
< 0x20 // ASCII Control Characters
86 || (c
>= 0x7F && c
< 0xA0) // ASCII Delete .. No-break spaceCharacter
87 || c
== softHyphenCharacter
88 || c
== zeroWidthSpaceCharacter
89 || (c
>= leftToRightMarkCharacter
&& c
<= rightToLeftMarkCharacter
)
90 || (c
>= leftToRightEmbedCharacter
&& c
<= rightToLeftOverrideCharacter
)
91 || c
== zeroWidthNoBreakSpaceCharacter
92 || c
== objectReplacementCharacter
;
94 static bool canReceiveTextEmphasis(UChar32
);
96 static inline UChar
normalizeSpaces(UChar character
)
98 if (treatAsSpace(character
))
99 return spaceCharacter
;
101 if (treatAsZeroWidthSpace(character
))
102 return zeroWidthSpaceCharacter
;
107 static inline bool isNormalizedCanvasSpaceCharacter(UChar c
)
109 // According to specification all space characters should be replaced with 0x0020 space character.
110 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-canvas-element.html#text-preparation-algorithm
111 // The space characters according to specification are : U+0020, U+0009, U+000A, U+000C, and U+000D.
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character
113 // This function returns true for 0x000B also, so that this is backward compatible.
114 // Otherwise, the test LayoutTests/canvas/philip/tests/2d.text.draw.space.collapse.space.html will fail
115 return c
== 0x0009 || (c
>= 0x000A && c
<= 0x000D);
118 static String
normalizeSpaces(const LChar
*, unsigned length
);
119 static String
normalizeSpaces(const UChar
*, unsigned length
);