1 // Copyright 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/gfx/text_utils.h"
7 #include "base/i18n/char_iterator.h"
8 #include "base/logging.h"
9 #include "base/numerics/safe_conversions.h"
10 #include "third_party/icu/source/common/unicode/uchar.h"
11 #include "third_party/icu/source/common/unicode/utf16.h"
17 // Returns true if the code point |c| is a combining mark character in Unicode.
18 bool CharIsMark(UChar32 c
) {
19 int8_t char_type
= u_charType(c
);
20 return char_type
== U_NON_SPACING_MARK
|| char_type
== U_ENCLOSING_MARK
||
21 char_type
== U_COMBINING_SPACING_MARK
;
24 // Gets the code point of |str| at the given code unit position |index|. If
25 // |index| is a surrogate code unit, returns the whole code point (unless the
26 // code unit is unpaired, in which case it just returns the surrogate value).
27 UChar32
GetCodePointAt(const base::string16
& str
, size_t index
) {
29 U16_GET(str
.data(), 0, index
, str
.size(), c
);
35 base::string16
RemoveAcceleratorChar(const base::string16
& s
,
36 base::char16 accelerator_char
,
37 int* accelerated_char_pos
,
38 int* accelerated_char_span
) {
40 ptrdiff_t last_char_pos
= -1;
41 int last_char_span
= 0;
42 base::i18n::UTF16CharIterator
chars(&s
);
43 base::string16 accelerator_removed
;
45 accelerator_removed
.reserve(s
.size());
46 while (!chars
.end()) {
47 int32 c
= chars
.get();
48 int array_pos
= chars
.array_pos();
51 if (c
!= accelerator_char
|| escaped
) {
52 int span
= chars
.array_pos() - array_pos
;
53 if (escaped
&& c
!= accelerator_char
) {
54 last_char_pos
= accelerator_removed
.size();
55 last_char_span
= span
;
57 for (int i
= 0; i
< span
; i
++)
58 accelerator_removed
.push_back(s
[array_pos
+ i
]);
65 if (accelerated_char_pos
)
66 *accelerated_char_pos
= last_char_pos
;
67 if (accelerated_char_span
)
68 *accelerated_char_span
= last_char_span
;
70 return accelerator_removed
;
73 size_t FindValidBoundaryBefore(const base::string16
& text
, size_t index
) {
74 size_t length
= text
.length();
75 DCHECK_LE(index
, length
);
79 // If |index| straddles a combining character sequence, go back until we find
81 while (index
> 0 && CharIsMark(GetCodePointAt(text
, index
)))
84 // If |index| straddles a UTF-16 surrogate pair, go back.
85 U16_SET_CP_START(text
.data(), 0, index
);
89 size_t FindValidBoundaryAfter(const base::string16
& text
, size_t index
) {
90 DCHECK_LE(index
, text
.length());
91 if (index
== text
.length())
94 int32_t text_index
= base::checked_cast
<int32_t>(index
);
95 int32_t text_length
= base::checked_cast
<int32_t>(text
.length());
97 // If |index| straddles a combining character sequence, go forward until we
98 // find a base character.
99 while (text_index
< text_length
&&
100 CharIsMark(GetCodePointAt(text
, text_index
))) {
104 // If |index| straddles a UTF-16 surrogate pair, go forward.
105 U16_SET_CP_LIMIT(text
.data(), 0, text_index
, text_length
);
106 return static_cast<size_t>(text_index
);