Disable ContentSettingBubbleModelTest.RPHAllow which is flaky.
[chromium-blink-merge.git] / content / renderer / hyphenator / hyphenator.cc
blob564fe46bf69ead353dfff98f749a11f61c125a0a
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/hyphenator/hyphenator.h"
7 #include "base/files/memory_mapped_file.h"
8 #include "base/logging.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "base/string_util.h"
11 #include "base/utf_string_conversions.h"
12 #include "content/common/hyphenator_messages.h"
13 #include "content/public/renderer/render_thread.h"
14 #include "third_party/hyphen/hyphen.h"
15 #include "third_party/icu/public/common/unicode/uscript.h"
17 namespace {
19 // A class that converts a sequence of UTF-8 characters to UTF-16 ones and holds
20 // only the length of converted UTF-16 characters. This class is used for
21 // creating a mapping from the position of a UTF-8 string to a position of a
22 // UTF-16 string without unnecessary conversions. Even though the following
23 // snippet produces the same mapping, it needs to convert same characters many
24 // times. This class incrementally counts the number of converted UTF-16
25 // characters to avoid this problem.
27 // scoped_ptr<size_t[]> position(new size_t[text.length()]);
28 // for (size_t i = 0; i < text.length(); ++i)
29 // position[i] = UTF8ToUTF16(text.substr(0, i)).length();
31 class UTF16TextLength {
32 public:
33 UTF16TextLength();
34 ~UTF16TextLength();
36 // Returns the current position.
37 int utf16_length() const { return utf16_length_; }
39 // Appends one UTF-8 character to this converter and advances the converted
40 // position. This converter increases the position by one when it finishes
41 // reading a BMP character and increases by two when it finish reading a
42 // non-BMP character.
43 void Append(char c);
45 private:
46 // The length of the converted UTF-16 text.
47 int utf16_length_;
49 // The buffer that stores UTF-8 characters being converted.
50 std::string utf8_text_;
52 DISALLOW_COPY_AND_ASSIGN(UTF16TextLength);
55 UTF16TextLength::UTF16TextLength()
56 : utf16_length_(0) {
59 UTF16TextLength::~UTF16TextLength() {
62 void UTF16TextLength::Append(char c) {
63 // Append the given character and try converting the UTF-8 characters in this
64 // buffer to Unicode codepoints. If this buffer includes a Unicode codepoint,
65 // get the number of UTF-16 characters representing this codepoint and advance
66 // the position.
67 int code = 0;
68 int index = 0;
69 utf8_text_.push_back(c);
70 U8_NEXT(utf8_text_.data(), index, static_cast<int>(utf8_text_.length()),
71 code);
72 if (code != U_SENTINEL) {
73 utf8_text_.clear();
74 utf16_length_ += U16_LENGTH(code);
78 // A class that encapsulates a hyphenation query. This class owns resources
79 // temporarily needed for hyphenating one word, and deletes them when it is
80 // deleted as listed in the following snippet.
82 // std::vector<int> hyphens;
83 // QUery query(UTF8ToUTF16("hyphenate"));
84 // query.Hyphenate(dict, &hyphens);
86 class Query {
87 public:
88 explicit Query(const string16& word);
89 ~Query();
91 // Hyphenates a word with the specified dictionary. This function hyphenates
92 // the word provided to its constructor and returns a list of hyphenation
93 // points, positions where we can insert hyphens.
94 bool Hyphenate(HyphenDict* dictionary, std::vector<int>* hyphen_offsets);
96 private:
97 // A word to be hyphenated.
98 std::string word_utf8_;
100 // Return variables from the hyphen library.
101 scoped_ptr<char[]> hyphen_vector_;
102 char** rep_;
103 int* pos_;
104 int* cut_;
106 DISALLOW_COPY_AND_ASSIGN(Query);
109 Query::Query(const string16& word)
110 : rep_(NULL),
111 pos_(NULL),
112 cut_(NULL) {
113 // Remove trailing punctuation characters. WebKit does not remove these
114 // characters when it hyphenates a word. These characters prevent the hyphen
115 // library from applying some rules, i.e. they prevent the library from adding
116 // hyphens.
117 DCHECK(!word.empty());
118 const char16* data = word.data();
119 int length = static_cast<int>(word.length());
120 while (length > 0) {
121 int previous = length;
122 int code = 0;
123 U16_PREV(data, 0, previous, code);
124 UErrorCode error = U_ZERO_ERROR;
125 if (uscript_getScript(code, &error) != USCRIPT_COMMON)
126 break;
127 length = previous;
129 UTF16ToUTF8(word.c_str(), length, &word_utf8_);
130 // Create a hyphen vector used by hnj_hyphen_hyphenate2(). We allocate a
131 // buffer of |word_.length()| + 5 as written in Line 112 of
132 // <http://cs.chromium.org/src/third_party/hyphen/hyphen.h>.
133 hyphen_vector_.reset(new char[word_utf8_.length() + 5]);
136 Query::~Query() {
137 if (rep_) {
138 for (size_t i = 0; i < word_utf8_.length(); ++i) {
139 if (rep_[i])
140 free(rep_[i]);
142 free(rep_);
144 if (pos_)
145 free(pos_);
146 if (cut_)
147 free(cut_);
150 bool Query::Hyphenate(HyphenDict* dictionary,
151 std::vector<int>* hyphen_offsets) {
152 DCHECK(dictionary);
153 DCHECK(hyphen_offsets);
155 int error_code = hnj_hyphen_hyphenate2(dictionary,
156 word_utf8_.data(),
157 static_cast<int>(word_utf8_.length()),
158 hyphen_vector_.get(),
159 NULL,
160 &rep_,
161 &pos_,
162 &cut_);
163 if (error_code)
164 return false;
166 // WebKit needs hyphenation points counted in UTF-16 characters. On the other
167 // hand, the hyphen library returns hyphenation points counted in UTF-8
168 // characters. We increamentally convert hyphenation points in UTF-8
169 // characters to hyphenation points in UTF-16 characters and write the
170 // converted hyphenation points to the output vector.
171 UTF16TextLength text_length;
172 hyphen_offsets->clear();
173 for (size_t i = 0; i < word_utf8_.length(); ++i) {
174 text_length.Append(word_utf8_[i]);
175 if (hyphen_vector_[i] & 1)
176 hyphen_offsets->push_back(text_length.utf16_length());
178 return !hyphen_offsets->empty();
181 } // namespace
183 namespace content {
185 Hyphenator::Hyphenator(base::PlatformFile file)
186 : dictionary_(NULL),
187 dictionary_file_(base::FdopenPlatformFile(file, "r")),
188 result_(0) {
191 Hyphenator::~Hyphenator() {
192 if (dictionary_)
193 hnj_hyphen_free(dictionary_);
196 bool Hyphenator::Initialize() {
197 if (dictionary_)
198 return true;
200 if (!dictionary_file_.get())
201 return false;
202 dictionary_ = hnj_hyphen_load_file(dictionary_file_.get());
203 return !!dictionary_;
206 bool Hyphenator::Attach(RenderThread* thread, const string16& locale) {
207 if (!thread)
208 return false;
209 locale_.assign(locale);
210 thread->AddObserver(this);
211 return thread->Send(new HyphenatorHostMsg_OpenDictionary(locale));
214 bool Hyphenator::CanHyphenate(const string16& locale) {
215 return !locale_.compare(locale);
218 size_t Hyphenator::ComputeLastHyphenLocation(const string16& word,
219 size_t before_index) {
220 if (!Initialize() || word.empty())
221 return 0;
223 // Call the hyphen library to get all hyphenation points, i.e. positions where
224 // we can insert hyphens. When WebKit finds a line-break, it calls this
225 // function twice or more with the same word to find the best hyphenation
226 // point. To avoid calling the hyphen library twice or more with the same
227 // word, we cache the last query.
228 if (word_ != word) {
229 word_ = word;
230 Query query(word);
231 result_ = query.Hyphenate(dictionary_, &hyphen_offsets_);
233 if (!result_)
234 return 0;
235 for (std::vector<int>::reverse_iterator it = hyphen_offsets_.rbegin();
236 it != hyphen_offsets_.rend(); ++it) {
237 if (static_cast<size_t>(*it) < before_index)
238 return *it;
240 return 0;
243 bool Hyphenator::OnControlMessageReceived(const IPC::Message& message) {
244 bool handled = true;
245 IPC_BEGIN_MESSAGE_MAP(Hyphenator, message)
246 IPC_MESSAGE_HANDLER(HyphenatorMsg_SetDictionary, OnSetDictionary)
247 IPC_MESSAGE_UNHANDLED(handled = false)
248 IPC_END_MESSAGE_MAP()
249 return handled;
252 void Hyphenator::OnSetDictionary(IPC::PlatformFileForTransit file) {
253 base::PlatformFile rule_file =
254 IPC::PlatformFileForTransitToPlatformFile(file);
255 if (rule_file == base::kInvalidPlatformFileValue)
256 return;
257 // Delete the current dictionary and save the given file to this object. We
258 // initialize the hyphen library the first time when WebKit actually
259 // hyphenates a word, i.e. when WebKit calls the ComputeLastHyphenLocation
260 // function. (WebKit does not always hyphenate words even when it calls the
261 // CanHyphenate function, e.g. WebKit does not have to hyphenate words when it
262 // does not have to break text into lines.)
263 if (dictionary_) {
264 hnj_hyphen_free(dictionary_);
265 dictionary_ = NULL;
267 dictionary_file_.Set(base::FdopenPlatformFile(rule_file, "r"));
270 } // namespace content