Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / chrome / renderer / spellchecker / spellcheck_worditerator_unittest.cc
blob94f71695ddf1d519af9cfa1de090bda28f86ed56
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <string>
6 #include <vector>
8 #include "base/format_macros.h"
9 #include "base/i18n/break_iterator.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/stringprintf.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
14 #include "testing/gtest/include/gtest/gtest.h"
16 using base::i18n::BreakIterator;
18 namespace {
20 struct TestCase {
21 const char* language;
22 bool allow_contraction;
23 const wchar_t* expected_words;
26 base::string16 GetRulesForLanguage(const std::string& language) {
27 SpellcheckCharAttribute attribute;
28 attribute.SetDefaultLanguage(language);
29 return attribute.GetRuleSet(true);
32 } // namespace
34 // Tests whether or not our SpellcheckWordIterator can extract words used by the
35 // specified language from a multi-language text.
36 TEST(SpellcheckWordIteratorTest, SplitWord) {
37 // An input text. This text includes words of several languages. (Some words
38 // are not separated with whitespace characters.) Our SpellcheckWordIterator
39 // should extract the words used by the specified language from this text and
40 // normalize them so our spell-checker can check their spellings. If
41 // characters are found that are not from the specified language the test
42 // skips them.
43 const wchar_t kTestText[] =
44 // Graphic characters
45 L"!@#$%^&*()"
46 // Latin (including a contraction character and a ligature).
47 L"hello:hello a\xFB03x"
48 // Greek
49 L"\x03B3\x03B5\x03B9\x03AC\x0020\x03C3\x03BF\x03C5"
50 // Cyrillic
51 L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
52 L"\x0443\x0439\x0442\x0435"
53 // Hebrew (including niqquds)
54 L"\x05e9\x05c1\x05b8\x05dc\x05d5\x05b9\x05dd "
55 // Hebrew words with U+0027 and U+05F3
56 L"\x05e6\x0027\x05d9\x05e4\x05e1 \x05e6\x05F3\x05d9\x05e4\x05e1 "
57 // Hebrew words with U+0022 and U+05F4
58 L"\x05e6\x05d4\x0022\x05dc \x05e6\x05d4\x05f4\x05dc "
59 // Hebrew words enclosed with ASCII quotes.
60 L"\"\x05e6\x05d4\x0022\x05dc\" '\x05e9\x05c1\x05b8\x05dc\x05d5'"
61 // Arabic (including vowel marks)
62 L"\x0627\x064e\x0644\x0633\x064e\x0651\x0644\x0627"
63 L"\x0645\x064f\x0020\x0639\x064e\x0644\x064e\x064a"
64 L"\x0652\x0643\x064f\x0645\x0652"
65 // Hindi
66 L"\x0930\x093E\x091C\x0927\x093E\x0928"
67 // Thai
68 L"\x0e2a\x0e27\x0e31\x0e2a\x0e14\x0e35\x0020\x0e04"
69 L"\x0e23\x0e31\x0e1a"
70 // Hiraganas
71 L"\x3053\x3093\x306B\x3061\x306F"
72 // CJKV ideographs
73 L"\x4F60\x597D"
74 // Hangul Syllables
75 L"\xC548\xB155\xD558\xC138\xC694"
76 // Full-width latin : Hello
77 L"\xFF28\xFF45\xFF4C\xFF4C\xFF4F "
78 L"e.g.,";
80 // The languages and expected results used in this test.
81 static const TestCase kTestCases[] = {
83 // English (keep contraction words)
84 "en-US", true, L"hello:hello affix Hello e.g"
85 }, {
86 // English (split contraction words)
87 "en-US", false, L"hello hello affix Hello e g"
88 }, {
89 // Greek
90 "el-GR", true,
91 L"\x03B3\x03B5\x03B9\x03AC\x0020\x03C3\x03BF\x03C5"
92 }, {
93 // Russian
94 "ru-RU", true,
95 L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"
96 L"\x0443\x0439\x0442\x0435"
97 }, {
98 // Hebrew
99 "he-IL", true,
100 L"\x05e9\x05dc\x05d5\x05dd "
101 L"\x05e6\x0027\x05d9\x05e4\x05e1 \x05e6\x05F3\x05d9\x05e4\x05e1 "
102 L"\x05e6\x05d4\x0022\x05dc \x05e6\x05d4\x05f4\x05dc "
103 L"\x05e6\x05d4\x0022\x05dc \x05e9\x05dc\x05d5"
104 }, {
105 // Arabic
106 "ar", true,
107 L"\x0627\x0644\x0633\x0644\x0627\x0645\x0020\x0639"
108 L"\x0644\x064a\x0643\x0645"
109 }, {
110 // Hindi
111 "hi-IN", true,
112 L"\x0930\x093E\x091C\x0927\x093E\x0928"
113 }, {
114 // Thai
115 "th-TH", true,
116 L"\x0e2a\x0e27\x0e31\x0e2a\x0e14\x0e35\x0020\x0e04"
117 L"\x0e23\x0e31\x0e1a"
118 }, {
119 // Korean
120 "ko-KR", true,
121 L"\x110b\x1161\x11ab\x1102\x1167\x11bc\x1112\x1161"
122 L"\x1109\x1166\x110b\x116d"
126 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
127 SCOPED_TRACE(base::StringPrintf("kTestCases[%" PRIuS "]: language=%s", i,
128 kTestCases[i].language));
130 SpellcheckCharAttribute attributes;
131 attributes.SetDefaultLanguage(kTestCases[i].language);
133 base::string16 input(base::WideToUTF16(kTestText));
134 SpellcheckWordIterator iterator;
135 EXPECT_TRUE(iterator.Initialize(&attributes,
136 kTestCases[i].allow_contraction));
137 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));
139 std::vector<base::string16> expected_words = base::SplitString(
140 base::WideToUTF16(kTestCases[i].expected_words),
141 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
143 base::string16 actual_word;
144 int actual_start, actual_end;
145 size_t index = 0;
146 for (SpellcheckWordIterator::WordIteratorStatus status =
147 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);
148 status != SpellcheckWordIterator::IS_END_OF_TEXT;
149 status =
150 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {
151 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE)
152 continue;
154 EXPECT_TRUE(index < expected_words.size());
155 if (index < expected_words.size())
156 EXPECT_EQ(expected_words[index], actual_word);
157 ++index;
162 // Tests whether our SpellcheckWordIterator extracts an empty word without
163 // getting stuck in an infinite loop when inputting a Khmer text. (This is a
164 // regression test for Issue 46278.)
165 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {
166 SpellcheckCharAttribute attributes;
167 attributes.SetDefaultLanguage("en-US");
169 const wchar_t kTestText[] = L"\x1791\x17c1\x002e";
170 base::string16 input(base::WideToUTF16(kTestText));
172 SpellcheckWordIterator iterator;
173 EXPECT_TRUE(iterator.Initialize(&attributes, true));
174 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));
176 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following
177 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this
178 // test succeeds if this call returns without timeouts.
179 base::string16 actual_word;
180 int actual_start, actual_end;
181 SpellcheckWordIterator::WordIteratorStatus status;
182 for (status = iterator.GetNextWord(&actual_word, &actual_start, &actual_end);
183 status == SpellcheckWordIterator::IS_SKIPPABLE;
184 status =
185 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {
186 continue;
189 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status);
190 EXPECT_EQ(0, actual_start);
191 EXPECT_EQ(0, actual_end);
194 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters
195 // on LTR languages. On the other hand, it should not treat ASCII numbers as
196 // word characters on RTL languages because they change the text direction from
197 // RTL to LTR.
198 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) {
199 // A set of a language, a dummy word, and a text direction used in this test.
200 // For each language, this test splits a dummy word, which consists of ASCII
201 // numbers and an alphabet of the language, into words. When ASCII numbers are
202 // treated as word characters, the split word becomes equal to the dummy word.
203 // Otherwise, the split word does not include ASCII numbers.
204 static const struct {
205 const char* language;
206 const wchar_t* text;
207 bool left_to_right;
208 } kTestCases[] = {
210 // English
211 "en-US", L"0123456789" L"a", true,
212 }, {
213 // Greek
214 "el-GR", L"0123456789" L"\x03B1", true,
215 }, {
216 // Russian
217 "ru-RU", L"0123456789" L"\x0430", true,
218 }, {
219 // Hebrew
220 "he-IL", L"0123456789" L"\x05D0", false,
221 }, {
222 // Arabic
223 "ar", L"0123456789" L"\x0627", false,
224 }, {
225 // Hindi
226 "hi-IN", L"0123456789" L"\x0905", true,
227 }, {
228 // Thai
229 "th-TH", L"0123456789" L"\x0e01", true,
230 }, {
231 // Korean
232 "ko-KR", L"0123456789" L"\x1100\x1161", true,
236 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
237 SCOPED_TRACE(base::StringPrintf("kTestCases[%" PRIuS "]: language=%s", i,
238 kTestCases[i].language));
240 SpellcheckCharAttribute attributes;
241 attributes.SetDefaultLanguage(kTestCases[i].language);
243 base::string16 input_word(base::WideToUTF16(kTestCases[i].text));
244 SpellcheckWordIterator iterator;
245 EXPECT_TRUE(iterator.Initialize(&attributes, true));
246 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));
248 base::string16 actual_word;
249 int actual_start, actual_end;
250 SpellcheckWordIterator::WordIteratorStatus status;
251 for (status =
252 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);
253 status == SpellcheckWordIterator::IS_SKIPPABLE;
254 status =
255 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {
256 continue;
259 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status);
260 if (kTestCases[i].left_to_right)
261 EXPECT_EQ(input_word, actual_word);
262 else
263 EXPECT_NE(input_word, actual_word);
267 // Vertify SpellcheckWordIterator treats typographical apostrophe as a part of
268 // the word.
269 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) {
270 static const struct {
271 const char* language;
272 const wchar_t* word;
273 } kTestCases[] = {
274 // Typewriter apostrophe:
276 "en-AU", L"you're"
277 }, {
278 "en-CA", L"you're"
279 }, {
280 "en-GB", L"you're"
281 }, {
282 "en-US", L"you're"
284 // Typographical apostrophe:
286 "en-AU", L"you\x2019re"
287 }, {
288 "en-CA", L"you\x2019re"
289 }, {
290 "en-GB", L"you\x2019re"
291 }, {
292 "en-US", L"you\x2019re"
296 for (size_t i = 0; i < arraysize(kTestCases); ++i) {
297 SpellcheckCharAttribute attributes;
298 attributes.SetDefaultLanguage(kTestCases[i].language);
300 base::string16 input_word(base::WideToUTF16(kTestCases[i].word));
301 SpellcheckWordIterator iterator;
302 EXPECT_TRUE(iterator.Initialize(&attributes, true));
303 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));
305 base::string16 actual_word;
306 int actual_start, actual_end;
307 SpellcheckWordIterator::WordIteratorStatus status;
308 for (status =
309 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);
310 status == SpellcheckWordIterator::IS_SKIPPABLE;
311 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {
312 continue;
315 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status);
316 EXPECT_EQ(input_word, actual_word);
317 EXPECT_EQ(0, actual_start);
318 EXPECT_EQ(input_word.length(),
319 static_cast<base::string16::size_type>(actual_end));
323 TEST(SpellcheckWordIteratorTest, Initialization) {
324 // Test initialization works when a default language is set.
326 SpellcheckCharAttribute attributes;
327 attributes.SetDefaultLanguage("en-US");
329 SpellcheckWordIterator iterator;
330 EXPECT_TRUE(iterator.Initialize(&attributes, true));
333 // Test initialization fails when no default language is set.
335 SpellcheckCharAttribute attributes;
337 SpellcheckWordIterator iterator;
338 EXPECT_FALSE(iterator.Initialize(&attributes, true));
342 // This test uses English rules to check that different character set
343 // combinations properly find word breaks and skippable characters.
344 TEST(SpellcheckWordIteratorTest, FindSkippableWordsEnglish) {
345 // A string containing the English word "foo", followed by two Khmer
346 // characters, the English word "Can", and then two Russian characters and
347 // punctuation.
348 base::string16 text(
349 base::WideToUTF16(L"foo \x1791\x17C1 Can \x041C\x0438..."));
350 BreakIterator iter(text, GetRulesForLanguage("en-US"));
351 ASSERT_TRUE(iter.Init());
353 EXPECT_TRUE(iter.Advance());
354 // Finds "foo".
355 EXPECT_EQ(base::UTF8ToUTF16("foo"), iter.GetString());
356 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
357 EXPECT_TRUE(iter.Advance());
358 // Finds the space and then the Khmer characters.
359 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
360 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
361 EXPECT_TRUE(iter.Advance());
362 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
363 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
364 EXPECT_TRUE(iter.Advance());
365 // Finds the next space and "Can".
366 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
367 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
368 EXPECT_TRUE(iter.Advance());
369 EXPECT_EQ(base::UTF8ToUTF16("Can"), iter.GetString());
370 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
371 EXPECT_TRUE(iter.Advance());
372 // Finds the next space and each Russian character.
373 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
374 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
375 EXPECT_TRUE(iter.Advance());
376 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
377 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
378 EXPECT_TRUE(iter.Advance());
379 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
380 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
381 EXPECT_TRUE(iter.Advance());
382 // Finds the periods at the end.
383 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
384 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
385 EXPECT_TRUE(iter.Advance());
386 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
387 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
388 EXPECT_TRUE(iter.Advance());
389 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
390 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
391 EXPECT_FALSE(iter.Advance());
394 // This test uses Russian rules to check that different character set
395 // combinations properly find word breaks and skippable characters.
396 TEST(SpellcheckWordIteratorTest, FindSkippableWordsRussian) {
397 // A string containing punctuation followed by two Russian characters, the
398 // English word "Can", and then two Khmer characters.
399 base::string16 text(base::WideToUTF16(L".;\x041C\x0438 Can \x1791\x17C1 "));
400 BreakIterator iter(text, GetRulesForLanguage("ru-RU"));
401 ASSERT_TRUE(iter.Init());
403 EXPECT_TRUE(iter.Advance());
404 // Finds the period and semicolon.
405 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
406 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
407 EXPECT_TRUE(iter.Advance());
408 EXPECT_EQ(base::UTF8ToUTF16(";"), iter.GetString());
409 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
410 EXPECT_TRUE(iter.Advance());
411 // Finds all the Russian characters.
412 EXPECT_EQ(base::WideToUTF16(L"\x041C\x0438"), iter.GetString());
413 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
414 EXPECT_TRUE(iter.Advance());
415 // Finds the space and each character in "Can".
416 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
417 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
418 EXPECT_TRUE(iter.Advance());
419 EXPECT_EQ(base::UTF8ToUTF16("C"), iter.GetString());
420 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
421 EXPECT_TRUE(iter.Advance());
422 EXPECT_EQ(base::UTF8ToUTF16("a"), iter.GetString());
423 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
424 EXPECT_TRUE(iter.Advance());
425 EXPECT_EQ(base::UTF8ToUTF16("n"), iter.GetString());
426 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
427 EXPECT_TRUE(iter.Advance());
428 // Finds the next space, the Khmer characters, and the last two spaces.
429 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
430 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
431 EXPECT_TRUE(iter.Advance());
432 EXPECT_EQ(base::WideToUTF16(L"\x1791\x17C1"), iter.GetString());
433 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
434 EXPECT_TRUE(iter.Advance());
435 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
436 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
437 EXPECT_TRUE(iter.Advance());
438 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
439 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
440 EXPECT_FALSE(iter.Advance());
443 // This test uses Khmer rules to check that different character set combinations
444 // properly find word breaks and skippable characters. Khmer does not use spaces
445 // between words and uses a dictionary to determine word breaks instead.
446 TEST(SpellcheckWordIteratorTest, FindSkippableWordsKhmer) {
447 // A string containing two Russian characters followed by two, three, and
448 // two-character Khmer words, and then English characters and punctuation.
449 base::string16 text(base::WideToUTF16(
450 L"\x041C\x0438 \x178F\x17BE\x179B\x17C4\x1780\x1798\x1780zoo. ,"));
451 BreakIterator iter(text, GetRulesForLanguage("km"));
452 ASSERT_TRUE(iter.Init());
454 EXPECT_TRUE(iter.Advance());
455 // Finds each Russian character and the space.
456 EXPECT_EQ(base::WideToUTF16(L"\x041C"), iter.GetString());
457 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
458 EXPECT_TRUE(iter.Advance());
459 EXPECT_EQ(base::WideToUTF16(L"\x0438"), iter.GetString());
460 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
461 EXPECT_TRUE(iter.Advance());
462 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
463 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
464 EXPECT_TRUE(iter.Advance());
465 // Finds the first two-character Khmer word.
466 EXPECT_EQ(base::WideToUTF16(L"\x178F\x17BE"), iter.GetString());
467 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
468 EXPECT_TRUE(iter.Advance());
469 // Finds the three-character Khmer word and then the next two-character word.
470 // Note: Technically these are two different Khmer words so the Khmer language
471 // rule should find a break between them but due to the heuristic/statistical
472 // nature of the Khmer word breaker it does not.
473 EXPECT_EQ(base::WideToUTF16(L"\x179B\x17C4\x1780\x1798\x1780"),
474 iter.GetString());
475 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_WORD_BREAK);
476 EXPECT_TRUE(iter.Advance());
477 // Finds each character in "zoo".
478 EXPECT_EQ(base::UTF8ToUTF16("z"), iter.GetString());
479 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
480 EXPECT_TRUE(iter.Advance());
481 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
482 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
483 EXPECT_TRUE(iter.Advance());
484 EXPECT_EQ(base::UTF8ToUTF16("o"), iter.GetString());
485 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
486 EXPECT_TRUE(iter.Advance());
487 // Finds the period, space, and comma.
488 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());
489 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
490 EXPECT_TRUE(iter.Advance());
491 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());
492 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
493 EXPECT_TRUE(iter.Advance());
494 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());
495 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);
496 EXPECT_FALSE(iter.Advance());