Backed out changeset 9d8b4c0b99ed (bug 1945683) for causing btime failures. CLOSED...
[gecko.git] / dom / base / nsLineBreaker.cpp
blob5d3c8e4183b9f3721f14470d18d2cf14157bde1f
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsLineBreaker.h"
8 #include "nsContentUtils.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/AutoRestore.h"
13 #include "mozilla/ClearOnShutdown.h"
14 #include "mozilla/gfx/2D.h"
15 #include "mozilla/intl/LineBreaker.h" // for LineBreaker::ComputeBreakPositions
16 #include "mozilla/intl/Locale.h"
17 #include "mozilla/intl/UnicodeProperties.h"
18 #include "mozilla/ScopeExit.h"
19 #include "mozilla/StaticPrefs_intl.h"
21 using mozilla::AutoRestore;
22 using mozilla::intl::LineBreaker;
23 using mozilla::intl::LineBreakRule;
24 using mozilla::intl::Locale;
25 using mozilla::intl::LocaleParser;
26 using mozilla::intl::UnicodeProperties;
27 using mozilla::intl::WordBreakRule;
29 // There is no break opportunity between any pair of characters that has line
30 // break class of either AL (Alphabetic), IS (Infix Numeric Separator), NU
31 // (Numeric), or QU (Quotation). See
32 // https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt for Unicode code
33 // point and line break class mapping.
34 static constexpr uint8_t kNonBreakableASCII[] = {
35 // clang-format off
36 // 0x20-0x2f
37 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,
38 // 0x30-0x3f
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
40 // 0x40-0x4f
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 // 0x50-0x5f
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
44 // 0x60-0x6f
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 // 0x70-0x7f
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
48 // clang-format on
51 template <typename T>
52 static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
53 if (aLegacyBehavior) {
54 // If not using ICU4X, line break rules aren't compatible with UAX#14. Use
55 // old way.
56 return (0x0030 <= aChar && aChar <= 0x0039) ||
57 (0x0041 <= aChar && aChar <= 0x005A) ||
58 (0x0061 <= aChar && aChar <= 0x007A) || (0x000a == aChar);
60 if (aChar < 0x20 || aChar > 0x7f) {
61 return false;
63 return !!kNonBreakableASCII[aChar - 0x20];
66 nsLineBreaker::nsLineBreaker()
67 : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
69 nsLineBreaker::~nsLineBreaker() {
70 NS_ASSERTION(mCurrentWord.Length() == 0,
71 "Should have Reset() before destruction!");
74 /* static */
75 bool nsLineBreaker::ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext) {
76 using mozilla::intl::GeneralCategory;
77 auto category = UnicodeProperties::CharType(aChar);
78 switch (category) {
79 case GeneralCategory::Uppercase_Letter:
80 case GeneralCategory::Lowercase_Letter:
81 case GeneralCategory::Titlecase_Letter:
82 case GeneralCategory::Modifier_Letter:
83 case GeneralCategory::Other_Letter:
84 case GeneralCategory::Decimal_Number:
85 case GeneralCategory::Letter_Number:
86 case GeneralCategory::Other_Number:
87 if (aCapitalizeNext) {
88 aCapitalizeNext = false;
89 return true;
91 break;
92 case GeneralCategory::Space_Separator:
93 case GeneralCategory::Line_Separator:
94 case GeneralCategory::Paragraph_Separator:
95 case GeneralCategory::Dash_Punctuation:
96 case GeneralCategory::Initial_Punctuation:
97 /* These punctuation categories are excluded, for examples like
98 * "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
99 * and
100 * "snake_case" -> "Snake_case" (to match word selection behavior)
101 case GeneralCategory::Open_Punctuation:
102 case GeneralCategory::Close_Punctuation:
103 case GeneralCategory::Connector_Punctuation:
105 aCapitalizeNext = true;
106 break;
107 case GeneralCategory::Final_Punctuation:
108 /* Special-case: exclude Unicode single-close-quote/apostrophe,
109 for examples like "Lowe’s" etc. */
110 if (aChar != 0x2019) {
111 aCapitalizeNext = true;
113 break;
114 case GeneralCategory::Other_Punctuation:
115 /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
116 and MIDDLE DOT, for Catalan "l·l". */
117 if (aChar != '\'' && aChar != 0x00B7) {
118 aCapitalizeNext = true;
120 break;
121 default:
122 break;
124 return false;
127 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
128 bool* aCapitalization) {
129 // Capitalize the first alphanumeric character after a space or punctuation.
130 bool capitalizeNextChar = true;
131 for (uint32_t i = 0; i < aLength; ++i) {
132 uint32_t ch = aWord[i];
133 if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
134 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
136 aCapitalization[i] =
137 nsLineBreaker::ShouldCapitalize(ch, capitalizeNextChar);
139 if (!IS_IN_BMP(ch)) {
140 ++i;
145 nsresult nsLineBreaker::FlushCurrentWord() {
146 auto cleanup = mozilla::MakeScopeExit([&] {
147 mCurrentWord.Clear();
148 mTextItems.Clear();
149 mCurrentWordMightBeBreakable = false;
150 mCurrentWordContainsMixedLang = false;
151 mCurrentWordLanguage = nullptr;
152 mWordContinuation = false;
155 uint32_t length = mCurrentWord.Length();
156 AutoTArray<uint8_t, 4000> breakState;
157 if (!breakState.AppendElements(length, mozilla::fallible)) {
158 return NS_ERROR_OUT_OF_MEMORY;
161 if (mLineBreak == LineBreakRule::Anywhere) {
162 memset(breakState.Elements(),
163 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
164 length * sizeof(uint8_t));
165 } else if (!mCurrentWordMightBeBreakable &&
166 mWordBreak != WordBreakRule::BreakAll) {
167 // word-break: normal or keep-all has no break opportunity if the word
168 // is non-breakable. (See the comment of kNonBreakableASCII).
169 memset(breakState.Elements(),
170 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
171 length * sizeof(uint8_t));
172 } else {
173 LineBreaker::ComputeBreakPositions(
174 mCurrentWord.Elements(), length, mWordBreak, mLineBreak,
175 mScriptIsChineseOrJapanese, breakState.Elements());
178 bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
179 uint32_t i;
180 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
181 TextItem* ti = &mTextItems[i];
182 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
183 autoHyphenate = false;
186 if (autoHyphenate) {
187 RefPtr<nsHyphenator> hyphenator =
188 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
189 if (hyphenator) {
190 FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
191 mCurrentWord.Elements() + length,
192 breakState.Elements());
196 nsTArray<bool> capitalizationState;
197 uint32_t offset = 0;
198 for (i = 0; i < mTextItems.Length(); ++i) {
199 TextItem* ti = &mTextItems[i];
200 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
202 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
203 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
205 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
206 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
207 memset(breakState.Elements() + offset + exclude,
208 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
209 (ti->mLength - exclude) * sizeof(uint8_t));
212 // Don't set the break state for the first character of the word, because
213 // it was already set correctly earlier and we don't know what the true
214 // value should be.
215 uint32_t skipSet = i == 0 ? 1 : 0;
216 if (ti->mSink) {
217 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
218 breakState.Elements() + offset + skipSet);
220 if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
221 if (capitalizationState.Length() == 0) {
222 if (!capitalizationState.AppendElements(length, mozilla::fallible)) {
223 return NS_ERROR_OUT_OF_MEMORY;
225 memset(capitalizationState.Elements(), false, length * sizeof(bool));
226 SetupCapitalization(mCurrentWord.Elements(), length,
227 capitalizationState.Elements());
229 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
230 capitalizationState.Elements() + offset);
234 offset += ti->mLength;
237 return NS_OK;
240 // If the aFlags parameter to AppendText has all these bits set,
241 // then we don't need to worry about finding break opportunities
242 // in the appended text.
243 #define NO_BREAKS_NEEDED_FLAGS \
244 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
245 BREAK_SKIP_SETTING_NO_BREAKS)
247 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
248 const char16_t* aText, uint32_t aLength,
249 uint32_t aFlags, nsILineBreakSink* aSink) {
250 NS_ASSERTION(aLength > 0, "Appending empty text...");
252 uint32_t offset = 0;
254 // Continue the current word
255 if (mCurrentWord.Length() > 0) {
256 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
257 "These should not be set");
259 while (offset < aLength && !IsSegmentSpace(aText[offset])) {
260 mCurrentWord.AppendElement(aText[offset]);
261 if (!mCurrentWordMightBeBreakable &&
262 !IsNonBreakableChar<char16_t>(aText[offset], mLegacyBehavior)) {
263 mCurrentWordMightBeBreakable = true;
265 UpdateCurrentWordLanguage(aHyphenationLanguage);
266 ++offset;
269 if (offset > 0) {
270 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
273 if (offset == aLength) {
274 return NS_OK;
277 // We encountered whitespace, so we're done with this word
278 nsresult rv = FlushCurrentWord();
279 if (NS_FAILED(rv)) {
280 return rv;
284 AutoTArray<uint8_t, 4000> breakState;
285 if (aSink) {
286 if (!breakState.AppendElements(aLength, mozilla::fallible)) {
287 return NS_ERROR_OUT_OF_MEMORY;
291 bool noCapitalizationNeeded = true;
292 nsTArray<bool> capitalizationState;
293 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
294 if (!capitalizationState.AppendElements(aLength, mozilla::fallible)) {
295 return NS_ERROR_OUT_OF_MEMORY;
297 memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
298 noCapitalizationNeeded = false;
301 uint32_t start = offset;
302 bool noBreaksNeeded =
303 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
304 !mBreakHere && !mAfterBreakableSpace);
305 if (noBreaksNeeded && noCapitalizationNeeded) {
306 // Skip to the space before the last word, since either the break data
307 // here is not needed, or no breaks are set in the sink and there cannot
308 // be any breaks in this chunk; and we don't need to do word-initial
309 // capitalization. All we need is the context for the next chunk (if any).
310 offset = aLength;
311 while (offset > start) {
312 --offset;
313 if (IsSegmentSpace(aText[offset])) {
314 break;
318 uint32_t wordStart = offset;
319 bool wordMightBeBreakable = false;
321 RefPtr<nsHyphenator> hyphenator;
322 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
323 !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
324 hyphenator =
325 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
328 for (;;) {
329 char16_t ch = aText[offset];
330 bool isSpace = IsSegmentSpace(ch);
331 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
333 if (aSink && !noBreaksNeeded) {
334 breakState[offset] =
335 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
336 mWordBreak == WordBreakRule::BreakAll ||
337 mLineBreak == LineBreakRule::Anywhere
338 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
339 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
341 mBreakHere = false;
342 mAfterBreakableSpace = isBreakableSpace;
344 if (isSpace || ch == '\n') {
345 if (offset > wordStart && aSink) {
346 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
347 if (mLineBreak == LineBreakRule::Anywhere) {
348 memset(breakState.Elements() + wordStart,
349 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
350 offset - wordStart);
351 } else if (wordMightBeBreakable) {
352 // Save current start-of-word state because ComputeBreakPositions()
353 // will set it to false.
354 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
355 LineBreaker::ComputeBreakPositions(
356 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
357 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
359 if (hyphenator) {
360 FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
361 breakState.Elements() + wordStart);
364 if (!mWordContinuation && !noCapitalizationNeeded) {
365 SetupCapitalization(aText + wordStart, offset - wordStart,
366 capitalizationState.Elements() + wordStart);
369 wordMightBeBreakable = false;
370 mWordContinuation = false;
371 ++offset;
372 if (offset >= aLength) {
373 break;
375 wordStart = offset;
376 continue;
379 if (!wordMightBeBreakable &&
380 !IsNonBreakableChar<char16_t>(ch, mLegacyBehavior)) {
381 wordMightBeBreakable = true;
383 ++offset;
384 if (offset >= aLength) {
385 // Save this word
386 mCurrentWordMightBeBreakable = wordMightBeBreakable;
387 uint32_t len = offset - wordStart;
388 char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
389 if (!elems) {
390 return NS_ERROR_OUT_OF_MEMORY;
392 memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
393 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
394 // Ensure that the break-before for this word is written out
395 offset = wordStart + 1;
396 UpdateCurrentWordLanguage(aHyphenationLanguage);
397 break;
401 if (aSink) {
402 if (!noBreaksNeeded) {
403 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
405 if (!noCapitalizationNeeded) {
406 aSink->SetCapitalization(start, offset - start,
407 capitalizationState.Elements() + start);
410 return NS_OK;
413 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
414 const char16_t* aTextStart,
415 const char16_t* aTextLimit,
416 uint8_t* aBreakState) {
417 // Early-return for words that are definitely too short to hyphenate.
418 if (aTextLimit - aTextStart < mHyphenateLimitWord) {
419 return;
422 nsDependentSubstring string(aTextStart, aTextLimit);
423 AutoTArray<bool, 200> hyphens;
424 if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
425 return;
428 // Keep track of the length seen so far, in terms of characters that are
429 // countable for hyphenate-limit-chars purposes.
430 uint32_t length = 0;
431 // When setting a potential break in aBreakState, we record the previous
432 // value in case we need to restore it because the position turns out to
433 // be too close to the end of the word.
434 struct BreakInfo {
435 uint32_t mPosition;
436 uint32_t mLength;
437 uint8_t mState;
439 AutoTArray<BreakInfo, 16> oldBreaks;
440 // Don't consider setting any breaks where i >= endLimit, as they will
441 // definitely be too near the end of the word to be accepted.
442 uint32_t endLimit =
443 string.Length() - std::max<uint32_t>(1u, mHyphenateLimitEnd);
444 for (uint32_t i = 0; i < string.Length(); ++i) {
445 // Get current character, converting surrogate pairs to UCS4 for char
446 // category lookup.
447 uint32_t ch = string[i];
448 if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
449 NS_IS_LOW_SURROGATE(string[i + 1])) {
450 ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
453 // According to CSS Text, "Nonspacing combining marks (Unicode General
454 // Category Mn) and intra-word punctuation (Unicode General Category P*)
455 // do not count towards the minimum."
456 // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
457 // We also don't count Control or Format categories.
458 using mozilla::intl::GeneralCategory;
459 switch (UnicodeProperties::CharType(ch)) {
460 case GeneralCategory::Nonspacing_Mark:
461 case GeneralCategory::Dash_Punctuation:
462 case GeneralCategory::Open_Punctuation:
463 case GeneralCategory::Close_Punctuation:
464 case GeneralCategory::Connector_Punctuation:
465 case GeneralCategory::Other_Punctuation:
466 case GeneralCategory::Initial_Punctuation:
467 case GeneralCategory::Final_Punctuation:
468 case GeneralCategory::Control:
469 case GeneralCategory::Format:
470 case GeneralCategory::Surrogate:
471 break;
472 default:
473 ++length;
474 break;
477 // Don't accept any breaks until we're far enough into the word, or if
478 // we're too near the end for it to possibly be accepted. (Note that the
479 // check against endLimit is just an initial worst-case check that assumes
480 // all the remaining characters are countable; if there are combining
481 // marks, etc., in the trailing part of the word we may need to reset the
482 // potential break later, after we've fully counted length.)
483 if (hyphens[i] && length >= mHyphenateLimitStart && i < endLimit) {
484 // Keep track of hyphen position and "countable" length of the word.
485 oldBreaks.AppendElement(BreakInfo{i + 1, length, aBreakState[i + 1]});
486 aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
489 // If the character was outside the BMP, skip past the low surrogate.
490 if (!IS_IN_BMP(ch)) {
491 ++i;
495 if (length < mHyphenateLimitWord) {
496 // After discounting combining marks, punctuation, controls, etc., the word
497 // was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
498 // forget them.
499 while (!oldBreaks.IsEmpty()) {
500 auto lastBreak = oldBreaks.PopLastElement();
501 aBreakState[lastBreak.mPosition] = lastBreak.mState;
503 } else {
504 // Check if trailing fragment is too short; if so, remove the last hyphen
505 // break(s) that we set, until the fragment will be long enough.
506 while (!oldBreaks.IsEmpty()) {
507 auto lastBreak = oldBreaks.PopLastElement();
508 if (length - lastBreak.mLength >= mHyphenateLimitEnd) {
509 break;
511 aBreakState[lastBreak.mPosition] = lastBreak.mState;
516 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
517 const uint8_t* aText, uint32_t aLength,
518 uint32_t aFlags, nsILineBreakSink* aSink) {
519 NS_ASSERTION(aLength > 0, "Appending empty text...");
521 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
522 // Defer to the Unicode path if capitalization or hyphenation is required
523 nsAutoString str;
524 const char* cp = reinterpret_cast<const char*>(aText);
525 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
526 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
529 uint32_t offset = 0;
531 // Continue the current word
532 if (mCurrentWord.Length() > 0) {
533 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
534 "These should not be set");
536 while (offset < aLength && !IsSegmentSpace(aText[offset])) {
537 mCurrentWord.AppendElement(aText[offset]);
538 if (!mCurrentWordMightBeBreakable &&
539 !IsNonBreakableChar<uint8_t>(aText[offset], mLegacyBehavior)) {
540 mCurrentWordMightBeBreakable = true;
542 ++offset;
545 if (offset > 0) {
546 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
549 if (offset == aLength) {
550 // We did not encounter whitespace so the word hasn't finished yet.
551 return NS_OK;
554 // We encountered whitespace, so we're done with this word
555 nsresult rv = FlushCurrentWord();
556 if (NS_FAILED(rv)) {
557 return rv;
561 AutoTArray<uint8_t, 4000> breakState;
562 if (aSink) {
563 if (!breakState.AppendElements(aLength, mozilla::fallible)) {
564 return NS_ERROR_OUT_OF_MEMORY;
568 uint32_t start = offset;
569 bool noBreaksNeeded =
570 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
571 !mBreakHere && !mAfterBreakableSpace);
572 if (noBreaksNeeded) {
573 // Skip to the space before the last word, since either the break data
574 // here is not needed, or no breaks are set in the sink and there cannot
575 // be any breaks in this chunk; all we need is the context for the next
576 // chunk (if any)
577 offset = aLength;
578 while (offset > start) {
579 --offset;
580 if (IsSegmentSpace(aText[offset])) {
581 break;
585 uint32_t wordStart = offset;
586 bool wordMightBeBreakable = false;
588 for (;;) {
589 uint8_t ch = aText[offset];
590 bool isSpace = IsSegmentSpace(ch);
591 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
593 if (aSink) {
594 // Consider word-break style. Since the break position of CJK scripts
595 // will be set by nsILineBreaker, we don't consider CJK at this point.
596 breakState[offset] =
597 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
598 mWordBreak == WordBreakRule::BreakAll ||
599 mLineBreak == LineBreakRule::Anywhere
600 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
601 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
603 mBreakHere = false;
604 mAfterBreakableSpace = isBreakableSpace;
606 if (isSpace) {
607 if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
608 if (mLineBreak == LineBreakRule::Anywhere) {
609 memset(breakState.Elements() + wordStart,
610 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
611 offset - wordStart);
612 } else if (wordMightBeBreakable) {
613 // Save current start-of-word state because ComputeBreakPositions()
614 // will set it to false.
615 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
616 LineBreaker::ComputeBreakPositions(
617 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
618 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
622 wordMightBeBreakable = false;
623 mWordContinuation = false;
624 ++offset;
625 if (offset >= aLength) {
626 break;
628 wordStart = offset;
629 continue;
632 if (!wordMightBeBreakable &&
633 !IsNonBreakableChar<uint8_t>(ch, mLegacyBehavior)) {
634 wordMightBeBreakable = true;
636 ++offset;
637 if (offset >= aLength) {
638 // Save this word
639 mCurrentWordMightBeBreakable = wordMightBeBreakable;
640 uint32_t len = offset - wordStart;
641 char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
642 if (!elems) {
643 return NS_ERROR_OUT_OF_MEMORY;
645 uint32_t i;
646 for (i = wordStart; i < offset; ++i) {
647 elems[i - wordStart] = aText[i];
649 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
650 // Ensure that the break-before for this word is written out
651 offset = wordStart + 1;
652 break;
656 if (!noBreaksNeeded) {
657 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
659 return NS_OK;
662 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
663 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
664 mCurrentWordContainsMixedLang = true;
665 mScriptIsChineseOrJapanese = false;
666 return;
669 if (aHyphenationLanguage && !mCurrentWordLanguage) {
670 static mozilla::StaticRefPtr<nsAtom> sLastHyphenationLanguage;
671 static bool sLastScriptIsChineseOrJapanese = false;
672 static bool sInit = false;
674 if (!sInit) {
675 mozilla::ClearOnShutdown(&sLastHyphenationLanguage);
676 sInit = true;
679 if (sLastHyphenationLanguage == aHyphenationLanguage) {
680 MOZ_ASSERT(nsAtomString(sLastHyphenationLanguage)
681 .Equals(nsAtomString(aHyphenationLanguage)));
682 mScriptIsChineseOrJapanese = sLastScriptIsChineseOrJapanese;
683 } else {
684 Locale loc;
685 auto result =
686 LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage), loc);
688 if (result.isErr()) {
689 return;
691 if (loc.Script().Missing() && loc.AddLikelySubtags().isErr()) {
692 return;
694 mScriptIsChineseOrJapanese =
695 loc.Script().EqualTo("Hans") || loc.Script().EqualTo("Hant") ||
696 loc.Script().EqualTo("Jpan") || loc.Script().EqualTo("Hrkt");
698 sLastHyphenationLanguage = aHyphenationLanguage;
699 sLastScriptIsChineseOrJapanese = mScriptIsChineseOrJapanese;
702 mCurrentWordLanguage = aHyphenationLanguage;
705 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
706 nsresult rv = FlushCurrentWord();
707 if (NS_FAILED(rv)) {
708 return rv;
711 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
712 if (mAfterBreakableSpace && !isBreakableSpace) {
713 mBreakHere = true;
715 mAfterBreakableSpace = isBreakableSpace;
716 mWordContinuation = false;
717 return NS_OK;
720 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
721 nsresult rv = FlushCurrentWord();
722 if (NS_FAILED(rv)) {
723 return rv;
726 *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
727 mBreakHere = false;
728 mAfterBreakableSpace = false;
729 return NS_OK;