1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsLineBreaker.h"
8 #include "nsContentUtils.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/AutoRestore.h"
13 #include "mozilla/ClearOnShutdown.h"
14 #include "mozilla/gfx/2D.h"
15 #include "mozilla/intl/LineBreaker.h" // for LineBreaker::ComputeBreakPositions
16 #include "mozilla/intl/Locale.h"
17 #include "mozilla/intl/UnicodeProperties.h"
18 #include "mozilla/ScopeExit.h"
19 #include "mozilla/StaticPrefs_intl.h"
21 using mozilla::AutoRestore
;
22 using mozilla::intl::LineBreaker
;
23 using mozilla::intl::LineBreakRule
;
24 using mozilla::intl::Locale
;
25 using mozilla::intl::LocaleParser
;
26 using mozilla::intl::UnicodeProperties
;
27 using mozilla::intl::WordBreakRule
;
29 // There is no break opportunity between any pair of characters that has line
30 // break class of either AL (Alphabetic), IS (Infix Numeric Separator), NU
31 // (Numeric), or QU (Quotation). See
32 // https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt for Unicode code
33 // point and line break class mapping.
34 static constexpr uint8_t kNonBreakableASCII
[] = {
37 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
52 static constexpr bool IsNonBreakableChar(T aChar
, bool aLegacyBehavior
) {
53 if (aLegacyBehavior
) {
54 // If not using ICU4X, line break rules aren't compatible with UAX#14. Use
56 return (0x0030 <= aChar
&& aChar
<= 0x0039) ||
57 (0x0041 <= aChar
&& aChar
<= 0x005A) ||
58 (0x0061 <= aChar
&& aChar
<= 0x007A) || (0x000a == aChar
);
60 if (aChar
< 0x20 || aChar
> 0x7f) {
63 return !!kNonBreakableASCII
[aChar
- 0x20];
66 nsLineBreaker::nsLineBreaker()
67 : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
69 nsLineBreaker::~nsLineBreaker() {
70 NS_ASSERTION(mCurrentWord
.Length() == 0,
71 "Should have Reset() before destruction!");
75 bool nsLineBreaker::ShouldCapitalize(uint32_t aChar
, bool& aCapitalizeNext
) {
76 using mozilla::intl::GeneralCategory
;
77 auto category
= UnicodeProperties::CharType(aChar
);
79 case GeneralCategory::Uppercase_Letter
:
80 case GeneralCategory::Lowercase_Letter
:
81 case GeneralCategory::Titlecase_Letter
:
82 case GeneralCategory::Modifier_Letter
:
83 case GeneralCategory::Other_Letter
:
84 case GeneralCategory::Decimal_Number
:
85 case GeneralCategory::Letter_Number
:
86 case GeneralCategory::Other_Number
:
87 if (aCapitalizeNext
) {
88 aCapitalizeNext
= false;
92 case GeneralCategory::Space_Separator
:
93 case GeneralCategory::Line_Separator
:
94 case GeneralCategory::Paragraph_Separator
:
95 case GeneralCategory::Dash_Punctuation
:
96 case GeneralCategory::Initial_Punctuation
:
97 /* These punctuation categories are excluded, for examples like
98 * "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
100 * "snake_case" -> "Snake_case" (to match word selection behavior)
101 case GeneralCategory::Open_Punctuation:
102 case GeneralCategory::Close_Punctuation:
103 case GeneralCategory::Connector_Punctuation:
105 aCapitalizeNext
= true;
107 case GeneralCategory::Final_Punctuation
:
108 /* Special-case: exclude Unicode single-close-quote/apostrophe,
109 for examples like "Lowe’s" etc. */
110 if (aChar
!= 0x2019) {
111 aCapitalizeNext
= true;
114 case GeneralCategory::Other_Punctuation
:
115 /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
116 and MIDDLE DOT, for Catalan "l·l". */
117 if (aChar
!= '\'' && aChar
!= 0x00B7) {
118 aCapitalizeNext
= true;
127 static void SetupCapitalization(const char16_t
* aWord
, uint32_t aLength
,
128 bool* aCapitalization
) {
129 // Capitalize the first alphanumeric character after a space or punctuation.
130 bool capitalizeNextChar
= true;
131 for (uint32_t i
= 0; i
< aLength
; ++i
) {
132 uint32_t ch
= aWord
[i
];
133 if (i
+ 1 < aLength
&& NS_IS_SURROGATE_PAIR(ch
, aWord
[i
+ 1])) {
134 ch
= SURROGATE_TO_UCS4(ch
, aWord
[i
+ 1]);
137 nsLineBreaker::ShouldCapitalize(ch
, capitalizeNextChar
);
139 if (!IS_IN_BMP(ch
)) {
145 nsresult
nsLineBreaker::FlushCurrentWord() {
146 auto cleanup
= mozilla::MakeScopeExit([&] {
147 mCurrentWord
.Clear();
149 mCurrentWordMightBeBreakable
= false;
150 mCurrentWordContainsMixedLang
= false;
151 mCurrentWordLanguage
= nullptr;
152 mWordContinuation
= false;
155 uint32_t length
= mCurrentWord
.Length();
156 AutoTArray
<uint8_t, 4000> breakState
;
157 if (!breakState
.AppendElements(length
, mozilla::fallible
)) {
158 return NS_ERROR_OUT_OF_MEMORY
;
161 if (mLineBreak
== LineBreakRule::Anywhere
) {
162 memset(breakState
.Elements(),
163 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
164 length
* sizeof(uint8_t));
165 } else if (!mCurrentWordMightBeBreakable
&&
166 mWordBreak
!= WordBreakRule::BreakAll
) {
167 // word-break: normal or keep-all has no break opportunity if the word
168 // is non-breakable. (See the comment of kNonBreakableASCII).
169 memset(breakState
.Elements(),
170 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
171 length
* sizeof(uint8_t));
173 LineBreaker::ComputeBreakPositions(
174 mCurrentWord
.Elements(), length
, mWordBreak
, mLineBreak
,
175 mScriptIsChineseOrJapanese
, breakState
.Elements());
178 bool autoHyphenate
= mCurrentWordLanguage
&& !mCurrentWordContainsMixedLang
;
180 for (i
= 0; autoHyphenate
&& i
< mTextItems
.Length(); ++i
) {
181 TextItem
* ti
= &mTextItems
[i
];
182 if (!(ti
->mFlags
& BREAK_USE_AUTO_HYPHENATION
)) {
183 autoHyphenate
= false;
187 RefPtr
<nsHyphenator
> hyphenator
=
188 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage
);
190 FindHyphenationPoints(hyphenator
, mCurrentWord
.Elements(),
191 mCurrentWord
.Elements() + length
,
192 breakState
.Elements());
196 nsTArray
<bool> capitalizationState
;
198 for (i
= 0; i
< mTextItems
.Length(); ++i
) {
199 TextItem
* ti
= &mTextItems
[i
];
200 NS_ASSERTION(ti
->mLength
> 0, "Zero length word contribution?");
202 if ((ti
->mFlags
& BREAK_SUPPRESS_INITIAL
) && ti
->mSinkOffset
== 0) {
203 breakState
[offset
] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
205 if (ti
->mFlags
& BREAK_SUPPRESS_INSIDE
) {
206 uint32_t exclude
= ti
->mSinkOffset
== 0 ? 1 : 0;
207 memset(breakState
.Elements() + offset
+ exclude
,
208 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
209 (ti
->mLength
- exclude
) * sizeof(uint8_t));
212 // Don't set the break state for the first character of the word, because
213 // it was already set correctly earlier and we don't know what the true
215 uint32_t skipSet
= i
== 0 ? 1 : 0;
217 ti
->mSink
->SetBreaks(ti
->mSinkOffset
+ skipSet
, ti
->mLength
- skipSet
,
218 breakState
.Elements() + offset
+ skipSet
);
220 if (!mWordContinuation
&& (ti
->mFlags
& BREAK_NEED_CAPITALIZATION
)) {
221 if (capitalizationState
.Length() == 0) {
222 if (!capitalizationState
.AppendElements(length
, mozilla::fallible
)) {
223 return NS_ERROR_OUT_OF_MEMORY
;
225 memset(capitalizationState
.Elements(), false, length
* sizeof(bool));
226 SetupCapitalization(mCurrentWord
.Elements(), length
,
227 capitalizationState
.Elements());
229 ti
->mSink
->SetCapitalization(ti
->mSinkOffset
, ti
->mLength
,
230 capitalizationState
.Elements() + offset
);
234 offset
+= ti
->mLength
;
240 // If the aFlags parameter to AppendText has all these bits set,
241 // then we don't need to worry about finding break opportunities
242 // in the appended text.
243 #define NO_BREAKS_NEEDED_FLAGS \
244 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
245 BREAK_SKIP_SETTING_NO_BREAKS)
247 nsresult
nsLineBreaker::AppendText(nsAtom
* aHyphenationLanguage
,
248 const char16_t
* aText
, uint32_t aLength
,
249 uint32_t aFlags
, nsILineBreakSink
* aSink
) {
250 NS_ASSERTION(aLength
> 0, "Appending empty text...");
254 // Continue the current word
255 if (mCurrentWord
.Length() > 0) {
256 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
,
257 "These should not be set");
259 while (offset
< aLength
&& !IsSegmentSpace(aText
[offset
])) {
260 mCurrentWord
.AppendElement(aText
[offset
]);
261 if (!mCurrentWordMightBeBreakable
&&
262 !IsNonBreakableChar
<char16_t
>(aText
[offset
], mLegacyBehavior
)) {
263 mCurrentWordMightBeBreakable
= true;
265 UpdateCurrentWordLanguage(aHyphenationLanguage
);
270 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
273 if (offset
== aLength
) {
277 // We encountered whitespace, so we're done with this word
278 nsresult rv
= FlushCurrentWord();
284 AutoTArray
<uint8_t, 4000> breakState
;
286 if (!breakState
.AppendElements(aLength
, mozilla::fallible
)) {
287 return NS_ERROR_OUT_OF_MEMORY
;
291 bool noCapitalizationNeeded
= true;
292 nsTArray
<bool> capitalizationState
;
293 if (aSink
&& (aFlags
& BREAK_NEED_CAPITALIZATION
)) {
294 if (!capitalizationState
.AppendElements(aLength
, mozilla::fallible
)) {
295 return NS_ERROR_OUT_OF_MEMORY
;
297 memset(capitalizationState
.Elements(), false, aLength
* sizeof(bool));
298 noCapitalizationNeeded
= false;
301 uint32_t start
= offset
;
302 bool noBreaksNeeded
=
303 !aSink
|| ((aFlags
& NO_BREAKS_NEEDED_FLAGS
) == NO_BREAKS_NEEDED_FLAGS
&&
304 !mBreakHere
&& !mAfterBreakableSpace
);
305 if (noBreaksNeeded
&& noCapitalizationNeeded
) {
306 // Skip to the space before the last word, since either the break data
307 // here is not needed, or no breaks are set in the sink and there cannot
308 // be any breaks in this chunk; and we don't need to do word-initial
309 // capitalization. All we need is the context for the next chunk (if any).
311 while (offset
> start
) {
313 if (IsSegmentSpace(aText
[offset
])) {
318 uint32_t wordStart
= offset
;
319 bool wordMightBeBreakable
= false;
321 RefPtr
<nsHyphenator
> hyphenator
;
322 if ((aFlags
& BREAK_USE_AUTO_HYPHENATION
) &&
323 !(aFlags
& BREAK_SUPPRESS_INSIDE
) && aHyphenationLanguage
) {
325 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage
);
329 char16_t ch
= aText
[offset
];
330 bool isSpace
= IsSegmentSpace(ch
);
331 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
333 if (aSink
&& !noBreaksNeeded
) {
335 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
336 mWordBreak
== WordBreakRule::BreakAll
||
337 mLineBreak
== LineBreakRule::Anywhere
338 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
339 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
342 mAfterBreakableSpace
= isBreakableSpace
;
344 if (isSpace
|| ch
== '\n') {
345 if (offset
> wordStart
&& aSink
) {
346 if (!(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
347 if (mLineBreak
== LineBreakRule::Anywhere
) {
348 memset(breakState
.Elements() + wordStart
,
349 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
351 } else if (wordMightBeBreakable
) {
352 // Save current start-of-word state because ComputeBreakPositions()
353 // will set it to false.
354 AutoRestore
<uint8_t> saveWordStartBreakState(breakState
[wordStart
]);
355 LineBreaker::ComputeBreakPositions(
356 aText
+ wordStart
, offset
- wordStart
, mWordBreak
, mLineBreak
,
357 mScriptIsChineseOrJapanese
, breakState
.Elements() + wordStart
);
360 FindHyphenationPoints(hyphenator
, aText
+ wordStart
, aText
+ offset
,
361 breakState
.Elements() + wordStart
);
364 if (!mWordContinuation
&& !noCapitalizationNeeded
) {
365 SetupCapitalization(aText
+ wordStart
, offset
- wordStart
,
366 capitalizationState
.Elements() + wordStart
);
369 wordMightBeBreakable
= false;
370 mWordContinuation
= false;
372 if (offset
>= aLength
) {
379 if (!wordMightBeBreakable
&&
380 !IsNonBreakableChar
<char16_t
>(ch
, mLegacyBehavior
)) {
381 wordMightBeBreakable
= true;
384 if (offset
>= aLength
) {
386 mCurrentWordMightBeBreakable
= wordMightBeBreakable
;
387 uint32_t len
= offset
- wordStart
;
388 char16_t
* elems
= mCurrentWord
.AppendElements(len
, mozilla::fallible
);
390 return NS_ERROR_OUT_OF_MEMORY
;
392 memcpy(elems
, aText
+ wordStart
, sizeof(char16_t
) * len
);
393 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
394 // Ensure that the break-before for this word is written out
395 offset
= wordStart
+ 1;
396 UpdateCurrentWordLanguage(aHyphenationLanguage
);
402 if (!noBreaksNeeded
) {
403 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
405 if (!noCapitalizationNeeded
) {
406 aSink
->SetCapitalization(start
, offset
- start
,
407 capitalizationState
.Elements() + start
);
413 void nsLineBreaker::FindHyphenationPoints(nsHyphenator
* aHyphenator
,
414 const char16_t
* aTextStart
,
415 const char16_t
* aTextLimit
,
416 uint8_t* aBreakState
) {
417 // Early-return for words that are definitely too short to hyphenate.
418 if (aTextLimit
- aTextStart
< mHyphenateLimitWord
) {
422 nsDependentSubstring
string(aTextStart
, aTextLimit
);
423 AutoTArray
<bool, 200> hyphens
;
424 if (NS_FAILED(aHyphenator
->Hyphenate(string
, hyphens
))) {
428 // Keep track of the length seen so far, in terms of characters that are
429 // countable for hyphenate-limit-chars purposes.
431 // When setting a potential break in aBreakState, we record the previous
432 // value in case we need to restore it because the position turns out to
433 // be too close to the end of the word.
439 AutoTArray
<BreakInfo
, 16> oldBreaks
;
440 // Don't consider setting any breaks where i >= endLimit, as they will
441 // definitely be too near the end of the word to be accepted.
443 string
.Length() - std::max
<uint32_t>(1u, mHyphenateLimitEnd
);
444 for (uint32_t i
= 0; i
< string
.Length(); ++i
) {
445 // Get current character, converting surrogate pairs to UCS4 for char
447 uint32_t ch
= string
[i
];
448 if (NS_IS_HIGH_SURROGATE(ch
) && i
+ 1 < string
.Length() &&
449 NS_IS_LOW_SURROGATE(string
[i
+ 1])) {
450 ch
= SURROGATE_TO_UCS4(ch
, string
[i
+ 1]);
453 // According to CSS Text, "Nonspacing combining marks (Unicode General
454 // Category Mn) and intra-word punctuation (Unicode General Category P*)
455 // do not count towards the minimum."
456 // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
457 // We also don't count Control or Format categories.
458 using mozilla::intl::GeneralCategory
;
459 switch (UnicodeProperties::CharType(ch
)) {
460 case GeneralCategory::Nonspacing_Mark
:
461 case GeneralCategory::Dash_Punctuation
:
462 case GeneralCategory::Open_Punctuation
:
463 case GeneralCategory::Close_Punctuation
:
464 case GeneralCategory::Connector_Punctuation
:
465 case GeneralCategory::Other_Punctuation
:
466 case GeneralCategory::Initial_Punctuation
:
467 case GeneralCategory::Final_Punctuation
:
468 case GeneralCategory::Control
:
469 case GeneralCategory::Format
:
470 case GeneralCategory::Surrogate
:
477 // Don't accept any breaks until we're far enough into the word, or if
478 // we're too near the end for it to possibly be accepted. (Note that the
479 // check against endLimit is just an initial worst-case check that assumes
480 // all the remaining characters are countable; if there are combining
481 // marks, etc., in the trailing part of the word we may need to reset the
482 // potential break later, after we've fully counted length.)
483 if (hyphens
[i
] && length
>= mHyphenateLimitStart
&& i
< endLimit
) {
484 // Keep track of hyphen position and "countable" length of the word.
485 oldBreaks
.AppendElement(BreakInfo
{i
+ 1, length
, aBreakState
[i
+ 1]});
486 aBreakState
[i
+ 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN
;
489 // If the character was outside the BMP, skip past the low surrogate.
490 if (!IS_IN_BMP(ch
)) {
495 if (length
< mHyphenateLimitWord
) {
496 // After discounting combining marks, punctuation, controls, etc., the word
497 // was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
499 while (!oldBreaks
.IsEmpty()) {
500 auto lastBreak
= oldBreaks
.PopLastElement();
501 aBreakState
[lastBreak
.mPosition
] = lastBreak
.mState
;
504 // Check if trailing fragment is too short; if so, remove the last hyphen
505 // break(s) that we set, until the fragment will be long enough.
506 while (!oldBreaks
.IsEmpty()) {
507 auto lastBreak
= oldBreaks
.PopLastElement();
508 if (length
- lastBreak
.mLength
>= mHyphenateLimitEnd
) {
511 aBreakState
[lastBreak
.mPosition
] = lastBreak
.mState
;
516 nsresult
nsLineBreaker::AppendText(nsAtom
* aHyphenationLanguage
,
517 const uint8_t* aText
, uint32_t aLength
,
518 uint32_t aFlags
, nsILineBreakSink
* aSink
) {
519 NS_ASSERTION(aLength
> 0, "Appending empty text...");
521 if (aFlags
& (BREAK_NEED_CAPITALIZATION
| BREAK_USE_AUTO_HYPHENATION
)) {
522 // Defer to the Unicode path if capitalization or hyphenation is required
524 const char* cp
= reinterpret_cast<const char*>(aText
);
525 CopyASCIItoUTF16(nsDependentCSubstring(cp
, cp
+ aLength
), str
);
526 return AppendText(aHyphenationLanguage
, str
.get(), aLength
, aFlags
, aSink
);
531 // Continue the current word
532 if (mCurrentWord
.Length() > 0) {
533 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
,
534 "These should not be set");
536 while (offset
< aLength
&& !IsSegmentSpace(aText
[offset
])) {
537 mCurrentWord
.AppendElement(aText
[offset
]);
538 if (!mCurrentWordMightBeBreakable
&&
539 !IsNonBreakableChar
<uint8_t>(aText
[offset
], mLegacyBehavior
)) {
540 mCurrentWordMightBeBreakable
= true;
546 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
549 if (offset
== aLength
) {
550 // We did not encounter whitespace so the word hasn't finished yet.
554 // We encountered whitespace, so we're done with this word
555 nsresult rv
= FlushCurrentWord();
561 AutoTArray
<uint8_t, 4000> breakState
;
563 if (!breakState
.AppendElements(aLength
, mozilla::fallible
)) {
564 return NS_ERROR_OUT_OF_MEMORY
;
568 uint32_t start
= offset
;
569 bool noBreaksNeeded
=
570 !aSink
|| ((aFlags
& NO_BREAKS_NEEDED_FLAGS
) == NO_BREAKS_NEEDED_FLAGS
&&
571 !mBreakHere
&& !mAfterBreakableSpace
);
572 if (noBreaksNeeded
) {
573 // Skip to the space before the last word, since either the break data
574 // here is not needed, or no breaks are set in the sink and there cannot
575 // be any breaks in this chunk; all we need is the context for the next
578 while (offset
> start
) {
580 if (IsSegmentSpace(aText
[offset
])) {
585 uint32_t wordStart
= offset
;
586 bool wordMightBeBreakable
= false;
589 uint8_t ch
= aText
[offset
];
590 bool isSpace
= IsSegmentSpace(ch
);
591 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
594 // Consider word-break style. Since the break position of CJK scripts
595 // will be set by nsILineBreaker, we don't consider CJK at this point.
597 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
598 mWordBreak
== WordBreakRule::BreakAll
||
599 mLineBreak
== LineBreakRule::Anywhere
600 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
601 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
604 mAfterBreakableSpace
= isBreakableSpace
;
607 if (offset
> wordStart
&& aSink
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
608 if (mLineBreak
== LineBreakRule::Anywhere
) {
609 memset(breakState
.Elements() + wordStart
,
610 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
612 } else if (wordMightBeBreakable
) {
613 // Save current start-of-word state because ComputeBreakPositions()
614 // will set it to false.
615 AutoRestore
<uint8_t> saveWordStartBreakState(breakState
[wordStart
]);
616 LineBreaker::ComputeBreakPositions(
617 aText
+ wordStart
, offset
- wordStart
, mWordBreak
, mLineBreak
,
618 mScriptIsChineseOrJapanese
, breakState
.Elements() + wordStart
);
622 wordMightBeBreakable
= false;
623 mWordContinuation
= false;
625 if (offset
>= aLength
) {
632 if (!wordMightBeBreakable
&&
633 !IsNonBreakableChar
<uint8_t>(ch
, mLegacyBehavior
)) {
634 wordMightBeBreakable
= true;
637 if (offset
>= aLength
) {
639 mCurrentWordMightBeBreakable
= wordMightBeBreakable
;
640 uint32_t len
= offset
- wordStart
;
641 char16_t
* elems
= mCurrentWord
.AppendElements(len
, mozilla::fallible
);
643 return NS_ERROR_OUT_OF_MEMORY
;
646 for (i
= wordStart
; i
< offset
; ++i
) {
647 elems
[i
- wordStart
] = aText
[i
];
649 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
650 // Ensure that the break-before for this word is written out
651 offset
= wordStart
+ 1;
656 if (!noBreaksNeeded
) {
657 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
662 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom
* aHyphenationLanguage
) {
663 if (mCurrentWordLanguage
&& mCurrentWordLanguage
!= aHyphenationLanguage
) {
664 mCurrentWordContainsMixedLang
= true;
665 mScriptIsChineseOrJapanese
= false;
669 if (aHyphenationLanguage
&& !mCurrentWordLanguage
) {
670 static mozilla::StaticRefPtr
<nsAtom
> sLastHyphenationLanguage
;
671 static bool sLastScriptIsChineseOrJapanese
= false;
672 static bool sInit
= false;
675 mozilla::ClearOnShutdown(&sLastHyphenationLanguage
);
679 if (sLastHyphenationLanguage
== aHyphenationLanguage
) {
680 MOZ_ASSERT(nsAtomString(sLastHyphenationLanguage
)
681 .Equals(nsAtomString(aHyphenationLanguage
)));
682 mScriptIsChineseOrJapanese
= sLastScriptIsChineseOrJapanese
;
686 LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage
), loc
);
688 if (result
.isErr()) {
691 if (loc
.Script().Missing() && loc
.AddLikelySubtags().isErr()) {
694 mScriptIsChineseOrJapanese
=
695 loc
.Script().EqualTo("Hans") || loc
.Script().EqualTo("Hant") ||
696 loc
.Script().EqualTo("Jpan") || loc
.Script().EqualTo("Hrkt");
698 sLastHyphenationLanguage
= aHyphenationLanguage
;
699 sLastScriptIsChineseOrJapanese
= mScriptIsChineseOrJapanese
;
702 mCurrentWordLanguage
= aHyphenationLanguage
;
705 nsresult
nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags
) {
706 nsresult rv
= FlushCurrentWord();
711 bool isBreakableSpace
= !(aFlags
& BREAK_SUPPRESS_INSIDE
);
712 if (mAfterBreakableSpace
&& !isBreakableSpace
) {
715 mAfterBreakableSpace
= isBreakableSpace
;
716 mWordContinuation
= false;
720 nsresult
nsLineBreaker::Reset(bool* aTrailingBreak
) {
721 nsresult rv
= FlushCurrentWord();
726 *aTrailingBreak
= mBreakHere
|| mAfterBreakableSpace
;
728 mAfterBreakableSpace
= false;