dom/base/nsLineBreaker.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsLineBreaker.h"
   8 #include "nsContentUtils.h"
   9 #include "gfxTextRun.h"  // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
  10 #include "nsHyphenationManager.h"
  11 #include "nsHyphenator.h"
  12 #include "mozilla/AutoRestore.h"
  13 #include "mozilla/ClearOnShutdown.h"
  14 #include "mozilla/gfx/2D.h"
  15 #include "mozilla/intl/LineBreaker.h"  // for LineBreaker::ComputeBreakPositions
  16 #include "mozilla/intl/Locale.h"
  17 #include "mozilla/intl/UnicodeProperties.h"
  18 #include "mozilla/ScopeExit.h"
  19 #include "mozilla/StaticPrefs_intl.h"
  20
  21 using mozilla::AutoRestore;
  22 using mozilla::intl::LineBreaker;
  23 using mozilla::intl::LineBreakRule;
  24 using mozilla::intl::Locale;
  25 using mozilla::intl::LocaleParser;
  26 using mozilla::intl::UnicodeProperties;
  27 using mozilla::intl::WordBreakRule;
  28
  29 // There is no break opportunity between any pair of characters that has line
  30 // break class of either AL (Alphabetic), IS (Infix Numeric Separator), NU
  31 // (Numeric), or QU (Quotation). See
  32 // https://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt for Unicode code
  33 // point and line break class mapping.
  34 static constexpr uint8_t kNonBreakableASCII[] = {
  35     // clang-format off
  36 // 0x20-0x2f
  37 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0,
  38 // 0x30-0x3f
  39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
  40 // 0x40-0x4f
  41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  42 // 0x50-0x5f
  43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
  44 // 0x60-0x6f
  45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  46 // 0x70-0x7f
  47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
  48     // clang-format on
  49 };
  50
  51 template <typename T>
  52 static constexpr bool IsNonBreakableChar(T aChar, bool aLegacyBehavior) {
  53   if (aLegacyBehavior) {
  54     // If not using ICU4X, line break rules aren't compatible with UAX#14. Use
  55     // old way.
  56     return (0x0030 <= aChar && aChar <= 0x0039) ||
  57            (0x0041 <= aChar && aChar <= 0x005A) ||
  58            (0x0061 <= aChar && aChar <= 0x007A) || (0x000a == aChar);
  59   }
  60   if (aChar < 0x20 || aChar > 0x7f) {
  61     return false;
  62   }
  63   return !!kNonBreakableASCII[aChar - 0x20];
  64 }
  65
  66 nsLineBreaker::nsLineBreaker()
  67     : mLegacyBehavior(!mozilla::StaticPrefs::intl_icu4x_segmenter_enabled()) {}
  68
  69 nsLineBreaker::~nsLineBreaker() {
  70   NS_ASSERTION(mCurrentWord.Length() == 0,
  71                "Should have Reset() before destruction!");
  72 }
  73
  74 /* static */
  75 bool nsLineBreaker::ShouldCapitalize(uint32_t aChar, bool& aCapitalizeNext) {
  76   using mozilla::intl::GeneralCategory;
  77   auto category = UnicodeProperties::CharType(aChar);
  78   switch (category) {
  79     case GeneralCategory::Uppercase_Letter:
  80     case GeneralCategory::Lowercase_Letter:
  81     case GeneralCategory::Titlecase_Letter:
  82     case GeneralCategory::Modifier_Letter:
  83     case GeneralCategory::Other_Letter:
  84     case GeneralCategory::Decimal_Number:
  85     case GeneralCategory::Letter_Number:
  86     case GeneralCategory::Other_Number:
  87       if (aCapitalizeNext) {
  88         aCapitalizeNext = false;
  89         return true;
  90       }
  91       break;
  92     case GeneralCategory::Space_Separator:
  93     case GeneralCategory::Line_Separator:
  94     case GeneralCategory::Paragraph_Separator:
  95     case GeneralCategory::Dash_Punctuation:
  96     case GeneralCategory::Initial_Punctuation:
  97       /* These punctuation categories are excluded, for examples like
  98        *   "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
  99        * and
 100        *   "snake_case" -> "Snake_case" (to match word selection behavior)
 101       case GeneralCategory::Open_Punctuation:
 102       case GeneralCategory::Close_Punctuation:
 103       case GeneralCategory::Connector_Punctuation:
 104        */
 105       aCapitalizeNext = true;
 106       break;
 107     case GeneralCategory::Final_Punctuation:
 108       /* Special-case: exclude Unicode single-close-quote/apostrophe,
 109          for examples like "Lowe’s" etc. */
 110       if (aChar != 0x2019) {
 111         aCapitalizeNext = true;
 112       }
 113       break;
 114     case GeneralCategory::Other_Punctuation:
 115       /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
 116          and MIDDLE DOT, for Catalan "l·l". */
 117       if (aChar != '\'' && aChar != 0x00B7) {
 118         aCapitalizeNext = true;
 119       }
 120       break;
 121     default:
 122       break;
 123   }
 124   return false;
 125 }
 126
 127 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
 128                                 bool* aCapitalization) {
 129   // Capitalize the first alphanumeric character after a space or punctuation.
 130   bool capitalizeNextChar = true;
 131   for (uint32_t i = 0; i < aLength; ++i) {
 132     uint32_t ch = aWord[i];
 133     if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
 134       ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
 135     }
 136     aCapitalization[i] =
 137         nsLineBreaker::ShouldCapitalize(ch, capitalizeNextChar);
 138
 139     if (!IS_IN_BMP(ch)) {
 140       ++i;
 141     }
 142   }
 143 }
 144
 145 nsresult nsLineBreaker::FlushCurrentWord() {
 146   auto cleanup = mozilla::MakeScopeExit([&] {
 147     mCurrentWord.Clear();
 148     mTextItems.Clear();
 149     mCurrentWordMightBeBreakable = false;
 150     mCurrentWordContainsMixedLang = false;
 151     mCurrentWordLanguage = nullptr;
 152     mWordContinuation = false;
 153   });
 154
 155   uint32_t length = mCurrentWord.Length();
 156   AutoTArray<uint8_t, 4000> breakState;
 157   if (!breakState.AppendElements(length, mozilla::fallible)) {
 158     return NS_ERROR_OUT_OF_MEMORY;
 159   }
 160
 161   if (mLineBreak == LineBreakRule::Anywhere) {
 162     memset(breakState.Elements(),
 163            gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
 164            length * sizeof(uint8_t));
 165   } else if (!mCurrentWordMightBeBreakable &&
 166              mWordBreak != WordBreakRule::BreakAll) {
 167     // word-break: normal or keep-all has no break opportunity if the word
 168     // is non-breakable. (See the comment of kNonBreakableASCII).
 169     memset(breakState.Elements(),
 170            gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
 171            length * sizeof(uint8_t));
 172   } else {
 173     LineBreaker::ComputeBreakPositions(
 174         mCurrentWord.Elements(), length, mWordBreak, mLineBreak,
 175         mScriptIsChineseOrJapanese, breakState.Elements());
 176   }
 177
 178   bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
 179   uint32_t i;
 180   for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
 181     TextItem* ti = &mTextItems[i];
 182     if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
 183       autoHyphenate = false;
 184     }
 185   }
 186   if (autoHyphenate) {
 187     RefPtr<nsHyphenator> hyphenator =
 188         nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
 189     if (hyphenator) {
 190       FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
 191                             mCurrentWord.Elements() + length,
 192                             breakState.Elements());
 193     }
 194   }
 195
 196   nsTArray<bool> capitalizationState;
 197   uint32_t offset = 0;
 198   for (i = 0; i < mTextItems.Length(); ++i) {
 199     TextItem* ti = &mTextItems[i];
 200     NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
 201
 202     if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
 203       breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
 204     }
 205     if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
 206       uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
 207       memset(breakState.Elements() + offset + exclude,
 208              gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
 209              (ti->mLength - exclude) * sizeof(uint8_t));
 210     }
 211
 212     // Don't set the break state for the first character of the word, because
 213     // it was already set correctly earlier and we don't know what the true
 214     // value should be.
 215     uint32_t skipSet = i == 0 ? 1 : 0;
 216     if (ti->mSink) {
 217       ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
 218                            breakState.Elements() + offset + skipSet);
 219
 220       if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
 221         if (capitalizationState.Length() == 0) {
 222           if (!capitalizationState.AppendElements(length, mozilla::fallible)) {
 223             return NS_ERROR_OUT_OF_MEMORY;
 224           }
 225           memset(capitalizationState.Elements(), false, length * sizeof(bool));
 226           SetupCapitalization(mCurrentWord.Elements(), length,
 227                               capitalizationState.Elements());
 228         }
 229         ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
 230                                      capitalizationState.Elements() + offset);
 231       }
 232     }
 233
 234     offset += ti->mLength;
 235   }
 236
 237   return NS_OK;
 238 }
 239
 240 // If the aFlags parameter to AppendText has all these bits set,
 241 // then we don't need to worry about finding break opportunities
 242 // in the appended text.
 243 #define NO_BREAKS_NEEDED_FLAGS                      \
 244   (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
 245    BREAK_SKIP_SETTING_NO_BREAKS)
 246
 247 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
 248                                    const char16_t* aText, uint32_t aLength,
 249                                    uint32_t aFlags, nsILineBreakSink* aSink) {
 250   NS_ASSERTION(aLength > 0, "Appending empty text...");
 251
 252   uint32_t offset = 0;
 253
 254   // Continue the current word
 255   if (mCurrentWord.Length() > 0) {
 256     NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
 257                  "These should not be set");
 258
 259     while (offset < aLength && !IsSegmentSpace(aText[offset])) {
 260       mCurrentWord.AppendElement(aText[offset]);
 261       if (!mCurrentWordMightBeBreakable &&
 262           !IsNonBreakableChar<char16_t>(aText[offset], mLegacyBehavior)) {
 263         mCurrentWordMightBeBreakable = true;
 264       }
 265       UpdateCurrentWordLanguage(aHyphenationLanguage);
 266       ++offset;
 267     }
 268
 269     if (offset > 0) {
 270       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
 271     }
 272
 273     if (offset == aLength) {
 274       return NS_OK;
 275     }
 276
 277     // We encountered whitespace, so we're done with this word
 278     nsresult rv = FlushCurrentWord();
 279     if (NS_FAILED(rv)) {
 280       return rv;
 281     }
 282   }
 283
 284   AutoTArray<uint8_t, 4000> breakState;
 285   if (aSink) {
 286     if (!breakState.AppendElements(aLength, mozilla::fallible)) {
 287       return NS_ERROR_OUT_OF_MEMORY;
 288     }
 289   }
 290
 291   bool noCapitalizationNeeded = true;
 292   nsTArray<bool> capitalizationState;
 293   if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
 294     if (!capitalizationState.AppendElements(aLength, mozilla::fallible)) {
 295       return NS_ERROR_OUT_OF_MEMORY;
 296     }
 297     memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
 298     noCapitalizationNeeded = false;
 299   }
 300
 301   uint32_t start = offset;
 302   bool noBreaksNeeded =
 303       !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
 304                  !mBreakHere && !mAfterBreakableSpace);
 305   if (noBreaksNeeded && noCapitalizationNeeded) {
 306     // Skip to the space before the last word, since either the break data
 307     // here is not needed, or no breaks are set in the sink and there cannot
 308     // be any breaks in this chunk; and we don't need to do word-initial
 309     // capitalization. All we need is the context for the next chunk (if any).
 310     offset = aLength;
 311     while (offset > start) {
 312       --offset;
 313       if (IsSegmentSpace(aText[offset])) {
 314         break;
 315       }
 316     }
 317   }
 318   uint32_t wordStart = offset;
 319   bool wordMightBeBreakable = false;
 320
 321   RefPtr<nsHyphenator> hyphenator;
 322   if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
 323       !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
 324     hyphenator =
 325         nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
 326   }
 327
 328   for (;;) {
 329     char16_t ch = aText[offset];
 330     bool isSpace = IsSegmentSpace(ch);
 331     bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
 332
 333     if (aSink && !noBreaksNeeded) {
 334       breakState[offset] =
 335           mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
 336                   mWordBreak == WordBreakRule::BreakAll ||
 337                   mLineBreak == LineBreakRule::Anywhere
 338               ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
 339               : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
 340     }
 341     mBreakHere = false;
 342     mAfterBreakableSpace = isBreakableSpace;
 343
 344     if (isSpace || ch == '\n') {
 345       if (offset > wordStart && aSink) {
 346         if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
 347           if (mLineBreak == LineBreakRule::Anywhere) {
 348             memset(breakState.Elements() + wordStart,
 349                    gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
 350                    offset - wordStart);
 351           } else if (wordMightBeBreakable) {
 352             // Save current start-of-word state because ComputeBreakPositions()
 353             // will set it to false.
 354             AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
 355             LineBreaker::ComputeBreakPositions(
 356                 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
 357                 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
 358           }
 359           if (hyphenator) {
 360             FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
 361                                   breakState.Elements() + wordStart);
 362           }
 363         }
 364         if (!mWordContinuation && !noCapitalizationNeeded) {
 365           SetupCapitalization(aText + wordStart, offset - wordStart,
 366                               capitalizationState.Elements() + wordStart);
 367         }
 368       }
 369       wordMightBeBreakable = false;
 370       mWordContinuation = false;
 371       ++offset;
 372       if (offset >= aLength) {
 373         break;
 374       }
 375       wordStart = offset;
 376       continue;
 377     }
 378
 379     if (!wordMightBeBreakable &&
 380         !IsNonBreakableChar<char16_t>(ch, mLegacyBehavior)) {
 381       wordMightBeBreakable = true;
 382     }
 383     ++offset;
 384     if (offset >= aLength) {
 385       // Save this word
 386       mCurrentWordMightBeBreakable = wordMightBeBreakable;
 387       uint32_t len = offset - wordStart;
 388       char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
 389       if (!elems) {
 390         return NS_ERROR_OUT_OF_MEMORY;
 391       }
 392       memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
 393       mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
 394       // Ensure that the break-before for this word is written out
 395       offset = wordStart + 1;
 396       UpdateCurrentWordLanguage(aHyphenationLanguage);
 397       break;
 398     }
 399   }
 400
 401   if (aSink) {
 402     if (!noBreaksNeeded) {
 403       aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
 404     }
 405     if (!noCapitalizationNeeded) {
 406       aSink->SetCapitalization(start, offset - start,
 407                                capitalizationState.Elements() + start);
 408     }
 409   }
 410   return NS_OK;
 411 }
 412
 413 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
 414                                           const char16_t* aTextStart,
 415                                           const char16_t* aTextLimit,
 416                                           uint8_t* aBreakState) {
 417   // Early-return for words that are definitely too short to hyphenate.
 418   if (aTextLimit - aTextStart < mHyphenateLimitWord) {
 419     return;
 420   }
 421
 422   nsDependentSubstring string(aTextStart, aTextLimit);
 423   AutoTArray<bool, 200> hyphens;
 424   if (NS_FAILED(aHyphenator->Hyphenate(string, hyphens))) {
 425     return;
 426   }
 427
 428   // Keep track of the length seen so far, in terms of characters that are
 429   // countable for hyphenate-limit-chars purposes.
 430   uint32_t length = 0;
 431   // When setting a potential break in aBreakState, we record the previous
 432   // value in case we need to restore it because the position turns out to
 433   // be too close to the end of the word.
 434   struct BreakInfo {
 435     uint32_t mPosition;
 436     uint32_t mLength;
 437     uint8_t mState;
 438   };
 439   AutoTArray<BreakInfo, 16> oldBreaks;
 440   // Don't consider setting any breaks where i >= endLimit, as they will
 441   // definitely be too near the end of the word to be accepted.
 442   uint32_t endLimit =
 443       string.Length() - std::max<uint32_t>(1u, mHyphenateLimitEnd);
 444   for (uint32_t i = 0; i < string.Length(); ++i) {
 445     // Get current character, converting surrogate pairs to UCS4 for char
 446     // category lookup.
 447     uint32_t ch = string[i];
 448     if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < string.Length() &&
 449         NS_IS_LOW_SURROGATE(string[i + 1])) {
 450       ch = SURROGATE_TO_UCS4(ch, string[i + 1]);
 451     }
 452
 453     // According to CSS Text, "Nonspacing combining marks (Unicode General
 454     // Category Mn) and intra-word punctuation (Unicode General Category P*)
 455     // do not count towards the minimum."
 456     // (https://drafts.csswg.org/css-text-4/#hyphenate-char-limits)
 457     // We also don't count Control or Format categories.
 458     using mozilla::intl::GeneralCategory;
 459     switch (UnicodeProperties::CharType(ch)) {
 460       case GeneralCategory::Nonspacing_Mark:
 461       case GeneralCategory::Dash_Punctuation:
 462       case GeneralCategory::Open_Punctuation:
 463       case GeneralCategory::Close_Punctuation:
 464       case GeneralCategory::Connector_Punctuation:
 465       case GeneralCategory::Other_Punctuation:
 466       case GeneralCategory::Initial_Punctuation:
 467       case GeneralCategory::Final_Punctuation:
 468       case GeneralCategory::Control:
 469       case GeneralCategory::Format:
 470       case GeneralCategory::Surrogate:
 471         break;
 472       default:
 473         ++length;
 474         break;
 475     }
 476
 477     // Don't accept any breaks until we're far enough into the word, or if
 478     // we're too near the end for it to possibly be accepted. (Note that the
 479     // check against endLimit is just an initial worst-case check that assumes
 480     // all the remaining characters are countable; if there are combining
 481     // marks, etc., in the trailing part of the word we may need to reset the
 482     // potential break later, after we've fully counted length.)
 483     if (hyphens[i] && length >= mHyphenateLimitStart && i < endLimit) {
 484       // Keep track of hyphen position and "countable" length of the word.
 485       oldBreaks.AppendElement(BreakInfo{i + 1, length, aBreakState[i + 1]});
 486       aBreakState[i + 1] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
 487     }
 488
 489     // If the character was outside the BMP, skip past the low surrogate.
 490     if (!IS_IN_BMP(ch)) {
 491       ++i;
 492     }
 493   }
 494
 495   if (length < mHyphenateLimitWord) {
 496     // After discounting combining marks, punctuation, controls, etc., the word
 497     // was too short for hyphenate-limit-chars. If we've set any hyphen breaks,
 498     // forget them.
 499     while (!oldBreaks.IsEmpty()) {
 500       auto lastBreak = oldBreaks.PopLastElement();
 501       aBreakState[lastBreak.mPosition] = lastBreak.mState;
 502     }
 503   } else {
 504     // Check if trailing fragment is too short; if so, remove the last hyphen
 505     // break(s) that we set, until the fragment will be long enough.
 506     while (!oldBreaks.IsEmpty()) {
 507       auto lastBreak = oldBreaks.PopLastElement();
 508       if (length - lastBreak.mLength >= mHyphenateLimitEnd) {
 509         break;
 510       }
 511       aBreakState[lastBreak.mPosition] = lastBreak.mState;
 512     }
 513   }
 514 }
 515
 516 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
 517                                    const uint8_t* aText, uint32_t aLength,
 518                                    uint32_t aFlags, nsILineBreakSink* aSink) {
 519   NS_ASSERTION(aLength > 0, "Appending empty text...");
 520
 521   if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
 522     // Defer to the Unicode path if capitalization or hyphenation is required
 523     nsAutoString str;
 524     const char* cp = reinterpret_cast<const char*>(aText);
 525     CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
 526     return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
 527   }
 528
 529   uint32_t offset = 0;
 530
 531   // Continue the current word
 532   if (mCurrentWord.Length() > 0) {
 533     NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
 534                  "These should not be set");
 535
 536     while (offset < aLength && !IsSegmentSpace(aText[offset])) {
 537       mCurrentWord.AppendElement(aText[offset]);
 538       if (!mCurrentWordMightBeBreakable &&
 539           !IsNonBreakableChar<uint8_t>(aText[offset], mLegacyBehavior)) {
 540         mCurrentWordMightBeBreakable = true;
 541       }
 542       ++offset;
 543     }
 544
 545     if (offset > 0) {
 546       mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
 547     }
 548
 549     if (offset == aLength) {
 550       // We did not encounter whitespace so the word hasn't finished yet.
 551       return NS_OK;
 552     }
 553
 554     // We encountered whitespace, so we're done with this word
 555     nsresult rv = FlushCurrentWord();
 556     if (NS_FAILED(rv)) {
 557       return rv;
 558     }
 559   }
 560
 561   AutoTArray<uint8_t, 4000> breakState;
 562   if (aSink) {
 563     if (!breakState.AppendElements(aLength, mozilla::fallible)) {
 564       return NS_ERROR_OUT_OF_MEMORY;
 565     }
 566   }
 567
 568   uint32_t start = offset;
 569   bool noBreaksNeeded =
 570       !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
 571                  !mBreakHere && !mAfterBreakableSpace);
 572   if (noBreaksNeeded) {
 573     // Skip to the space before the last word, since either the break data
 574     // here is not needed, or no breaks are set in the sink and there cannot
 575     // be any breaks in this chunk; all we need is the context for the next
 576     // chunk (if any)
 577     offset = aLength;
 578     while (offset > start) {
 579       --offset;
 580       if (IsSegmentSpace(aText[offset])) {
 581         break;
 582       }
 583     }
 584   }
 585   uint32_t wordStart = offset;
 586   bool wordMightBeBreakable = false;
 587
 588   for (;;) {
 589     uint8_t ch = aText[offset];
 590     bool isSpace = IsSegmentSpace(ch);
 591     bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
 592
 593     if (aSink) {
 594       // Consider word-break style.  Since the break position of CJK scripts
 595       // will be set by nsILineBreaker, we don't consider CJK at this point.
 596       breakState[offset] =
 597           mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
 598                   mWordBreak == WordBreakRule::BreakAll ||
 599                   mLineBreak == LineBreakRule::Anywhere
 600               ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
 601               : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
 602     }
 603     mBreakHere = false;
 604     mAfterBreakableSpace = isBreakableSpace;
 605
 606     if (isSpace) {
 607       if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
 608         if (mLineBreak == LineBreakRule::Anywhere) {
 609           memset(breakState.Elements() + wordStart,
 610                  gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
 611                  offset - wordStart);
 612         } else if (wordMightBeBreakable) {
 613           // Save current start-of-word state because ComputeBreakPositions()
 614           // will set it to false.
 615           AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
 616           LineBreaker::ComputeBreakPositions(
 617               aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
 618               mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
 619         }
 620       }
 621
 622       wordMightBeBreakable = false;
 623       mWordContinuation = false;
 624       ++offset;
 625       if (offset >= aLength) {
 626         break;
 627       }
 628       wordStart = offset;
 629       continue;
 630     }
 631
 632     if (!wordMightBeBreakable &&
 633         !IsNonBreakableChar<uint8_t>(ch, mLegacyBehavior)) {
 634       wordMightBeBreakable = true;
 635     }
 636     ++offset;
 637     if (offset >= aLength) {
 638       // Save this word
 639       mCurrentWordMightBeBreakable = wordMightBeBreakable;
 640       uint32_t len = offset - wordStart;
 641       char16_t* elems = mCurrentWord.AppendElements(len, mozilla::fallible);
 642       if (!elems) {
 643         return NS_ERROR_OUT_OF_MEMORY;
 644       }
 645       uint32_t i;
 646       for (i = wordStart; i < offset; ++i) {
 647         elems[i - wordStart] = aText[i];
 648       }
 649       mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
 650       // Ensure that the break-before for this word is written out
 651       offset = wordStart + 1;
 652       break;
 653     }
 654   }
 655
 656   if (!noBreaksNeeded) {
 657     aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
 658   }
 659   return NS_OK;
 660 }
 661
 662 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
 663   if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
 664     mCurrentWordContainsMixedLang = true;
 665     mScriptIsChineseOrJapanese = false;
 666     return;
 667   }
 668
 669   if (aHyphenationLanguage && !mCurrentWordLanguage) {
 670     static mozilla::StaticRefPtr<nsAtom> sLastHyphenationLanguage;
 671     static bool sLastScriptIsChineseOrJapanese = false;
 672     static bool sInit = false;
 673
 674     if (!sInit) {
 675       mozilla::ClearOnShutdown(&sLastHyphenationLanguage);
 676       sInit = true;
 677     }
 678
 679     if (sLastHyphenationLanguage == aHyphenationLanguage) {
 680       MOZ_ASSERT(nsAtomString(sLastHyphenationLanguage)
 681                      .Equals(nsAtomString(aHyphenationLanguage)));
 682       mScriptIsChineseOrJapanese = sLastScriptIsChineseOrJapanese;
 683     } else {
 684       Locale loc;
 685       auto result =
 686           LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage), loc);
 687
 688       if (result.isErr()) {
 689         return;
 690       }
 691       if (loc.Script().Missing() && loc.AddLikelySubtags().isErr()) {
 692         return;
 693       }
 694       mScriptIsChineseOrJapanese =
 695           loc.Script().EqualTo("Hans") || loc.Script().EqualTo("Hant") ||
 696           loc.Script().EqualTo("Jpan") || loc.Script().EqualTo("Hrkt");
 697
 698       sLastHyphenationLanguage = aHyphenationLanguage;
 699       sLastScriptIsChineseOrJapanese = mScriptIsChineseOrJapanese;
 700     }
 701   }
 702   mCurrentWordLanguage = aHyphenationLanguage;
 703 }
 704
 705 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
 706   nsresult rv = FlushCurrentWord();
 707   if (NS_FAILED(rv)) {
 708     return rv;
 709   }
 710
 711   bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
 712   if (mAfterBreakableSpace && !isBreakableSpace) {
 713     mBreakHere = true;
 714   }
 715   mAfterBreakableSpace = isBreakableSpace;
 716   mWordContinuation = false;
 717   return NS_OK;
 718 }
 719
 720 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
 721   nsresult rv = FlushCurrentWord();
 722   if (NS_FAILED(rv)) {
 723     return rv;
 724   }
 725
 726   *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
 727   mBreakHere = false;
 728   mAfterBreakableSpace = false;
 729   return NS_OK;
 730 }