dom/serializers/nsPlainTextSerializer.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * nsIContentSerializer implementation that can be used with an
   9  * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
  10  * (eg for copy/paste as plaintext).
  11  */
  12
  13 #include "nsPlainTextSerializer.h"
  14
  15 #include <limits>
  16
  17 #include "nsPrintfCString.h"
  18 #include "nsDebug.h"
  19 #include "nsGkAtoms.h"
  20 #include "nsNameSpaceManager.h"
  21 #include "nsTextFragment.h"
  22 #include "nsContentUtils.h"
  23 #include "nsReadableUtils.h"
  24 #include "nsUnicharUtils.h"
  25 #include "nsCRT.h"
  26 #include "mozilla/Casting.h"
  27 #include "mozilla/TextEditor.h"
  28 #include "mozilla/dom/CharacterData.h"
  29 #include "mozilla/dom/Element.h"
  30 #include "mozilla/dom/HTMLBRElement.h"
  31 #include "mozilla/dom/Text.h"
  32 #include "mozilla/intl/Segmenter.h"
  33 #include "mozilla/intl/UnicodeProperties.h"
  34 #include "mozilla/dom/AbstractRange.h"
  35 #include "nsUnicodeProperties.h"
  36 #include "mozilla/Span.h"
  37 #include "mozilla/Preferences.h"
  38 #include "mozilla/StaticPrefs_converter.h"
  39 #include "nsComputedDOMStyle.h"
  40
  41 namespace mozilla {
  42 class Encoding;
  43 }
  44
  45 using namespace mozilla;
  46 using namespace mozilla::dom;
  47
  48 #define PREF_STRUCTS "converter.html2txt.structs"
  49 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
  50
  51 static const int32_t kTabSize = 4;
  52 static const int32_t kIndentSizeHeaders =
  53     2; /* Indention of h1, if
  54         mHeaderStrategy = kIndentIncreasedWithHeaderLevel
  55         or = kNumberHeadingsAndIndentSlightly. Indention of
  56         other headers is derived from that. */
  57 static const int32_t kIndentIncrementHeaders =
  58     2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
  59    indent h(x+1) this many
  60    columns more than h(x) */
  61 static const int32_t kIndentSizeList = kTabSize;
  62 // Indention of non-first lines of ul and ol
  63 static const int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
  64 static const char16_t kNBSP = 160;
  65 static const char16_t kSPACE = ' ';
  66
  67 static int32_t HeaderLevel(const nsAtom* aTag);
  68 static int32_t GetUnicharWidth(char32_t ucs);
  69 static int32_t GetUnicharStringWidth(Span<const char16_t> aString);
  70
  71 // Someday may want to make this non-const:
  72 static const uint32_t TagStackSize = 500;
  73
  74 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
  75 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
  76
  77 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
  78   NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
  79   NS_INTERFACE_MAP_ENTRY(nsISupports)
  80 NS_INTERFACE_MAP_END
  81
  82 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
  83
  84 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
  85   RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
  86   it.forget(aSerializer);
  87   return NS_OK;
  88 }
  89
  90 // @param aFlags As defined in nsIDocumentEncoder.idl.
  91 static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
  92   // Set the line break character:
  93   if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
  94       (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
  95     // Windows
  96     aLineBreak.AssignLiteral(u"\r\n");
  97   } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
  98     // Mac
  99     aLineBreak.AssignLiteral(u"\r");
 100   } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
 101     // Unix/DOM
 102     aLineBreak.AssignLiteral(u"\n");
 103   } else {
 104     // Platform/default
 105     aLineBreak.AssignLiteral(NS_ULINEBREAK);
 106   }
 107 }
 108
 109 void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
 110     const int32_t aFlags) {
 111   if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
 112     // First, replace all nbsp characters with spaces,
 113     // which the unicode encoder won't do for us.
 114     mContent.ReplaceChar(kNBSP, kSPACE);
 115   }
 116 }
 117
 118 void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
 119   mContent.Truncate();
 120   mIndentation.mHeader.Truncate();
 121 }
 122
 123 int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
 124     const uint32_t aWrapColumn, bool aUseLineBreaker) const {
 125   MOZ_ASSERT(!mContent.IsEmpty());
 126
 127   const uint32_t prefixwidth = DeterminePrefixWidth();
 128   int32_t goodSpace = 0;
 129
 130   if (aUseLineBreaker) {
 131     // We advance one line break point at a time from the beginning of the
 132     // mContent until we find a width less than or equal to wrap column.
 133     uint32_t width = 0;
 134     intl::LineBreakIteratorUtf16 lineBreakIter(mContent);
 135     while (Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) {
 136       // Trim space at the tail. UAX#14 doesn't have break opportunity for
 137       // ASCII space at the tail.
 138       const Maybe<uint32_t> originalNextGoodSpace = nextGoodSpace;
 139       while (*nextGoodSpace > 0 &&
 140              mContent.CharAt(*nextGoodSpace - 1) == 0x20) {
 141         nextGoodSpace = Some(*nextGoodSpace - 1);
 142       }
 143       if (*nextGoodSpace == 0) {
 144         // Restore the original nextGoodSpace.
 145         nextGoodSpace = originalNextGoodSpace;
 146       }
 147
 148       width += GetUnicharStringWidth(Span<const char16_t>(
 149           mContent.get() + goodSpace, *nextGoodSpace - goodSpace));
 150       if (prefixwidth + width > aWrapColumn) {
 151         // The next break point makes the width exceeding the wrap column, so
 152         // goodSpace is what we want.
 153         break;
 154       }
 155       goodSpace = AssertedCast<int32_t>(*nextGoodSpace);
 156     }
 157
 158     return goodSpace;
 159   }
 160
 161   // In this case we don't want strings, especially CJK-ones, to be split. See
 162   // bug 333064 for more information. We break only at ASCII spaces.
 163   if (aWrapColumn >= prefixwidth) {
 164     // Search backward from the adjusted wrap column or from the text end.
 165     goodSpace =
 166         std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
 167     while (goodSpace >= 0) {
 168       if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
 169         return goodSpace;
 170       }
 171       goodSpace--;
 172     }
 173   }
 174
 175   // Search forward from the adjusted wrap column.
 176   goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
 177   const int32_t contentLength = mContent.Length();
 178   while (goodSpace < contentLength &&
 179          !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
 180     goodSpace++;
 181   }
 182
 183   return goodSpace;
 184 }
 185
 186 nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
 187                                                     nsAString& aOutput)
 188     : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
 189   MOZ_ASSERT(aOutput.IsEmpty());
 190
 191   DetermineLineBreak(mFlags, mLineBreak);
 192 }
 193
 194 void nsPlainTextSerializer::OutputManager::Append(
 195     const CurrentLine& aCurrentLine,
 196     const StripTrailingWhitespaces aStripTrailingWhitespaces) {
 197   if (IsAtFirstColumn()) {
 198     nsAutoString quotesAndIndent;
 199     aCurrentLine.CreateQuotesAndIndent(quotesAndIndent);
 200
 201     if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
 202       const bool stripTrailingSpaces = aCurrentLine.mContent.IsEmpty();
 203       if (stripTrailingSpaces) {
 204         quotesAndIndent.Trim(" ", false, true, false);
 205       }
 206     }
 207
 208     Append(quotesAndIndent);
 209   }
 210
 211   Append(aCurrentLine.mContent);
 212 }
 213
 214 void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
 215   if (!aString.IsEmpty()) {
 216     mOutput.Append(aString);
 217     mAtFirstColumn = false;
 218   }
 219 }
 220
 221 void nsPlainTextSerializer::OutputManager::AppendLineBreak() {
 222   mOutput.Append(mLineBreak);
 223   mAtFirstColumn = true;
 224 }
 225
 226 uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
 227   return mOutput.Length();
 228 }
 229
 230 nsPlainTextSerializer::nsPlainTextSerializer()
 231     : mFloatingLines(-1),
 232       mLineBreakDue(false),
 233       kSpace(u" "_ns)  // Init of "constant"
 234 {
 235   mHeadLevel = 0;
 236   mHasWrittenCiteBlockquote = false;
 237   mSpanLevel = 0;
 238   for (int32_t i = 0; i <= 6; i++) {
 239     mHeaderCounter[i] = 0;
 240   }
 241
 242   // Flow
 243   mEmptyLines = 1;  // The start of the document is an "empty line" in itself,
 244   mInWhitespace = false;
 245   mPreFormattedMail = false;
 246
 247   mPreformattedBlockBoundary = false;
 248
 249   // initialize the tag stack to zero:
 250   // The stack only ever contains pointers to static atoms, so they don't
 251   // need refcounting.
 252   mTagStack = new const nsAtom*[TagStackSize];
 253   mTagStackIndex = 0;
 254   mIgnoreAboveIndex = (uint32_t)kNotFound;
 255
 256   mULCount = 0;
 257
 258   mIgnoredChildNodeLevel = 0;
 259 }
 260
 261 nsPlainTextSerializer::~nsPlainTextSerializer() {
 262   delete[] mTagStack;
 263   NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
 264 }
 265
 266 nsPlainTextSerializer::Settings::HeaderStrategy
 267 nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
 268   HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};
 269
 270   switch (aPrefHeaderStrategy) {
 271     case 0: {
 272       result = HeaderStrategy::kNoIndentation;
 273       break;
 274     }
 275     case 1: {
 276       result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
 277       break;
 278     }
 279     case 2: {
 280       result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
 281       break;
 282     }
 283     default: {
 284       NS_WARNING(
 285           nsPrintfCString("Header strategy pref contains undefined value: %i",
 286                           aPrefHeaderStrategy)
 287               .get());
 288     }
 289   }
 290
 291   return result;
 292 }
 293
 294 const int32_t kDefaultHeaderStrategy = 1;
 295
 296 void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
 297                                            const uint32_t aWrapColumn) {
 298   mFlags = aFlags;
 299
 300   if (mFlags & nsIDocumentEncoder::OutputFormatted) {
 301     // Get some prefs that controls how we do formatted output
 302     mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
 303
 304     int32_t headerStrategy =
 305         Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
 306     mHeaderStrategy = Convert(headerStrategy);
 307   }
 308
 309   mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
 310                         (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
 311
 312   // XXX We should let the caller decide whether to do this or not
 313   mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
 314
 315   mWrapColumn = aWrapColumn;
 316 }
 317
 318 NS_IMETHODIMP
 319 nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
 320                             const Encoding* aEncoding, bool aIsCopying,
 321                             bool aIsWholeDocument,
 322                             bool* aNeedsPreformatScanning, nsAString& aOutput) {
 323 #ifdef DEBUG
 324   // Check if the major control flags are set correctly.
 325   if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
 326     // One of OutputFormatted or OutputWrap must be set, but not both.
 327     NS_ASSERTION((aFlags & nsIDocumentEncoder::OutputFormatted) !=
 328                      (aFlags & nsIDocumentEncoder::OutputWrap),
 329                  "If you want format=flowed, you must combine it "
 330                  "with either nsIDocumentEncoder::OutputFormatted "
 331                  "or nsIDocumentEncoder::OutputWrap");
 332   }
 333
 334   if (aFlags & nsIDocumentEncoder::OutputFormatted) {
 335     NS_ASSERTION(
 336         !(aFlags & nsIDocumentEncoder::OutputPreformatted),
 337         "Can't do formatted and preformatted output at the same time!");
 338   }
 339 #endif
 340   MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
 341              (aFlags & nsIDocumentEncoder::OutputFormatFlowed));
 342
 343   *aNeedsPreformatScanning = true;
 344   mSettings.Init(aFlags, aWrapColumn);
 345   mOutputManager.emplace(mSettings.GetFlags(), aOutput);
 346
 347   mUseLineBreaker = mSettings.MayWrap() && mSettings.MayBreakLines();
 348
 349   mLineBreakDue = false;
 350   mFloatingLines = -1;
 351
 352   mPreformattedBlockBoundary = false;
 353
 354   MOZ_ASSERT(mOLStack.IsEmpty());
 355
 356   return NS_OK;
 357 }
 358
 359 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
 360   uint32_t size = aStack.Length();
 361   if (size == 0) {
 362     return false;
 363   }
 364   return aStack.ElementAt(size - 1);
 365 }
 366
 367 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
 368   uint32_t size = aStack.Length();
 369   if (size > 0) {
 370     aStack.ElementAt(size - 1) = aValue;
 371   } else {
 372     NS_ERROR("There is no \"Last\" value");
 373   }
 374 }
 375
 376 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
 377   aStack.AppendElement(bool(aValue));
 378 }
 379
 380 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
 381   return aStack.Length() ? aStack.PopLastElement() : false;
 382 }
 383
 384 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
 385     const nsAtom* aTag) const {
 386   if (mSettings.GetWithRubyAnnotation()) {
 387     return false;
 388   }
 389
 390   return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
 391          aTag == nsGkAtoms::rtc;
 392 }
 393
 394 // Return true if aElement has 'display:none' or if we just don't know.
 395 static bool IsDisplayNone(Element* aElement) {
 396   RefPtr<const ComputedStyle> computedStyle =
 397       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
 398   return !computedStyle ||
 399          computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
 400 }
 401
 402 static bool IsIgnorableScriptOrStyle(Element* aElement) {
 403   return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
 404          IsDisplayNone(aElement);
 405 }
 406
 407 NS_IMETHODIMP
 408 nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
 409                                   int32_t aEndOffset) {
 410   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
 411     return NS_OK;
 412   }
 413
 414   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
 415   if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
 416
 417   NS_ENSURE_ARG(aText);
 418
 419   nsresult rv = NS_OK;
 420
 421   nsIContent* content = aText;
 422   const nsTextFragment* frag;
 423   if (!content || !(frag = content->GetText())) {
 424     return NS_ERROR_FAILURE;
 425   }
 426
 427   int32_t fragLength = frag->GetLength();
 428   int32_t endoffset =
 429       (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
 430   NS_ASSERTION(aStartOffset <= endoffset,
 431                "A start offset is beyond the end of the text fragment!");
 432
 433   int32_t length = endoffset - aStartOffset;
 434   if (length <= 0) {
 435     return NS_OK;
 436   }
 437
 438   nsAutoString textstr;
 439   if (frag->Is2b()) {
 440     textstr.Assign(frag->Get2b() + aStartOffset, length);
 441   } else {
 442     // AssignASCII is for 7-bit character only, so don't use it
 443     const char* data = frag->Get1b();
 444     CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
 445   }
 446
 447   // Mask the text if the text node is in a password field.
 448   if (content->HasFlag(NS_MAYBE_MASKED)) {
 449     TextEditor::MaskString(textstr, *content->AsText(), 0, aStartOffset);
 450   }
 451
 452   // We have to split the string across newlines
 453   // to match parser behavior
 454   int32_t start = 0;
 455   int32_t offset = textstr.FindCharInSet(u"\n\r");
 456   while (offset != kNotFound) {
 457     if (offset > start) {
 458       // Pass in the line
 459       DoAddText(false, Substring(textstr, start, offset - start));
 460     }
 461
 462     // Pass in a newline
 463     DoAddText();
 464
 465     start = offset + 1;
 466     offset = textstr.FindCharInSet(u"\n\r", start);
 467   }
 468
 469   // Consume the last bit of the string if there's any left
 470   if (start < length) {
 471     if (start) {
 472       DoAddText(false, Substring(textstr, start, length - start));
 473     } else {
 474       DoAddText(false, textstr);
 475     }
 476   }
 477
 478   return rv;
 479 }
 480
 481 NS_IMETHODIMP
 482 nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
 483                                           int32_t aStartOffset,
 484                                           int32_t aEndOffset) {
 485   return AppendText(aCDATASection, aStartOffset, aEndOffset);
 486 }
 487
 488 NS_IMETHODIMP
 489 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
 490   mPreformatStack.push(IsElementPreformatted(aElement));
 491   return NS_OK;
 492 }
 493
 494 NS_IMETHODIMP
 495 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
 496   MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
 497                      "Tried to pop without previous push.");
 498   mPreformatStack.pop();
 499   return NS_OK;
 500 }
 501
 502 NS_IMETHODIMP
 503 nsPlainTextSerializer::AppendElementStart(Element* aElement,
 504                                           Element* aOriginalElement) {
 505   NS_ENSURE_ARG(aElement);
 506
 507   mElement = aElement;
 508
 509   nsresult rv;
 510   nsAtom* id = GetIdForContent(mElement);
 511
 512   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
 513
 514   if (isContainer) {
 515     rv = DoOpenContainer(id);
 516   } else {
 517     rv = DoAddLeaf(id);
 518   }
 519
 520   mElement = nullptr;
 521
 522   if (id == nsGkAtoms::head) {
 523     ++mHeadLevel;
 524   }
 525
 526   return rv;
 527 }
 528
 529 NS_IMETHODIMP
 530 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
 531                                         Element* aOriginalElement) {
 532   NS_ENSURE_ARG(aElement);
 533
 534   mElement = aElement;
 535
 536   nsresult rv;
 537   nsAtom* id = GetIdForContent(mElement);
 538
 539   bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
 540
 541   rv = NS_OK;
 542   if (isContainer) {
 543     rv = DoCloseContainer(id);
 544   }
 545
 546   mElement = nullptr;
 547
 548   if (id == nsGkAtoms::head) {
 549     NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
 550     --mHeadLevel;
 551   }
 552
 553   return rv;
 554 }
 555
 556 NS_IMETHODIMP
 557 nsPlainTextSerializer::FlushAndFinish() {
 558   MOZ_ASSERT(mOutputManager);
 559
 560   mOutputManager->Flush(mCurrentLine);
 561   return Finish();
 562 }
 563
 564 NS_IMETHODIMP
 565 nsPlainTextSerializer::Finish() {
 566   mOutputManager.reset();
 567
 568   return NS_OK;
 569 }
 570
 571 NS_IMETHODIMP
 572 nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
 573   MOZ_ASSERT(mOutputManager);
 574
 575   aLength = mOutputManager->GetOutputLength();
 576
 577   return NS_OK;
 578 }
 579
 580 NS_IMETHODIMP
 581 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
 582   return NS_OK;
 583 }
 584
 585 constexpr int32_t kOlStackDummyValue = 0;
 586
 587 nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) {
 588   if (IsIgnorableRubyAnnotation(aTag)) {
 589     // Ignorable ruby annotation shouldn't be replaced by a placeholder
 590     // character, neither any of its descendants.
 591     mIgnoredChildNodeLevel++;
 592     return NS_OK;
 593   }
 594   if (IsIgnorableScriptOrStyle(mElement)) {
 595     mIgnoredChildNodeLevel++;
 596     return NS_OK;
 597   }
 598
 599   if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
 600     if (mPreformattedBlockBoundary && DoOutput()) {
 601       // Should always end a line, but get no more whitespace
 602       if (mFloatingLines < 0) mFloatingLines = 0;
 603       mLineBreakDue = true;
 604     }
 605     mPreformattedBlockBoundary = false;
 606   }
 607
 608   if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
 609     // Raw means raw.  Don't even think about doing anything fancy
 610     // here like indenting, adding line breaks or any other
 611     // characters such as list item bullets, quote characters
 612     // around <q>, etc.
 613
 614     return NS_OK;
 615   }
 616
 617   if (mTagStackIndex < TagStackSize) {
 618     mTagStack[mTagStackIndex++] = aTag;
 619   }
 620
 621   if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
 622     return NS_OK;
 623   }
 624
 625   // Reset this so that <blockquote type=cite> doesn't affect the whitespace
 626   // above random <pre>s below it.
 627   mHasWrittenCiteBlockquote =
 628       mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
 629
 630   bool isInCiteBlockquote = false;
 631
 632   // XXX special-case <blockquote type=cite> so that we don't add additional
 633   // newlines before the text.
 634   if (aTag == nsGkAtoms::blockquote) {
 635     nsAutoString value;
 636     nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
 637     isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
 638   }
 639
 640   if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
 641
 642   // Check if this tag's content that should not be output
 643   if ((aTag == nsGkAtoms::noscript &&
 644        !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
 645       ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
 646        !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
 647     // Ignore everything that follows the current tag in
 648     // question until a matching end tag is encountered.
 649     mIgnoreAboveIndex = mTagStackIndex - 1;
 650     return NS_OK;
 651   }
 652
 653   if (aTag == nsGkAtoms::body) {
 654     // Try to figure out here whether we have a
 655     // preformatted style attribute set by Thunderbird.
 656     //
 657     // Trigger on the presence of a "pre-wrap" in the
 658     // style attribute. That's a very simplistic way to do
 659     // it, but better than nothing.
 660     nsAutoString style;
 661     int32_t whitespace;
 662     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
 663         (kNotFound != (whitespace = style.Find(u"white-space:")))) {
 664       if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) {
 665 #ifdef DEBUG_preformatted
 666         printf("Set mPreFormattedMail based on style pre-wrap\n");
 667 #endif
 668         mPreFormattedMail = true;
 669       } else if (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) {
 670 #ifdef DEBUG_preformatted
 671         printf("Set mPreFormattedMail based on style pre\n");
 672 #endif
 673         mPreFormattedMail = true;
 674       }
 675     } else {
 676       /* See comment at end of function. */
 677       mInWhitespace = true;
 678       mPreFormattedMail = false;
 679     }
 680
 681     return NS_OK;
 682   }
 683
 684   // Keep this in sync with DoCloseContainer!
 685   if (!DoOutput()) {
 686     return NS_OK;
 687   }
 688
 689   if (aTag == nsGkAtoms::p)
 690     EnsureVerticalSpace(1);
 691   else if (aTag == nsGkAtoms::pre) {
 692     if (GetLastBool(mIsInCiteBlockquote))
 693       EnsureVerticalSpace(0);
 694     else if (mHasWrittenCiteBlockquote) {
 695       EnsureVerticalSpace(0);
 696       mHasWrittenCiteBlockquote = false;
 697     } else
 698       EnsureVerticalSpace(1);
 699   } else if (aTag == nsGkAtoms::tr) {
 700     PushBool(mHasWrittenCellsForRow, false);
 701   } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
 702     // We must make sure that the content of two table cells get a
 703     // space between them.
 704
 705     // To make the separation between cells most obvious and
 706     // importable, we use a TAB.
 707     if (mHasWrittenCellsForRow.IsEmpty()) {
 708       // We don't always see a <tr> (nor a <table>) before the <td> if we're
 709       // copying part of a table
 710       PushBool(mHasWrittenCellsForRow, true);  // will never be popped
 711     } else if (GetLastBool(mHasWrittenCellsForRow)) {
 712       // Bypass |Write| so that the TAB isn't compressed away.
 713       AddToLine(u"\t", 1);
 714       mInWhitespace = true;
 715     } else {
 716       SetLastBool(mHasWrittenCellsForRow, true);
 717     }
 718   } else if (aTag == nsGkAtoms::ul) {
 719     // Indent here to support nested lists, which aren't included in li :-(
 720     EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
 721     // Must end the current line before we change indention
 722     mCurrentLine.mIndentation.mLength += kIndentSizeList;
 723     mULCount++;
 724   } else if (aTag == nsGkAtoms::ol) {
 725     EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
 726     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
 727       // Must end the current line before we change indention
 728       nsAutoString startAttr;
 729       int32_t startVal = 1;
 730       if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
 731         nsresult rv = NS_OK;
 732         startVal = startAttr.ToInteger(&rv);
 733         if (NS_FAILED(rv)) {
 734           startVal = 1;
 735         }
 736       }
 737       mOLStack.AppendElement(startVal);
 738     } else {
 739       mOLStack.AppendElement(kOlStackDummyValue);
 740     }
 741     mCurrentLine.mIndentation.mLength += kIndentSizeList;  // see ul
 742   } else if (aTag == nsGkAtoms::li &&
 743              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
 744     if (mTagStackIndex > 1 && IsInOL()) {
 745       if (!mOLStack.IsEmpty()) {
 746         nsAutoString valueAttr;
 747         if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
 748           nsresult rv = NS_OK;
 749           int32_t valueAttrVal = valueAttr.ToInteger(&rv);
 750           if (NS_SUCCEEDED(rv)) {
 751             mOLStack.LastElement() = valueAttrVal;
 752           }
 753         }
 754         // This is what nsBulletFrame does for OLs:
 755         mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
 756         mOLStack.LastElement()++;
 757       } else {
 758         mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
 759       }
 760
 761       mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));
 762
 763     } else {
 764       static const char bulletCharArray[] = "*o+#";
 765       uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
 766       char bulletChar = bulletCharArray[index % 4];
 767       mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
 768     }
 769
 770     mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
 771   } else if (aTag == nsGkAtoms::dl) {
 772     EnsureVerticalSpace(1);
 773   } else if (aTag == nsGkAtoms::dt) {
 774     EnsureVerticalSpace(0);
 775   } else if (aTag == nsGkAtoms::dd) {
 776     EnsureVerticalSpace(0);
 777     mCurrentLine.mIndentation.mLength += kIndentSizeDD;
 778   } else if (aTag == nsGkAtoms::span) {
 779     ++mSpanLevel;
 780   } else if (aTag == nsGkAtoms::blockquote) {
 781     // Push
 782     PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
 783     if (isInCiteBlockquote) {
 784       EnsureVerticalSpace(0);
 785       mCurrentLine.mCiteQuoteLevel++;
 786     } else {
 787       EnsureVerticalSpace(1);
 788       mCurrentLine.mIndentation.mLength +=
 789           kTabSize;  // Check for some maximum value?
 790     }
 791   } else if (aTag == nsGkAtoms::q) {
 792     Write(u"\""_ns);
 793   }
 794
 795   // Else make sure we'll separate block level tags,
 796   // even if we're about to leave, before doing any other formatting.
 797   else if (IsCssBlockLevelElement(mElement)) {
 798     EnsureVerticalSpace(0);
 799   }
 800
 801   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
 802     OpenContainerForOutputFormatted(aTag);
 803   }
 804   return NS_OK;
 805 }
 806
 807 void nsPlainTextSerializer::OpenContainerForOutputFormatted(
 808     const nsAtom* aTag) {
 809   const bool currentNodeIsConverted = IsCurrentNodeConverted();
 810
 811   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
 812       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
 813     EnsureVerticalSpace(2);
 814     if (mSettings.GetHeaderStrategy() ==
 815         Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
 816       mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
 817       // Caching
 818       int32_t level = HeaderLevel(aTag);
 819       // Increase counter for current level
 820       mHeaderCounter[level]++;
 821       // Reset all lower levels
 822       int32_t i;
 823
 824       for (i = level + 1; i <= 6; i++) {
 825         mHeaderCounter[i] = 0;
 826       }
 827
 828       // Construct numbers
 829       nsAutoString leadup;
 830       for (i = 1; i <= level; i++) {
 831         leadup.AppendInt(mHeaderCounter[i]);
 832         leadup.Append(char16_t('.'));
 833       }
 834       leadup.Append(char16_t(' '));
 835       Write(leadup);
 836     } else if (mSettings.GetHeaderStrategy() ==
 837                Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
 838       mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
 839       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
 840         // for h(x), run x-1 times
 841         mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
 842       }
 843     }
 844   } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
 845              !currentNodeIsConverted) {
 846     Write(u"^"_ns);
 847   } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
 848              !currentNodeIsConverted) {
 849     Write(u"_"_ns);
 850   } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
 851              !currentNodeIsConverted) {
 852     Write(u"|"_ns);
 853   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
 854              mSettings.GetStructs() && !currentNodeIsConverted) {
 855     Write(u"*"_ns);
 856   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
 857              mSettings.GetStructs() && !currentNodeIsConverted) {
 858     Write(u"/"_ns);
 859   } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
 860              !currentNodeIsConverted) {
 861     Write(u"_"_ns);
 862   }
 863
 864   /* Container elements are always block elements, so we shouldn't
 865      output any whitespace immediately after the container tag even if
 866      there's extra whitespace there because the HTML is pretty-printed
 867      or something. To ensure that happens, tell the serializer we're
 868      already in whitespace so it won't output more. */
 869   mInWhitespace = true;
 870 }
 871
 872 nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) {
 873   if (IsIgnorableRubyAnnotation(aTag)) {
 874     mIgnoredChildNodeLevel--;
 875     return NS_OK;
 876   }
 877   if (IsIgnorableScriptOrStyle(mElement)) {
 878     mIgnoredChildNodeLevel--;
 879     return NS_OK;
 880   }
 881
 882   if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
 883     if (DoOutput() && IsElementPreformatted() &&
 884         IsCssBlockLevelElement(mElement)) {
 885       // If we're closing a preformatted block element, output a line break
 886       // when we find a new container.
 887       mPreformattedBlockBoundary = true;
 888     }
 889   }
 890
 891   if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
 892     // Raw means raw.  Don't even think about doing anything fancy
 893     // here like indenting, adding line breaks or any other
 894     // characters such as list item bullets, quote characters
 895     // around <q>, etc.
 896
 897     return NS_OK;
 898   }
 899
 900   if (mTagStackIndex > 0) {
 901     --mTagStackIndex;
 902   }
 903
 904   if (mTagStackIndex >= mIgnoreAboveIndex) {
 905     if (mTagStackIndex == mIgnoreAboveIndex) {
 906       // We're dealing with the close tag whose matching
 907       // open tag had set the mIgnoreAboveIndex value.
 908       // Reset mIgnoreAboveIndex before discarding this tag.
 909       mIgnoreAboveIndex = (uint32_t)kNotFound;
 910     }
 911     return NS_OK;
 912   }
 913
 914   MOZ_ASSERT(mOutputManager);
 915
 916   // End current line if we're ending a block level tag
 917   if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
 918     // We want the output to end with a new line,
 919     // but in preformatted areas like text fields,
 920     // we can't emit newlines that weren't there.
 921     // So add the newline only in the case of formatted output.
 922     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
 923       EnsureVerticalSpace(0);
 924     } else {
 925       mOutputManager->Flush(mCurrentLine);
 926     }
 927     // We won't want to do anything with these in formatted mode either,
 928     // so just return now:
 929     return NS_OK;
 930   }
 931
 932   // Keep this in sync with DoOpenContainer!
 933   if (!DoOutput()) {
 934     return NS_OK;
 935   }
 936
 937   if (aTag == nsGkAtoms::tr) {
 938     PopBool(mHasWrittenCellsForRow);
 939     // Should always end a line, but get no more whitespace
 940     if (mFloatingLines < 0) mFloatingLines = 0;
 941     mLineBreakDue = true;
 942   } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
 943              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
 944     // Items that should always end a line, but get no more whitespace
 945     if (mFloatingLines < 0) mFloatingLines = 0;
 946     mLineBreakDue = true;
 947   } else if (aTag == nsGkAtoms::pre) {
 948     mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
 949     mLineBreakDue = true;
 950   } else if (aTag == nsGkAtoms::ul) {
 951     mOutputManager->Flush(mCurrentLine);
 952     mCurrentLine.mIndentation.mLength -= kIndentSizeList;
 953     --mULCount;
 954     if (!IsInOlOrUl()) {
 955       mFloatingLines = 1;
 956       mLineBreakDue = true;
 957     }
 958   } else if (aTag == nsGkAtoms::ol) {
 959     mOutputManager->Flush(mCurrentLine);  // Doing this after decreasing
 960                                           // OLStackIndex would be wrong.
 961     mCurrentLine.mIndentation.mLength -= kIndentSizeList;
 962     MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
 963     mOLStack.RemoveLastElement();
 964     if (!IsInOlOrUl()) {
 965       mFloatingLines = 1;
 966       mLineBreakDue = true;
 967     }
 968   } else if (aTag == nsGkAtoms::dl) {
 969     mFloatingLines = 1;
 970     mLineBreakDue = true;
 971   } else if (aTag == nsGkAtoms::dd) {
 972     mOutputManager->Flush(mCurrentLine);
 973     mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
 974   } else if (aTag == nsGkAtoms::span) {
 975     NS_ASSERTION(mSpanLevel, "Span level will be negative!");
 976     --mSpanLevel;
 977   } else if (aTag == nsGkAtoms::div) {
 978     if (mFloatingLines < 0) mFloatingLines = 0;
 979     mLineBreakDue = true;
 980   } else if (aTag == nsGkAtoms::blockquote) {
 981     mOutputManager->Flush(mCurrentLine);  // Is this needed?
 982
 983     // Pop
 984     bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
 985
 986     if (isInCiteBlockquote) {
 987       NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
 988                    "CiteQuote level will be negative!");
 989       mCurrentLine.mCiteQuoteLevel--;
 990       mFloatingLines = 0;
 991       mHasWrittenCiteBlockquote = true;
 992     } else {
 993       mCurrentLine.mIndentation.mLength -= kTabSize;
 994       mFloatingLines = 1;
 995     }
 996     mLineBreakDue = true;
 997   } else if (aTag == nsGkAtoms::q) {
 998     Write(u"\""_ns);
 999   } else if (IsCssBlockLevelElement(mElement)) {
1000     // All other blocks get 1 vertical space after them
1001     // in formatted mode, otherwise 0.
1002     // This is hard. Sometimes 0 is a better number, but
1003     // how to know?
1004     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1005       EnsureVerticalSpace(1);
1006     } else {
1007       if (mFloatingLines < 0) mFloatingLines = 0;
1008       mLineBreakDue = true;
1009     }
1010   }
1011
1012   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1013     CloseContainerForOutputFormatted(aTag);
1014   }
1015
1016   return NS_OK;
1017 }
1018
1019 void nsPlainTextSerializer::CloseContainerForOutputFormatted(
1020     const nsAtom* aTag) {
1021   const bool currentNodeIsConverted = IsCurrentNodeConverted();
1022
1023   if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
1024       aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
1025     using HeaderStrategy = Settings::HeaderStrategy;
1026     if ((mSettings.GetHeaderStrategy() ==
1027          HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
1028         (mSettings.GetHeaderStrategy() ==
1029          HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
1030       mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
1031     }
1032     if (mSettings.GetHeaderStrategy() ==
1033         HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
1034       for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
1035         // for h(x), run x-1 times
1036         mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
1037       }
1038     }
1039     EnsureVerticalSpace(1);
1040   } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
1041     nsAutoString url;
1042     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
1043         !url.IsEmpty()) {
1044       nsAutoString temp;
1045       temp.AssignLiteral(" <");
1046       temp += url;
1047       temp.Append(char16_t('>'));
1048       Write(temp);
1049     }
1050   } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
1051              mSettings.GetStructs() && !currentNodeIsConverted) {
1052     Write(kSpace);
1053   } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
1054              !currentNodeIsConverted) {
1055     Write(u"|"_ns);
1056   } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
1057              mSettings.GetStructs() && !currentNodeIsConverted) {
1058     Write(u"*"_ns);
1059   } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
1060              mSettings.GetStructs() && !currentNodeIsConverted) {
1061     Write(u"/"_ns);
1062   } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
1063              !currentNodeIsConverted) {
1064     Write(u"_"_ns);
1065   }
1066 }
1067
1068 bool nsPlainTextSerializer::MustSuppressLeaf() const {
1069   if (mIgnoredChildNodeLevel > 0) {
1070     return true;
1071   }
1072
1073   if ((mTagStackIndex > 1 &&
1074        mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
1075       (mTagStackIndex > 0 &&
1076        mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
1077     // Don't output the contents of SELECT elements;
1078     // Might be nice, eventually, to output just the selected element.
1079     // Read more in bug 31994.
1080     return true;
1081   }
1082
1083   return false;
1084 }
1085
1086 void nsPlainTextSerializer::DoAddText() { DoAddText(true, u""_ns); }
1087
1088 void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
1089                                       const nsAString& aText) {
1090   // If we don't want any output, just return
1091   if (!DoOutput()) {
1092     return;
1093   }
1094
1095   if (!aIsLineBreak) {
1096     // Make sure to reset this, since it's no longer true.
1097     mHasWrittenCiteBlockquote = false;
1098   }
1099
1100   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1101
1102   if (MustSuppressLeaf()) {
1103     return;
1104   }
1105
1106   if (aIsLineBreak) {
1107     // The only times we want to pass along whitespace from the original
1108     // html source are if we're forced into preformatted mode via flags,
1109     // or if we're prettyprinting and we're inside a <pre>.
1110     // Otherwise, either we're collapsing to minimal text, or we're
1111     // prettyprinting to mimic the html format, and in neither case
1112     // does the formatting of the html source help us.
1113     if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
1114         (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1115         IsElementPreformatted()) {
1116       EnsureVerticalSpace(mEmptyLines + 1);
1117     } else if (!mInWhitespace) {
1118       Write(kSpace);
1119       mInWhitespace = true;
1120     }
1121     return;
1122   }
1123
1124   Write(aText);
1125 }
1126
1127 void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
1128   MOZ_ASSERT(aResult.IsEmpty());
1129
1130   const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
1131   while (aResult.Length() < width) {
1132     aResult.Append(char16_t('-'));
1133   }
1134 }
1135
1136 nsresult nsPlainTextSerializer::DoAddLeaf(const nsAtom* aTag) {
1137   mPreformattedBlockBoundary = false;
1138
1139   if (!DoOutput()) {
1140     return NS_OK;
1141   }
1142
1143   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1144
1145   if (MustSuppressLeaf()) {
1146     return NS_OK;
1147   }
1148
1149   if (aTag == nsGkAtoms::br) {
1150     // Another egregious editor workaround, see bug 38194:
1151     // ignore the bogus br tags that the editor sticks here and there.
1152     // FYI: `brElement` may be `nullptr` if the element is <br> element
1153     //      of non-HTML element.
1154     // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
1155     //     is not an HTML element?
1156     HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement);
1157     if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
1158       EnsureVerticalSpace(mEmptyLines + 1);
1159     }
1160   } else if (aTag == nsGkAtoms::hr &&
1161              mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
1162     EnsureVerticalSpace(0);
1163
1164     // Make a line of dashes as wide as the wrap width
1165     // XXX honoring percentage would be nice
1166     nsAutoString line;
1167     CreateLineOfDashes(line, mSettings.GetWrapColumn());
1168     Write(line);
1169
1170     EnsureVerticalSpace(0);
1171   } else if (aTag == nsGkAtoms::img) {
1172     /* Output (in decreasing order of preference)
1173        alt, title or nothing */
1174     // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
1175     nsAutoString imageDescription;
1176     if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
1177       // If the alt attribute has an empty value (|alt=""|), output nothing
1178     } else if (NS_SUCCEEDED(
1179                    GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
1180                !imageDescription.IsEmpty()) {
1181       imageDescription = u" ["_ns + imageDescription + u"] "_ns;
1182     }
1183
1184     Write(imageDescription);
1185   }
1186
1187   return NS_OK;
1188 }
1189
1190 /**
1191  * Adds as many newline as necessary to get |aNumberOfRows| empty lines
1192  *
1193  * aNumberOfRows = -1    :   Being in the middle of some line of text
1194  * aNumberOfRows =  0    :   Being at the start of a line
1195  * aNumberOfRows =  n>0  :   Having n empty lines before the current line.
1196  */
1197 void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
1198   // If we have something in the indent we probably want to output
1199   // it and it's not included in the count for empty lines so we don't
1200   // realize that we should start a new line.
1201   if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
1202     EndLine(false);
1203     mInWhitespace = true;
1204   }
1205
1206   while (mEmptyLines < aNumberOfRows) {
1207     EndLine(false);
1208     mInWhitespace = true;
1209   }
1210   mLineBreakDue = false;
1211   mFloatingLines = -1;
1212 }
1213
1214 void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aCurrentLine) {
1215   if (!aCurrentLine.mContent.IsEmpty()) {
1216     aCurrentLine.MaybeReplaceNbspsInContent(mFlags);
1217
1218     Append(aCurrentLine, StripTrailingWhitespaces::kNo);
1219
1220     aCurrentLine.ResetContentAndIndentationHeader();
1221   }
1222 }
1223
1224 static bool IsSpaceStuffable(const char16_t* s) {
1225   return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
1226           NS_strncmp(s, u"From ", 5) == 0);
1227 }
1228
1229 void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
1230   if (!mSettings.MayWrap()) {
1231     return;
1232   }
1233
1234   // Yes, wrap!
1235   // The "+4" is to avoid wrap lines that only would be a couple
1236   // of letters too long. We give this bonus only if the
1237   // wrapcolumn is more than 20.
1238   const uint32_t wrapColumn = mSettings.GetWrapColumn();
1239   uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
1240   while (!mCurrentLine.mContent.IsEmpty()) {
1241     const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth();
1242     // The width of the line as it will appear on the screen (approx.).
1243     const uint32_t currentLineContentWidth =
1244         GetUnicharStringWidth(mCurrentLine.mContent);
1245     if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) {
1246       break;
1247     }
1248
1249     const int32_t goodSpace =
1250         mCurrentLine.FindWrapIndexForContent(wrapColumn, mUseLineBreaker);
1251
1252     const int32_t contentLength = mCurrentLine.mContent.Length();
1253     if (goodSpace <= 0 || goodSpace >= contentLength) {
1254       // Nothing to do. Hopefully we get more data later to use for a place to
1255       // break line.
1256       break;
1257     }
1258     // Found a place to break
1259     // -1 (trim a char at the break position) only if the line break was a
1260     // space.
1261     nsAutoString restOfContent;
1262     if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
1263       mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace - 1);
1264     } else {
1265       mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
1266     }
1267     // if breaker was U+0020, it has to consider for delsp=yes support
1268     const bool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
1269     mCurrentLine.mContent.Truncate(goodSpace);
1270     EndLine(true, breakBySpace);
1271     mCurrentLine.mContent.Truncate();
1272     // Space stuffing a la RFC 2646 (format=flowed)
1273     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1274       mCurrentLine.mSpaceStuffed = !restOfContent.IsEmpty() &&
1275                                    IsSpaceStuffable(restOfContent.get()) &&
1276                                    // We space-stuff quoted lines anyway
1277                                    mCurrentLine.mCiteQuoteLevel == 0;
1278     }
1279     mCurrentLine.mContent.Append(restOfContent);
1280     mEmptyLines = -1;
1281   }
1282 }
1283
1284 /**
1285  * This function adds a piece of text to the current stored line. If we are
1286  * wrapping text and the stored line will become too long, a suitable
1287  * location to wrap will be found and the line that's complete will be
1288  * output.
1289  */
1290 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
1291                                       int32_t aLineFragmentLength) {
1292   if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
1293
1294   if (mCurrentLine.mContent.IsEmpty()) {
1295     if (0 == aLineFragmentLength) {
1296       return;
1297     }
1298
1299     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1300       // Space stuffing a la RFC 2646 (format=flowed).
1301       // We space-stuff quoted lines anyway
1302       mCurrentLine.mSpaceStuffed =
1303           IsSpaceStuffable(aLineFragment) && mCurrentLine.mCiteQuoteLevel == 0;
1304     }
1305     mEmptyLines = -1;
1306   }
1307
1308   mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);
1309
1310   MaybeWrapAndOutputCompleteLines();
1311 }
1312
1313 // The signature separator (RFC 2646).
1314 const char kSignatureSeparator[] = "-- ";
1315
1316 // The OpenPGP dash-escaped signature separator in inline
1317 // signed messages according to the OpenPGP standard (RFC 2440).
1318 const char kDashEscapedSignatureSeparator[] = "- -- ";
1319
1320 static bool IsSignatureSeparator(const nsAString& aString) {
1321   return aString.EqualsLiteral(kSignatureSeparator) ||
1322          aString.EqualsLiteral(kDashEscapedSignatureSeparator);
1323 }
1324
1325 /**
1326  * Outputs the contents of mCurrentLine.mContent, and resets line
1327  * specific variables. Also adds an indentation and prefix if there is one
1328  * specified. Strips ending spaces from the line if it isn't preformatted.
1329  */
1330 void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) {
1331   if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) {
1332     // No meaning
1333     return;
1334   }
1335
1336   /* In non-preformatted mode, remove spaces from the end of the line for
1337    * format=flowed compatibility. Don't do this for these special cases:
1338    * "-- ", the signature separator (RFC 2646) shouldn't be touched and
1339    * "- -- ", the OpenPGP dash-escaped signature separator in inline
1340    * signed messages according to the OpenPGP standard (RFC 2440).
1341    */
1342   if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
1343       (aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
1344     mCurrentLine.mContent.Trim(" ", false, true, false);
1345   }
1346
1347   if (aSoftLineBreak &&
1348       mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
1349       !mCurrentLine.mIndentation.mLength) {
1350     // Add the soft part of the soft linebreak (RFC 2646 4.1)
1351     // We only do this when there is no indentation since format=flowed
1352     // lines and indentation doesn't work well together.
1353
1354     // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
1355     // add twice space.
1356     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
1357         aBreakBySpace) {
1358       mCurrentLine.mContent.AppendLiteral("  ");
1359     } else {
1360       mCurrentLine.mContent.Append(char16_t(' '));
1361     }
1362   }
1363
1364   if (aSoftLineBreak) {
1365     mEmptyLines = 0;
1366   } else {
1367     // Hard break
1368     if (mCurrentLine.HasContentOrIndentationHeader()) {
1369       mEmptyLines = 0;
1370     } else {
1371       mEmptyLines++;
1372     }
1373   }
1374
1375   MOZ_ASSERT(mOutputManager);
1376
1377   mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1378
1379   // If we don't have anything "real" to output we have to
1380   // make sure the indent doesn't end in a space since that
1381   // would trick a format=flowed-aware receiver.
1382   mOutputManager->Append(mCurrentLine,
1383                          OutputManager::StripTrailingWhitespaces::kMaybe);
1384   mOutputManager->AppendLineBreak();
1385   mCurrentLine.ResetContentAndIndentationHeader();
1386   mInWhitespace = true;
1387   mLineBreakDue = false;
1388   mFloatingLines = -1;
1389 }
1390
1391 /**
1392  * Creates the calculated and stored indent and text in the indentation. That is
1393  * quote chars and numbers for numbered lists and such.
1394  */
1395 void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
1396     nsAString& aResult) const {
1397   // Put the mail quote "> " chars in, if appropriate:
1398   if (mCiteQuoteLevel > 0) {
1399     nsAutoString quotes;
1400     for (int i = 0; i < mCiteQuoteLevel; i++) {
1401       quotes.Append(char16_t('>'));
1402     }
1403     if (!mContent.IsEmpty()) {
1404       /* Better don't output a space here, if the line is empty,
1405          in case a receiving format=flowed-aware UA thinks, this were a flowed
1406          line, which it isn't - it's just empty. (Flowed lines may be joined
1407          with the following one, so the empty line may be lost completely.) */
1408       quotes.Append(char16_t(' '));
1409     }
1410     aResult = quotes;
1411   }
1412
1413   // Indent if necessary
1414   int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
1415   if (mSpaceStuffed) {
1416     indentwidth += 1;
1417   }
1418
1419   // Don't make empty lines look flowed
1420   if (indentwidth > 0 && HasContentOrIndentationHeader()) {
1421     nsAutoString spaces;
1422     for (int i = 0; i < indentwidth; ++i) {
1423       spaces.Append(char16_t(' '));
1424     }
1425     aResult += spaces;
1426   }
1427
1428   if (!mIndentation.mHeader.IsEmpty()) {
1429     aResult += mIndentation.mHeader;
1430   }
1431 }
1432
1433 static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
1434   return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
1435 }
1436
1437 static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
1438   const int32_t totLen = aString.Length();
1439   for (int32_t i = totLen - 1; i >= 0; i--) {
1440     char16_t c = aString[i];
1441     if (IsLineFeedCarriageReturnBlankOrTab(c)) {
1442       continue;
1443     }
1444     if (kNBSP == c) {
1445       aString.Replace(i, 1, ' ');
1446     } else {
1447       break;
1448     }
1449   }
1450 }
1451
1452 void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
1453   const int32_t totLen = aString.Length();
1454   int32_t newline{0};
1455
1456   // Put the mail quote "> " chars in, if appropriate.
1457   // Have to put it in before every line.
1458   int32_t bol = 0;
1459   while (bol < totLen) {
1460     bool outputLineBreak = false;
1461     bool spacesOnly = true;
1462
1463     // Find one of '\n' or '\r' using iterators since nsAString
1464     // doesn't have the old FindCharInSet function.
1465     nsAString::const_iterator iter;
1466     aString.BeginReading(iter);
1467     nsAString::const_iterator done_searching;
1468     aString.EndReading(done_searching);
1469     iter.advance(bol);
1470     int32_t new_newline = bol;
1471     newline = kNotFound;
1472     while (iter != done_searching) {
1473       if ('\n' == *iter || '\r' == *iter) {
1474         newline = new_newline;
1475         break;
1476       }
1477       if (' ' != *iter) {
1478         spacesOnly = false;
1479       }
1480       ++new_newline;
1481       ++iter;
1482     }
1483
1484     // Done searching
1485     nsAutoString stringpart;
1486     if (newline == kNotFound) {
1487       // No new lines.
1488       stringpart.Assign(Substring(aString, bol, totLen - bol));
1489       if (!stringpart.IsEmpty()) {
1490         char16_t lastchar = stringpart.Last();
1491         mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
1492       }
1493       mEmptyLines = -1;
1494       bol = totLen;
1495     } else {
1496       // There is a newline
1497       stringpart.Assign(Substring(aString, bol, newline - bol));
1498       mInWhitespace = true;
1499       outputLineBreak = true;
1500       mEmptyLines = 0;
1501       bol = newline + 1;
1502       if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
1503         // There was a CRLF in the input. This used to be illegal and
1504         // stripped by the parser. Apparently not anymore. Let's skip
1505         // over the LF.
1506         bol++;
1507       }
1508     }
1509
1510     if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1511       if ((outputLineBreak || !spacesOnly) &&  // bugs 261467,125928
1512           !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
1513         stringpart.Trim(" ", false, true, true);
1514       }
1515       mCurrentLine.mSpaceStuffed =
1516           IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart);
1517     }
1518     mCurrentLine.mContent.Append(stringpart);
1519
1520     mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
1521
1522     mOutputManager->Append(mCurrentLine,
1523                            OutputManager::StripTrailingWhitespaces::kNo);
1524     if (outputLineBreak) {
1525       mOutputManager->AppendLineBreak();
1526     }
1527
1528     mCurrentLine.ResetContentAndIndentationHeader();
1529   }
1530
1531 #ifdef DEBUG_wrapping
1532   printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
1533 #endif
1534 }
1535
1536 /**
1537  * Write a string. This is the highlevel function to use to get text output.
1538  * By using AddToLine, Output, EndLine and other functions it handles quotation,
1539  * line wrapping, indentation, whitespace compression and other things.
1540  */
1541 void nsPlainTextSerializer::Write(const nsAString& aStr) {
1542   // XXX Copy necessary to use nsString methods and gain
1543   // access to underlying buffer
1544   nsAutoString str(aStr);
1545
1546 #ifdef DEBUG_wrapping
1547   printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
1548          mSettings.GetWrapColumn());
1549 #endif
1550
1551   const int32_t totLen = str.Length();
1552
1553   // If the string is empty, do nothing:
1554   if (totLen <= 0) return;
1555
1556   // For Flowed text change nbsp-ses to spaces at end of lines to allow them
1557   // to be cut off along with usual spaces if required. (bug #125928)
1558   if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
1559     ReplaceVisiblyTrailingNbsps(str);
1560   }
1561
1562   // We have two major codepaths here. One that does preformatted text and one
1563   // that does normal formatted text. The one for preformatted text calls
1564   // Output directly while the other code path goes through AddToLine.
1565   if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
1566       (IsElementPreformatted() && !mPreFormattedMail) ||
1567       (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
1568     // No intelligent wrapping.
1569
1570     // This mustn't be mixed with intelligent wrapping without clearing
1571     // the mCurrentLine.mContent buffer before!!!
1572     NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
1573                      (IsElementPreformatted() && !mPreFormattedMail),
1574                  "Mixed wrapping data and nonwrapping data on the same line");
1575     MOZ_ASSERT(mOutputManager);
1576
1577     if (!mCurrentLine.mContent.IsEmpty()) {
1578       mOutputManager->Flush(mCurrentLine);
1579     }
1580
1581     ConvertToLinesAndOutput(str);
1582     return;
1583   }
1584
1585   // Intelligent handling of text
1586   // If needed, strip out all "end of lines"
1587   // and multiple whitespace between words
1588   int32_t nextpos;
1589   const char16_t* offsetIntoBuffer = nullptr;
1590
1591   int32_t bol = 0;
1592   while (bol < totLen) {  // Loop over lines
1593     // Find a place where we may have to do whitespace compression
1594     nextpos = str.FindCharInSet(u" \t\n\r", bol);
1595 #ifdef DEBUG_wrapping
1596     nsAutoString remaining;
1597     str.Right(remaining, totLen - bol);
1598     foo = ToNewCString(remaining);
1599     // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
1600     //        "string = '%s'\n", bol, nextpos, totLen, foo);
1601     free(foo);
1602 #endif
1603
1604     if (nextpos == kNotFound) {
1605       // The rest of the string
1606       offsetIntoBuffer = str.get() + bol;
1607       AddToLine(offsetIntoBuffer, totLen - bol);
1608       bol = totLen;
1609       mInWhitespace = false;
1610     } else {
1611       // There's still whitespace left in the string
1612       if (nextpos != 0 && (nextpos + 1) < totLen) {
1613         offsetIntoBuffer = str.get() + nextpos;
1614         // skip '\n' if it is between CJ chars
1615         if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
1616             IS_CJ_CHAR(offsetIntoBuffer[1])) {
1617           offsetIntoBuffer = str.get() + bol;
1618           AddToLine(offsetIntoBuffer, nextpos - bol);
1619           bol = nextpos + 1;
1620           continue;
1621         }
1622       }
1623       // If we're already in whitespace and not preformatted, just skip it:
1624       if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
1625           !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1626         // Skip whitespace
1627         bol++;
1628         continue;
1629       }
1630
1631       if (nextpos == bol) {
1632         // Note that we are in whitespace.
1633         mInWhitespace = true;
1634         offsetIntoBuffer = str.get() + nextpos;
1635         AddToLine(offsetIntoBuffer, 1);
1636         bol++;
1637         continue;
1638       }
1639
1640       mInWhitespace = true;
1641
1642       offsetIntoBuffer = str.get() + bol;
1643       if (mPreFormattedMail ||
1644           mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
1645         // Preserve the real whitespace character
1646         nextpos++;
1647         AddToLine(offsetIntoBuffer, nextpos - bol);
1648         bol = nextpos;
1649       } else {
1650         // Replace the whitespace with a space
1651         AddToLine(offsetIntoBuffer, nextpos - bol);
1652         AddToLine(kSpace.get(), 1);
1653         bol = nextpos + 1;  // Let's eat the whitespace
1654       }
1655     }
1656   }  // Continue looping over the string
1657 }
1658
1659 /**
1660  * Gets the value of an attribute in a string. If the function returns
1661  * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
1662  */
1663 nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
1664                                                   nsString& aValueRet) const {
1665   if (mElement) {
1666     if (mElement->GetAttr(aName, aValueRet)) {
1667       return NS_OK;
1668     }
1669   }
1670
1671   return NS_ERROR_NOT_AVAILABLE;
1672 }
1673
1674 /**
1675  * Returns true, if the element was inserted by Moz' TXT->HTML converter.
1676  * In this case, we should ignore it.
1677  */
1678 bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
1679   nsAutoString value;
1680   nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
1681   return (NS_SUCCEEDED(rv) &&
1682           (StringBeginsWith(value, u"moz-txt"_ns,
1683                             nsASCIICaseInsensitiveStringComparator) ||
1684            StringBeginsWith(value, u"\"moz-txt"_ns,
1685                             nsASCIICaseInsensitiveStringComparator)));
1686 }
1687
1688 // static
1689 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
1690   if (!aContent->IsHTMLElement()) {
1691     return nullptr;
1692   }
1693
1694   nsAtom* localName = aContent->NodeInfo()->NameAtom();
1695   return localName->IsStatic() ? localName : nullptr;
1696 }
1697
1698 bool nsPlainTextSerializer::IsElementPreformatted() const {
1699   return !mPreformatStack.empty() && mPreformatStack.top();
1700 }
1701
1702 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
1703   RefPtr<const ComputedStyle> computedStyle =
1704       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
1705   if (computedStyle) {
1706     const nsStyleText* textStyle = computedStyle->StyleText();
1707     return textStyle->WhiteSpaceOrNewlineIsSignificant();
1708   }
1709   // Fall back to looking at the tag, in case there is no style information.
1710   return GetIdForContent(aElement) == nsGkAtoms::pre;
1711 }
1712
1713 bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
1714   RefPtr<const ComputedStyle> computedStyle =
1715       nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
1716   if (computedStyle) {
1717     const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
1718     return displayStyle->IsBlockOutsideStyle();
1719   }
1720   // Fall back to looking at the tag, in case there is no style information.
1721   return nsContentUtils::IsHTMLBlockLevelElement(aElement);
1722 }
1723
1724 /**
1725  * This method is required only to identify LI's inside OL.
1726  * Returns TRUE if we are inside an OL tag and FALSE otherwise.
1727  */
1728 bool nsPlainTextSerializer::IsInOL() const {
1729   int32_t i = mTagStackIndex;
1730   while (--i >= 0) {
1731     if (mTagStack[i] == nsGkAtoms::ol) return true;
1732     if (mTagStack[i] == nsGkAtoms::ul) {
1733       // If a UL is reached first, LI belongs the UL nested in OL.
1734       return false;
1735     }
1736   }
1737   // We may reach here for orphan LI's.
1738   return false;
1739 }
1740
1741 bool nsPlainTextSerializer::IsInOlOrUl() const {
1742   return (mULCount > 0) || !mOLStack.IsEmpty();
1743 }
1744
1745 /*
1746   @return 0 = no header, 1 = h1, ..., 6 = h6
1747 */
1748 int32_t HeaderLevel(const nsAtom* aTag) {
1749   if (aTag == nsGkAtoms::h1) {
1750     return 1;
1751   }
1752   if (aTag == nsGkAtoms::h2) {
1753     return 2;
1754   }
1755   if (aTag == nsGkAtoms::h3) {
1756     return 3;
1757   }
1758   if (aTag == nsGkAtoms::h4) {
1759     return 4;
1760   }
1761   if (aTag == nsGkAtoms::h5) {
1762     return 5;
1763   }
1764   if (aTag == nsGkAtoms::h6) {
1765     return 6;
1766   }
1767   return 0;
1768 }
1769
1770 /* These functions define the column width of an ISO 10646 character
1771  * as follows:
1772  *
1773  *    - The null character (U+0000) has a column width of 0.
1774  *
1775  *    - Other C0/C1 control characters and DEL will lead to a return
1776  *      value of -1.
1777  *
1778  *    - Non-spacing and enclosing combining characters (general
1779  *      category code Mn or Me in the Unicode database) have a
1780  *      column width of 0.
1781  *
1782  *    - Spacing characters in the East Asian Wide (W) or East Asian
1783  *      FullWidth (F) category as defined in Unicode Technical
1784  *      Report #11 have a column width of 2.
1785  *
1786  *    - All remaining characters (including all printable
1787  *      ISO 8859-1 and WGL4 characters, Unicode control characters,
1788  *      etc.) have a column width of 1.
1789  */
1790
1791 int32_t GetUnicharWidth(char32_t aCh) {
1792   /* test for 8-bit control characters */
1793   if (aCh == 0) {
1794     return 0;
1795   }
1796   if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) {
1797     return -1;
1798   }
1799
1800   /* The first combining char in Unicode is U+0300 */
1801   if (aCh < 0x0300) {
1802     return 1;
1803   }
1804
1805   auto gc = unicode::GetGeneralCategory(aCh);
1806   if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
1807       gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) {
1808     return 0;
1809   }
1810
1811   /* if we arrive here, ucs is not a combining or C0/C1 control character */
1812
1813   /* fast test for majority of non-wide scripts */
1814   if (aCh < 0x1100) {
1815     return 1;
1816   }
1817
1818   return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
1819 }
1820
1821 int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
1822   int32_t width = 0;
1823   for (auto iter = aString.begin(); iter != aString.end(); ++iter) {
1824     char32_t c = *iter;
1825     if (NS_IS_HIGH_SURROGATE(c) && (iter + 1) != aString.end() &&
1826         NS_IS_LOW_SURROGATE(*(iter + 1))) {
1827       c = SURROGATE_TO_UCS4(c, *++iter);
1828     }
1829     const int32_t w = GetUnicharWidth(c);
1830     // Taking 1 as the width of non-printable character, for bug 94475.
1831     width += (w < 0 ? 1 : w);
1832   }
1833   return width;
1834 }